718 files changed, 51989 insertions, 27458 deletions
diff --git a/net/802/Kconfig b/net/802/Kconfig
index be33d27c8e69..80d4bf78905d 100644
--- a/net/802/Kconfig
+++ b/net/802/Kconfig
@@ -5,3 +5,6 @@ config STP
 config GARP
 	tristate
 	select STP
+
+config MRP
+	tristate
diff --git a/net/802/Makefile b/net/802/Makefile
index a30d6e385aed..37e654d6615e 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_IPX)	+= p8022.o psnap.o p8023.o
 obj-$(CONFIG_ATALK)	+= p8022.o psnap.o
 obj-$(CONFIG_STP)	+= stp.o
 obj-$(CONFIG_GARP)	+= garp.o
+obj-$(CONFIG_MRP)	+= mrp.o
diff --git a/net/802/garp.c b/net/802/garp.c
index 8456f5d98b85..5d9630a0eb93 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -609,8 +609,12 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
 	/* Delete timer and generate a final TRANSMIT_PDU event to flush out
 	 * all pending messages before the applicant is gone. */
 	del_timer_sync(&app->join_timer);
+
+	spin_lock_bh(&app->lock);
 	garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
 	garp_pdu_queue(app);
+	spin_unlock_bh(&app->lock);
+
 	garp_queue_xmit(app);
 
 	dev_mc_del(dev, appl->proto.group_address);
diff --git a/net/802/mrp.c b/net/802/mrp.c
new file mode 100644
index 000000000000..e085bcc754f6
--- /dev/null
+++ b/net/802/mrp.c
@@ -0,0 +1,899 @@
+/*
+ *	IEEE 802.1Q Multiple Registration Protocol (MRP)
+ *
+ *	Copyright (c) 2012 Massachusetts Institute of Technology
+ *
+ *	Adapted from code in net/802/garp.c
+ *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <net/mrp.h>
+#include <asm/unaligned.h>
+
+static unsigned int mrp_join_time __read_mostly = 200;
+module_param(mrp_join_time, uint, 0644);
+MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
+MODULE_LICENSE("GPL");
+
+static const u8
+mrp_applicant_state_table[MRP_APPLICANT_MAX + 1][MRP_EVENT_MAX + 1] = {
+	[MRP_APPLICANT_VO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VO,
+	},
+	[MRP_APPLICANT_VP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VP,
+	},
+	[MRP_APPLICANT_VN] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VN,
+	},
+	[MRP_APPLICANT_AN] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_AN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AN,
+	},
+	[MRP_APPLICANT_AA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AA,
+	},
+	[MRP_APPLICANT_QA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AA,
+	},
+	[MRP_APPLICANT_LA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_LA,
+	},
+	[MRP_APPLICANT_AO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AO,
+	},
+	[MRP_APPLICANT_QO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_QO,
+	},
+	[MRP_APPLICANT_AP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AP,
+	},
+	[MRP_APPLICANT_QP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AP,
+	},
+};
+
+static const u8
+mrp_tx_action_table[MRP_APPLICANT_MAX + 1] = {
+	[MRP_APPLICANT_VO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_VP] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_VN] = MRP_TX_ACTION_S_NEW,
+	[MRP_APPLICANT_AN] = MRP_TX_ACTION_S_NEW,
+	[MRP_APPLICANT_AA] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_QA] = MRP_TX_ACTION_S_JOIN_IN_OPTIONAL,
+	[MRP_APPLICANT_LA] = MRP_TX_ACTION_S_LV,
+	[MRP_APPLICANT_AO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_QO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_AP] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_QP] = MRP_TX_ACTION_S_IN_OPTIONAL,
+};
+
+static void mrp_attrvalue_inc(void *value, u8 len)
+{
+	u8 *v = (u8 *)value;
+
+	/* Add 1 to the last byte. If it becomes zero,
+	 * go to the previous byte and repeat.
+	 */
+	while (len > 0 && !++v[--len])
+		;
+}
+
+static int mrp_attr_cmp(const struct mrp_attr *attr,
+			 const void *value, u8 len, u8 type)
+{
+	if (attr->type != type)
+		return attr->type - type;
+	if (attr->len != len)
+		return attr->len - len;
+	return memcmp(attr->value, value, len);
+}
+
+static struct mrp_attr *mrp_attr_lookup(const struct mrp_applicant *app,
+					const void *value, u8 len, u8 type)
+{
+	struct rb_node *parent = app->mad.rb_node;
+	struct mrp_attr *attr;
+	int d;
+
+	while (parent) {
+		attr = rb_entry(parent, struct mrp_attr, node);
+		d = mrp_attr_cmp(attr, value, len, type);
+		if (d > 0)
+			parent = parent->rb_left;
+		else if (d < 0)
+			parent = parent->rb_right;
+		else
+			return attr;
+	}
+	return NULL;
+}
+
+static struct mrp_attr *mrp_attr_create(struct mrp_applicant *app,
+					const void *value, u8 len, u8 type)
+{
+	struct rb_node *parent = NULL, **p = &app->mad.rb_node;
+	struct mrp_attr *attr;
+	int d;
+
+	while (*p) {
+		parent = *p;
+		attr = rb_entry(parent, struct mrp_attr, node);
+		d = mrp_attr_cmp(attr, value, len, type);
+		if (d > 0)
+			p = &parent->rb_left;
+		else if (d < 0)
+			p = &parent->rb_right;
+		else {
+			/* The attribute already exists; re-use it. */
+			return attr;
+		}
+	}
+	attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC);
+	if (!attr)
+		return attr;
+	attr->state = MRP_APPLICANT_VO;
+	attr->type  = type;
+	attr->len   = len;
+	memcpy(attr->value, value, len);
+
+	rb_link_node(&attr->node, parent, p);
+	rb_insert_color(&attr->node, &app->mad);
+	return attr;
+}
+
+static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr)
+{
+	rb_erase(&attr->node, &app->mad);
+	kfree(attr);
+}
+
+static int mrp_pdu_init(struct mrp_applicant *app)
+{
+	struct sk_buff *skb;
+	struct mrp_pdu_hdr *ph;
+
+	skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev),
+			GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	skb->dev = app->dev;
+	skb->protocol = app->app->pkttype.type;
+	skb_reserve(skb, LL_RESERVED_SPACE(app->dev));
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
+	ph = (struct mrp_pdu_hdr *)__skb_put(skb, sizeof(*ph));
+	ph->version = app->app->version;
+
+	app->pdu = skb;
+	return 0;
+}
+
+static int mrp_pdu_append_end_mark(struct mrp_applicant *app)
+{
+	__be16 *endmark;
+
+	if (skb_tailroom(app->pdu) < sizeof(*endmark))
+		return -1;
+	endmark = (__be16 *)__skb_put(app->pdu, sizeof(*endmark));
+	put_unaligned(MRP_END_MARK, endmark);
+	return 0;
+}
+
+static void mrp_pdu_queue(struct mrp_applicant *app)
+{
+	if (!app->pdu)
+		return;
+
+	if (mrp_cb(app->pdu)->mh)
+		mrp_pdu_append_end_mark(app);
+	mrp_pdu_append_end_mark(app);
+
+	dev_hard_header(app->pdu, app->dev, ntohs(app->app->pkttype.type),
+			app->app->group_address, app->dev->dev_addr,
+			app->pdu->len);
+
+	skb_queue_tail(&app->queue, app->pdu);
+	app->pdu = NULL;
+}
+
+static void mrp_queue_xmit(struct mrp_applicant *app)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&app->queue)))
+		dev_queue_xmit(skb);
+}
+
+static int mrp_pdu_append_msg_hdr(struct mrp_applicant *app,
+				  u8 attrtype, u8 attrlen)
+{
+	struct mrp_msg_hdr *mh;
+
+	if (mrp_cb(app->pdu)->mh) {
+		if (mrp_pdu_append_end_mark(app) < 0)
+			return -1;
+		mrp_cb(app->pdu)->mh = NULL;
+		mrp_cb(app->pdu)->vah = NULL;
+	}
+
+	if (skb_tailroom(app->pdu) < sizeof(*mh))
+		return -1;
+	mh = (struct mrp_msg_hdr *)__skb_put(app->pdu, sizeof(*mh));
+	mh->attrtype = attrtype;
+	mh->attrlen = attrlen;
+	mrp_cb(app->pdu)->mh = mh;
+	return 0;
+}
+
+static int mrp_pdu_append_vecattr_hdr(struct mrp_applicant *app,
+				      const void *firstattrvalue, u8 attrlen)
+{
+	struct mrp_vecattr_hdr *vah;
+
+	if (skb_tailroom(app->pdu) < sizeof(*vah) + attrlen)
+		return -1;
+	vah = (struct mrp_vecattr_hdr *)__skb_put(app->pdu,
+						  sizeof(*vah) + attrlen);
+	put_unaligned(0, &vah->lenflags);
+	memcpy(vah->firstattrvalue, firstattrvalue, attrlen);
+	mrp_cb(app->pdu)->vah = vah;
+	memcpy(mrp_cb(app->pdu)->attrvalue, firstattrvalue, attrlen);
+	return 0;
+}
+
+static int mrp_pdu_append_vecattr_event(struct mrp_applicant *app,
+					const struct mrp_attr *attr,
+					enum mrp_vecattr_event vaevent)
+{
+	u16 len, pos;
+	u8 *vaevents;
+	int err;
+again:
+	if (!app->pdu) {
+		err = mrp_pdu_init(app);
+		if (err < 0)
+			return err;
+	}
+
+	/* If there is no Message header in the PDU, or the Message header is
+	 * for a different attribute type, add an EndMark (if necessary) and a
+	 * new Message header to the PDU.
+	 */
+	if (!mrp_cb(app->pdu)->mh ||
+	    mrp_cb(app->pdu)->mh->attrtype != attr->type ||
+	    mrp_cb(app->pdu)->mh->attrlen != attr->len) {
+		if (mrp_pdu_append_msg_hdr(app, attr->type, attr->len) < 0)
+			goto queue;
+	}
+
+	/* If there is no VectorAttribute header for this Message in the PDU,
+	 * or this attribute's value does not sequentially follow the previous
+	 * attribute's value, add a new VectorAttribute header to the PDU.
+	 */
+	if (!mrp_cb(app->pdu)->vah ||
+	    memcmp(mrp_cb(app->pdu)->attrvalue, attr->value, attr->len)) {
+		if (mrp_pdu_append_vecattr_hdr(app, attr->value, attr->len) < 0)
+			goto queue;
+	}
+
+	len = be16_to_cpu(get_unaligned(&mrp_cb(app->pdu)->vah->lenflags));
+	pos = len % 3;
+
+	/* Events are packed into Vectors in the PDU, three to a byte. Add a
+	 * byte to the end of the Vector if necessary.
+	 */
+	if (!pos) {
+		if (skb_tailroom(app->pdu) < sizeof(u8))
+			goto queue;
+		vaevents = (u8 *)__skb_put(app->pdu, sizeof(u8));
+	} else {
+		vaevents = (u8 *)(skb_tail_pointer(app->pdu) - sizeof(u8));
+	}
+
+	switch (pos) {
+	case 0:
+		*vaevents = vaevent * (__MRP_VECATTR_EVENT_MAX *
+				       __MRP_VECATTR_EVENT_MAX);
+		break;
+	case 1:
+		*vaevents += vaevent * __MRP_VECATTR_EVENT_MAX;
+		break;
+	case 2:
+		*vaevents += vaevent;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	/* Increment the length of the VectorAttribute in the PDU, as well as
+	 * the value of the next attribute that would continue its Vector.
+	 */
+	put_unaligned(cpu_to_be16(++len), &mrp_cb(app->pdu)->vah->lenflags);
+	mrp_attrvalue_inc(mrp_cb(app->pdu)->attrvalue, attr->len);
+
+	return 0;
+
+queue:
+	mrp_pdu_queue(app);
+	goto again;
+}
+
+static void mrp_attr_event(struct mrp_applicant *app,
+			   struct mrp_attr *attr, enum mrp_event event)
+{
+	enum mrp_applicant_state state;
+
+	state = mrp_applicant_state_table[attr->state][event];
+	if (state == MRP_APPLICANT_INVALID) {
+		WARN_ON(1);
+		return;
+	}
+
+	if (event == MRP_EVENT_TX) {
+		/* When appending the attribute fails, don't update its state
+		 * in order to retry at the next TX event.
+		 */
+
+		switch (mrp_tx_action_table[attr->state]) {
+		case MRP_TX_ACTION_NONE:
+		case MRP_TX_ACTION_S_JOIN_IN_OPTIONAL:
+		case MRP_TX_ACTION_S_IN_OPTIONAL:
+			break;
+		case MRP_TX_ACTION_S_NEW:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_NEW) < 0)
+				return;
+			break;
+		case MRP_TX_ACTION_S_JOIN_IN:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_JOIN_IN) < 0)
+				return;
+			break;
+		case MRP_TX_ACTION_S_LV:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_LV) < 0)
+				return;
+			/* As a pure applicant, sending a leave message
+			 * implies that the attribute was unregistered and
+			 * can be destroyed.
+			 */
+			mrp_attr_destroy(app, attr);
+			return;
+		default:
+			WARN_ON(1);
+		}
+	}
+
+	attr->state = state;
+}
+
+int mrp_request_join(const struct net_device *dev,
+		     const struct mrp_application *appl,
+		     const void *value, u8 len, u8 type)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+	struct mrp_attr *attr;
+
+	if (sizeof(struct mrp_skb_cb) + len >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return -ENOMEM;
+
+	spin_lock_bh(&app->lock);
+	attr = mrp_attr_create(app, value, len, type);
+	if (!attr) {
+		spin_unlock_bh(&app->lock);
+		return -ENOMEM;
+	}
+	mrp_attr_event(app, attr, MRP_EVENT_JOIN);
+	spin_unlock_bh(&app->lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mrp_request_join);
+
+void mrp_request_leave(const struct net_device *dev,
+		       const struct mrp_application *appl,
+		       const void *value, u8 len, u8 type)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+	struct mrp_attr *attr;
+
+	if (sizeof(struct mrp_skb_cb) + len >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return;
+
+	spin_lock_bh(&app->lock);
+	attr = mrp_attr_lookup(app, value, len, type);
+	if (!attr) {
+		spin_unlock_bh(&app->lock);
+		return;
+	}
+	mrp_attr_event(app, attr, MRP_EVENT_LV);
+	spin_unlock_bh(&app->lock);
+}
+EXPORT_SYMBOL_GPL(mrp_request_leave);
+
+static void mrp_mad_event(struct mrp_applicant *app, enum mrp_event event)
+{
+	struct rb_node *node, *next;
+	struct mrp_attr *attr;
+
+	for (node = rb_first(&app->mad);
+	     next = node ? rb_next(node) : NULL, node != NULL;
+	     node = next) {
+		attr = rb_entry(node, struct mrp_attr, node);
+		mrp_attr_event(app, attr, event);
+	}
+}
+
+static void mrp_join_timer_arm(struct mrp_applicant *app)
+{
+	unsigned long delay;
+
+	delay = (u64)msecs_to_jiffies(mrp_join_time) * net_random() >> 32;
+	mod_timer(&app->join_timer, jiffies + delay);
+}
+
+static void mrp_join_timer(unsigned long data)
+{
+	struct mrp_applicant *app = (struct mrp_applicant *)data;
+
+	spin_lock(&app->lock);
+	mrp_mad_event(app, MRP_EVENT_TX);
+	mrp_pdu_queue(app);
+	spin_unlock(&app->lock);
+
+	mrp_queue_xmit(app);
+	mrp_join_timer_arm(app);
+}
+
+static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
+{
+	__be16 endmark;
+
+	if (skb_copy_bits(skb, *offset, &endmark, sizeof(endmark)) < 0)
+		return -1;
+	if (endmark == MRP_END_MARK) {
+		*offset += sizeof(endmark);
+		return -1;
+	}
+	return 0;
+}
+
+static void mrp_pdu_parse_vecattr_event(struct mrp_applicant *app,
+					struct sk_buff *skb,
+					enum mrp_vecattr_event vaevent)
+{
+	struct mrp_attr *attr;
+	enum mrp_event event;
+
+	attr = mrp_attr_lookup(app, mrp_cb(skb)->attrvalue,
+			       mrp_cb(skb)->mh->attrlen,
+			       mrp_cb(skb)->mh->attrtype);
+	if (attr == NULL)
+		return;
+
+	switch (vaevent) {
+	case MRP_VECATTR_EVENT_NEW:
+		event = MRP_EVENT_R_NEW;
+		break;
+	case MRP_VECATTR_EVENT_JOIN_IN:
+		event = MRP_EVENT_R_JOIN_IN;
+		break;
+	case MRP_VECATTR_EVENT_IN:
+		event = MRP_EVENT_R_IN;
+		break;
+	case MRP_VECATTR_EVENT_JOIN_MT:
+		event = MRP_EVENT_R_JOIN_MT;
+		break;
+	case MRP_VECATTR_EVENT_MT:
+		event = MRP_EVENT_R_MT;
+		break;
+	case MRP_VECATTR_EVENT_LV:
+		event = MRP_EVENT_R_LV;
+		break;
+	default:
+		return;
+	}
+
+	mrp_attr_event(app, attr, event);
+}
+
+static int mrp_pdu_parse_vecattr(struct mrp_applicant *app,
+				 struct sk_buff *skb, int *offset)
+{
+	struct mrp_vecattr_hdr _vah;
+	u16 valen;
+	u8 vaevents, vaevent;
+
+	mrp_cb(skb)->vah = skb_header_pointer(skb, *offset, sizeof(_vah),
+					      &_vah);
+	if (!mrp_cb(skb)->vah)
+		return -1;
+	*offset += sizeof(_vah);
+
+	if (get_unaligned(&mrp_cb(skb)->vah->lenflags) &
+	    MRP_VECATTR_HDR_FLAG_LA)
+		mrp_mad_event(app, MRP_EVENT_R_LA);
+	valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) &
+			    MRP_VECATTR_HDR_LEN_MASK);
+
+	/* The VectorAttribute structure in a PDU carries event information
+	 * about one or more attributes having consecutive values. Only the
+	 * value for the first attribute is contained in the structure. So
+	 * we make a copy of that value, and then increment it each time we
+	 * advance to the next event in its Vector.
+	 */
+	if (sizeof(struct mrp_skb_cb) + mrp_cb(skb)->mh->attrlen >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return -1;
+	if (skb_copy_bits(skb, *offset, mrp_cb(skb)->attrvalue,
+			  mrp_cb(skb)->mh->attrlen) < 0)
+		return -1;
+	*offset += mrp_cb(skb)->mh->attrlen;
+
+	/* In a VectorAttribute, the Vector contains events which are packed
+	 * three to a byte. We process one byte of the Vector at a time.
+	 */
+	while (valen > 0) {
+		if (skb_copy_bits(skb, *offset, &vaevents,
+				  sizeof(vaevents)) < 0)
+			return -1;
+		*offset += sizeof(vaevents);
+
+		/* Extract and process the first event. */
+		vaevent = vaevents / (__MRP_VECATTR_EVENT_MAX *
+				      __MRP_VECATTR_EVENT_MAX);
+		if (vaevent >= __MRP_VECATTR_EVENT_MAX) {
+			/* The byte is malformed; stop processing. */
+			return -1;
+		}
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+
+		/* If present, extract and process the second event. */
+		if (!--valen)
+			break;
+		mrp_attrvalue_inc(mrp_cb(skb)->attrvalue,
+				  mrp_cb(skb)->mh->attrlen);
+		vaevents %= (__MRP_VECATTR_EVENT_MAX *
+			     __MRP_VECATTR_EVENT_MAX);
+		vaevent = vaevents / __MRP_VECATTR_EVENT_MAX;
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+
+		/* If present, extract and process the third event. */
+		if (!--valen)
+			break;
+		mrp_attrvalue_inc(mrp_cb(skb)->attrvalue,
+				  mrp_cb(skb)->mh->attrlen);
+		vaevents %= __MRP_VECATTR_EVENT_MAX;
+		vaevent = vaevents;
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+	}
+	return 0;
+}
+
+static int mrp_pdu_parse_msg(struct mrp_applicant *app, struct sk_buff *skb,
+			     int *offset)
+{
+	struct mrp_msg_hdr _mh;
+
+	mrp_cb(skb)->mh = skb_header_pointer(skb, *offset, sizeof(_mh), &_mh);
+	if (!mrp_cb(skb)->mh)
+		return -1;
+	*offset += sizeof(_mh);
+
+	if (mrp_cb(skb)->mh->attrtype == 0 ||
+	    mrp_cb(skb)->mh->attrtype > app->app->maxattr ||
+	    mrp_cb(skb)->mh->attrlen == 0)
+		return -1;
+
+	while (skb->len > *offset) {
+		if (mrp_pdu_parse_end_mark(skb, offset) < 0)
+			break;
+		if (mrp_pdu_parse_vecattr(app, skb, offset) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static int mrp_rcv(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct mrp_application *appl = container_of(pt, struct mrp_application,
+						    pkttype);
+	struct mrp_port *port;
+	struct mrp_applicant *app;
+	struct mrp_pdu_hdr _ph;
+	const struct mrp_pdu_hdr *ph;
+	int offset = skb_network_offset(skb);
+
+	/* If the interface is in promiscuous mode, drop the packet if
+	 * it was unicast to another host.
+	 */
+	if (unlikely(skb->pkt_type == PACKET_OTHERHOST))
+		goto out;
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto out;
+	port = rcu_dereference(dev->mrp_port);
+	if (unlikely(!port))
+		goto out;
+	app = rcu_dereference(port->applicants[appl->type]);
+	if (unlikely(!app))
+		goto out;
+
+	ph = skb_header_pointer(skb, offset, sizeof(_ph), &_ph);
+	if (!ph)
+		goto out;
+	offset += sizeof(_ph);
+
+	if (ph->version != app->app->version)
+		goto out;
+
+	spin_lock(&app->lock);
+	while (skb->len > offset) {
+		if (mrp_pdu_parse_end_mark(skb, &offset) < 0)
+			break;
+		if (mrp_pdu_parse_msg(app, skb, &offset) < 0)
+			break;
+	}
+	spin_unlock(&app->lock);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int mrp_init_port(struct net_device *dev)
+{
+	struct mrp_port *port;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+	rcu_assign_pointer(dev->mrp_port, port);
+	return 0;
+}
+
+static void mrp_release_port(struct net_device *dev)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	unsigned int i;
+
+	for (i = 0; i <= MRP_APPLICATION_MAX; i++) {
+		if (rtnl_dereference(port->applicants[i]))
+			return;
+	}
+	RCU_INIT_POINTER(dev->mrp_port, NULL);
+	kfree_rcu(port, rcu);
+}
+
+int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
+{
+	struct mrp_applicant *app;
+	int err;
+
+	ASSERT_RTNL();
+
+	if (!rtnl_dereference(dev->mrp_port)) {
+		err = mrp_init_port(dev);
+		if (err < 0)
+			goto err1;
+	}
+
+	err = -ENOMEM;
+	app = kzalloc(sizeof(*app), GFP_KERNEL);
+	if (!app)
+		goto err2;
+
+	err = dev_mc_add(dev, appl->group_address);
+	if (err < 0)
+		goto err3;
+
+	app->dev = dev;
+	app->app = appl;
+	app->mad = RB_ROOT;
+	spin_lock_init(&app->lock);
+	skb_queue_head_init(&app->queue);
+	rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
+	setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
+	mrp_join_timer_arm(app);
+	return 0;
+
+err3:
+	kfree(app);
+err2:
+	mrp_release_port(dev);
+err1:
+	return err;
+}
+EXPORT_SYMBOL_GPL(mrp_init_applicant);
+
+void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+
+	ASSERT_RTNL();
+
+	RCU_INIT_POINTER(port->applicants[appl->type], NULL);
+
+	/* Delete timer and generate a final TX event to flush out
+	 * all pending messages before the applicant is gone.
+	 */
+	del_timer_sync(&app->join_timer);
+
+	spin_lock(&app->lock);
+	mrp_mad_event(app, MRP_EVENT_TX);
+	mrp_pdu_queue(app);
+	spin_unlock(&app->lock);
+
+	mrp_queue_xmit(app);
+
+	dev_mc_del(dev, appl->group_address);
+	kfree_rcu(app, rcu);
+	mrp_release_port(dev);
+}
+EXPORT_SYMBOL_GPL(mrp_uninit_applicant);
+
+int mrp_register_application(struct mrp_application *appl)
+{
+	appl->pkttype.func = mrp_rcv;
+	dev_add_pack(&appl->pkttype);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mrp_register_application);
+
+void mrp_unregister_application(struct mrp_application *appl)
+{
+	dev_remove_pack(&appl->pkttype);
+}
+EXPORT_SYMBOL_GPL(mrp_unregister_application);
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index fa073a54963e..b85a91fa61f1 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig
@@ -3,7 +3,7 @@
 #
 
 config VLAN_8021Q
-	tristate "802.1Q VLAN Support"
+	tristate "802.1Q/802.1ad VLAN Support"
 	---help---
 	  Select this and you will be able to create 802.1Q VLAN interfaces
 	  on your ethernet interfaces.  802.1Q VLAN supports almost
@@ -27,3 +27,14 @@ config VLAN_8021Q_GVRP
 	  automatic propagation of registered VLANs to switches.
 
 	  If unsure, say N.
+
+config VLAN_8021Q_MVRP
+	bool "MVRP (Multiple VLAN Registration Protocol) support"
+	depends on VLAN_8021Q
+	select MRP
+	help
+	  Select this to enable MVRP end-system support. MVRP is used for
+	  automatic propagation of registered VLANs to switches; it
+	  supersedes GVRP and is not backwards-compatible.
+
+	  If unsure, say N.
diff --git a/net/8021q/Makefile b/net/8021q/Makefile
index 9f4f174ead1c..7bc8db08d7ef 100644
--- a/net/8021q/Makefile
+++ b/net/8021q/Makefile
@@ -6,5 +6,6 @@ obj-$(CONFIG_VLAN_8021Q)		+= 8021q.o
 
 8021q-y					:= vlan.o vlan_dev.o vlan_netlink.o
 8021q-$(CONFIG_VLAN_8021Q_GVRP)		+= vlan_gvrp.o
+8021q-$(CONFIG_VLAN_8021Q_MVRP)		+= vlan_mvrp.o
 8021q-$(CONFIG_PROC_FS)			+= vlanproc.o
 
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a292e8050ef2..9424f3718ea7 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -51,14 +51,18 @@ const char vlan_version[] = DRV_VERSION;
 
 /* End of global variables definitions. */
 
-static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
+static int vlan_group_prealloc_vid(struct vlan_group *vg,
+				   __be16 vlan_proto, u16 vlan_id)
 {
 	struct net_device **array;
+	unsigned int pidx, vidx;
 	unsigned int size;
 
 	ASSERT_RTNL();
 
-	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+	pidx  = vlan_proto_idx(vlan_proto);
+	vidx  = vlan_id / VLAN_GROUP_ARRAY_PART_LEN;
+	array = vg->vlan_devices_arrays[pidx][vidx];
 	if (array != NULL)
 		return 0;
 
@@ -67,7 +71,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
 	if (array == NULL)
 		return -ENOBUFS;
 
-	vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array;
+	vg->vlan_devices_arrays[pidx][vidx] = array;
 	return 0;
 }
 
@@ -86,49 +90,49 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 
 	grp = &vlan_info->grp;
 
-	/* Take it out of our own structures, but be sure to interlock with
-	 * HW accelerating devices or SW vlan input packet processing if
-	 * VLAN is not 0 (leave it there for 802.1p).
-	 */
-	if (vlan_id)
-		vlan_vid_del(real_dev, vlan_id);
-
 	grp->nr_vlan_devs--;
 
+	if (vlan->flags & VLAN_FLAG_MVRP)
+		vlan_mvrp_request_leave(dev);
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_leave(dev);
 
-	vlan_group_set_device(grp, vlan_id, NULL);
+	vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL);
 	/* Because unregister_netdevice_queue() makes sure at least one rcu
 	 * grace period is respected before device freeing,
 	 * we dont need to call synchronize_net() here.
 	 */
 	unregister_netdevice_queue(dev, head);
 
-	if (grp->nr_vlan_devs == 0)
+	netdev_upper_dev_unlink(real_dev, dev);
+
+	if (grp->nr_vlan_devs == 0) {
+		vlan_mvrp_uninit_applicant(real_dev);
 		vlan_gvrp_uninit_applicant(real_dev);
+	}
+
+	/* Take it out of our own structures, but be sure to interlock with
+	 * HW accelerating devices or SW vlan input packet processing if
+	 * VLAN is not 0 (leave it there for 802.1p).
+	 */
+	if (vlan_id)
+		vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
 
 	/* Get rid of the vlan's reference to real_dev */
 	dev_put(real_dev);
 }
 
-int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
+int vlan_check_real_dev(struct net_device *real_dev,
+			__be16 protocol, u16 vlan_id)
 {
 	const char *name = real_dev->name;
-	const struct net_device_ops *ops = real_dev->netdev_ops;
 
 	if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
 		pr_info("VLANs not supported on %s\n", name);
 		return -EOPNOTSUPP;
 	}
 
-	if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
-	    (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) {
-		pr_info("Device %s has buggy VLAN hw accel\n", name);
-		return -EOPNOTSUPP;
-	}
-
-	if (vlan_find_dev(real_dev, vlan_id) != NULL)
+	if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL)
 		return -EEXIST;
 
 	return 0;
@@ -143,7 +147,7 @@ int register_vlan_dev(struct net_device *dev)
 	struct vlan_group *grp;
 	int err;
 
-	err = vlan_vid_add(real_dev, vlan_id);
+	err = vlan_vid_add(real_dev, vlan->vlan_proto, vlan_id);
 	if (err)
 		return err;
 
@@ -156,15 +160,22 @@ int register_vlan_dev(struct net_device *dev)
 		err = vlan_gvrp_init_applicant(real_dev);
 		if (err < 0)
 			goto out_vid_del;
+		err = vlan_mvrp_init_applicant(real_dev);
+		if (err < 0)
+			goto out_uninit_gvrp;
 	}
 
-	err = vlan_group_prealloc_vid(grp, vlan_id);
+	err = vlan_group_prealloc_vid(grp, vlan->vlan_proto, vlan_id);
 	if (err < 0)
-		goto out_uninit_applicant;
+		goto out_uninit_mvrp;
+
+	err = netdev_upper_dev_link(real_dev, dev);
+	if (err)
+		goto out_uninit_mvrp;
 
 	err = register_netdevice(dev);
 	if (err < 0)
-		goto out_uninit_applicant;
+		goto out_upper_dev_unlink;
 
 	/* Account for reference in struct vlan_dev_priv */
 	dev_hold(real_dev);
@@ -175,16 +186,21 @@ int register_vlan_dev(struct net_device *dev)
 	/* So, got the sucker initialized, now lets place
 	 * it into our local structure.
 	 */
-	vlan_group_set_device(grp, vlan_id, dev);
+	vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev);
 	grp->nr_vlan_devs++;
 
 	return 0;
 
-out_uninit_applicant:
+out_upper_dev_unlink:
+	netdev_upper_dev_unlink(real_dev, dev);
+out_uninit_mvrp:
+	if (grp->nr_vlan_devs == 0)
+		vlan_mvrp_uninit_applicant(real_dev);
+out_uninit_gvrp:
 	if (grp->nr_vlan_devs == 0)
 		vlan_gvrp_uninit_applicant(real_dev);
 out_vid_del:
-	vlan_vid_del(real_dev, vlan_id);
+	vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
 	return err;
 }
 
@@ -202,7 +218,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	if (vlan_id >= VLAN_VID_MASK)
 		return -ERANGE;
 
-	err = vlan_check_real_dev(real_dev, vlan_id);
+	err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id);
 	if (err < 0)
 		return err;
 
@@ -244,6 +260,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	new_dev->mtu = real_dev->mtu;
 	new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
 
+	vlan_dev_priv(new_dev)->vlan_proto = htons(ETH_P_8021Q);
 	vlan_dev_priv(new_dev)->vlan_id = vlan_id;
 	vlan_dev_priv(new_dev)->real_dev = real_dev;
 	vlan_dev_priv(new_dev)->dent = NULL;
@@ -290,7 +307,7 @@ static void vlan_transfer_features(struct net_device *dev,
 {
 	vlandev->gso_max_size = dev->gso_max_size;
 
-	if (dev->features & NETIF_F_HW_VLAN_TX)
+	if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)
 		vlandev->hard_header_len = dev->hard_header_len;
 	else
 		vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
@@ -330,16 +347,17 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	int i, flgs;
 	struct net_device *vlandev;
 	struct vlan_dev_priv *vlan;
+	bool last = false;
 	LIST_HEAD(list);
 
 	if (is_vlan_dev(dev))
 		__vlan_device_event(dev, event);
 
 	if ((event == NETDEV_UP) &&
-	    (dev->features & NETIF_F_HW_VLAN_FILTER)) {
+	    (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
 		pr_info("adding VLAN 0 to HW filter on device %s\n",
 			dev->name);
-		vlan_vid_add(dev, 0);
+		vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
 	}
 
 	vlan_info = rtnl_dereference(dev->vlan_info);
@@ -354,22 +372,13 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	switch (event) {
 	case NETDEV_CHANGE:
 		/* Propagate real device state to vlan devices */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev)
 			netif_stacked_transfer_operstate(dev, vlandev);
-		}
 		break;
 
 	case NETDEV_CHANGEADDR:
 		/* Adjust unicast filters on underlying device */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev) {
 			flgs = vlandev->flags;
 			if (!(flgs & IFF_UP))
 				continue;
@@ -379,11 +388,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		break;
 
 	case NETDEV_CHANGEMTU:
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev) {
 			if (vlandev->mtu <= dev->mtu)
 				continue;
 
@@ -393,26 +398,16 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_FEAT_CHANGE:
 		/* Propagate device features to underlying device */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev)
 			vlan_transfer_features(dev, vlandev);
-		}
-
 		break;
 
 	case NETDEV_DOWN:
-		if (dev->features & NETIF_F_HW_VLAN_FILTER)
-			vlan_vid_del(dev, 0);
+		if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+			vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
 
 		/* Put all VLANs for this dev in the down state too.  */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev) {
 			flgs = vlandev->flags;
 			if (!(flgs & IFF_UP))
 				continue;
@@ -426,11 +421,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_UP:
 		/* Put all VLANs for this dev in the up state too.  */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev) {
 			flgs = vlandev->flags;
 			if (flgs & IFF_UP)
 				continue;
@@ -447,17 +438,15 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		if (dev->reg_state != NETREG_UNREGISTERING)
 			break;
 
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev) {
 			/* removal of last vid destroys vlan_info, abort
 			 * afterwards */
 			if (vlan_info->nr_vids == 1)
-				i = VLAN_N_VID;
+				last = true;
 
 			unregister_vlan_dev(vlandev, &list);
+			if (last)
+				break;
 		}
 		unregister_netdevice_many(&list);
 		break;
@@ -471,13 +460,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	case NETDEV_NOTIFY_PEERS:
 	case NETDEV_BONDING_FAILOVER:
 		/* Propagate to vlan devices */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlan_group_for_each_dev(grp, i, vlandev)
 			call_netdevice_notifiers(event, vlandev);
-		}
 		break;
 	}
 
@@ -654,13 +638,19 @@ static int __init vlan_proto_init(void)
 	if (err < 0)
 		goto err3;
 
-	err = vlan_netlink_init();
+	err = vlan_mvrp_init();
 	if (err < 0)
 		goto err4;
 
+	err = vlan_netlink_init();
+	if (err < 0)
+		goto err5;
+
 	vlan_ioctl_set(vlan_ioctl_handler);
 	return 0;
 
+err5:
+	vlan_mvrp_uninit();
 err4:
 	vlan_gvrp_uninit();
 err3:
@@ -681,6 +671,7 @@ static void __exit vlan_cleanup_module(void)
 	unregister_pernet_subsys(&vlan_net_ops);
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
+	vlan_mvrp_uninit();
 	vlan_gvrp_uninit();
 }
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index a4886d94c40c..ba5983f34c42 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -49,6 +49,7 @@ struct netpoll;
  *	@ingress_priority_map: ingress priority mappings
  *	@nr_egress_mappings: number of egress priority mappings
  *	@egress_priority_map: hash of egress priority mappings
+ *	@vlan_proto: VLAN encapsulation protocol
  *	@vlan_id: VLAN identifier
  *	@flags: device flags
  *	@real_dev: underlying netdevice
@@ -62,6 +63,7 @@ struct vlan_dev_priv {
 	unsigned int				nr_egress_mappings;
 	struct vlan_priority_tci_mapping	*egress_priority_map[16];
 
+	__be16					vlan_proto;
 	u16					vlan_id;
 	u16					flags;
 
@@ -87,10 +89,17 @@ static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
 #define VLAN_GROUP_ARRAY_SPLIT_PARTS  8
 #define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
 
+enum vlan_protos {
+	VLAN_PROTO_8021Q	= 0,
+	VLAN_PROTO_8021AD,
+	VLAN_PROTO_NUM,
+};
+
 struct vlan_group {
 	unsigned int		nr_vlan_devs;
 	struct hlist_node	hlist;	/* linked list */
-	struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
+	struct net_device **vlan_devices_arrays[VLAN_PROTO_NUM]
+					       [VLAN_GROUP_ARRAY_SPLIT_PARTS];
 };
 
 struct vlan_info {
@@ -103,37 +112,67 @@ struct vlan_info {
 	struct rcu_head		rcu;
 };
 
-static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
-						       u16 vlan_id)
+static inline unsigned int vlan_proto_idx(__be16 proto)
+{
+	switch (proto) {
+	case __constant_htons(ETH_P_8021Q):
+		return VLAN_PROTO_8021Q;
+	case __constant_htons(ETH_P_8021AD):
+		return VLAN_PROTO_8021AD;
+	default:
+		BUG();
+		return 0;
+	}
+}
+
+static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg,
+							 unsigned int pidx,
+							 u16 vlan_id)
 {
 	struct net_device **array;
-	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+
+	array = vg->vlan_devices_arrays[pidx]
+				       [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
 	return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
 }
 
+static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
+						       __be16 vlan_proto,
+						       u16 vlan_id)
+{
+	return __vlan_group_get_device(vg, vlan_proto_idx(vlan_proto), vlan_id);
+}
+
 static inline void vlan_group_set_device(struct vlan_group *vg,
-					 u16 vlan_id,
+					 __be16 vlan_proto, u16 vlan_id,
 					 struct net_device *dev)
 {
 	struct net_device **array;
 	if (!vg)
 		return;
-	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+	array = vg->vlan_devices_arrays[vlan_proto_idx(vlan_proto)]
+				       [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
 	array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
 }
 
 /* Must be invoked with rcu_read_lock or with RTNL. */
 static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
-					       u16 vlan_id)
+					       __be16 vlan_proto, u16 vlan_id)
 {
 	struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
 
 	if (vlan_info)
-		return vlan_group_get_device(&vlan_info->grp, vlan_id);
+		return vlan_group_get_device(&vlan_info->grp,
+					     vlan_proto, vlan_id);
 
 	return NULL;
 }
 
+#define vlan_group_for_each_dev(grp, i, dev) \
+	for ((i) = 0; i < VLAN_PROTO_NUM * VLAN_N_VID; i++) \
+		if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \
+							    (i) % VLAN_N_VID)))
+
 /* found in vlan_dev.c */
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, u16 vlan_prio);
@@ -142,7 +181,8 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
 
-int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id);
+int vlan_check_real_dev(struct net_device *real_dev,
+			__be16 protocol, u16 vlan_id);
 void vlan_setup(struct net_device *dev);
 int register_vlan_dev(struct net_device *dev);
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
@@ -171,6 +211,22 @@ static inline int vlan_gvrp_init(void) { return 0; }
 static inline void vlan_gvrp_uninit(void) {}
 #endif
 
+#ifdef CONFIG_VLAN_8021Q_MVRP
+extern int vlan_mvrp_request_join(const struct net_device *dev);
+extern void vlan_mvrp_request_leave(const struct net_device *dev);
+extern int vlan_mvrp_init_applicant(struct net_device *dev);
+extern void vlan_mvrp_uninit_applicant(struct net_device *dev);
+extern int vlan_mvrp_init(void);
+extern void vlan_mvrp_uninit(void);
+#else
+static inline int vlan_mvrp_request_join(const struct net_device *dev) { return 0; }
+static inline void vlan_mvrp_request_leave(const struct net_device *dev) {}
+static inline int vlan_mvrp_init_applicant(struct net_device *dev) { return 0; }
+static inline void vlan_mvrp_uninit_applicant(struct net_device *dev) {}
+static inline int vlan_mvrp_init(void) { return 0; }
+static inline void vlan_mvrp_uninit(void) {}
+#endif
+
 extern const char vlan_fullname[];
 extern const char vlan_version[];
 extern int vlan_netlink_init(void);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 65e06abe023f..8a15eaadc4bd 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -8,11 +8,12 @@
 bool vlan_do_receive(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
+	__be16 vlan_proto = skb->vlan_proto;
 	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
 	struct net_device *vlan_dev;
 	struct vlan_pcpu_stats *rx_stats;
 
-	vlan_dev = vlan_find_dev(skb->dev, vlan_id);
+	vlan_dev = vlan_find_dev(skb->dev, vlan_proto, vlan_id);
 	if (!vlan_dev)
 		return false;
 
@@ -38,7 +39,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
 		 * original position later
 		 */
 		skb_push(skb, offset);
-		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
+		skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto,
+					      skb->vlan_tci);
 		if (!skb)
 			return false;
 		skb_pull(skb, offset + VLAN_HLEN);
@@ -60,21 +62,27 @@ bool vlan_do_receive(struct sk_buff **skbp)
 	return true;
 }
 
-/* Must be invoked with rcu_read_lock or with RTNL. */
-struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
-					u16 vlan_id)
+/* Must be invoked with rcu_read_lock. */
+struct net_device *__vlan_find_dev_deep(struct net_device *dev,
+					__be16 vlan_proto, u16 vlan_id)
 {
-	struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
+	struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info);
 
 	if (vlan_info) {
-		return vlan_group_get_device(&vlan_info->grp, vlan_id);
+		return vlan_group_get_device(&vlan_info->grp,
+					     vlan_proto, vlan_id);
 	} else {
 		/*
-		 * Bonding slaves do not have grp assigned to themselves.
-		 * Grp is assigned to bonding master instead.
+		 * Lower devices of master uppers (bonding, team) do not have
+		 * grp assigned to themselves. Grp is assigned to upper device
+		 * instead.
 		 */
-		if (netif_is_bond_slave(real_dev))
-			return __vlan_find_dev_deep(real_dev->master, vlan_id);
+		struct net_device *upper_dev;
+
+		upper_dev = netdev_master_upper_dev_get_rcu(dev);
+		if (upper_dev)
+			return __vlan_find_dev_deep(upper_dev,
+						    vlan_proto, vlan_id);
 	}
 
 	return NULL;
@@ -121,7 +129,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
 
 	vhdr = (struct vlan_hdr *) skb->data;
 	vlan_tci = ntohs(vhdr->h_vlan_TCI);
-	__vlan_hwaccel_put_tag(skb, vlan_tci);
+	__vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
 
 	skb_pull_rcsum(skb, VLAN_HLEN);
 	vlan_set_encap_proto(skb, vhdr);
@@ -140,6 +148,7 @@ err_free:
 	kfree_skb(skb);
 	return NULL;
 }
+EXPORT_SYMBOL(vlan_untag);
 
 
 /*
@@ -148,10 +157,11 @@ err_free:
 
 static void vlan_group_free(struct vlan_group *grp)
 {
-	int i;
+	int i, j;
 
-	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
-		kfree(grp->vlan_devices_arrays[i]);
+	for (i = 0; i < VLAN_PROTO_NUM; i++)
+		for (j = 0; j < VLAN_GROUP_ARRAY_SPLIT_PARTS; j++)
+			kfree(grp->vlan_devices_arrays[i][j]);
 }
 
 static void vlan_info_free(struct vlan_info *vlan_info)
@@ -180,35 +190,49 @@ static struct vlan_info *vlan_info_alloc(struct net_device *dev)
 
 struct vlan_vid_info {
 	struct list_head list;
-	unsigned short vid;
+	__be16 proto;
+	u16 vid;
 	int refcount;
 };
 
+static bool vlan_hw_filter_capable(const struct net_device *dev,
+				     const struct vlan_vid_info *vid_info)
+{
+	if (vid_info->proto == htons(ETH_P_8021Q) &&
+	    dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+		return true;
+	if (vid_info->proto == htons(ETH_P_8021AD) &&
+	    dev->features & NETIF_F_HW_VLAN_STAG_FILTER)
+		return true;
+	return false;
+}
+
 static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info,
-					       unsigned short vid)
+					       __be16 proto, u16 vid)
 {
 	struct vlan_vid_info *vid_info;
 
 	list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
-		if (vid_info->vid == vid)
+		if (vid_info->proto == proto && vid_info->vid == vid)
 			return vid_info;
 	}
 	return NULL;
 }
 
-static struct vlan_vid_info *vlan_vid_info_alloc(unsigned short vid)
+static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid)
 {
 	struct vlan_vid_info *vid_info;
 
 	vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL);
 	if (!vid_info)
 		return NULL;
+	vid_info->proto = proto;
 	vid_info->vid = vid;
 
 	return vid_info;
 }
 
-static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
+static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
 			  struct vlan_vid_info **pvid_info)
 {
 	struct net_device *dev = vlan_info->real_dev;
@@ -216,13 +240,12 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
 	struct vlan_vid_info *vid_info;
 	int err;
 
-	vid_info = vlan_vid_info_alloc(vid);
+	vid_info = vlan_vid_info_alloc(proto, vid);
 	if (!vid_info)
 		return -ENOMEM;
 
-	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
-	    ops->ndo_vlan_rx_add_vid) {
-		err =  ops->ndo_vlan_rx_add_vid(dev, vid);
+	if (vlan_hw_filter_capable(dev, vid_info)) {
+		err =  ops->ndo_vlan_rx_add_vid(dev, proto, vid);
 		if (err) {
 			kfree(vid_info);
 			return err;
@@ -234,7 +257,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
 	return 0;
 }
 
-int vlan_vid_add(struct net_device *dev, unsigned short vid)
+int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct vlan_info *vlan_info;
 	struct vlan_vid_info *vid_info;
@@ -250,9 +273,9 @@ int vlan_vid_add(struct net_device *dev, unsigned short vid)
 			return -ENOMEM;
 		vlan_info_created = true;
 	}
-	vid_info = vlan_vid_info_get(vlan_info, vid);
+	vid_info = vlan_vid_info_get(vlan_info, proto, vid);
 	if (!vid_info) {
-		err = __vlan_vid_add(vlan_info, vid, &vid_info);
+		err = __vlan_vid_add(vlan_info, proto, vid, &vid_info);
 		if (err)
 			goto out_free_vlan_info;
 	}
@@ -275,15 +298,15 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
 {
 	struct net_device *dev = vlan_info->real_dev;
 	const struct net_device_ops *ops = dev->netdev_ops;
-	unsigned short vid = vid_info->vid;
+	__be16 proto = vid_info->proto;
+	u16 vid = vid_info->vid;
 	int err;
 
-	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
-	     ops->ndo_vlan_rx_kill_vid) {
-		err = ops->ndo_vlan_rx_kill_vid(dev, vid);
+	if (vlan_hw_filter_capable(dev, vid_info)) {
+		err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
 		if (err) {
-			pr_warn("failed to kill vid %d for device %s\n",
-				vid, dev->name);
+			pr_warn("failed to kill vid %04x/%d for device %s\n",
+				proto, vid, dev->name);
 		}
 	}
 	list_del(&vid_info->list);
@@ -291,7 +314,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
 	vlan_info->nr_vids--;
 }
 
-void vlan_vid_del(struct net_device *dev, unsigned short vid)
+void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct vlan_info *vlan_info;
 	struct vlan_vid_info *vid_info;
@@ -302,7 +325,7 @@ void vlan_vid_del(struct net_device *dev, unsigned short vid)
 	if (!vlan_info)
 		return;
 
-	vid_info = vlan_vid_info_get(vlan_info, vid);
+	vid_info = vlan_vid_info_get(vlan_info, proto, vid);
 	if (!vid_info)
 		return;
 	vid_info->refcount--;
@@ -330,7 +353,7 @@ int vlan_vids_add_by_dev(struct net_device *dev,
 		return 0;
 
 	list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
-		err = vlan_vid_add(dev, vid_info->vid);
+		err = vlan_vid_add(dev, vid_info->proto, vid_info->vid);
 		if (err)
 			goto unwind;
 	}
@@ -340,7 +363,7 @@ unwind:
 	list_for_each_entry_continue_reverse(vid_info,
 					     &vlan_info->vid_list,
 					     list) {
-		vlan_vid_del(dev, vid_info->vid);
+		vlan_vid_del(dev, vid_info->proto, vid_info->vid);
 	}
 
 	return err;
@@ -360,7 +383,7 @@ void vlan_vids_del_by_dev(struct net_device *dev,
 		return;
 
 	list_for_each_entry(vid_info, &vlan_info->vid_list, list)
-		vlan_vid_del(dev, vid_info->vid);
+		vlan_vid_del(dev, vid_info->proto, vid_info->vid);
 }
 EXPORT_SYMBOL(vlan_vids_del_by_dev);
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 4a6d31a082b9..3a8c8fd63c88 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -99,6 +99,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				const void *daddr, const void *saddr,
 				unsigned int len)
 {
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct vlan_hdr *vhdr;
 	unsigned int vhdrlen = 0;
 	u16 vlan_tci = 0;
@@ -120,8 +121,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		else
 			vhdr->h_vlan_encapsulated_proto = htons(len);
 
-		skb->protocol = htons(ETH_P_8021Q);
-		type = ETH_P_8021Q;
+		skb->protocol = vlan->vlan_proto;
+		type = ntohs(vlan->vlan_proto);
 		vhdrlen = VLAN_HLEN;
 	}
 
@@ -161,12 +162,12 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
 	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
 	 */
-	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
+	if (veth->h_vlan_proto != vlan->vlan_proto ||
 	    vlan->flags & VLAN_FLAG_REORDER_HDR) {
 		u16 vlan_tci;
 		vlan_tci = vlan->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
-		skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
+		skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci);
 	}
 
 	skb->dev = vlan->real_dev;
@@ -261,7 +262,7 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 	u32 old_flags = vlan->flags;
 
 	if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
-		     VLAN_FLAG_LOOSE_BINDING))
+		     VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP))
 		return -EINVAL;
 
 	vlan->flags = (old_flags & ~mask) | (flags & mask);
@@ -272,6 +273,13 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 		else
 			vlan_gvrp_request_leave(dev);
 	}
+
+	if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_MVRP) {
+		if (vlan->flags & VLAN_FLAG_MVRP)
+			vlan_mvrp_request_join(dev);
+		else
+			vlan_mvrp_request_leave(dev);
+	}
 	return 0;
 }
 
@@ -312,6 +320,9 @@ static int vlan_dev_open(struct net_device *dev)
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_join(dev);
 
+	if (vlan->flags & VLAN_FLAG_MVRP)
+		vlan_mvrp_request_join(dev);
+
 	if (netif_carrier_ok(real_dev))
 		netif_carrier_on(dev);
 	return 0;
@@ -573,7 +584,7 @@ static int vlan_dev_init(struct net_device *dev)
 #endif
 
 	dev->needed_headroom = real_dev->needed_headroom;
-	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
+	if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) {
 		dev->header_ops      = real_dev->header_ops;
 		dev->hard_header_len = real_dev->hard_header_len;
 	} else {
@@ -617,7 +628,7 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
 	netdev_features_t features)
 {
 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
-	u32 old_features = features;
+	netdev_features_t old_features = features;
 
 	features &= real_dev->vlan_features;
 	features |= NETIF_F_RXCSUM;
@@ -640,9 +651,9 @@ static int vlan_ethtool_get_settings(struct net_device *dev,
 static void vlan_ethtool_get_drvinfo(struct net_device *dev,
 				     struct ethtool_drvinfo *info)
 {
-	strcpy(info->driver, vlan_fullname);
-	strcpy(info->version, vlan_version);
-	strcpy(info->fw_version, "N/A");
+	strlcpy(info->driver, vlan_fullname, sizeof(info->driver));
+	strlcpy(info->version, vlan_version, sizeof(info->version));
+	strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
 }
 
 static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
@@ -723,7 +734,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
 
 	vlan->netpoll = NULL;
 
-	__netpoll_free_rcu(netpoll);
+	__netpoll_free_async(netpoll);
 }
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
index 6f9755352760..66a80320b032 100644
--- a/net/8021q/vlan_gvrp.c
+++ b/net/8021q/vlan_gvrp.c
@@ -32,6 +32,8 @@ int vlan_gvrp_request_join(const struct net_device *dev)
 	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	__be16 vlan_id = htons(vlan->vlan_id);
 
+	if (vlan->vlan_proto != htons(ETH_P_8021Q))
+		return 0;
 	return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
 				 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
 }
@@ -41,6 +43,8 @@ void vlan_gvrp_request_leave(const struct net_device *dev)
 	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	__be16 vlan_id = htons(vlan->vlan_id);
 
+	if (vlan->vlan_proto != htons(ETH_P_8021Q))
+		return;
 	garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
 			   &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
 }
diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c
new file mode 100644
index 000000000000..e0fe091801b0
--- /dev/null
+++ b/net/8021q/vlan_mvrp.c
@@ -0,0 +1,76 @@
+/*
+ *	IEEE 802.1Q Multiple VLAN Registration Protocol (MVRP)
+ *
+ *	Copyright (c) 2012 Massachusetts Institute of Technology
+ *
+ *	Adapted from code in net/8021q/vlan_gvrp.c
+ *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 as published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/mrp.h>
+#include "vlan.h"
+
+#define MRP_MVRP_ADDRESS	{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 }
+
+enum mvrp_attributes {
+	MVRP_ATTR_INVALID,
+	MVRP_ATTR_VID,
+	__MVRP_ATTR_MAX
+};
+#define MVRP_ATTR_MAX	(__MVRP_ATTR_MAX - 1)
+
+static struct mrp_application vlan_mrp_app __read_mostly = {
+	.type		= MRP_APPLICATION_MVRP,
+	.maxattr	= MVRP_ATTR_MAX,
+	.pkttype.type	= htons(ETH_P_MVRP),
+	.group_address	= MRP_MVRP_ADDRESS,
+	.version	= 0,
+};
+
+int vlan_mvrp_request_join(const struct net_device *dev)
+{
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	__be16 vlan_id = htons(vlan->vlan_id);
+
+	if (vlan->vlan_proto != htons(ETH_P_8021Q))
+		return 0;
+	return mrp_request_join(vlan->real_dev, &vlan_mrp_app,
+				&vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);
+}
+
+void vlan_mvrp_request_leave(const struct net_device *dev)
+{
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	__be16 vlan_id = htons(vlan->vlan_id);
+
+	if (vlan->vlan_proto != htons(ETH_P_8021Q))
+		return;
+	mrp_request_leave(vlan->real_dev, &vlan_mrp_app,
+			  &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);
+}
+
+int vlan_mvrp_init_applicant(struct net_device *dev)
+{
+	return mrp_init_applicant(dev, &vlan_mrp_app);
+}
+
+void vlan_mvrp_uninit_applicant(struct net_device *dev)
+{
+	mrp_uninit_applicant(dev, &vlan_mrp_app);
+}
+
+int __init vlan_mvrp_init(void)
+{
+	return mrp_register_application(&vlan_mrp_app);
+}
+
+void vlan_mvrp_uninit(void)
+{
+	mrp_unregister_application(&vlan_mrp_app);
+}
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 708c80ea1874..309129732285 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -23,6 +23,7 @@ static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = {
 	[IFLA_VLAN_FLAGS]	= { .len = sizeof(struct ifla_vlan_flags) },
 	[IFLA_VLAN_EGRESS_QOS]	= { .type = NLA_NESTED },
 	[IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED },
+	[IFLA_VLAN_PROTOCOL]	= { .type = NLA_U16 },
 };
 
 static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = {
@@ -53,6 +54,16 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 	if (!data)
 		return -EINVAL;
 
+	if (data[IFLA_VLAN_PROTOCOL]) {
+		switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) {
+		case __constant_htons(ETH_P_8021Q):
+		case __constant_htons(ETH_P_8021AD):
+			break;
+		default:
+			return -EPROTONOSUPPORT;
+		}
+	}
+
 	if (data[IFLA_VLAN_ID]) {
 		id = nla_get_u16(data[IFLA_VLAN_ID]);
 		if (id >= VLAN_VID_MASK)
@@ -62,7 +73,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
 		if ((flags->flags & flags->mask) &
 		    ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
-		      VLAN_FLAG_LOOSE_BINDING))
+		      VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP))
 			return -EINVAL;
 	}
 
@@ -107,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev;
+	__be16 proto;
 	int err;
 
 	if (!data[IFLA_VLAN_ID])
@@ -118,11 +130,17 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 	if (!real_dev)
 		return -ENODEV;
 
-	vlan->vlan_id  = nla_get_u16(data[IFLA_VLAN_ID]);
-	vlan->real_dev = real_dev;
-	vlan->flags    = VLAN_FLAG_REORDER_HDR;
+	if (data[IFLA_VLAN_PROTOCOL])
+		proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]);
+	else
+		proto = htons(ETH_P_8021Q);
+
+	vlan->vlan_proto = proto;
+	vlan->vlan_id	 = nla_get_u16(data[IFLA_VLAN_ID]);
+	vlan->real_dev	 = real_dev;
+	vlan->flags	 = VLAN_FLAG_REORDER_HDR;
 
-	err = vlan_check_real_dev(real_dev, vlan->vlan_id);
+	err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id);
 	if (err < 0)
 		return err;
 
@@ -151,7 +169,8 @@ static size_t vlan_get_size(const struct net_device *dev)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 
-	return nla_total_size(2) +	/* IFLA_VLAN_ID */
+	return nla_total_size(2) +	/* IFLA_VLAN_PROTOCOL */
+	       nla_total_size(2) +	/* IFLA_VLAN_ID */
 	       sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
 	       vlan_qos_map_size(vlan->nr_ingress_mappings) +
 	       vlan_qos_map_size(vlan->nr_egress_mappings);
@@ -166,7 +185,8 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	struct nlattr *nest;
 	unsigned int i;
 
-	if (nla_put_u16(skb, IFLA_VLAN_ID, vlan_dev_priv(dev)->vlan_id))
+	if (nla_put_be16(skb, IFLA_VLAN_PROTOCOL, vlan->vlan_proto) ||
+	    nla_put_u16(skb, IFLA_VLAN_ID, vlan->vlan_id))
 		goto nla_put_failure;
 	if (vlan->flags) {
 		f.flags = vlan->flags;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 4de77ea5fa37..1d0e89213a28 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -93,7 +93,7 @@ static const struct file_operations vlan_fops = {
 
 static int vlandev_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, vlandev_seq_show, PDE(inode)->data);
+	return single_open(file, vlandev_seq_show, PDE_DATA(inode));
 }
 
 static const struct file_operations vlandev_fops = {
@@ -131,7 +131,7 @@ void vlan_proc_cleanup(struct net *net)
 		remove_proc_entry(name_conf, vn->proc_vlan_dir);
 
 	if (vn->proc_vlan_dir)
-		proc_net_remove(net, name_root);
+		remove_proc_entry(name_root, net->proc_net);
 
 	/* Dynamically added entries should be cleaned up as their vlan_device
 	 * is removed, so we should not have to take care of it here...
@@ -184,14 +184,9 @@ int vlan_proc_add_dev(struct net_device *vlandev)
  */
 int vlan_proc_rem_dev(struct net_device *vlandev)
 {
-	struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
-
 	/** NOTE:  This will consume the memory pointed to by dent, it seems. */
-	if (vlan_dev_priv(vlandev)->dent) {
-		remove_proc_entry(vlan_dev_priv(vlandev)->dent->name,
-				  vn->proc_vlan_dir);
-		vlan_dev_priv(vlandev)->dent = NULL;
-	}
+	proc_remove(vlan_dev_priv(vlandev)->dent);
+	vlan_dev_priv(vlandev)->dent = NULL;
 	return 0;
 }
 
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index d9ea09b11cf8..a75174a33723 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -23,7 +23,7 @@ config NET_9P_VIRTIO
 	  guest partitions and a host partition.
 
 config NET_9P_RDMA
-	depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL
+	depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS
 	tristate "9P RDMA Transport (Experimental)"
 	help
 	  This builds support for an RDMA transport.
diff --git a/net/9p/client.c b/net/9p/client.c
index 34d417670935..8eb75425e6e6 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1100,7 +1100,7 @@ void p9_client_begin_disconnect(struct p9_client *clnt)
 EXPORT_SYMBOL(p9_client_begin_disconnect);
 
 struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
-	char *uname, u32 n_uname, char *aname)
+	char *uname, kuid_t n_uname, char *aname)
 {
 	int err = 0;
 	struct p9_req_t *req;
@@ -1117,7 +1117,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 		goto error;
 	}
 
-	req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid,
+	req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid,
 			afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -1270,7 +1270,7 @@ error:
 EXPORT_SYMBOL(p9_client_open);
 
 int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
-		gid_t gid, struct p9_qid *qid)
+		kgid_t gid, struct p9_qid *qid)
 {
 	int err = 0;
 	struct p9_client *clnt;
@@ -1279,13 +1279,14 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
 
 	p9_debug(P9_DEBUG_9P,
 			">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n",
-			ofid->fid, name, flags, mode, gid);
+			ofid->fid, name, flags, mode,
+		 	from_kgid(&init_user_ns, gid));
 	clnt = ofid->clnt;
 
 	if (ofid->mode != -1)
 		return -EINVAL;
 
-	req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags,
+	req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags,
 			mode, gid);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -1358,7 +1359,7 @@ error:
 }
 EXPORT_SYMBOL(p9_client_fcreate);
 
-int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid,
+int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid,
 		struct p9_qid *qid)
 {
 	int err = 0;
@@ -1369,7 +1370,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid,
 			dfid->fid, name, symtgt);
 	clnt = dfid->clnt;
 
-	req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt,
+	req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt,
 			gid);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -1710,7 +1711,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 		(unsigned long long)ret->qid.path, ret->qid.version, ret->mode,
 		ret->atime, ret->mtime, (unsigned long long)ret->length,
 		ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
-		ret->n_uid, ret->n_gid, ret->n_muid);
+		from_kuid(&init_user_ns, ret->n_uid),
+		from_kgid(&init_user_ns, ret->n_gid),
+		from_kuid(&init_user_ns, ret->n_muid));
 
 	p9_free_req(clnt, req);
 	return ret;
@@ -1764,8 +1767,10 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
 		"<<< st_btime_sec=%lld st_btime_nsec=%lld\n"
 		"<<< st_gen=%lld st_data_version=%lld",
 		ret->st_result_mask, ret->qid.type, ret->qid.path,
-		ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid,
-		ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize,
+		ret->qid.version, ret->st_mode, ret->st_nlink,
+		from_kuid(&init_user_ns, ret->st_uid),
+		from_kgid(&init_user_ns, ret->st_gid),
+		ret->st_rdev, ret->st_size, ret->st_blksize,
 		ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec,
 		ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec,
 		ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec,
@@ -1828,7 +1833,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 		(unsigned long long)wst->qid.path, wst->qid.version, wst->mode,
 		wst->atime, wst->mtime, (unsigned long long)wst->length,
 		wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
-		wst->n_uid, wst->n_gid, wst->n_muid);
+		from_kuid(&init_user_ns, wst->n_uid),
+		from_kgid(&init_user_ns, wst->n_gid),
+		from_kuid(&init_user_ns, wst->n_muid));
 
 	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst);
 	if (IS_ERR(req)) {
@@ -1857,7 +1864,9 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
 		"    valid=%x mode=%x uid=%d gid=%d size=%lld\n"
 		"    atime_sec=%lld atime_nsec=%lld\n"
 		"    mtime_sec=%lld mtime_nsec=%lld\n",
-		p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid,
+		p9attr->valid, p9attr->mode,
+		from_kuid(&init_user_ns, p9attr->uid),
+		from_kgid(&init_user_ns, p9attr->gid),
 		p9attr->size, p9attr->atime_sec, p9attr->atime_nsec,
 		p9attr->mtime_sec, p9attr->mtime_nsec);
 
@@ -2106,7 +2115,7 @@ error:
 EXPORT_SYMBOL(p9_client_readdir);
 
 int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
-			dev_t rdev, gid_t gid, struct p9_qid *qid)
+			dev_t rdev, kgid_t gid, struct p9_qid *qid)
 {
 	int err;
 	struct p9_client *clnt;
@@ -2116,7 +2125,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
 	clnt = fid->clnt;
 	p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d "
 		"minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev));
-	req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode,
+	req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode,
 		MAJOR(rdev), MINOR(rdev), gid);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -2137,7 +2146,7 @@ error:
 EXPORT_SYMBOL(p9_client_mknod_dotl);
 
 int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
-				gid_t gid, struct p9_qid *qid)
+				kgid_t gid, struct p9_qid *qid)
 {
 	int err;
 	struct p9_client *clnt;
@@ -2146,8 +2155,8 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
 	err = 0;
 	clnt = fid->clnt;
 	p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
-		 fid->fid, name, mode, gid);
-	req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode,
+		 fid->fid, name, mode, from_kgid(&init_user_ns, gid));
+	req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", fid->fid, name, mode,
 		gid);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
diff --git a/net/9p/error.c b/net/9p/error.c
index 2ab2de76010f..126fd0dceea2 100644
--- a/net/9p/error.c
+++ b/net/9p/error.c
@@ -221,15 +221,13 @@ EXPORT_SYMBOL(p9_error_init);
 int p9_errstr2errno(char *errstr, int len)
 {
 	int errno;
-	struct hlist_node *p;
 	struct errormap *c;
 	int bucket;
 
 	errno = 0;
-	p = NULL;
 	c = NULL;
 	bucket = jhash(errstr, len, 0) % ERRHASHSZ;
-	hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
+	hlist_for_each_entry(c, &hash_errmap[bucket], list) {
 		if (c->namelen == len && !memcmp(c->name, errstr, len)) {
 			errno = c->val;
 			break;
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 3d33ecf13327..ab9127ec5b7a 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -85,6 +85,8 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
 	d - int32_t
 	q - int64_t
 	s - string
+	u - numeric uid
+	g - numeric gid
 	S - stat
 	Q - qid
 	D - data blob (int32_t size followed by void *, results are not freed)
@@ -163,6 +165,26 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 					(*sptr)[len] = 0;
 			}
 			break;
+		case 'u': {
+				kuid_t *uid = va_arg(ap, kuid_t *);
+				__le32 le_val;
+				if (pdu_read(pdu, &le_val, sizeof(le_val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*uid = make_kuid(&init_user_ns,
+						 le32_to_cpu(le_val));
+			} break;
+		case 'g': {
+				kgid_t *gid = va_arg(ap, kgid_t *);
+				__le32 le_val;
+				if (pdu_read(pdu, &le_val, sizeof(le_val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*gid = make_kgid(&init_user_ns,
+						 le32_to_cpu(le_val));
+			} break;
 		case 'Q':{
 				struct p9_qid *qid =
 				    va_arg(ap, struct p9_qid *);
@@ -177,11 +199,12 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				    va_arg(ap, struct p9_wstat *);
 
 				memset(stbuf, 0, sizeof(struct p9_wstat));
-				stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
-									-1;
+				stbuf->n_uid = stbuf->n_muid = INVALID_UID;
+				stbuf->n_gid = INVALID_GID;
+
 				errcode =
 				    p9pdu_readf(pdu, proto_version,
-						"wwdQdddqssss?sddd",
+						"wwdQdddqssss?sugu",
 						&stbuf->size, &stbuf->type,
 						&stbuf->dev, &stbuf->qid,
 						&stbuf->mode, &stbuf->atime,
@@ -294,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				memset(stbuf, 0, sizeof(struct p9_stat_dotl));
 				errcode =
 				    p9pdu_readf(pdu, proto_version,
-					"qQdddqqqqqqqqqqqqqqq",
+					"qQdugqqqqqqqqqqqqqqq",
 					&stbuf->st_result_mask,
 					&stbuf->qid,
 					&stbuf->st_mode,
@@ -377,6 +400,20 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 					errcode = -EFAULT;
 			}
 			break;
+		case 'u': {
+				kuid_t uid = va_arg(ap, kuid_t);
+				__le32 val = cpu_to_le32(
+						from_kuid(&init_user_ns, uid));
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			} break;
+		case 'g': {
+				kgid_t gid = va_arg(ap, kgid_t);
+				__le32 val = cpu_to_le32(
+						from_kgid(&init_user_ns, gid));
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			} break;
 		case 'Q':{
 				const struct p9_qid *qid =
 				    va_arg(ap, const struct p9_qid *);
@@ -390,7 +427,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				    va_arg(ap, const struct p9_wstat *);
 				errcode =
 				    p9pdu_writef(pdu, proto_version,
-						 "wwdQdddqssss?sddd",
+						 "wwdQdddqssss?sugu",
 						 stbuf->size, stbuf->type,
 						 stbuf->dev, &stbuf->qid,
 						 stbuf->mode, stbuf->atime,
@@ -468,7 +505,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 							struct p9_iattr_dotl *);
 
 				errcode = p9pdu_writef(pdu, proto_version,
-							"ddddqqqqq",
+							"ddugqqqqq",
 							p9attr->valid,
 							p9attr->mode,
 							p9attr->uid,
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index fd05c81cb348..e1c26b101830 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -87,7 +87,7 @@ struct virtio_chan {
 	/* This is global limit. Since we don't have a global structure,
 	 * will be placing it in each channel.
 	 */
-	int p9_max_pages;
+	unsigned long p9_max_pages;
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
 
@@ -194,11 +194,14 @@ static int pack_sg_list(struct scatterlist *sg, int start,
 		if (s > count)
 			s = count;
 		BUG_ON(index > limit);
+		/* Make sure we don't terminate early. */
+		sg_unmark_end(&sg[index]);
 		sg_set_buf(&sg[index++], data, s);
 		count -= s;
 		data += s;
 	}
-
+	if (index-start)
+		sg_mark_end(&sg[index - 1]);
 	return index-start;
 }
 
@@ -236,12 +239,17 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit,
 		s = rest_of_page(data);
 		if (s > count)
 			s = count;
+		/* Make sure we don't terminate early. */
+		sg_unmark_end(&sg[index]);
 		sg_set_page(&sg[index++], pdata[i++], s, data_off);
 		data_off = 0;
 		data += s;
 		count -= s;
 		nr_pages--;
 	}
+
+	if (index-start)
+		sg_mark_end(&sg[index - 1]);
 	return index - start;
 }
 
@@ -256,9 +264,10 @@ static int
 p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 {
 	int err;
-	int in, out;
+	int in, out, out_sgs, in_sgs;
 	unsigned long flags;
 	struct virtio_chan *chan = client->trans;
+	struct scatterlist *sgs[2];
 
 	p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
 
@@ -266,14 +275,19 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 req_retry:
 	spin_lock_irqsave(&chan->lock, flags);
 
+	out_sgs = in_sgs = 0;
 	/* Handle out VirtIO ring buffers */
 	out = pack_sg_list(chan->sg, 0,
 			   VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+	if (out)
+		sgs[out_sgs++] = chan->sg;
 
 	in = pack_sg_list(chan->sg, out,
 			  VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
+	if (in)
+		sgs[out_sgs + in_sgs++] = chan->sg + out;
 
-	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
+	err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
 				GFP_ATOMIC);
 	if (err < 0) {
 		if (err == -ENOSPC) {
@@ -289,7 +303,7 @@ req_retry:
 		} else {
 			spin_unlock_irqrestore(&chan->lock, flags);
 			p9_debug(P9_DEBUG_TRANS,
-				 "virtio rpc add_buf returned failure\n");
+				 "virtio rpc add_sgs returned failure\n");
 			return -EIO;
 		}
 	}
@@ -351,11 +365,12 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 		     char *uidata, char *uodata, int inlen,
 		     int outlen, int in_hdr_len, int kern_buf)
 {
-	int in, out, err;
+	int in, out, err, out_sgs, in_sgs;
 	unsigned long flags;
 	int in_nr_pages = 0, out_nr_pages = 0;
 	struct page **in_pages = NULL, **out_pages = NULL;
 	struct virtio_chan *chan = client->trans;
+	struct scatterlist *sgs[4];
 
 	p9_debug(P9_DEBUG_TRANS, "virtio request\n");
 
@@ -396,13 +411,22 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 	req->status = REQ_STATUS_SENT;
 req_retry_pinned:
 	spin_lock_irqsave(&chan->lock, flags);
+
+	out_sgs = in_sgs = 0;
+
 	/* out data */
 	out = pack_sg_list(chan->sg, 0,
 			   VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
 
-	if (out_pages)
+	if (out)
+		sgs[out_sgs++] = chan->sg;
+
+	if (out_pages) {
+		sgs[out_sgs++] = chan->sg + out;
 		out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
 				      out_pages, out_nr_pages, uodata, outlen);
+	}
+		
 	/*
 	 * Take care of in data
 	 * For example TREAD have 11.
@@ -412,11 +436,17 @@ req_retry_pinned:
 	 */
 	in = pack_sg_list(chan->sg, out,
 			  VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
-	if (in_pages)
+	if (in)
+		sgs[out_sgs + in_sgs++] = chan->sg + out;
+
+	if (in_pages) {
+		sgs[out_sgs + in_sgs++] = chan->sg + out + in;
 		in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
 				     in_pages, in_nr_pages, uidata, inlen);
+	}
 
-	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
+	BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
+	err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
 				GFP_ATOMIC);
 	if (err < 0) {
 		if (err == -ENOSPC) {
@@ -432,7 +462,7 @@ req_retry_pinned:
 		} else {
 			spin_unlock_irqrestore(&chan->lock, flags);
 			p9_debug(P9_DEBUG_TRANS,
-				 "virtio rpc add_buf returned failure\n");
+				 "virtio rpc add_sgs returned failure\n");
 			err = -EIO;
 			goto err_out;
 		}
diff --git a/net/9p/util.c b/net/9p/util.c
index 6ceeeb384de7..59f278e64f58 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -87,23 +87,18 @@ EXPORT_SYMBOL(p9_idpool_destroy);
 
 int p9_idpool_get(struct p9_idpool *p)
 {
-	int i = 0;
-	int error;
+	int i;
 	unsigned long flags;
 
-retry:
-	if (idr_pre_get(&p->pool, GFP_NOFS) == 0)
-		return -1;
-
+	idr_preload(GFP_NOFS);
 	spin_lock_irqsave(&p->lock, flags);
 
 	/* no need to store exactly p, we just need something non-null */
-	error = idr_get_new(&p->pool, p, &i);
-	spin_unlock_irqrestore(&p->lock, flags);
+	i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT);
 
-	if (error == -EAGAIN)
-		goto retry;
-	else if (error)
+	spin_unlock_irqrestore(&p->lock, flags);
+	idr_preload_end();
+	if (i < 0)
 		return -1;
 
 	p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p);
diff --git a/net/Kconfig b/net/Kconfig
index 30b48f523135..2ddc9046868e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -90,7 +90,6 @@ config NETWORK_SECMARK
 
 config NETWORK_PHY_TIMESTAMPING
 	bool "Timestamping in PHY devices"
-	depends on EXPERIMENTAL
 	help
 	  This allows timestamping of network packets by PHYs with
 	  hardware timestamping capabilities. This option adds some
@@ -209,7 +208,6 @@ source "net/ipx/Kconfig"
 source "drivers/net/appletalk/Kconfig"
 source "net/x25/Kconfig"
 source "net/lapb/Kconfig"
-source "net/wanrouter/Kconfig"
 source "net/phonet/Kconfig"
 source "net/ieee802154/Kconfig"
 source "net/mac802154/Kconfig"
@@ -218,6 +216,8 @@ source "net/dcb/Kconfig"
 source "net/dns_resolver/Kconfig"
 source "net/batman-adv/Kconfig"
 source "net/openvswitch/Kconfig"
+source "net/vmw_vsock/Kconfig"
+source "net/netlink/Kconfig"
 
 config RPS
 	boolean
@@ -232,7 +232,7 @@ config RFS_ACCEL
 
 config XPS
 	boolean
-	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+	depends on SMP && USE_GENERIC_SMP_HELPERS
 	default y
 
 config NETPRIO_CGROUP
@@ -278,7 +278,7 @@ config NET_PKTGEN
 
 config NET_TCPPROBE
 	tristate "TCP connection probing"
-	depends on INET && EXPERIMENTAL && PROC_FS && KPROBES
+	depends on INET && PROC_FS && KPROBES
 	---help---
 	This module allows for capturing the changes to TCP connection
 	state in response to incoming packets. It is used for debugging
@@ -295,7 +295,7 @@ config NET_TCPPROBE
 
 config NET_DROP_MONITOR
 	tristate "Network packet drop alerting service"
-	depends on INET && EXPERIMENTAL && TRACEPOINTS
+	depends on INET && TRACEPOINTS
 	---help---
 	This feature provides an alerting service to userspace in the
 	event that packets are discarded in the network stack.  Alerts
diff --git a/net/Makefile b/net/Makefile
index 4f4ee083064c..091e7b04f301 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -26,7 +26,6 @@ obj-$(CONFIG_BRIDGE)		+= bridge/
 obj-$(CONFIG_NET_DSA)		+= dsa/
 obj-$(CONFIG_IPX)		+= ipx/
 obj-$(CONFIG_ATALK)		+= appletalk/
-obj-$(CONFIG_WAN_ROUTER)	+= wanrouter/
 obj-$(CONFIG_X25)		+= x25/
 obj-$(CONFIG_LAPB)		+= lapb/
 obj-$(CONFIG_NETROM)		+= netrom/
@@ -70,3 +69,4 @@ obj-$(CONFIG_CEPH_LIB)		+= ceph/
 obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/
 obj-$(CONFIG_NFC)		+= nfc/
 obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
+obj-$(CONFIG_VSOCKETS)	+= vmw_vsock/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 33475291c9c1..ef12839a7cfe 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -93,10 +93,9 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to,
 					struct atalk_iface *atif)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	read_lock_bh(&atalk_sockets_lock);
-	sk_for_each(s, node, &atalk_sockets) {
+	sk_for_each(s, &atalk_sockets) {
 		struct atalk_sock *at = at_sk(s);
 
 		if (to->sat_port != at->src_port)
@@ -141,11 +140,10 @@ static struct sock *atalk_find_or_insert_socket(struct sock *sk,
 						struct sockaddr_at *sat)
 {
 	struct sock *s;
-	struct hlist_node *node;
 	struct atalk_sock *at;
 
 	write_lock_bh(&atalk_sockets_lock);
-	sk_for_each(s, node, &atalk_sockets) {
+	sk_for_each(s, &atalk_sockets) {
 		at = at_sk(s);
 
 		if (at->src_net == sat->sat_addr.s_net &&
@@ -1084,9 +1082,8 @@ static int atalk_pick_and_bind_port(struct sock *sk, struct sockaddr_at *sat)
 	     sat->sat_port < ATPORT_LAST;
 	     sat->sat_port++) {
 		struct sock *s;
-		struct hlist_node *node;
 
-		sk_for_each(s, node, &atalk_sockets) {
+		sk_for_each(s, &atalk_sockets) {
 			struct atalk_sock *at = at_sk(s);
 
 			if (at->src_net == sat->sat_addr.s_net &&
@@ -1256,7 +1253,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
 			goto out;
 
 	*uaddr_len = sizeof(struct sockaddr_at);
-	memset(&sat.sat_zero, 0, sizeof(sat.sat_zero));
+	memset(&sat, 0, sizeof(sat));
 
 	if (peer) {
 		err = -ENOTCONN;
diff --git a/net/atm/common.c b/net/atm/common.c
index 806fc0a40051..737bef59ce89 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -270,11 +270,11 @@ void atm_dev_release_vccs(struct atm_dev *dev)
 	write_lock_irq(&vcc_sklist_lock);
 	for (i = 0; i < VCC_HTABLE_SIZE; i++) {
 		struct hlist_head *head = &vcc_hash[i];
-		struct hlist_node *node, *tmp;
+		struct hlist_node *tmp;
 		struct sock *s;
 		struct atm_vcc *vcc;
 
-		sk_for_each_safe(s, node, tmp, head) {
+		sk_for_each_safe(s, tmp, head) {
 			vcc = atm_sk(s);
 			if (vcc->dev == dev) {
 				vcc_release_async(vcc, -EPIPE);
@@ -317,11 +317,10 @@ static int adjust_tp(struct atm_trafprm *tp, unsigned char aal)
 static int check_ci(const struct atm_vcc *vcc, short vpi, int vci)
 {
 	struct hlist_head *head = &vcc_hash[vci & (VCC_HTABLE_SIZE - 1)];
-	struct hlist_node *node;
 	struct sock *s;
 	struct atm_vcc *walk;
 
-	sk_for_each(s, node, head) {
+	sk_for_each(s, head) {
 		walk = atm_sk(s);
 		if (walk->dev != vcc->dev)
 			continue;
@@ -532,6 +531,8 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	struct sk_buff *skb;
 	int copied, error = -EINVAL;
 
+	msg->msg_namelen = 0;
+
 	if (sock->state != SS_CONNECTED)
 		return -ENOTCONN;
 
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 2e3d942e77f1..f23916be18fb 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -842,7 +842,9 @@ static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl,
 		--*l;
 	}
 
-	hlist_for_each_entry_from(tmp, e, next) {
+	tmp = container_of(e, struct lec_arp_table, next);
+
+	hlist_for_each_entry_from(tmp, next) {
 		if (--*l < 0)
 			break;
 	}
@@ -1307,7 +1309,6 @@ lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry)
 static int
 lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove)
 {
-	struct hlist_node *node;
 	struct lec_arp_table *entry;
 	int i, remove_vcc = 1;
 
@@ -1326,7 +1327,7 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove)
 		 * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT
 		 */
 		for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-			hlist_for_each_entry(entry, node,
+			hlist_for_each_entry(entry,
 					     &priv->lec_arp_tables[i], next) {
 				if (memcmp(to_remove->atm_addr,
 					   entry->atm_addr, ATM_ESA_LEN) == 0) {
@@ -1364,14 +1365,13 @@ static const char *get_status_string(unsigned char st)
 
 static void dump_arp_table(struct lec_priv *priv)
 {
-	struct hlist_node *node;
 	struct lec_arp_table *rulla;
 	char buf[256];
 	int i, j, offset;
 
 	pr_info("Dump %p:\n", priv);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(rulla, node,
+		hlist_for_each_entry(rulla,
 				     &priv->lec_arp_tables[i], next) {
 			offset = 0;
 			offset += sprintf(buf, "%d: %p\n", i, rulla);
@@ -1403,7 +1403,7 @@ static void dump_arp_table(struct lec_priv *priv)
 
 	if (!hlist_empty(&priv->lec_no_forward))
 		pr_info("No forward\n");
-	hlist_for_each_entry(rulla, node, &priv->lec_no_forward, next) {
+	hlist_for_each_entry(rulla, &priv->lec_no_forward, next) {
 		offset = 0;
 		offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
 		offset += sprintf(buf + offset, " Atm:");
@@ -1428,7 +1428,7 @@ static void dump_arp_table(struct lec_priv *priv)
 
 	if (!hlist_empty(&priv->lec_arp_empty_ones))
 		pr_info("Empty ones\n");
-	hlist_for_each_entry(rulla, node, &priv->lec_arp_empty_ones, next) {
+	hlist_for_each_entry(rulla, &priv->lec_arp_empty_ones, next) {
 		offset = 0;
 		offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
 		offset += sprintf(buf + offset, " Atm:");
@@ -1453,7 +1453,7 @@ static void dump_arp_table(struct lec_priv *priv)
 
 	if (!hlist_empty(&priv->mcast_fwds))
 		pr_info("Multicast Forward VCCs\n");
-	hlist_for_each_entry(rulla, node, &priv->mcast_fwds, next) {
+	hlist_for_each_entry(rulla, &priv->mcast_fwds, next) {
 		offset = 0;
 		offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
 		offset += sprintf(buf + offset, " Atm:");
@@ -1487,7 +1487,7 @@ static void dump_arp_table(struct lec_priv *priv)
 static void lec_arp_destroy(struct lec_priv *priv)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry;
 	int i;
 
@@ -1499,7 +1499,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_tables[i], next) {
 			lec_arp_remove(priv, entry);
 			lec_arp_put(entry);
@@ -1507,7 +1507,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 		INIT_HLIST_HEAD(&priv->lec_arp_tables[i]);
 	}
 
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_arp_empty_ones, next) {
 		del_timer_sync(&entry->timer);
 		lec_arp_clear_vccs(entry);
@@ -1516,7 +1516,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 	}
 	INIT_HLIST_HEAD(&priv->lec_arp_empty_ones);
 
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_no_forward, next) {
 		del_timer_sync(&entry->timer);
 		lec_arp_clear_vccs(entry);
@@ -1525,7 +1525,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 	}
 	INIT_HLIST_HEAD(&priv->lec_no_forward);
 
-	hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) {
+	hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) {
 		/* No timer, LANEv2 7.1.20 and 2.3.5.3 */
 		lec_arp_clear_vccs(entry);
 		hlist_del(&entry->next);
@@ -1542,14 +1542,13 @@ static void lec_arp_destroy(struct lec_priv *priv)
 static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
 					  const unsigned char *mac_addr)
 {
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct lec_arp_table *entry;
 
 	pr_debug("%pM\n", mac_addr);
 
 	head = &priv->lec_arp_tables[HASH(mac_addr[ETH_ALEN - 1])];
-	hlist_for_each_entry(entry, node, head, next) {
+	hlist_for_each_entry(entry, head, next) {
 		if (ether_addr_equal(mac_addr, entry->mac_addr))
 			return entry;
 	}
@@ -1686,7 +1685,7 @@ static void lec_arp_check_expire(struct work_struct *work)
 	unsigned long flags;
 	struct lec_priv *priv =
 		container_of(work, struct lec_priv, lec_arp_work.work);
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry;
 	unsigned long now;
 	int i;
@@ -1696,7 +1695,7 @@ static void lec_arp_check_expire(struct work_struct *work)
 restart:
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_tables[i], next) {
 			if (__lec_arp_check_expire(entry, now, priv)) {
 				struct sk_buff *skb;
@@ -1823,14 +1822,14 @@ lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr,
 		unsigned long permanent)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry;
 	int i;
 
 	pr_debug("\n");
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_tables[i], next) {
 			if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) &&
 			    (permanent ||
@@ -1855,7 +1854,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 	       unsigned int targetless_le_arp)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry, *tmp;
 	int i;
 
@@ -1870,7 +1869,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 				 * we have no entry in the cache. 7.1.30
 				 */
 	if (!hlist_empty(&priv->lec_arp_empty_ones)) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_empty_ones, next) {
 			if (memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN) == 0) {
 				hlist_del(&entry->next);
@@ -1915,7 +1914,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 	memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN);
 	del_timer(&entry->timer);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(tmp, node,
+		hlist_for_each_entry(tmp,
 				     &priv->lec_arp_tables[i], next) {
 			if (entry != tmp &&
 			    !memcmp(tmp->atm_addr, atm_addr, ATM_ESA_LEN)) {
@@ -1956,7 +1955,6 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 	      void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb))
 {
 	unsigned long flags;
-	struct hlist_node *node;
 	struct lec_arp_table *entry;
 	int i, found_entry = 0;
 
@@ -2026,7 +2024,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 		 ioc_data->atm_addr[16], ioc_data->atm_addr[17],
 		 ioc_data->atm_addr[18], ioc_data->atm_addr[19]);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(entry, node,
+		hlist_for_each_entry(entry,
 				     &priv->lec_arp_tables[i], next) {
 			if (memcmp
 			    (ioc_data->atm_addr, entry->atm_addr,
@@ -2103,7 +2101,6 @@ out:
 static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
 {
 	unsigned long flags;
-	struct hlist_node *node;
 	struct lec_arp_table *entry;
 	int i;
 
@@ -2111,7 +2108,7 @@ static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
 restart:
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(entry, node,
+		hlist_for_each_entry(entry,
 				     &priv->lec_arp_tables[i], next) {
 			if (entry->flush_tran_id == tran_id &&
 			    entry->status == ESI_FLUSH_PENDING) {
@@ -2140,13 +2137,12 @@ lec_set_flush_tran_id(struct lec_priv *priv,
 		      const unsigned char *atm_addr, unsigned long tran_id)
 {
 	unsigned long flags;
-	struct hlist_node *node;
 	struct lec_arp_table *entry;
 	int i;
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++)
-		hlist_for_each_entry(entry, node,
+		hlist_for_each_entry(entry,
 				     &priv->lec_arp_tables[i], next) {
 			if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) {
 				entry->flush_tran_id = tran_id;
@@ -2198,7 +2194,7 @@ out:
 static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry;
 	int i;
 
@@ -2208,7 +2204,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_tables[i], next) {
 			if (vcc == entry->vcc) {
 				lec_arp_remove(priv, entry);
@@ -2219,7 +2215,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 		}
 	}
 
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_arp_empty_ones, next) {
 		if (entry->vcc == vcc) {
 			lec_arp_clear_vccs(entry);
@@ -2229,7 +2225,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 		}
 	}
 
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_no_forward, next) {
 		if (entry->recv_vcc == vcc) {
 			lec_arp_clear_vccs(entry);
@@ -2239,7 +2235,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 		}
 	}
 
-	hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) {
+	hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) {
 		if (entry->recv_vcc == vcc) {
 			lec_arp_clear_vccs(entry);
 			/* No timer, LANEv2 7.1.20 and 2.3.5.3 */
@@ -2257,13 +2253,13 @@ lec_arp_check_empties(struct lec_priv *priv,
 		      struct atm_vcc *vcc, struct sk_buff *skb)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry, *tmp;
 	struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data;
 	unsigned char *src = hdr->h_source;
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_arp_empty_ones, next) {
 		if (vcc == entry->vcc) {
 			del_timer(&entry->timer);
diff --git a/net/atm/lec.h b/net/atm/lec.h
index a86aff9a3c04..4149db1b7885 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -58,7 +58,7 @@ struct lane2_ops {
  *    field in h_type field. Data follows immediately after header.
  * 2. LLC Data frames whose total length, including LLC field and data,
  *    but not padding required to meet the minimum data frame length,
- *    is less than 1536(0x0600) MUST be encoded by placing that length
+ *    is less than ETH_P_802_3_MIN MUST be encoded by placing that length
  *    in the h_type field. The LLC field follows header immediately.
  * 3. LLC data frames longer than this maximum MUST be encoded by placing
  *    the value 0 in the h_type field.
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 0d020de8d233..bbb6461a4b7f 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -385,7 +385,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
 	page = get_zeroed_page(GFP_KERNEL);
 	if (!page)
 		return -ENOMEM;
-	dev = PDE(file->f_path.dentry->d_inode)->data;
+	dev = PDE_DATA(file_inode(file));
 	if (!dev->ops->proc_read)
 		length = -EINVAL;
 	else {
@@ -460,7 +460,7 @@ static void atm_proc_dirs_remove(void)
 		if (e->dirent)
 			remove_proc_entry(e->name, atm_proc_root);
 	}
-	proc_net_remove(&init_net, "atm");
+	remove_proc_entry("atm", init_net.proc_net);
 }
 
 int __init atm_proc_init(void)
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 86767ca908a3..4176887e72eb 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -217,7 +217,6 @@ static void purge_vcc(struct atm_vcc *vcc)
 
 static void sigd_close(struct atm_vcc *vcc)
 {
-	struct hlist_node *node;
 	struct sock *s;
 	int i;
 
@@ -231,7 +230,7 @@ static void sigd_close(struct atm_vcc *vcc)
 	for (i = 0; i < VCC_HTABLE_SIZE; ++i) {
 		struct hlist_head *head = &vcc_hash[i];
 
-		sk_for_each(s, node, head) {
+		sk_for_each(s, head) {
 			vcc = atm_sk(s);
 
 			purge_vcc(vcc);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 779095ded689..e277e38f736b 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -81,14 +81,13 @@ static void ax25_kill_by_device(struct net_device *dev)
 {
 	ax25_dev *ax25_dev;
 	ax25_cb *s;
-	struct hlist_node *node;
 
 	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
 		return;
 
 	spin_lock_bh(&ax25_list_lock);
 again:
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if (s->ax25_dev == ax25_dev) {
 			s->ax25_dev = NULL;
 			spin_unlock_bh(&ax25_list_lock);
@@ -158,10 +157,9 @@ struct sock *ax25_find_listener(ax25_address *addr, int digi,
 	struct net_device *dev, int type)
 {
 	ax25_cb *s;
-	struct hlist_node *node;
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if ((s->iamdigi && !digi) || (!s->iamdigi && digi))
 			continue;
 		if (s->sk && !ax25cmp(&s->source_addr, addr) &&
@@ -187,10 +185,9 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr,
 {
 	struct sock *sk = NULL;
 	ax25_cb *s;
-	struct hlist_node *node;
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if (s->sk && !ax25cmp(&s->source_addr, my_addr) &&
 		    !ax25cmp(&s->dest_addr, dest_addr) &&
 		    s->sk->sk_type == type) {
@@ -213,10 +210,9 @@ ax25_cb *ax25_find_cb(ax25_address *src_addr, ax25_address *dest_addr,
 	ax25_digi *digi, struct net_device *dev)
 {
 	ax25_cb *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&ax25_list_lock);
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if (s->sk && s->sk->sk_type != SOCK_SEQPACKET)
 			continue;
 		if (s->ax25_dev == NULL)
@@ -248,10 +244,9 @@ void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto)
 {
 	ax25_cb *s;
 	struct sk_buff *copy;
-	struct hlist_node *node;
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 &&
 		    s->sk->sk_type == SOCK_RAW &&
 		    s->sk->sk_protocol == proto &&
@@ -1647,6 +1642,7 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
 		ax25_address src;
 		const unsigned char *mac = skb_mac_header(skb);
 
+		memset(sax, 0, sizeof(struct full_sockaddr_ax25));
 		ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
 				&digi, NULL, NULL);
 		sax->sax25_family = AF_AX25;
@@ -1992,9 +1988,10 @@ static int __init ax25_init(void)
 	dev_add_pack(&ax25_packet_type);
 	register_netdevice_notifier(&ax25_dev_notifier);
 
-	proc_net_fops_create(&init_net, "ax25_route", S_IRUGO, &ax25_route_fops);
-	proc_net_fops_create(&init_net, "ax25", S_IRUGO, &ax25_info_fops);
-	proc_net_fops_create(&init_net, "ax25_calls", S_IRUGO, &ax25_uid_fops);
+	proc_create("ax25_route", S_IRUGO, init_net.proc_net,
+		    &ax25_route_fops);
+	proc_create("ax25", S_IRUGO, init_net.proc_net, &ax25_info_fops);
+	proc_create("ax25_calls", S_IRUGO, init_net.proc_net, &ax25_uid_fops);
 out:
 	return rc;
 }
@@ -2008,9 +2005,9 @@ MODULE_ALIAS_NETPROTO(PF_AX25);
 
 static void __exit ax25_exit(void)
 {
-	proc_net_remove(&init_net, "ax25_route");
-	proc_net_remove(&init_net, "ax25");
-	proc_net_remove(&init_net, "ax25_calls");
+	remove_proc_entry("ax25_route", init_net.proc_net);
+	remove_proc_entry("ax25", init_net.proc_net);
+	remove_proc_entry("ax25_calls", init_net.proc_net);
 
 	unregister_netdevice_notifier(&ax25_dev_notifier);
 
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index 5ea7fd3e2af9..e05bd57b5afd 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -39,7 +39,6 @@ void ax25_ds_nr_error_recovery(ax25_cb *ax25)
 void ax25_ds_enquiry_response(ax25_cb *ax25)
 {
 	ax25_cb *ax25o;
-	struct hlist_node *node;
 
 	/* Please note that neither DK4EG's nor DG2FEF's
 	 * DAMA spec mention the following behaviour as seen
@@ -80,7 +79,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25)
 	ax25_ds_set_timer(ax25->ax25_dev);
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(ax25o, node, &ax25_list) {
+	ax25_for_each(ax25o, &ax25_list) {
 		if (ax25o == ax25)
 			continue;
 
@@ -159,10 +158,9 @@ static int ax25_check_dama_slave(ax25_dev *ax25_dev)
 {
 	ax25_cb *ax25;
 	int res = 0;
-	struct hlist_node *node;
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(ax25, node, &ax25_list)
+	ax25_for_each(ax25, &ax25_list)
 		if (ax25->ax25_dev == ax25_dev && (ax25->condition & AX25_COND_DAMA_MODE) && ax25->state > AX25_STATE_1) {
 			res = 1;
 			break;
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 993c439b4f71..951cd57bb07d 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -70,7 +70,6 @@ static void ax25_ds_timeout(unsigned long arg)
 {
 	ax25_dev *ax25_dev = (struct ax25_dev *) arg;
 	ax25_cb *ax25;
-	struct hlist_node *node;
 
 	if (ax25_dev == NULL || !ax25_dev->dama.slave)
 		return;			/* Yikes! */
@@ -81,7 +80,7 @@ static void ax25_ds_timeout(unsigned long arg)
 	}
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(ax25, node, &ax25_list) {
+	ax25_for_each(ax25, &ax25_list) {
 		if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE))
 			continue;
 
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index 7d5f24b82cc8..7f16e8a931b2 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -193,10 +193,9 @@ int ax25_listen_mine(ax25_address *callsign, struct net_device *dev)
 void ax25_link_failed(ax25_cb *ax25, int reason)
 {
 	struct ax25_linkfail *lf;
-	struct hlist_node *node;
 
 	spin_lock_bh(&linkfail_lock);
-	hlist_for_each_entry(lf, node, &ax25_linkfail_list, lf_node)
+	hlist_for_each_entry(lf, &ax25_linkfail_list, lf_node)
 		lf->func(ax25, reason);
 	spin_unlock_bh(&linkfail_lock);
 }
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 957999e43ff7..71c4badbc807 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -54,10 +54,9 @@ EXPORT_SYMBOL(ax25_uid_policy);
 ax25_uid_assoc *ax25_findbyuid(kuid_t uid)
 {
 	ax25_uid_assoc *ax25_uid, *res = NULL;
-	struct hlist_node *node;
 
 	read_lock(&ax25_uid_lock);
-	ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+	ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
 		if (uid_eq(ax25_uid->uid, uid)) {
 			ax25_uid_hold(ax25_uid);
 			res = ax25_uid;
@@ -74,7 +73,6 @@ EXPORT_SYMBOL(ax25_findbyuid);
 int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 {
 	ax25_uid_assoc *ax25_uid;
-	struct hlist_node *node;
 	ax25_uid_assoc *user;
 	unsigned long res;
 
@@ -82,7 +80,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 	case SIOCAX25GETUID:
 		res = -ENOENT;
 		read_lock(&ax25_uid_lock);
-		ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+		ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
 			if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) {
 				res = from_kuid_munged(current_user_ns(), ax25_uid->uid);
 				break;
@@ -126,7 +124,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 
 		ax25_uid = NULL;
 		write_lock(&ax25_uid_lock);
-		ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+		ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
 			if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0)
 				break;
 		}
@@ -212,11 +210,10 @@ const struct file_operations ax25_uid_fops = {
 void __exit ax25_uid_free(void)
 {
 	ax25_uid_assoc *ax25_uid;
-	struct hlist_node *node;
 
 	write_lock(&ax25_uid_lock);
 again:
-	ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+	ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
 		hlist_del_init(&ax25_uid->uid_node);
 		ax25_uid_put(ax25_uid);
 		goto again;
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 8d8afb134b3a..fa780b76630e 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -36,6 +36,20 @@ config BATMAN_ADV_DAT
 	  mesh networks. If you think that your network does not need
 	  this option you can safely remove it and save some space.
 
+config BATMAN_ADV_NC
+	bool "Network Coding"
+	depends on BATMAN_ADV
+	default n
+	help
+	  This option enables network coding, a mechanism that aims to
+	  increase the overall network throughput by fusing multiple
+	  packets in one transmission.
+	  Note that interfaces controlled by batman-adv must be manually
+	  configured to have promiscuous mode enabled in order to make
+	  network coding work.
+	  If you think that your network does not need this feature you
+	  can safely disable it and save some space.
+
 config BATMAN_ADV_DEBUG
 	bool "B.A.T.M.A.N. debugging"
 	depends on BATMAN_ADV
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index e45e3b4e32e3..acbac2a9c62f 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
@@ -30,6 +30,7 @@ batman-adv-y += hard-interface.o
 batman-adv-y += hash.o
 batman-adv-y += icmp_socket.o
 batman-adv-y += main.o
+batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
 batman-adv-y += originator.o
 batman-adv-y += ring_buffer.o
 batman-adv-y += routing.o
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index a0ba3bff9b36..a4808c29ea3d 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 7d02ebd11a7f..071f288b77a8 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -27,6 +27,7 @@
 #include "hard-interface.h"
 #include "send.h"
 #include "bat_algo.h"
+#include "network-coding.h"
 
 static struct batadv_neigh_node *
 batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
@@ -123,7 +124,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv)
 	unsigned int msecs;
 
 	msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER;
-	msecs += random32() % (2 * BATADV_JITTER);
+	msecs += prandom_u32() % (2 * BATADV_JITTER);
 
 	return jiffies + msecs_to_jiffies(msecs);
 }
@@ -131,7 +132,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv)
 /* when do we schedule a ogm packet to be sent */
 static unsigned long batadv_iv_ogm_fwd_send_time(void)
 {
-	return jiffies + msecs_to_jiffies(random32() % (BATADV_JITTER / 2));
+	return jiffies + msecs_to_jiffies(prandom_u32() % (BATADV_JITTER / 2));
 }
 
 /* apply hop penalty for a normal link */
@@ -183,7 +184,6 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
 	/* adjust all flags and log packets */
 	while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len,
 					 batadv_ogm_packet->tt_num_changes)) {
-
 		/* we might have aggregated direct link packets with an
 		 * ordinary base packet
 		 */
@@ -261,7 +261,6 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
 	 */
 	if ((directlink && (batadv_ogm_packet->header.ttl == 1)) ||
 	    (forw_packet->own && (forw_packet->if_incoming != primary_if))) {
-
 		/* FIXME: what about aggregated packets ? */
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "%s packet (originator %pM, seqno %u, TTL %d) on interface %s [%pM]\n",
@@ -325,7 +324,6 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
 	if (time_before(send_time, forw_packet->send_time) &&
 	    time_after_eq(aggregation_end_time, forw_packet->send_time) &&
 	    (aggregated_bytes <= BATADV_MAX_AGGREGATION_BYTES)) {
-
 		/* check aggregation compatibility
 		 * -> direct link packets are broadcasted on
 		 *    their interface only
@@ -490,7 +488,6 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
 	 */
 	struct batadv_forw_packet *forw_packet_aggr = NULL;
 	struct batadv_forw_packet *forw_packet_pos = NULL;
-	struct hlist_node *tmp_node;
 	struct batadv_ogm_packet *batadv_ogm_packet;
 	bool direct_link;
 	unsigned long max_aggregation_jiffies;
@@ -503,7 +500,7 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
 	spin_lock_bh(&bat_priv->forw_bat_list_lock);
 	/* own packets are not to be aggregated */
 	if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) {
-		hlist_for_each_entry(forw_packet_pos, tmp_node,
+		hlist_for_each_entry(forw_packet_pos,
 				     &bat_priv->forw_bat_list, list) {
 			if (batadv_iv_ogm_can_aggregate(batadv_ogm_packet,
 							bat_priv, packet_len,
@@ -658,7 +655,6 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
 	struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
 	struct batadv_neigh_node *router = NULL;
 	struct batadv_orig_node *orig_node_tmp;
-	struct hlist_node *node;
 	int if_num;
 	uint8_t sum_orig, sum_neigh;
 	uint8_t *neigh_addr;
@@ -668,7 +664,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
 		   "update_originator(): Searching and updating originator entry of received packet\n");
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(tmp_neigh_node, node,
+	hlist_for_each_entry_rcu(tmp_neigh_node,
 				 &orig_node->neigh_list, list) {
 		neigh_addr = tmp_neigh_node->addr;
 		if (batadv_compare_eth(neigh_addr, ethhdr->h_source) &&
@@ -804,7 +800,6 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 {
 	struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
 	struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node;
-	struct hlist_node *node;
 	uint8_t total_count;
 	uint8_t orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
 	unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
@@ -813,9 +808,8 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 
 	/* find corresponding one hop neighbor */
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(tmp_neigh_node, node,
+	hlist_for_each_entry_rcu(tmp_neigh_node,
 				 &orig_neigh_node->neigh_list, list) {
-
 		if (!batadv_compare_eth(tmp_neigh_node->addr,
 					orig_neigh_node->orig))
 			continue;
@@ -924,7 +918,6 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
 	struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
 	struct batadv_orig_node *orig_node;
 	struct batadv_neigh_node *tmp_neigh_node;
-	struct hlist_node *node;
 	int is_duplicate = 0;
 	int32_t seq_diff;
 	int need_update = 0;
@@ -947,9 +940,8 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
 		goto out;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(tmp_neigh_node, node,
+	hlist_for_each_entry_rcu(tmp_neigh_node,
 				 &orig_node->neigh_list, list) {
-
 		is_duplicate |= batadv_test_bit(tmp_neigh_node->real_bits,
 						orig_node->last_real_seqno,
 						seqno);
@@ -1033,7 +1025,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
 		is_single_hop_neigh = true;
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-		   "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, ttvn %u, crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n",
+		   "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, ttvn %u, crc %#.4x, changes %u, tq %d, TTL %d, V %d, IDF %d)\n",
 		   ethhdr->h_source, if_incoming->net_dev->name,
 		   if_incoming->net_dev->dev_addr, batadv_ogm_packet->orig,
 		   batadv_ogm_packet->prev_sender,
@@ -1194,6 +1186,10 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
 	if (!orig_neigh_node)
 		goto out;
 
+	/* Update nc_nodes of the originator */
+	batadv_nc_update_nc_node(bat_priv, orig_node, orig_neigh_node,
+				 batadv_ogm_packet, is_single_hop_neigh);
+
 	orig_neigh_router = batadv_orig_node_get_router(orig_neigh_node);
 
 	/* drop packet if sender is not a direct neighbor and if we
@@ -1223,7 +1219,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
 
 	/* is single hop (direct) neighbor */
 	if (is_single_hop_neigh) {
-
 		/* mark direct link on incoming interface */
 		batadv_iv_ogm_forward(orig_node, ethhdr, batadv_ogm_packet,
 				      is_single_hop_neigh,
@@ -1298,7 +1293,8 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
 	batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff;
 
 	/* unpack the aggregated packets and process them one by one */
-	do {
+	while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len,
+					 batadv_ogm_packet->tt_num_changes)) {
 		tt_buff = packet_buff + buff_pos + BATADV_OGM_HLEN;
 
 		batadv_iv_ogm_process(ethhdr, batadv_ogm_packet, tt_buff,
@@ -1309,8 +1305,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
 
 		packet_pos = packet_buff + buff_pos;
 		batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos;
-	} while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len,
-					   batadv_ogm_packet->tt_num_changes));
+	}
 
 	kfree_skb(skb);
 	return NET_RX_SUCCESS;
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index 5453b17d8df2..973982414d58 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index cebaae7e148b..a81b9322e382 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 5aebe9327d68..379061c72549 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
@@ -34,13 +34,14 @@
 static const uint8_t batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05};
 
 static void batadv_bla_periodic_work(struct work_struct *work);
-static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
-				     struct batadv_backbone_gw *backbone_gw);
+static void
+batadv_bla_send_announce(struct batadv_priv *bat_priv,
+			 struct batadv_bla_backbone_gw *backbone_gw);
 
 /* return the index of the claim */
 static inline uint32_t batadv_choose_claim(const void *data, uint32_t size)
 {
-	struct batadv_claim *claim = (struct batadv_claim *)data;
+	struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
 	uint32_t hash = 0;
 
 	hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr));
@@ -57,7 +58,7 @@ static inline uint32_t batadv_choose_claim(const void *data, uint32_t size)
 static inline uint32_t batadv_choose_backbone_gw(const void *data,
 						 uint32_t size)
 {
-	struct batadv_claim *claim = (struct batadv_claim *)data;
+	const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
 	uint32_t hash = 0;
 
 	hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr));
@@ -75,9 +76,9 @@ static inline uint32_t batadv_choose_backbone_gw(const void *data,
 static int batadv_compare_backbone_gw(const struct hlist_node *node,
 				      const void *data2)
 {
-	const void *data1 = container_of(node, struct batadv_backbone_gw,
+	const void *data1 = container_of(node, struct batadv_bla_backbone_gw,
 					 hash_entry);
-	const struct batadv_backbone_gw *gw1 = data1, *gw2 = data2;
+	const struct batadv_bla_backbone_gw *gw1 = data1, *gw2 = data2;
 
 	if (!batadv_compare_eth(gw1->orig, gw2->orig))
 		return 0;
@@ -92,9 +93,9 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node,
 static int batadv_compare_claim(const struct hlist_node *node,
 				const void *data2)
 {
-	const void *data1 = container_of(node, struct batadv_claim,
+	const void *data1 = container_of(node, struct batadv_bla_claim,
 					 hash_entry);
-	const struct batadv_claim *cl1 = data1, *cl2 = data2;
+	const struct batadv_bla_claim *cl1 = data1, *cl2 = data2;
 
 	if (!batadv_compare_eth(cl1->addr, cl2->addr))
 		return 0;
@@ -106,7 +107,8 @@ static int batadv_compare_claim(const struct hlist_node *node,
 }
 
 /* free a backbone gw */
-static void batadv_backbone_gw_free_ref(struct batadv_backbone_gw *backbone_gw)
+static void
+batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw)
 {
 	if (atomic_dec_and_test(&backbone_gw->refcount))
 		kfree_rcu(backbone_gw, rcu);
@@ -115,16 +117,16 @@ static void batadv_backbone_gw_free_ref(struct batadv_backbone_gw *backbone_gw)
 /* finally deinitialize the claim */
 static void batadv_claim_free_rcu(struct rcu_head *rcu)
 {
-	struct batadv_claim *claim;
+	struct batadv_bla_claim *claim;
 
-	claim = container_of(rcu, struct batadv_claim, rcu);
+	claim = container_of(rcu, struct batadv_bla_claim, rcu);
 
 	batadv_backbone_gw_free_ref(claim->backbone_gw);
 	kfree(claim);
 }
 
 /* free a claim, call claim_free_rcu if its the last reference */
-static void batadv_claim_free_ref(struct batadv_claim *claim)
+static void batadv_claim_free_ref(struct batadv_bla_claim *claim)
 {
 	if (atomic_dec_and_test(&claim->refcount))
 		call_rcu(&claim->rcu, batadv_claim_free_rcu);
@@ -136,14 +138,14 @@ static void batadv_claim_free_ref(struct batadv_claim *claim)
  * looks for a claim in the hash, and returns it if found
  * or NULL otherwise.
  */
-static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv,
-						   struct batadv_claim *data)
+static struct batadv_bla_claim
+*batadv_claim_hash_find(struct batadv_priv *bat_priv,
+			struct batadv_bla_claim *data)
 {
 	struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
 	struct hlist_head *head;
-	struct hlist_node *node;
-	struct batadv_claim *claim;
-	struct batadv_claim *claim_tmp = NULL;
+	struct batadv_bla_claim *claim;
+	struct batadv_bla_claim *claim_tmp = NULL;
 	int index;
 
 	if (!hash)
@@ -153,7 +155,7 @@ static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv,
 	head = &hash->table[index];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(claim, node, head, hash_entry) {
+	hlist_for_each_entry_rcu(claim, head, hash_entry) {
 		if (!batadv_compare_claim(&claim->hash_entry, data))
 			continue;
 
@@ -176,15 +178,14 @@ static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv,
  *
  * Returns claim if found or NULL otherwise.
  */
-static struct batadv_backbone_gw *
+static struct batadv_bla_backbone_gw *
 batadv_backbone_hash_find(struct batadv_priv *bat_priv,
 			  uint8_t *addr, short vid)
 {
 	struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
 	struct hlist_head *head;
-	struct hlist_node *node;
-	struct batadv_backbone_gw search_entry, *backbone_gw;
-	struct batadv_backbone_gw *backbone_gw_tmp = NULL;
+	struct batadv_bla_backbone_gw search_entry, *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw_tmp = NULL;
 	int index;
 
 	if (!hash)
@@ -197,7 +198,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv,
 	head = &hash->table[index];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) {
+	hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
 		if (!batadv_compare_backbone_gw(&backbone_gw->hash_entry,
 						&search_entry))
 			continue;
@@ -215,12 +216,12 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv,
 
 /* delete all claims for a backbone */
 static void
-batadv_bla_del_backbone_claims(struct batadv_backbone_gw *backbone_gw)
+batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
 {
 	struct batadv_hashtable *hash;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
-	struct batadv_claim *claim;
+	struct batadv_bla_claim *claim;
 	int i;
 	spinlock_t *list_lock;	/* protects write access to the hash lists */
 
@@ -233,14 +234,13 @@ batadv_bla_del_backbone_claims(struct batadv_backbone_gw *backbone_gw)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(claim, node, node_tmp,
+		hlist_for_each_entry_safe(claim, node_tmp,
 					  head, hash_entry) {
-
 			if (claim->backbone_gw != backbone_gw)
 				continue;
 
 			batadv_claim_free_ref(claim);
-			hlist_del_rcu(node);
+			hlist_del_rcu(&claim->hash_entry);
 		}
 		spin_unlock_bh(list_lock);
 	}
@@ -338,11 +338,10 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
 			   "bla_send_claim(): REQUEST of %pM to %pMon vid %d\n",
 			   ethhdr->h_source, ethhdr->h_dest, vid);
 		break;
-
 	}
 
 	if (vid != -1)
-		skb = vlan_insert_tag(skb, vid);
+		skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid);
 
 	skb_reset_mac_header(skb);
 	skb->protocol = eth_type_trans(skb, soft_iface);
@@ -366,11 +365,11 @@ out:
  * searches for the backbone gw or creates a new one if it could not
  * be found.
  */
-static struct batadv_backbone_gw *
+static struct batadv_bla_backbone_gw *
 batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
 			   short vid, bool own_backbone)
 {
-	struct batadv_backbone_gw *entry;
+	struct batadv_bla_backbone_gw *entry;
 	struct batadv_orig_node *orig_node;
 	int hash_added;
 
@@ -437,7 +436,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
 				  struct batadv_hard_iface *primary_if,
 				  short vid)
 {
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 
 	backbone_gw = batadv_bla_get_backbone_gw(bat_priv,
 						 primary_if->net_dev->dev_addr,
@@ -459,11 +458,10 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
 				      struct batadv_hard_iface *primary_if,
 				      short vid)
 {
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
-	struct batadv_claim *claim;
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_claim *claim;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	int i;
 
 	batadv_dbg(BATADV_DBG_BLA, bat_priv,
@@ -480,7 +478,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(claim, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(claim, head, hash_entry) {
 			/* only own claims are interesting */
 			if (claim->backbone_gw != backbone_gw)
 				continue;
@@ -502,7 +500,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
  * After the request, it will repeat all of his own claims and finally
  * send an announcement claim with which we can check again.
  */
-static void batadv_bla_send_request(struct batadv_backbone_gw *backbone_gw)
+static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw)
 {
 	/* first, remove all old entries */
 	batadv_bla_del_backbone_claims(backbone_gw);
@@ -528,7 +526,7 @@ static void batadv_bla_send_request(struct batadv_backbone_gw *backbone_gw)
  * places.
  */
 static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
-				     struct batadv_backbone_gw *backbone_gw)
+				     struct batadv_bla_backbone_gw *backbone_gw)
 {
 	uint8_t mac[ETH_ALEN];
 	__be16 crc;
@@ -539,7 +537,6 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
 
 	batadv_bla_send_claim(bat_priv, mac, backbone_gw->vid,
 			      BATADV_CLAIM_TYPE_ANNOUNCE);
-
 }
 
 /**
@@ -551,10 +548,10 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
  */
 static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
 				 const uint8_t *mac, const short vid,
-				 struct batadv_backbone_gw *backbone_gw)
+				 struct batadv_bla_backbone_gw *backbone_gw)
 {
-	struct batadv_claim *claim;
-	struct batadv_claim search_claim;
+	struct batadv_bla_claim *claim;
+	struct batadv_bla_claim search_claim;
 	int hash_added;
 
 	memcpy(search_claim.addr, mac, ETH_ALEN);
@@ -598,7 +595,6 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
 
 		claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
 		batadv_backbone_gw_free_ref(claim->backbone_gw);
-
 	}
 	/* set (new) backbone gw */
 	atomic_inc(&backbone_gw->refcount);
@@ -617,7 +613,7 @@ claim_free_ref:
 static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
 				 const uint8_t *mac, const short vid)
 {
-	struct batadv_claim search_claim, *claim;
+	struct batadv_bla_claim search_claim, *claim;
 
 	memcpy(search_claim.addr, mac, ETH_ALEN);
 	search_claim.vid = vid;
@@ -643,7 +639,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv,
 				  uint8_t *an_addr, uint8_t *backbone_addr,
 				  short vid)
 {
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	uint16_t crc;
 
 	if (memcmp(an_addr, batadv_announce_mac, 4) != 0)
@@ -661,12 +657,12 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv,
 	crc = ntohs(*((__be16 *)(&an_addr[4])));
 
 	batadv_dbg(BATADV_DBG_BLA, bat_priv,
-		   "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %04x\n",
+		   "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n",
 		   vid, backbone_gw->orig, crc);
 
 	if (backbone_gw->crc != crc) {
 		batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
-			   "handle_announce(): CRC FAILED for %pM/%d (my = %04x, sent = %04x)\n",
+			   "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n",
 			   backbone_gw->orig, backbone_gw->vid,
 			   backbone_gw->crc, crc);
 
@@ -715,7 +711,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
 				 uint8_t *backbone_addr,
 				 uint8_t *claim_addr, short vid)
 {
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 
 	/* unclaim in any case if it is our own */
 	if (primary_if && batadv_compare_eth(backbone_addr,
@@ -744,7 +740,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
 			       uint8_t *backbone_addr, uint8_t *claim_addr,
 			       short vid)
 {
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 
 	/* register the gateway if not yet available, and add the claim. */
 
@@ -835,7 +831,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
 	/* if our mesh friends mac is bigger, use it for ourselves. */
 	if (ntohs(bla_dst->group) > ntohs(bla_dst_own->group)) {
 		batadv_dbg(BATADV_DBG_BLA, bat_priv,
-			   "taking other backbones claim group: %04x\n",
+			   "taking other backbones claim group: %#.4x\n",
 			   ntohs(bla_dst->group));
 		bla_dst_own->group = bla_dst->group;
 	}
@@ -958,8 +954,8 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
  */
 static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
 {
-	struct batadv_backbone_gw *backbone_gw;
-	struct hlist_node *node, *node_tmp;
+	struct batadv_bla_backbone_gw *backbone_gw;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
 	spinlock_t *list_lock;	/* protects write access to the hash lists */
@@ -974,7 +970,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(backbone_gw, node, node_tmp,
+		hlist_for_each_entry_safe(backbone_gw, node_tmp,
 					  head, hash_entry) {
 			if (now)
 				goto purge_now;
@@ -993,7 +989,7 @@ purge_now:
 
 			batadv_bla_del_backbone_claims(backbone_gw);
 
-			hlist_del_rcu(node);
+			hlist_del_rcu(&backbone_gw->hash_entry);
 			batadv_backbone_gw_free_ref(backbone_gw);
 		}
 		spin_unlock_bh(list_lock);
@@ -1013,8 +1009,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
 				    struct batadv_hard_iface *primary_if,
 				    int now)
 {
-	struct batadv_claim *claim;
-	struct hlist_node *node;
+	struct batadv_bla_claim *claim;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
 	int i;
@@ -1027,7 +1022,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(claim, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(claim, head, hash_entry) {
 			if (now)
 				goto purge_now;
 			if (!batadv_compare_eth(claim->backbone_gw->orig,
@@ -1062,8 +1057,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
 				    struct batadv_hard_iface *primary_if,
 				    struct batadv_hard_iface *oldif)
 {
-	struct batadv_backbone_gw *backbone_gw;
-	struct hlist_node *node;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
 	__be16 group;
@@ -1087,7 +1081,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
 			/* own orig still holds the old value. */
 			if (!batadv_compare_eth(backbone_gw->orig,
 						oldif->net_dev->dev_addr))
@@ -1104,16 +1098,6 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
 	}
 }
 
-
-
-/* (re)start the timer */
-static void batadv_bla_start_timer(struct batadv_priv *bat_priv)
-{
-	INIT_DELAYED_WORK(&bat_priv->bla.work, batadv_bla_periodic_work);
-	queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
-			   msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
-}
-
 /* periodic work to do:
  *  * purge structures when they are too old
  *  * send announcements
@@ -1123,9 +1107,8 @@ static void batadv_bla_periodic_work(struct work_struct *work)
 	struct delayed_work *delayed_work;
 	struct batadv_priv *bat_priv;
 	struct batadv_priv_bla *priv_bla;
-	struct hlist_node *node;
 	struct hlist_head *head;
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct batadv_hashtable *hash;
 	struct batadv_hard_iface *primary_if;
 	int i;
@@ -1151,7 +1134,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
 			if (!batadv_compare_eth(backbone_gw->orig,
 						primary_if->net_dev->dev_addr))
 				continue;
@@ -1184,7 +1167,8 @@ out:
 	if (primary_if)
 		batadv_hardif_free_ref(primary_if);
 
-	batadv_bla_start_timer(bat_priv);
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
+			   msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
 }
 
 /* The hash for claim and backbone hash receive the same key because they
@@ -1242,7 +1226,10 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
 
 	batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla hashes initialized\n");
 
-	batadv_bla_start_timer(bat_priv);
+	INIT_DELAYED_WORK(&bat_priv->bla.work, batadv_bla_periodic_work);
+
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
+			   msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
 	return 0;
 }
 
@@ -1329,8 +1316,7 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig)
 {
 	struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
 	struct hlist_head *head;
-	struct hlist_node *node;
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	int i;
 
 	if (!atomic_read(&bat_priv->bridge_loop_avoidance))
@@ -1343,7 +1329,7 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
 			if (batadv_compare_eth(backbone_gw->orig, orig)) {
 				rcu_read_unlock();
 				return 1;
@@ -1371,7 +1357,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
 {
 	struct ethhdr *ethhdr;
 	struct vlan_ethhdr *vhdr;
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	short vid = -1;
 
 	if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance))
@@ -1442,7 +1428,7 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid,
 		  bool is_bcast)
 {
 	struct ethhdr *ethhdr;
-	struct batadv_claim search_claim, *claim = NULL;
+	struct batadv_bla_claim search_claim, *claim = NULL;
 	struct batadv_hard_iface *primary_if;
 	int ret;
 
@@ -1536,7 +1522,7 @@ out:
 int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid)
 {
 	struct ethhdr *ethhdr;
-	struct batadv_claim search_claim, *claim = NULL;
+	struct batadv_bla_claim search_claim, *claim = NULL;
 	struct batadv_hard_iface *primary_if;
 	int ret = 0;
 
@@ -1612,9 +1598,8 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
 	struct net_device *net_dev = (struct net_device *)seq->private;
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
-	struct batadv_claim *claim;
+	struct batadv_bla_claim *claim;
 	struct batadv_hard_iface *primary_if;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	uint32_t i;
 	bool is_own;
@@ -1626,19 +1611,19 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
 
 	primary_addr = primary_if->net_dev->dev_addr;
 	seq_printf(seq,
-		   "Claims announced for the mesh %s (orig %pM, group id %04x)\n",
+		   "Claims announced for the mesh %s (orig %pM, group id %#.4x)\n",
 		   net_dev->name, primary_addr,
 		   ntohs(bat_priv->bla.claim_dest.group));
-	seq_printf(seq, "   %-17s    %-5s    %-17s [o] (%-4s)\n",
+	seq_printf(seq, "   %-17s    %-5s    %-17s [o] (%-6s)\n",
 		   "Client", "VID", "Originator", "CRC");
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(claim, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(claim, head, hash_entry) {
 			is_own = batadv_compare_eth(claim->backbone_gw->orig,
 						    primary_addr);
-			seq_printf(seq,	" * %pM on % 5d by %pM [%c] (%04x)\n",
+			seq_printf(seq,	" * %pM on % 5d by %pM [%c] (%#.4x)\n",
 				   claim->addr, claim->vid,
 				   claim->backbone_gw->orig,
 				   (is_own ? 'x' : ' '),
@@ -1657,9 +1642,8 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
 	struct net_device *net_dev = (struct net_device *)seq->private;
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct batadv_hard_iface *primary_if;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	int secs, msecs;
 	uint32_t i;
@@ -1672,16 +1656,16 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
 
 	primary_addr = primary_if->net_dev->dev_addr;
 	seq_printf(seq,
-		   "Backbones announced for the mesh %s (orig %pM, group id %04x)\n",
+		   "Backbones announced for the mesh %s (orig %pM, group id %#.4x)\n",
 		   net_dev->name, primary_addr,
 		   ntohs(bat_priv->bla.claim_dest.group));
-	seq_printf(seq, "   %-17s    %-5s %-9s (%-4s)\n",
+	seq_printf(seq, "   %-17s    %-5s %-9s (%-6s)\n",
 		   "Originator", "VID", "last seen", "CRC");
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
 			msecs = jiffies_to_msecs(jiffies -
 						 backbone_gw->lasttime);
 			secs = msecs / 1000;
@@ -1693,7 +1677,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
 				continue;
 
 			seq_printf(seq,
-				   " * %pM on % 5d % 4i.%03is (%04x)\n",
+				   " * %pM on % 5d % 4i.%03is (%#.4x)\n",
 				   backbone_gw->orig, backbone_gw->vid,
 				   secs, msecs, backbone_gw->crc);
 		}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 196d9a0254bc..dea2fbc5d98d 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 6f58ddd53bff..f186a55b23c3 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -32,6 +32,7 @@
 #include "icmp_socket.h"
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
+#include "network-coding.h"
 
 static struct dentry *batadv_debugfs;
 
@@ -40,13 +41,14 @@ static struct dentry *batadv_debugfs;
 
 static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
 
-static char *batadv_log_char_addr(struct batadv_debug_log *debug_log,
+static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log,
 				  size_t idx)
 {
 	return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK];
 }
 
-static void batadv_emit_log_char(struct batadv_debug_log *debug_log, char c)
+static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log,
+				 char c)
 {
 	char *char_addr;
 
@@ -59,7 +61,7 @@ static void batadv_emit_log_char(struct batadv_debug_log *debug_log, char c)
 }
 
 __printf(2, 3)
-static int batadv_fdebug_log(struct batadv_debug_log *debug_log,
+static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
 			     const char *fmt, ...)
 {
 	va_list args;
@@ -114,7 +116,7 @@ static int batadv_log_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int batadv_log_empty(struct batadv_debug_log *debug_log)
+static int batadv_log_empty(struct batadv_priv_debug_log *debug_log)
 {
 	return !(debug_log->log_start - debug_log->log_end);
 }
@@ -123,7 +125,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf,
 			       size_t count, loff_t *ppos)
 {
 	struct batadv_priv *bat_priv = file->private_data;
-	struct batadv_debug_log *debug_log = bat_priv->debug_log;
+	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
 	int error, i = 0;
 	char *char_addr;
 	char c;
@@ -164,7 +166,6 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf,
 
 		buf++;
 		i++;
-
 	}
 
 	spin_unlock_bh(&debug_log->lock);
@@ -178,7 +179,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf,
 static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
 {
 	struct batadv_priv *bat_priv = file->private_data;
-	struct batadv_debug_log *debug_log = bat_priv->debug_log;
+	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
 
 	poll_wait(file, &debug_log->queue_wait, wait);
 
@@ -230,7 +231,6 @@ static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
 #else /* CONFIG_BATMAN_ADV_DEBUG */
 static int batadv_debug_log_setup(struct batadv_priv *bat_priv)
 {
-	bat_priv->debug_log = NULL;
 	return 0;
 }
 
@@ -311,6 +311,14 @@ struct batadv_debuginfo {
 	const struct file_operations fops;
 };
 
+#ifdef CONFIG_BATMAN_ADV_NC
+static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
+{
+	struct net_device *net_dev = (struct net_device *)inode->i_private;
+	return single_open(file, batadv_nc_nodes_seq_print_text, net_dev);
+}
+#endif
+
 #define BATADV_DEBUGINFO(_name, _mode, _open)		\
 struct batadv_debuginfo batadv_debuginfo_##_name = {	\
 	.attr = { .name = __stringify(_name),		\
@@ -349,6 +357,9 @@ static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open);
 static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
 			batadv_transtable_local_open);
 static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open);
+#ifdef CONFIG_BATMAN_ADV_NC
+static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open);
+#endif
 
 static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
 	&batadv_debuginfo_originators,
@@ -363,6 +374,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
 #endif
 	&batadv_debuginfo_transtable_local,
 	&batadv_debuginfo_vis_data,
+#ifdef CONFIG_BATMAN_ADV_NC
+	&batadv_debuginfo_nc_nodes,
+#endif
 	NULL,
 };
 
@@ -397,10 +411,8 @@ err:
 
 void batadv_debugfs_destroy(void)
 {
-	if (batadv_debugfs) {
-		debugfs_remove_recursive(batadv_debugfs);
-		batadv_debugfs = NULL;
-	}
+	debugfs_remove_recursive(batadv_debugfs);
+	batadv_debugfs = NULL;
 }
 
 int batadv_debugfs_add_meshif(struct net_device *dev)
@@ -434,6 +446,9 @@ int batadv_debugfs_add_meshif(struct net_device *dev)
 		}
 	}
 
+	if (batadv_nc_init_debugfs(bat_priv) < 0)
+		goto rem_attr;
+
 	return 0;
 rem_attr:
 	debugfs_remove_recursive(bat_priv->debug_dir);
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 3319e1f21f55..f8c3849edff4 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 553921511e4e..8e15d966d9b0 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
@@ -83,7 +83,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
 {
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 	struct batadv_dat_entry *dat_entry;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	uint32_t i;
 
@@ -95,7 +95,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
 		list_lock = &bat_priv->dat.hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(dat_entry, node, node_tmp, head,
+		hlist_for_each_entry_safe(dat_entry, node_tmp, head,
 					  hash_entry) {
 			/* if an helper function has been passed as parameter,
 			 * ask it if the entry has to be purged or not
@@ -103,7 +103,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
 			if (to_purge && !to_purge(dat_entry))
 				continue;
 
-			hlist_del_rcu(node);
+			hlist_del_rcu(&dat_entry->hash_entry);
 			batadv_dat_entry_free_ref(dat_entry);
 		}
 		spin_unlock_bh(list_lock);
@@ -235,7 +235,6 @@ static struct batadv_dat_entry *
 batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_dat_entry *dat_entry, *dat_entry_tmp = NULL;
 	struct batadv_hashtable *hash = bat_priv->dat.hash;
 	uint32_t index;
@@ -247,7 +246,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip)
 	head = &hash->table[index];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) {
+	hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
 		if (dat_entry->ip != ip)
 			continue;
 
@@ -465,7 +464,6 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
 	batadv_dat_addr_t max = 0, tmp_max = 0;
 	struct batadv_orig_node *orig_node, *max_orig_node = NULL;
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	int i;
 
@@ -481,7 +479,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			/* the dht space is a ring and addresses are unsigned */
 			tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr +
 				  ip_key;
@@ -686,7 +684,6 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_hashtable *hash = bat_priv->dat.hash;
 	struct batadv_dat_entry *dat_entry;
 	struct batadv_hard_iface *primary_if;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	unsigned long last_seen_jiffies;
 	int last_seen_msecs, last_seen_secs, last_seen_mins;
@@ -704,7 +701,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
 			last_seen_jiffies = jiffies - dat_entry->last_update;
 			last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
 			last_seen_mins = last_seen_msecs / 60000;
@@ -819,7 +816,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
 	bool ret = false;
 	struct batadv_dat_entry *dat_entry = NULL;
 	struct sk_buff *skb_new;
-	struct batadv_hard_iface *primary_if = NULL;
 
 	if (!atomic_read(&bat_priv->distributed_arp_table))
 		goto out;
@@ -841,22 +837,18 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
 
 	dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
 	if (dat_entry) {
-		primary_if = batadv_primary_if_get_selected(bat_priv);
-		if (!primary_if)
-			goto out;
-
 		skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
-				     primary_if->soft_iface, ip_dst, hw_src,
+				     bat_priv->soft_iface, ip_dst, hw_src,
 				     dat_entry->mac_addr, hw_src);
 		if (!skb_new)
 			goto out;
 
 		skb_reset_mac_header(skb_new);
 		skb_new->protocol = eth_type_trans(skb_new,
-						   primary_if->soft_iface);
+						   bat_priv->soft_iface);
 		bat_priv->stats.rx_packets++;
 		bat_priv->stats.rx_bytes += skb->len + ETH_HLEN;
-		primary_if->soft_iface->last_rx = jiffies;
+		bat_priv->soft_iface->last_rx = jiffies;
 
 		netif_rx(skb_new);
 		batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
@@ -869,8 +861,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
 out:
 	if (dat_entry)
 		batadv_dat_entry_free_ref(dat_entry);
-	if (primary_if)
-		batadv_hardif_free_ref(primary_if);
 	return ret;
 }
 
@@ -890,7 +880,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
 	__be32 ip_src, ip_dst;
 	uint8_t *hw_src;
 	struct sk_buff *skb_new;
-	struct batadv_hard_iface *primary_if = NULL;
 	struct batadv_dat_entry *dat_entry = NULL;
 	bool ret = false;
 	int err;
@@ -915,12 +904,8 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
 	if (!dat_entry)
 		goto out;
 
-	primary_if = batadv_primary_if_get_selected(bat_priv);
-	if (!primary_if)
-		goto out;
-
 	skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
-			     primary_if->soft_iface, ip_dst, hw_src,
+			     bat_priv->soft_iface, ip_dst, hw_src,
 			     dat_entry->mac_addr, hw_src);
 
 	if (!skb_new)
@@ -944,8 +929,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
 out:
 	if (dat_entry)
 		batadv_dat_entry_free_ref(dat_entry);
-	if (primary_if)
-		batadv_hardif_free_ref(primary_if);
 	if (ret)
 		kfree_skb(skb);
 	return ret;
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index d060c033e7de..125c8c6fcfad 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index dd07c7e3654f..f105219f4a4b 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -114,7 +114,6 @@ static struct batadv_gw_node *
 batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 {
 	struct batadv_neigh_node *router;
-	struct hlist_node *node;
 	struct batadv_gw_node *gw_node, *curr_gw = NULL;
 	uint32_t max_gw_factor = 0, tmp_gw_factor = 0;
 	uint32_t gw_divisor;
@@ -127,7 +126,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 	gw_divisor *= 64;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) {
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
 		if (gw_node->deleted)
 			continue;
 
@@ -344,7 +343,6 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
 			   struct batadv_orig_node *orig_node,
 			   uint8_t new_gwflags)
 {
-	struct hlist_node *node;
 	struct batadv_gw_node *gw_node, *curr_gw;
 
 	/* Note: We don't need a NULL check here, since curr_gw never gets
@@ -355,7 +353,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
 	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) {
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
 		if (gw_node->orig_node != orig_node)
 			continue;
 
@@ -403,7 +401,7 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv,
 void batadv_gw_node_purge(struct batadv_priv *bat_priv)
 {
 	struct batadv_gw_node *gw_node, *curr_gw;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	unsigned long timeout = msecs_to_jiffies(2 * BATADV_PURGE_TIMEOUT);
 	int do_deselect = 0;
 
@@ -411,7 +409,7 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv)
 
 	spin_lock_bh(&bat_priv->gw.list_lock);
 
-	hlist_for_each_entry_safe(gw_node, node, node_tmp,
+	hlist_for_each_entry_safe(gw_node, node_tmp,
 				  &bat_priv->gw.list, list) {
 		if (((!gw_node->deleted) ||
 		     (time_before(jiffies, gw_node->deleted + timeout))) &&
@@ -476,7 +474,6 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hard_iface *primary_if;
 	struct batadv_gw_node *gw_node;
-	struct hlist_node *node;
 	int gw_count = 0;
 
 	primary_if = batadv_seq_print_text_primary_if_get(seq);
@@ -490,7 +487,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 		   primary_if->net_dev->dev_addr, net_dev->name);
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) {
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
 		if (gw_node->deleted)
 			continue;
 
@@ -503,7 +500,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 	rcu_read_unlock();
 
 	if (gw_count == 0)
-		seq_printf(seq, "No gateways in range ...\n");
+		seq_puts(seq, "No gateways in range ...\n");
 
 out:
 	if (primary_if)
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index f0d129e323c8..039902dca4a6 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 9001208d1752..84bb2b18d711 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 13697f6e7113..509b2bf8c2f4 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index f1d37cd81815..522243aff2f3 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -307,11 +307,35 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
 	batadv_update_min_mtu(hard_iface->soft_iface);
 }
 
+/**
+ * batadv_master_del_slave - remove hard_iface from the current master interface
+ * @slave: the interface enslaved in another master
+ * @master: the master from which slave has to be removed
+ *
+ * Invoke ndo_del_slave on master passing slave as argument. In this way slave
+ * is free'd and master can correctly change its internal state.
+ * Return 0 on success, a negative value representing the error otherwise
+ */
+static int batadv_master_del_slave(struct batadv_hard_iface *slave,
+				   struct net_device *master)
+{
+	int ret;
+
+	if (!master)
+		return 0;
+
+	ret = -EBUSY;
+	if (master->netdev_ops->ndo_del_slave)
+		ret = master->netdev_ops->ndo_del_slave(master, slave->net_dev);
+
+	return ret;
+}
+
 int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 				   const char *iface_name)
 {
 	struct batadv_priv *bat_priv;
-	struct net_device *soft_iface;
+	struct net_device *soft_iface, *master;
 	__be16 ethertype = __constant_htons(ETH_P_BATMAN);
 	int ret;
 
@@ -321,11 +345,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 	if (!atomic_inc_not_zero(&hard_iface->refcount))
 		goto out;
 
-	/* hard-interface is part of a bridge */
-	if (hard_iface->net_dev->priv_flags & IFF_BRIDGE_PORT)
-		pr_err("You are about to enable batman-adv on '%s' which already is part of a bridge. Unless you know exactly what you are doing this is probably wrong and won't work the way you think it would.\n",
-		       hard_iface->net_dev->name);
-
 	soft_iface = dev_get_by_name(&init_net, iface_name);
 
 	if (!soft_iface) {
@@ -347,12 +366,24 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 		goto err_dev;
 	}
 
+	/* check if the interface is enslaved in another virtual one and
+	 * in that case unlink it first
+	 */
+	master = netdev_master_upper_dev_get(hard_iface->net_dev);
+	ret = batadv_master_del_slave(hard_iface, master);
+	if (ret)
+		goto err_dev;
+
 	hard_iface->soft_iface = soft_iface;
 	bat_priv = netdev_priv(hard_iface->soft_iface);
 
+	ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface);
+	if (ret)
+		goto err_dev;
+
 	ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface);
 	if (ret < 0)
-		goto err_dev;
+		goto err_upper;
 
 	hard_iface->if_num = bat_priv->num_ifaces;
 	bat_priv->num_ifaces++;
@@ -362,7 +393,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 		bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
 		bat_priv->num_ifaces--;
 		hard_iface->if_status = BATADV_IF_NOT_IN_USE;
-		goto err_dev;
+		goto err_upper;
 	}
 
 	hard_iface->batman_adv_ptype.type = ethertype;
@@ -401,14 +432,18 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 out:
 	return 0;
 
+err_upper:
+	netdev_upper_dev_unlink(hard_iface->net_dev, soft_iface);
 err_dev:
+	hard_iface->soft_iface = NULL;
 	dev_put(soft_iface);
 err:
 	batadv_hardif_free_ref(hard_iface);
 	return ret;
 }
 
-void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
+void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
+				     enum batadv_hard_if_cleanup autodel)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct batadv_hard_iface *primary_if = NULL;
@@ -446,9 +481,10 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
 	dev_put(hard_iface->soft_iface);
 
 	/* nobody uses this interface anymore */
-	if (!bat_priv->num_ifaces)
-		batadv_softif_destroy(hard_iface->soft_iface);
+	if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO)
+		batadv_softif_destroy_sysfs(hard_iface->soft_iface);
 
+	netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface);
 	hard_iface->soft_iface = NULL;
 	batadv_hardif_free_ref(hard_iface);
 
@@ -457,6 +493,24 @@ out:
 		batadv_hardif_free_ref(primary_if);
 }
 
+/**
+ * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif
+ * @work: work queue item
+ *
+ * Free the parts of the hard interface which can not be removed under
+ * rtnl lock (to prevent deadlock situations).
+ */
+static void batadv_hardif_remove_interface_finish(struct work_struct *work)
+{
+	struct batadv_hard_iface *hard_iface;
+
+	hard_iface = container_of(work, struct batadv_hard_iface,
+				  cleanup_work);
+
+	batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
+	batadv_hardif_free_ref(hard_iface);
+}
+
 static struct batadv_hard_iface *
 batadv_hardif_add_interface(struct net_device *net_dev)
 {
@@ -484,6 +538,9 @@ batadv_hardif_add_interface(struct net_device *net_dev)
 	hard_iface->soft_iface = NULL;
 	hard_iface->if_status = BATADV_IF_NOT_IN_USE;
 	INIT_LIST_HEAD(&hard_iface->list);
+	INIT_WORK(&hard_iface->cleanup_work,
+		  batadv_hardif_remove_interface_finish);
+
 	/* extra reference for return */
 	atomic_set(&hard_iface->refcount, 2);
 
@@ -512,14 +569,14 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
 
 	/* first deactivate interface */
 	if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
-		batadv_hardif_disable_interface(hard_iface);
+		batadv_hardif_disable_interface(hard_iface,
+						BATADV_IF_CLEANUP_AUTO);
 
 	if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
 		return;
 
 	hard_iface->if_status = BATADV_IF_TO_BE_REMOVED;
-	batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
-	batadv_hardif_free_ref(hard_iface);
+	queue_work(batadv_event_workqueue, &hard_iface->cleanup_work);
 }
 
 void batadv_hardif_remove_interfaces(void)
@@ -543,6 +600,11 @@ static int batadv_hard_if_event(struct notifier_block *this,
 	struct batadv_hard_iface *primary_if = NULL;
 	struct batadv_priv *bat_priv;
 
+	if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) {
+		batadv_sysfs_add_meshif(net_dev);
+		return NOTIFY_DONE;
+	}
+
 	hard_iface = batadv_hardif_get_by_netdev(net_dev);
 	if (!hard_iface && event == NETDEV_REGISTER)
 		hard_iface = batadv_hardif_add_interface(net_dev);
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 3732366e7445..49892881a7c5 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -29,13 +29,24 @@ enum batadv_hard_if_state {
 	BATADV_IF_I_WANT_YOU,
 };
 
+/**
+ * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal
+ * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface
+ * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed
+ */
+enum batadv_hard_if_cleanup {
+	BATADV_IF_CLEANUP_KEEP,
+	BATADV_IF_CLEANUP_AUTO,
+};
+
 extern struct notifier_block batadv_hard_if_notifier;
 
 struct batadv_hard_iface*
 batadv_hardif_get_by_netdev(const struct net_device *net_dev);
 int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 				   const char *iface_name);
-void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface);
+void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
+				     enum batadv_hard_if_cleanup autodel);
 void batadv_hardif_remove_interfaces(void);
 int batadv_hardif_min_mtu(struct net_device *soft_iface);
 void batadv_update_min_mtu(struct net_device *soft_iface);
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 15a849c2d414..7198dafd3bf3 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index e05333905afd..1b4da72f2093 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
@@ -89,7 +89,7 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash,
  *
  *	Returns the new hash value.
  */
-static inline uint32_t batadv_hash_bytes(uint32_t hash, void *data,
+static inline uint32_t batadv_hash_bytes(uint32_t hash, const void *data,
 					 uint32_t size)
 {
 	const unsigned char *key = data;
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 87ca8095b011..0ba6c899b2d3 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 29443a1dbb5c..1fcca37b6223 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index f65a222b7b83..3e30a0f1b908 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -35,6 +35,7 @@
 #include "vis.h"
 #include "hash.h"
 #include "bat_algo.h"
+#include "network-coding.h"
 
 
 /* List manipulations on hardif_list have to be rtnl_lock()'ed,
@@ -70,6 +71,7 @@ static int __init batadv_init(void)
 	batadv_debugfs_init();
 
 	register_netdevice_notifier(&batadv_hard_if_notifier);
+	rtnl_link_register(&batadv_link_ops);
 
 	pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n",
 		BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
@@ -80,6 +82,7 @@ static int __init batadv_init(void)
 static void __exit batadv_exit(void)
 {
 	batadv_debugfs_destroy();
+	rtnl_link_unregister(&batadv_link_ops);
 	unregister_netdevice_notifier(&batadv_hard_if_notifier);
 	batadv_hardif_remove_interfaces();
 
@@ -135,6 +138,10 @@ int batadv_mesh_init(struct net_device *soft_iface)
 	if (ret < 0)
 		goto err;
 
+	ret = batadv_nc_init(bat_priv);
+	if (ret < 0)
+		goto err;
+
 	atomic_set(&bat_priv->gw.reselect, 0);
 	atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE);
 
@@ -157,6 +164,7 @@ void batadv_mesh_free(struct net_device *soft_iface)
 
 	batadv_gw_node_purge(bat_priv);
 	batadv_originator_free(bat_priv);
+	batadv_nc_free(bat_priv);
 
 	batadv_tt_free(bat_priv);
 
@@ -169,7 +177,13 @@ void batadv_mesh_free(struct net_device *soft_iface)
 	atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
 }
 
-int batadv_is_my_mac(const uint8_t *addr)
+/**
+ * batadv_is_my_mac - check if the given mac address belongs to any of the real
+ * interfaces in the current mesh
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: the address to check
+ */
+int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr)
 {
 	const struct batadv_hard_iface *hard_iface;
 
@@ -178,6 +192,9 @@ int batadv_is_my_mac(const uint8_t *addr)
 		if (hard_iface->if_status != BATADV_IF_ACTIVE)
 			continue;
 
+		if (hard_iface->soft_iface != bat_priv->soft_iface)
+			continue;
+
 		if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) {
 			rcu_read_unlock();
 			return 1;
@@ -345,9 +362,8 @@ void batadv_recv_handler_unregister(uint8_t packet_type)
 static struct batadv_algo_ops *batadv_algo_get(char *name)
 {
 	struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp;
-	struct hlist_node *node;
 
-	hlist_for_each_entry(bat_algo_ops_tmp, node, &batadv_algo_list, list) {
+	hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) {
 		if (strcmp(bat_algo_ops_tmp->name, name) != 0)
 			continue;
 
@@ -411,11 +427,10 @@ out:
 int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
 {
 	struct batadv_algo_ops *bat_algo_ops;
-	struct hlist_node *node;
 
-	seq_printf(seq, "Available routing algorithms:\n");
+	seq_puts(seq, "Available routing algorithms:\n");
 
-	hlist_for_each_entry(bat_algo_ops, node, &batadv_algo_list, list) {
+	hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
 		seq_printf(seq, "%s\n", bat_algo_ops->name);
 	}
 
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 2f85577086a7..59a0d6af15c8 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -26,7 +26,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2012.5.0"
+#define BATADV_SOURCE_VERSION "2013.2.0"
 #endif
 
 /* B.A.T.M.A.N. parameters */
@@ -41,9 +41,11 @@
  * -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE
  */
 #define BATADV_PURGE_TIMEOUT 200000 /* 200 seconds */
-#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */
+#define BATADV_TT_LOCAL_TIMEOUT 600000 /* in milliseconds */
 #define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */
 #define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */
+#define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */
+#define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */
 #define BATADV_DAT_ENTRY_TIMEOUT (5*60000) /* 5 mins in milliseconds */
 /* sliding packet range of received originator messages in sequence numbers
  * (should be a multiple of our word size)
@@ -103,6 +105,8 @@
 #define BATADV_RESET_PROTECTION_MS 30000
 #define BATADV_EXPECTED_SEQNO_RANGE	65536
 
+#define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */
+
 enum batadv_mesh_state {
 	BATADV_MESH_INACTIVE,
 	BATADV_MESH_ACTIVE,
@@ -148,6 +152,7 @@ enum batadv_uev_type {
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <net/sock.h>		/* struct sock */
+#include <net/rtnetlink.h>
 #include <linux/jiffies.h>
 #include <linux/seq_file.h>
 #include "types.h"
@@ -160,7 +165,7 @@ extern struct workqueue_struct *batadv_event_workqueue;
 
 int batadv_mesh_init(struct net_device *soft_iface);
 void batadv_mesh_free(struct net_device *soft_iface);
-int batadv_is_my_mac(const uint8_t *addr);
+int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr);
 struct batadv_hard_iface *
 batadv_seq_print_text_primary_if_get(struct seq_file *seq);
 int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
@@ -183,6 +188,7 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr);
  * @BATADV_DBG_TT: translation table messages
  * @BATADV_DBG_BLA: bridge loop avoidance messages
  * @BATADV_DBG_DAT: ARP snooping and DAT related messages
+ * @BATADV_DBG_NC: network coding related messages
  * @BATADV_DBG_ALL: the union of all the above log levels
  */
 enum batadv_dbg_level {
@@ -191,7 +197,8 @@ enum batadv_dbg_level {
 	BATADV_DBG_TT	  = BIT(2),
 	BATADV_DBG_BLA    = BIT(3),
 	BATADV_DBG_DAT    = BIT(4),
-	BATADV_DBG_ALL    = 31,
+	BATADV_DBG_NC	  = BIT(5),
+	BATADV_DBG_ALL    = 63,
 };
 
 #ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -276,9 +283,7 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
 static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx,
 				      size_t count)
 {
-	int cpu = get_cpu();
-	per_cpu_ptr(bat_priv->bat_counters, cpu)[idx] += count;
-	put_cpu();
+	this_cpu_add(bat_priv->bat_counters[idx], count);
 }
 
 #define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1)
@@ -298,4 +303,10 @@ static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv,
 	return sum;
 }
 
+/* Define a macro to reach the control buffer of the skb. The members of the
+ * control buffer are defined in struct batadv_skb_cb in types.h.
+ * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h.
+ */
+#define BATADV_SKB_CB(__skb)       ((struct batadv_skb_cb *)&((__skb)->cb[0]))
+
 #endif /* _NET_BATMAN_ADV_MAIN_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
new file mode 100644
index 000000000000..f7c54305a918
--- /dev/null
+++ b/net/batman-adv/network-coding.c
@@ -0,0 +1,1822 @@
+/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors:
+ *
+ * Martin Hundebøll, Jeppe Ledet-Pedersen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/debugfs.h>
+
+#include "main.h"
+#include "hash.h"
+#include "network-coding.h"
+#include "send.h"
+#include "originator.h"
+#include "hard-interface.h"
+#include "routing.h"
+
+static struct lock_class_key batadv_nc_coding_hash_lock_class_key;
+static struct lock_class_key batadv_nc_decoding_hash_lock_class_key;
+
+static void batadv_nc_worker(struct work_struct *work);
+static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
+				       struct batadv_hard_iface *recv_if);
+
+/**
+ * batadv_nc_start_timer - initialise the nc periodic worker
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_nc_start_timer(struct batadv_priv *bat_priv)
+{
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->nc.work,
+			   msecs_to_jiffies(10));
+}
+
+/**
+ * batadv_nc_init - initialise coding hash table and start house keeping
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+int batadv_nc_init(struct batadv_priv *bat_priv)
+{
+	bat_priv->nc.timestamp_fwd_flush = jiffies;
+	bat_priv->nc.timestamp_sniffed_purge = jiffies;
+
+	if (bat_priv->nc.coding_hash || bat_priv->nc.decoding_hash)
+		return 0;
+
+	bat_priv->nc.coding_hash = batadv_hash_new(128);
+	if (!bat_priv->nc.coding_hash)
+		goto err;
+
+	batadv_hash_set_lock_class(bat_priv->nc.coding_hash,
+				   &batadv_nc_coding_hash_lock_class_key);
+
+	bat_priv->nc.decoding_hash = batadv_hash_new(128);
+	if (!bat_priv->nc.decoding_hash)
+		goto err;
+
+	batadv_hash_set_lock_class(bat_priv->nc.coding_hash,
+				   &batadv_nc_decoding_hash_lock_class_key);
+
+	/* Register our packet type */
+	if (batadv_recv_handler_register(BATADV_CODED,
+					 batadv_nc_recv_coded_packet) < 0)
+		goto err;
+
+	INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker);
+	batadv_nc_start_timer(bat_priv);
+
+	return 0;
+
+err:
+	return -ENOMEM;
+}
+
+/**
+ * batadv_nc_init_bat_priv - initialise the nc specific bat_priv variables
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
+{
+	atomic_set(&bat_priv->network_coding, 1);
+	bat_priv->nc.min_tq = 200;
+	bat_priv->nc.max_fwd_delay = 10;
+	bat_priv->nc.max_buffer_time = 200;
+}
+
+/**
+ * batadv_nc_init_orig - initialise the nc fields of an orig_node
+ * @orig_node: the orig_node which is going to be initialised
+ */
+void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
+{
+	INIT_LIST_HEAD(&orig_node->in_coding_list);
+	INIT_LIST_HEAD(&orig_node->out_coding_list);
+	spin_lock_init(&orig_node->in_coding_list_lock);
+	spin_lock_init(&orig_node->out_coding_list_lock);
+}
+
+/**
+ * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove
+ *  its refcount on the orig_node
+ * @rcu: rcu pointer of the nc node
+ */
+static void batadv_nc_node_free_rcu(struct rcu_head *rcu)
+{
+	struct batadv_nc_node *nc_node;
+
+	nc_node = container_of(rcu, struct batadv_nc_node, rcu);
+	batadv_orig_node_free_ref(nc_node->orig_node);
+	kfree(nc_node);
+}
+
+/**
+ * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly
+ * frees it
+ * @nc_node: the nc node to free
+ */
+static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node)
+{
+	if (atomic_dec_and_test(&nc_node->refcount))
+		call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu);
+}
+
+/**
+ * batadv_nc_path_free_ref - decrements the nc path refcounter and possibly
+ * frees it
+ * @nc_path: the nc node to free
+ */
+static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path)
+{
+	if (atomic_dec_and_test(&nc_path->refcount))
+		kfree_rcu(nc_path, rcu);
+}
+
+/**
+ * batadv_nc_packet_free - frees nc packet
+ * @nc_packet: the nc packet to free
+ */
+static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
+{
+	if (nc_packet->skb)
+		kfree_skb(nc_packet->skb);
+
+	batadv_nc_path_free_ref(nc_packet->nc_path);
+	kfree(nc_packet);
+}
+
+/**
+ * batadv_nc_to_purge_nc_node - checks whether an nc node has to be purged
+ * @bat_priv: the bat priv with all the soft interface information
+ * @nc_node: the nc node to check
+ *
+ * Returns true if the entry has to be purged now, false otherwise
+ */
+static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv,
+				       struct batadv_nc_node *nc_node)
+{
+	if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+		return true;
+
+	return batadv_has_timed_out(nc_node->last_seen, BATADV_NC_NODE_TIMEOUT);
+}
+
+/**
+ * batadv_nc_to_purge_nc_path_coding - checks whether an nc path has timed out
+ * @bat_priv: the bat priv with all the soft interface information
+ * @nc_path: the nc path to check
+ *
+ * Returns true if the entry has to be purged now, false otherwise
+ */
+static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv,
+					      struct batadv_nc_path *nc_path)
+{
+	if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+		return true;
+
+	/* purge the path when no packets has been added for 10 times the
+	 * max_fwd_delay time
+	 */
+	return batadv_has_timed_out(nc_path->last_valid,
+				    bat_priv->nc.max_fwd_delay * 10);
+}
+
+/**
+ * batadv_nc_to_purge_nc_path_decoding - checks whether an nc path has timed out
+ * @bat_priv: the bat priv with all the soft interface information
+ * @nc_path: the nc path to check
+ *
+ * Returns true if the entry has to be purged now, false otherwise
+ */
+static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv,
+						struct batadv_nc_path *nc_path)
+{
+	if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+		return true;
+
+	/* purge the path when no packets has been added for 10 times the
+	 * max_buffer time
+	 */
+	return batadv_has_timed_out(nc_path->last_valid,
+				    bat_priv->nc.max_buffer_time*10);
+}
+
+/**
+ * batadv_nc_purge_orig_nc_nodes - go through list of nc nodes and purge stale
+ *  entries
+ * @bat_priv: the bat priv with all the soft interface information
+ * @list: list of nc nodes
+ * @lock: nc node list lock
+ * @to_purge: function in charge to decide whether an entry has to be purged or
+ *	      not. This function takes the nc node as argument and has to return
+ *	      a boolean value: true if the entry has to be deleted, false
+ *	      otherwise
+ */
+static void
+batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv,
+			      struct list_head *list,
+			      spinlock_t *lock,
+			      bool (*to_purge)(struct batadv_priv *,
+					       struct batadv_nc_node *))
+{
+	struct batadv_nc_node *nc_node, *nc_node_tmp;
+
+	/* For each nc_node in list */
+	spin_lock_bh(lock);
+	list_for_each_entry_safe(nc_node, nc_node_tmp, list, list) {
+		/* if an helper function has been passed as parameter,
+		 * ask it if the entry has to be purged or not
+		 */
+		if (to_purge && !to_purge(bat_priv, nc_node))
+			continue;
+
+		batadv_dbg(BATADV_DBG_NC, bat_priv,
+			   "Removing nc_node %pM -> %pM\n",
+			   nc_node->addr, nc_node->orig_node->orig);
+		list_del_rcu(&nc_node->list);
+		batadv_nc_node_free_ref(nc_node);
+	}
+	spin_unlock_bh(lock);
+}
+
+/**
+ * batadv_nc_purge_orig - purges all nc node data attached of the given
+ *  originator
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: orig_node with the nc node entries to be purged
+ * @to_purge: function in charge to decide whether an entry has to be purged or
+ *	      not. This function takes the nc node as argument and has to return
+ *	      a boolean value: true is the entry has to be deleted, false
+ *	      otherwise
+ */
+void batadv_nc_purge_orig(struct batadv_priv *bat_priv,
+			  struct batadv_orig_node *orig_node,
+			  bool (*to_purge)(struct batadv_priv *,
+					   struct batadv_nc_node *))
+{
+	/* Check ingoing nc_node's of this orig_node */
+	batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->in_coding_list,
+				      &orig_node->in_coding_list_lock,
+				      to_purge);
+
+	/* Check outgoing nc_node's of this orig_node */
+	batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->out_coding_list,
+				      &orig_node->out_coding_list_lock,
+				      to_purge);
+}
+
+/**
+ * batadv_nc_purge_orig_hash - traverse entire originator hash to check if they
+ *  have timed out nc nodes
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv)
+{
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct hlist_head *head;
+	struct batadv_orig_node *orig_node;
+	uint32_t i;
+
+	if (!hash)
+		return;
+
+	/* For each orig_node */
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry)
+			batadv_nc_purge_orig(bat_priv, orig_node,
+					     batadv_nc_to_purge_nc_node);
+		rcu_read_unlock();
+	}
+}
+
+/**
+ * batadv_nc_purge_paths - traverse all nc paths part of the hash and remove
+ *  unused ones
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hash: hash table containing the nc paths to check
+ * @to_purge: function in charge to decide whether an entry has to be purged or
+ *	      not. This function takes the nc node as argument and has to return
+ *	      a boolean value: true is the entry has to be deleted, false
+ *	      otherwise
+ */
+static void batadv_nc_purge_paths(struct batadv_priv *bat_priv,
+				  struct batadv_hashtable *hash,
+				  bool (*to_purge)(struct batadv_priv *,
+						   struct batadv_nc_path *))
+{
+	struct hlist_head *head;
+	struct hlist_node *node_tmp;
+	struct batadv_nc_path *nc_path;
+	spinlock_t *lock; /* Protects lists in hash */
+	uint32_t i;
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+		lock = &hash->list_locks[i];
+
+		/* For each nc_path in this bin */
+		spin_lock_bh(lock);
+		hlist_for_each_entry_safe(nc_path, node_tmp, head, hash_entry) {
+			/* if an helper function has been passed as parameter,
+			 * ask it if the entry has to be purged or not
+			 */
+			if (to_purge && !to_purge(bat_priv, nc_path))
+				continue;
+
+			/* purging an non-empty nc_path should never happen, but
+			 * is observed under high CPU load. Delay the purging
+			 * until next iteration to allow the packet_list to be
+			 * emptied first.
+			 */
+			if (!unlikely(list_empty(&nc_path->packet_list))) {
+				net_ratelimited_function(printk,
+							 KERN_WARNING
+							 "Skipping free of non-empty nc_path (%pM -> %pM)!\n",
+							 nc_path->prev_hop,
+							 nc_path->next_hop);
+				continue;
+			}
+
+			/* nc_path is unused, so remove it */
+			batadv_dbg(BATADV_DBG_NC, bat_priv,
+				   "Remove nc_path %pM -> %pM\n",
+				   nc_path->prev_hop, nc_path->next_hop);
+			hlist_del_rcu(&nc_path->hash_entry);
+			batadv_nc_path_free_ref(nc_path);
+		}
+		spin_unlock_bh(lock);
+	}
+}
+
+/**
+ * batadv_nc_hash_key_gen - computes the nc_path hash key
+ * @key: buffer to hold the final hash key
+ * @src: source ethernet mac address going into the hash key
+ * @dst: destination ethernet mac address going into the hash key
+ */
+static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src,
+				   const char *dst)
+{
+	memcpy(key->prev_hop, src, sizeof(key->prev_hop));
+	memcpy(key->next_hop, dst, sizeof(key->next_hop));
+}
+
+/**
+ * batadv_nc_hash_choose - compute the hash value for an nc path
+ * @data: data to hash
+ * @size: size of the hash table
+ *
+ * Returns the selected index in the hash table for the given data.
+ */
+static uint32_t batadv_nc_hash_choose(const void *data, uint32_t size)
+{
+	const struct batadv_nc_path *nc_path = data;
+	uint32_t hash = 0;
+
+	hash = batadv_hash_bytes(hash, &nc_path->prev_hop,
+				 sizeof(nc_path->prev_hop));
+	hash = batadv_hash_bytes(hash, &nc_path->next_hop,
+				 sizeof(nc_path->next_hop));
+
+	hash += (hash << 3);
+	hash ^= (hash >> 11);
+	hash += (hash << 15);
+
+	return hash % size;
+}
+
+/**
+ * batadv_nc_hash_compare - comparing function used in the network coding hash
+ *  tables
+ * @node: node in the local table
+ * @data2: second object to compare the node to
+ *
+ * Returns 1 if the two entry are the same, 0 otherwise
+ */
+static int batadv_nc_hash_compare(const struct hlist_node *node,
+				  const void *data2)
+{
+	const struct batadv_nc_path *nc_path1, *nc_path2;
+
+	nc_path1 = container_of(node, struct batadv_nc_path, hash_entry);
+	nc_path2 = data2;
+
+	/* Return 1 if the two keys are identical */
+	if (memcmp(nc_path1->prev_hop, nc_path2->prev_hop,
+		   sizeof(nc_path1->prev_hop)) != 0)
+		return 0;
+
+	if (memcmp(nc_path1->next_hop, nc_path2->next_hop,
+		   sizeof(nc_path1->next_hop)) != 0)
+		return 0;
+
+	return 1;
+}
+
+/**
+ * batadv_nc_hash_find - search for an existing nc path and return it
+ * @hash: hash table containing the nc path
+ * @data: search key
+ *
+ * Returns the nc_path if found, NULL otherwise.
+ */
+static struct batadv_nc_path *
+batadv_nc_hash_find(struct batadv_hashtable *hash,
+		    void *data)
+{
+	struct hlist_head *head;
+	struct batadv_nc_path *nc_path, *nc_path_tmp = NULL;
+	int index;
+
+	if (!hash)
+		return NULL;
+
+	index = batadv_nc_hash_choose(data, hash->size);
+	head = &hash->table[index];
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(nc_path, head, hash_entry) {
+		if (!batadv_nc_hash_compare(&nc_path->hash_entry, data))
+			continue;
+
+		if (!atomic_inc_not_zero(&nc_path->refcount))
+			continue;
+
+		nc_path_tmp = nc_path;
+		break;
+	}
+	rcu_read_unlock();
+
+	return nc_path_tmp;
+}
+
+/**
+ * batadv_nc_send_packet - send non-coded packet and free nc_packet struct
+ * @nc_packet: the nc packet to send
+ */
+static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
+{
+	batadv_send_skb_packet(nc_packet->skb,
+			       nc_packet->neigh_node->if_incoming,
+			       nc_packet->nc_path->next_hop);
+	nc_packet->skb = NULL;
+	batadv_nc_packet_free(nc_packet);
+}
+
+/**
+ * batadv_nc_sniffed_purge - Checks timestamp of given sniffed nc_packet.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @nc_path: the nc path the packet belongs to
+ * @nc_packet: the nc packet to be checked
+ *
+ * Checks whether the given sniffed (overheard) nc_packet has hit its buffering
+ * timeout. If so, the packet is no longer kept and the entry deleted from the
+ * queue. Has to be called with the appropriate locks.
+ *
+ * Returns false as soon as the entry in the fifo queue has not been timed out
+ * yet and true otherwise.
+ */
+static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv,
+				    struct batadv_nc_path *nc_path,
+				    struct batadv_nc_packet *nc_packet)
+{
+	unsigned long timeout = bat_priv->nc.max_buffer_time;
+	bool res = false;
+
+	/* Packets are added to tail, so the remaining packets did not time
+	 * out and we can stop processing the current queue
+	 */
+	if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE &&
+	    !batadv_has_timed_out(nc_packet->timestamp, timeout))
+		goto out;
+
+	/* purge nc packet */
+	list_del(&nc_packet->list);
+	batadv_nc_packet_free(nc_packet);
+
+	res = true;
+
+out:
+	return res;
+}
+
+/**
+ * batadv_nc_fwd_flush - Checks the timestamp of the given nc packet.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @nc_path: the nc path the packet belongs to
+ * @nc_packet: the nc packet to be checked
+ *
+ * Checks whether the given nc packet has hit its forward timeout. If so, the
+ * packet is no longer delayed, immediately sent and the entry deleted from the
+ * queue. Has to be called with the appropriate locks.
+ *
+ * Returns false as soon as the entry in the fifo queue has not been timed out
+ * yet and true otherwise.
+ */
+static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv,
+				struct batadv_nc_path *nc_path,
+				struct batadv_nc_packet *nc_packet)
+{
+	unsigned long timeout = bat_priv->nc.max_fwd_delay;
+
+	/* Packets are added to tail, so the remaining packets did not time
+	 * out and we can stop processing the current queue
+	 */
+	if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE &&
+	    !batadv_has_timed_out(nc_packet->timestamp, timeout))
+		return false;
+
+	/* Send packet */
+	batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
+	batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
+			   nc_packet->skb->len + ETH_HLEN);
+	list_del(&nc_packet->list);
+	batadv_nc_send_packet(nc_packet);
+
+	return true;
+}
+
+/**
+ * batadv_nc_process_nc_paths - traverse given nc packet pool and free timed out
+ *  nc packets
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hash: to be processed hash table
+ * @process_fn: Function called to process given nc packet. Should return true
+ *	        to encourage this function to proceed with the next packet.
+ *	        Otherwise the rest of the current queue is skipped.
+ */
+static void
+batadv_nc_process_nc_paths(struct batadv_priv *bat_priv,
+			   struct batadv_hashtable *hash,
+			   bool (*process_fn)(struct batadv_priv *,
+					      struct batadv_nc_path *,
+					      struct batadv_nc_packet *))
+{
+	struct hlist_head *head;
+	struct batadv_nc_packet *nc_packet, *nc_packet_tmp;
+	struct batadv_nc_path *nc_path;
+	bool ret;
+	int i;
+
+	if (!hash)
+		return;
+
+	/* Loop hash table bins */
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		/* Loop coding paths */
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(nc_path, head, hash_entry) {
+			/* Loop packets */
+			spin_lock_bh(&nc_path->packet_list_lock);
+			list_for_each_entry_safe(nc_packet, nc_packet_tmp,
+						 &nc_path->packet_list, list) {
+				ret = process_fn(bat_priv, nc_path, nc_packet);
+				if (!ret)
+					break;
+			}
+			spin_unlock_bh(&nc_path->packet_list_lock);
+		}
+		rcu_read_unlock();
+	}
+}
+
+/**
+ * batadv_nc_worker - periodic task for house keeping related to network coding
+ * @work: kernel work struct
+ */
+static void batadv_nc_worker(struct work_struct *work)
+{
+	struct delayed_work *delayed_work;
+	struct batadv_priv_nc *priv_nc;
+	struct batadv_priv *bat_priv;
+	unsigned long timeout;
+
+	delayed_work = container_of(work, struct delayed_work, work);
+	priv_nc = container_of(delayed_work, struct batadv_priv_nc, work);
+	bat_priv = container_of(priv_nc, struct batadv_priv, nc);
+
+	batadv_nc_purge_orig_hash(bat_priv);
+	batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash,
+			      batadv_nc_to_purge_nc_path_coding);
+	batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash,
+			      batadv_nc_to_purge_nc_path_decoding);
+
+	timeout = bat_priv->nc.max_fwd_delay;
+
+	if (batadv_has_timed_out(bat_priv->nc.timestamp_fwd_flush, timeout)) {
+		batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.coding_hash,
+					   batadv_nc_fwd_flush);
+		bat_priv->nc.timestamp_fwd_flush = jiffies;
+	}
+
+	if (batadv_has_timed_out(bat_priv->nc.timestamp_sniffed_purge,
+				 bat_priv->nc.max_buffer_time)) {
+		batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.decoding_hash,
+					   batadv_nc_sniffed_purge);
+		bat_priv->nc.timestamp_sniffed_purge = jiffies;
+	}
+
+	/* Schedule a new check */
+	batadv_nc_start_timer(bat_priv);
+}
+
+/**
+ * batadv_can_nc_with_orig - checks whether the given orig node is suitable for
+ *  coding or not
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: neighboring orig node which may be used as nc candidate
+ * @ogm_packet: incoming ogm packet also used for the checks
+ *
+ * Returns true if:
+ *  1) The OGM must have the most recent sequence number.
+ *  2) The TTL must be decremented by one and only one.
+ *  3) The OGM must be received from the first hop from orig_node.
+ *  4) The TQ value of the OGM must be above bat_priv->nc.min_tq.
+ */
+static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
+				    struct batadv_orig_node *orig_node,
+				    struct batadv_ogm_packet *ogm_packet)
+{
+	if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno))
+		return false;
+	if (orig_node->last_ttl != ogm_packet->header.ttl + 1)
+		return false;
+	if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender))
+		return false;
+	if (ogm_packet->tq < bat_priv->nc.min_tq)
+		return false;
+
+	return true;
+}
+
+/**
+ * batadv_nc_find_nc_node - search for an existing nc node and return it
+ * @orig_node: orig node originating the ogm packet
+ * @orig_neigh_node: neighboring orig node from which we received the ogm packet
+ *  (can be equal to orig_node)
+ * @in_coding: traverse incoming or outgoing network coding list
+ *
+ * Returns the nc_node if found, NULL otherwise.
+ */
+static struct batadv_nc_node
+*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node,
+			struct batadv_orig_node *orig_neigh_node,
+			bool in_coding)
+{
+	struct batadv_nc_node *nc_node, *nc_node_out = NULL;
+	struct list_head *list;
+
+	if (in_coding)
+		list = &orig_neigh_node->in_coding_list;
+	else
+		list = &orig_neigh_node->out_coding_list;
+
+	/* Traverse list of nc_nodes to orig_node */
+	rcu_read_lock();
+	list_for_each_entry_rcu(nc_node, list, list) {
+		if (!batadv_compare_eth(nc_node->addr, orig_node->orig))
+			continue;
+
+		if (!atomic_inc_not_zero(&nc_node->refcount))
+			continue;
+
+		/* Found a match */
+		nc_node_out = nc_node;
+		break;
+	}
+	rcu_read_unlock();
+
+	return nc_node_out;
+}
+
+/**
+ * batadv_nc_get_nc_node - retrieves an nc node or creates the entry if it was
+ *  not found
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: orig node originating the ogm packet
+ * @orig_neigh_node: neighboring orig node from which we received the ogm packet
+ *  (can be equal to orig_node)
+ * @in_coding: traverse incoming or outgoing network coding list
+ *
+ * Returns the nc_node if found or created, NULL in case of an error.
+ */
+static struct batadv_nc_node
+*batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
+		       struct batadv_orig_node *orig_node,
+		       struct batadv_orig_node *orig_neigh_node,
+		       bool in_coding)
+{
+	struct batadv_nc_node *nc_node;
+	spinlock_t *lock; /* Used to lock list selected by "int in_coding" */
+	struct list_head *list;
+
+	/* Check if nc_node is already added */
+	nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding);
+
+	/* Node found */
+	if (nc_node)
+		return nc_node;
+
+	nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC);
+	if (!nc_node)
+		return NULL;
+
+	if (!atomic_inc_not_zero(&orig_neigh_node->refcount))
+		goto free;
+
+	/* Initialize nc_node */
+	INIT_LIST_HEAD(&nc_node->list);
+	memcpy(nc_node->addr, orig_node->orig, ETH_ALEN);
+	nc_node->orig_node = orig_neigh_node;
+	atomic_set(&nc_node->refcount, 2);
+
+	/* Select ingoing or outgoing coding node */
+	if (in_coding) {
+		lock = &orig_neigh_node->in_coding_list_lock;
+		list = &orig_neigh_node->in_coding_list;
+	} else {
+		lock = &orig_neigh_node->out_coding_list_lock;
+		list = &orig_neigh_node->out_coding_list;
+	}
+
+	batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n",
+		   nc_node->addr, nc_node->orig_node->orig);
+
+	/* Add nc_node to orig_node */
+	spin_lock_bh(lock);
+	list_add_tail_rcu(&nc_node->list, list);
+	spin_unlock_bh(lock);
+
+	return nc_node;
+
+free:
+	kfree(nc_node);
+	return NULL;
+}
+
+/**
+ * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node structs
+ *  (best called on incoming OGMs)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: orig node originating the ogm packet
+ * @orig_neigh_node: neighboring orig node from which we received the ogm packet
+ *  (can be equal to orig_node)
+ * @ogm_packet: incoming ogm packet
+ * @is_single_hop_neigh: orig_node is a single hop neighbor
+ */
+void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
+			      struct batadv_orig_node *orig_node,
+			      struct batadv_orig_node *orig_neigh_node,
+			      struct batadv_ogm_packet *ogm_packet,
+			      int is_single_hop_neigh)
+{
+	struct batadv_nc_node *in_nc_node = NULL, *out_nc_node = NULL;
+
+	/* Check if network coding is enabled */
+	if (!atomic_read(&bat_priv->network_coding))
+		goto out;
+
+	/* accept ogms from 'good' neighbors and single hop neighbors */
+	if (!batadv_can_nc_with_orig(bat_priv, orig_node, ogm_packet) &&
+	    !is_single_hop_neigh)
+		goto out;
+
+	/* Add orig_node as in_nc_node on hop */
+	in_nc_node = batadv_nc_get_nc_node(bat_priv, orig_node,
+					   orig_neigh_node, true);
+	if (!in_nc_node)
+		goto out;
+
+	in_nc_node->last_seen = jiffies;
+
+	/* Add hop as out_nc_node on orig_node */
+	out_nc_node = batadv_nc_get_nc_node(bat_priv, orig_neigh_node,
+					    orig_node, false);
+	if (!out_nc_node)
+		goto out;
+
+	out_nc_node->last_seen = jiffies;
+
+out:
+	if (in_nc_node)
+		batadv_nc_node_free_ref(in_nc_node);
+	if (out_nc_node)
+		batadv_nc_node_free_ref(out_nc_node);
+}
+
+/**
+ * batadv_nc_get_path - get existing nc_path or allocate a new one
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hash: hash table containing the nc path
+ * @src: ethernet source address - first half of the nc path search key
+ * @dst: ethernet destination address - second half of the nc path search key
+ *
+ * Returns pointer to nc_path if the path was found or created, returns NULL
+ * on error.
+ */
+static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
+						 struct batadv_hashtable *hash,
+						 uint8_t *src,
+						 uint8_t *dst)
+{
+	int hash_added;
+	struct batadv_nc_path *nc_path, nc_path_key;
+
+	batadv_nc_hash_key_gen(&nc_path_key, src, dst);
+
+	/* Search for existing nc_path */
+	nc_path = batadv_nc_hash_find(hash, (void *)&nc_path_key);
+
+	if (nc_path) {
+		/* Set timestamp to delay removal of nc_path */
+		nc_path->last_valid = jiffies;
+		return nc_path;
+	}
+
+	/* No existing nc_path was found; create a new */
+	nc_path = kzalloc(sizeof(*nc_path), GFP_ATOMIC);
+
+	if (!nc_path)
+		return NULL;
+
+	/* Initialize nc_path */
+	INIT_LIST_HEAD(&nc_path->packet_list);
+	spin_lock_init(&nc_path->packet_list_lock);
+	atomic_set(&nc_path->refcount, 2);
+	nc_path->last_valid = jiffies;
+	memcpy(nc_path->next_hop, dst, ETH_ALEN);
+	memcpy(nc_path->prev_hop, src, ETH_ALEN);
+
+	batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_path %pM -> %pM\n",
+		   nc_path->prev_hop,
+		   nc_path->next_hop);
+
+	/* Add nc_path to hash table */
+	hash_added = batadv_hash_add(hash, batadv_nc_hash_compare,
+				     batadv_nc_hash_choose, &nc_path_key,
+				     &nc_path->hash_entry);
+
+	if (hash_added < 0) {
+		kfree(nc_path);
+		return NULL;
+	}
+
+	return nc_path;
+}
+
+/**
+ * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair
+ *  selection of a receiver with slightly lower TQ than the other
+ * @tq: to be weighted tq value
+ */
+static uint8_t batadv_nc_random_weight_tq(uint8_t tq)
+{
+	uint8_t rand_val, rand_tq;
+
+	get_random_bytes(&rand_val, sizeof(rand_val));
+
+	/* randomize the estimated packet loss (max TQ - estimated TQ) */
+	rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq);
+
+	/* normalize the randomized packet loss */
+	rand_tq /= BATADV_TQ_MAX_VALUE;
+
+	/* convert to (randomized) estimated tq again */
+	return BATADV_TQ_MAX_VALUE - rand_tq;
+}
+
+/**
+ * batadv_nc_memxor - XOR destination with source
+ * @dst: byte array to XOR into
+ * @src: byte array to XOR from
+ * @len: length of destination array
+ */
+static void batadv_nc_memxor(char *dst, const char *src, unsigned int len)
+{
+	unsigned int i;
+
+	for (i = 0; i < len; ++i)
+		dst[i] ^= src[i];
+}
+
+/**
+ * batadv_nc_code_packets - code a received unicast_packet with an nc packet
+ *  into a coded_packet and send it
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: data skb to forward
+ * @ethhdr: pointer to the ethernet header inside the skb
+ * @nc_packet: structure containing the packet to the skb can be coded with
+ * @neigh_node: next hop to forward packet to
+ *
+ * Returns true if both packets are consumed, false otherwise.
+ */
+static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
+				   struct sk_buff *skb,
+				   struct ethhdr *ethhdr,
+				   struct batadv_nc_packet *nc_packet,
+				   struct batadv_neigh_node *neigh_node)
+{
+	uint8_t tq_weighted_neigh, tq_weighted_coding;
+	struct sk_buff *skb_dest, *skb_src;
+	struct batadv_unicast_packet *packet1;
+	struct batadv_unicast_packet *packet2;
+	struct batadv_coded_packet *coded_packet;
+	struct batadv_neigh_node *neigh_tmp, *router_neigh;
+	struct batadv_neigh_node *router_coding = NULL;
+	uint8_t *first_source, *first_dest, *second_source, *second_dest;
+	__be32 packet_id1, packet_id2;
+	size_t count;
+	bool res = false;
+	int coding_len;
+	int unicast_size = sizeof(*packet1);
+	int coded_size = sizeof(*coded_packet);
+	int header_add = coded_size - unicast_size;
+
+	router_neigh = batadv_orig_node_get_router(neigh_node->orig_node);
+	if (!router_neigh)
+		goto out;
+
+	neigh_tmp = nc_packet->neigh_node;
+	router_coding = batadv_orig_node_get_router(neigh_tmp->orig_node);
+	if (!router_coding)
+		goto out;
+
+	tq_weighted_neigh = batadv_nc_random_weight_tq(router_neigh->tq_avg);
+	tq_weighted_coding = batadv_nc_random_weight_tq(router_coding->tq_avg);
+
+	/* Select one destination for the MAC-header dst-field based on
+	 * weighted TQ-values.
+	 */
+	if (tq_weighted_neigh >= tq_weighted_coding) {
+		/* Destination from nc_packet is selected for MAC-header */
+		first_dest = nc_packet->nc_path->next_hop;
+		first_source = nc_packet->nc_path->prev_hop;
+		second_dest = neigh_node->addr;
+		second_source = ethhdr->h_source;
+		packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data;
+		packet2 = (struct batadv_unicast_packet *)skb->data;
+		packet_id1 = nc_packet->packet_id;
+		packet_id2 = batadv_skb_crc32(skb,
+					      skb->data + sizeof(*packet2));
+	} else {
+		/* Destination for skb is selected for MAC-header */
+		first_dest = neigh_node->addr;
+		first_source = ethhdr->h_source;
+		second_dest = nc_packet->nc_path->next_hop;
+		second_source = nc_packet->nc_path->prev_hop;
+		packet1 = (struct batadv_unicast_packet *)skb->data;
+		packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data;
+		packet_id1 = batadv_skb_crc32(skb,
+					      skb->data + sizeof(*packet1));
+		packet_id2 = nc_packet->packet_id;
+	}
+
+	/* Instead of zero padding the smallest data buffer, we
+	 * code into the largest.
+	 */
+	if (skb->len <= nc_packet->skb->len) {
+		skb_dest = nc_packet->skb;
+		skb_src = skb;
+	} else {
+		skb_dest = skb;
+		skb_src = nc_packet->skb;
+	}
+
+	/* coding_len is used when decoding the packet shorter packet */
+	coding_len = skb_src->len - unicast_size;
+
+	if (skb_linearize(skb_dest) < 0 || skb_linearize(skb_src) < 0)
+		goto out;
+
+	skb_push(skb_dest, header_add);
+
+	coded_packet = (struct batadv_coded_packet *)skb_dest->data;
+	skb_reset_mac_header(skb_dest);
+
+	coded_packet->header.packet_type = BATADV_CODED;
+	coded_packet->header.version = BATADV_COMPAT_VERSION;
+	coded_packet->header.ttl = packet1->header.ttl;
+
+	/* Info about first unicast packet */
+	memcpy(coded_packet->first_source, first_source, ETH_ALEN);
+	memcpy(coded_packet->first_orig_dest, packet1->dest, ETH_ALEN);
+	coded_packet->first_crc = packet_id1;
+	coded_packet->first_ttvn = packet1->ttvn;
+
+	/* Info about second unicast packet */
+	memcpy(coded_packet->second_dest, second_dest, ETH_ALEN);
+	memcpy(coded_packet->second_source, second_source, ETH_ALEN);
+	memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN);
+	coded_packet->second_crc = packet_id2;
+	coded_packet->second_ttl = packet2->header.ttl;
+	coded_packet->second_ttvn = packet2->ttvn;
+	coded_packet->coded_len = htons(coding_len);
+
+	/* This is where the magic happens: Code skb_src into skb_dest */
+	batadv_nc_memxor(skb_dest->data + coded_size,
+			 skb_src->data + unicast_size, coding_len);
+
+	/* Update counters accordingly */
+	if (BATADV_SKB_CB(skb_src)->decoded &&
+	    BATADV_SKB_CB(skb_dest)->decoded) {
+		/* Both packets are recoded */
+		count = skb_src->len + ETH_HLEN;
+		count += skb_dest->len + ETH_HLEN;
+		batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE, 2);
+		batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, count);
+	} else if (!BATADV_SKB_CB(skb_src)->decoded &&
+		   !BATADV_SKB_CB(skb_dest)->decoded) {
+		/* Both packets are newly coded */
+		count = skb_src->len + ETH_HLEN;
+		count += skb_dest->len + ETH_HLEN;
+		batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE, 2);
+		batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, count);
+	} else if (BATADV_SKB_CB(skb_src)->decoded &&
+		   !BATADV_SKB_CB(skb_dest)->decoded) {
+		/* skb_src recoded and skb_dest is newly coded */
+		batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE);
+		batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES,
+				   skb_src->len + ETH_HLEN);
+		batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE);
+		batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES,
+				   skb_dest->len + ETH_HLEN);
+	} else if (!BATADV_SKB_CB(skb_src)->decoded &&
+		   BATADV_SKB_CB(skb_dest)->decoded) {
+		/* skb_src is newly coded and skb_dest is recoded */
+		batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE);
+		batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES,
+				   skb_src->len + ETH_HLEN);
+		batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE);
+		batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES,
+				   skb_dest->len + ETH_HLEN);
+	}
+
+	/* skb_src is now coded into skb_dest, so free it */
+	kfree_skb(skb_src);
+
+	/* avoid duplicate free of skb from nc_packet */
+	nc_packet->skb = NULL;
+	batadv_nc_packet_free(nc_packet);
+
+	/* Send the coded packet and return true */
+	batadv_send_skb_packet(skb_dest, neigh_node->if_incoming, first_dest);
+	res = true;
+out:
+	if (router_neigh)
+		batadv_neigh_node_free_ref(router_neigh);
+	if (router_coding)
+		batadv_neigh_node_free_ref(router_coding);
+	return res;
+}
+
+/**
+ * batadv_nc_skb_coding_possible - true if a decoded skb is available at dst.
+ * @skb: data skb to forward
+ * @dst: destination mac address of the other skb to code with
+ * @src: source mac address of skb
+ *
+ * Whenever we network code a packet we have to check whether we received it in
+ * a network coded form. If so, we may not be able to use it for coding because
+ * some neighbors may also have received (overheard) the packet in the network
+ * coded form without being able to decode it. It is hard to know which of the
+ * neighboring nodes was able to decode the packet, therefore we can only
+ * re-code the packet if the source of the previous encoded packet is involved.
+ * Since the source encoded the packet we can be certain it has all necessary
+ * decode information.
+ *
+ * Returns true if coding of a decoded packet is allowed.
+ */
+static bool batadv_nc_skb_coding_possible(struct sk_buff *skb,
+					  uint8_t *dst, uint8_t *src)
+{
+	if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src))
+		return false;
+	else
+		return true;
+}
+
+/**
+ * batadv_nc_path_search - Find the coding path matching in_nc_node and
+ *  out_nc_node to retrieve a buffered packet that can be used for coding.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @in_nc_node: pointer to skb next hop's neighbor nc node
+ * @out_nc_node: pointer to skb source's neighbor nc node
+ * @skb: data skb to forward
+ * @eth_dst: next hop mac address of skb
+ *
+ * Returns true if coding of a decoded skb is allowed.
+ */
+static struct batadv_nc_packet *
+batadv_nc_path_search(struct batadv_priv *bat_priv,
+		      struct batadv_nc_node *in_nc_node,
+		      struct batadv_nc_node *out_nc_node,
+		      struct sk_buff *skb,
+		      uint8_t *eth_dst)
+{
+	struct batadv_nc_path *nc_path, nc_path_key;
+	struct batadv_nc_packet *nc_packet_out = NULL;
+	struct batadv_nc_packet *nc_packet, *nc_packet_tmp;
+	struct batadv_hashtable *hash = bat_priv->nc.coding_hash;
+	int idx;
+
+	if (!hash)
+		return NULL;
+
+	/* Create almost path key */
+	batadv_nc_hash_key_gen(&nc_path_key, in_nc_node->addr,
+			       out_nc_node->addr);
+	idx = batadv_nc_hash_choose(&nc_path_key, hash->size);
+
+	/* Check for coding opportunities in this nc_path */
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(nc_path, &hash->table[idx], hash_entry) {
+		if (!batadv_compare_eth(nc_path->prev_hop, in_nc_node->addr))
+			continue;
+
+		if (!batadv_compare_eth(nc_path->next_hop, out_nc_node->addr))
+			continue;
+
+		spin_lock_bh(&nc_path->packet_list_lock);
+		if (list_empty(&nc_path->packet_list)) {
+			spin_unlock_bh(&nc_path->packet_list_lock);
+			continue;
+		}
+
+		list_for_each_entry_safe(nc_packet, nc_packet_tmp,
+					 &nc_path->packet_list, list) {
+			if (!batadv_nc_skb_coding_possible(nc_packet->skb,
+							   eth_dst,
+							   in_nc_node->addr))
+				continue;
+
+			/* Coding opportunity is found! */
+			list_del(&nc_packet->list);
+			nc_packet_out = nc_packet;
+			break;
+		}
+
+		spin_unlock_bh(&nc_path->packet_list_lock);
+		break;
+	}
+	rcu_read_unlock();
+
+	return nc_packet_out;
+}
+
+/**
+ * batadv_nc_skb_src_search - Loops through the list of neighoring nodes of the
+ *  skb's sender (may be equal to the originator).
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: data skb to forward
+ * @eth_dst: next hop mac address of skb
+ * @eth_src: source mac address of skb
+ * @in_nc_node: pointer to skb next hop's neighbor nc node
+ *
+ * Returns an nc packet if a suitable coding packet was found, NULL otherwise.
+ */
+static struct batadv_nc_packet *
+batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
+			 struct sk_buff *skb,
+			 uint8_t *eth_dst,
+			 uint8_t *eth_src,
+			 struct batadv_nc_node *in_nc_node)
+{
+	struct batadv_orig_node *orig_node;
+	struct batadv_nc_node *out_nc_node;
+	struct batadv_nc_packet *nc_packet = NULL;
+
+	orig_node = batadv_orig_hash_find(bat_priv, eth_src);
+	if (!orig_node)
+		return NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(out_nc_node,
+				&orig_node->out_coding_list, list) {
+		/* Check if the skb is decoded and if recoding is possible */
+		if (!batadv_nc_skb_coding_possible(skb,
+						   out_nc_node->addr, eth_src))
+			continue;
+
+		/* Search for an opportunity in this nc_path */
+		nc_packet = batadv_nc_path_search(bat_priv, in_nc_node,
+						  out_nc_node, skb, eth_dst);
+		if (nc_packet)
+			break;
+	}
+	rcu_read_unlock();
+
+	batadv_orig_node_free_ref(orig_node);
+	return nc_packet;
+}
+
+/**
+ * batadv_nc_skb_store_before_coding - set the ethernet src and dst of the
+ *  unicast skb before it is stored for use in later decoding
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: data skb to store
+ * @eth_dst_new: new destination mac address of skb
+ */
+static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
+					      struct sk_buff *skb,
+					      uint8_t *eth_dst_new)
+{
+	struct ethhdr *ethhdr;
+
+	/* Copy skb header to change the mac header */
+	skb = pskb_copy(skb, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	/* Set the mac header as if we actually sent the packet uncoded */
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+	memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN);
+	memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN);
+
+	/* Set data pointer to MAC header to mimic packets from our tx path */
+	skb_push(skb, ETH_HLEN);
+
+	/* Add the packet to the decoding packet pool */
+	batadv_nc_skb_store_for_decoding(bat_priv, skb);
+
+	/* batadv_nc_skb_store_for_decoding() clones the skb, so we must free
+	 * our ref
+	 */
+	kfree_skb(skb);
+}
+
+/**
+ * batadv_nc_skb_dst_search - Loops through list of neighboring nodes to dst.
+ * @skb: data skb to forward
+ * @neigh_node: next hop to forward packet to
+ * @ethhdr: pointer to the ethernet header inside the skb
+ *
+ * Loops through list of neighboring nodes the next hop has a good connection to
+ * (receives OGMs with a sufficient quality). We need to find a neighbor of our
+ * next hop that potentially sent a packet which our next hop also received
+ * (overheard) and has stored for later decoding.
+ *
+ * Returns true if the skb was consumed (encoded packet sent) or false otherwise
+ */
+static bool batadv_nc_skb_dst_search(struct sk_buff *skb,
+				     struct batadv_neigh_node *neigh_node,
+				     struct ethhdr *ethhdr)
+{
+	struct net_device *netdev = neigh_node->if_incoming->soft_iface;
+	struct batadv_priv *bat_priv = netdev_priv(netdev);
+	struct batadv_orig_node *orig_node = neigh_node->orig_node;
+	struct batadv_nc_node *nc_node;
+	struct batadv_nc_packet *nc_packet = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(nc_node, &orig_node->in_coding_list, list) {
+		/* Search for coding opportunity with this in_nc_node */
+		nc_packet = batadv_nc_skb_src_search(bat_priv, skb,
+						     neigh_node->addr,
+						     ethhdr->h_source, nc_node);
+
+		/* Opportunity was found, so stop searching */
+		if (nc_packet)
+			break;
+	}
+	rcu_read_unlock();
+
+	if (!nc_packet)
+		return false;
+
+	/* Save packets for later decoding */
+	batadv_nc_skb_store_before_coding(bat_priv, skb,
+					  neigh_node->addr);
+	batadv_nc_skb_store_before_coding(bat_priv, nc_packet->skb,
+					  nc_packet->neigh_node->addr);
+
+	/* Code and send packets */
+	if (batadv_nc_code_packets(bat_priv, skb, ethhdr, nc_packet,
+				   neigh_node))
+		return true;
+
+	/* out of mem ? Coding failed - we have to free the buffered packet
+	 * to avoid memleaks. The skb passed as argument will be dealt with
+	 * by the calling function.
+	 */
+	batadv_nc_send_packet(nc_packet);
+	return false;
+}
+
+/**
+ * batadv_nc_skb_add_to_path - buffer skb for later encoding / decoding
+ * @skb: skb to add to path
+ * @nc_path: path to add skb to
+ * @neigh_node: next hop to forward packet to
+ * @packet_id: checksum to identify packet
+ *
+ * Returns true if the packet was buffered or false in case of an error.
+ */
+static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
+				      struct batadv_nc_path *nc_path,
+				      struct batadv_neigh_node *neigh_node,
+				      __be32 packet_id)
+{
+	struct batadv_nc_packet *nc_packet;
+
+	nc_packet = kzalloc(sizeof(*nc_packet), GFP_ATOMIC);
+	if (!nc_packet)
+		return false;
+
+	/* Initialize nc_packet */
+	nc_packet->timestamp = jiffies;
+	nc_packet->packet_id = packet_id;
+	nc_packet->skb = skb;
+	nc_packet->neigh_node = neigh_node;
+	nc_packet->nc_path = nc_path;
+
+	/* Add coding packet to list */
+	spin_lock_bh(&nc_path->packet_list_lock);
+	list_add_tail(&nc_packet->list, &nc_path->packet_list);
+	spin_unlock_bh(&nc_path->packet_list_lock);
+
+	return true;
+}
+
+/**
+ * batadv_nc_skb_forward - try to code a packet or add it to the coding packet
+ *  buffer
+ * @skb: data skb to forward
+ * @neigh_node: next hop to forward packet to
+ * @ethhdr: pointer to the ethernet header inside the skb
+ *
+ * Returns true if the skb was consumed (encoded packet sent) or false otherwise
+ */
+bool batadv_nc_skb_forward(struct sk_buff *skb,
+			   struct batadv_neigh_node *neigh_node,
+			   struct ethhdr *ethhdr)
+{
+	const struct net_device *netdev = neigh_node->if_incoming->soft_iface;
+	struct batadv_priv *bat_priv = netdev_priv(netdev);
+	struct batadv_unicast_packet *packet;
+	struct batadv_nc_path *nc_path;
+	__be32 packet_id;
+	u8 *payload;
+
+	/* Check if network coding is enabled */
+	if (!atomic_read(&bat_priv->network_coding))
+		goto out;
+
+	/* We only handle unicast packets */
+	payload = skb_network_header(skb);
+	packet = (struct batadv_unicast_packet *)payload;
+	if (packet->header.packet_type != BATADV_UNICAST)
+		goto out;
+
+	/* Try to find a coding opportunity and send the skb if one is found */
+	if (batadv_nc_skb_dst_search(skb, neigh_node, ethhdr))
+		return true;
+
+	/* Find or create a nc_path for this src-dst pair */
+	nc_path = batadv_nc_get_path(bat_priv,
+				     bat_priv->nc.coding_hash,
+				     ethhdr->h_source,
+				     neigh_node->addr);
+
+	if (!nc_path)
+		goto out;
+
+	/* Add skb to nc_path */
+	packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet));
+	if (!batadv_nc_skb_add_to_path(skb, nc_path, neigh_node, packet_id))
+		goto free_nc_path;
+
+	/* Packet is consumed */
+	return true;
+
+free_nc_path:
+	batadv_nc_path_free_ref(nc_path);
+out:
+	/* Packet is not consumed */
+	return false;
+}
+
+/**
+ * batadv_nc_skb_store_for_decoding - save a clone of the skb which can be used
+ *  when decoding coded packets
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: data skb to store
+ */
+void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
+				      struct sk_buff *skb)
+{
+	struct batadv_unicast_packet *packet;
+	struct batadv_nc_path *nc_path;
+	struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+	__be32 packet_id;
+	u8 *payload;
+
+	/* Check if network coding is enabled */
+	if (!atomic_read(&bat_priv->network_coding))
+		goto out;
+
+	/* Check for supported packet type */
+	payload = skb_network_header(skb);
+	packet = (struct batadv_unicast_packet *)payload;
+	if (packet->header.packet_type != BATADV_UNICAST)
+		goto out;
+
+	/* Find existing nc_path or create a new */
+	nc_path = batadv_nc_get_path(bat_priv,
+				     bat_priv->nc.decoding_hash,
+				     ethhdr->h_source,
+				     ethhdr->h_dest);
+
+	if (!nc_path)
+		goto out;
+
+	/* Clone skb and adjust skb->data to point at batman header */
+	skb = skb_clone(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto free_nc_path;
+
+	if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
+		goto free_skb;
+
+	if (unlikely(!skb_pull_rcsum(skb, ETH_HLEN)))
+		goto free_skb;
+
+	/* Add skb to nc_path */
+	packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet));
+	if (!batadv_nc_skb_add_to_path(skb, nc_path, NULL, packet_id))
+		goto free_skb;
+
+	batadv_inc_counter(bat_priv, BATADV_CNT_NC_BUFFER);
+	return;
+
+free_skb:
+	kfree_skb(skb);
+free_nc_path:
+	batadv_nc_path_free_ref(nc_path);
+out:
+	return;
+}
+
+/**
+ * batadv_nc_skb_store_sniffed_unicast - check if a received unicast packet
+ *  should be saved in the decoding buffer and, if so, store it there
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: unicast skb to store
+ */
+void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
+					 struct sk_buff *skb)
+{
+	struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+	if (batadv_is_my_mac(bat_priv, ethhdr->h_dest))
+		return;
+
+	/* Set data pointer to MAC header to mimic packets from our tx path */
+	skb_push(skb, ETH_HLEN);
+
+	batadv_nc_skb_store_for_decoding(bat_priv, skb);
+}
+
+/**
+ * batadv_nc_skb_decode_packet - decode given skb using the decode data stored
+ *  in nc_packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: unicast skb to decode
+ * @nc_packet: decode data needed to decode the skb
+ *
+ * Returns pointer to decoded unicast packet if the packet was decoded or NULL
+ * in case of an error.
+ */
+static struct batadv_unicast_packet *
+batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
+			    struct batadv_nc_packet *nc_packet)
+{
+	const int h_size = sizeof(struct batadv_unicast_packet);
+	const int h_diff = sizeof(struct batadv_coded_packet) - h_size;
+	struct batadv_unicast_packet *unicast_packet;
+	struct batadv_coded_packet coded_packet_tmp;
+	struct ethhdr *ethhdr, ethhdr_tmp;
+	uint8_t *orig_dest, ttl, ttvn;
+	unsigned int coding_len;
+
+	/* Save headers temporarily */
+	memcpy(&coded_packet_tmp, skb->data, sizeof(coded_packet_tmp));
+	memcpy(&ethhdr_tmp, skb_mac_header(skb), sizeof(ethhdr_tmp));
+
+	if (skb_cow(skb, 0) < 0)
+		return NULL;
+
+	if (unlikely(!skb_pull_rcsum(skb, h_diff)))
+		return NULL;
+
+	/* Data points to batman header, so set mac header 14 bytes before
+	 * and network to data
+	 */
+	skb_set_mac_header(skb, -ETH_HLEN);
+	skb_reset_network_header(skb);
+
+	/* Reconstruct original mac header */
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+	memcpy(ethhdr, &ethhdr_tmp, sizeof(*ethhdr));
+
+	/* Select the correct unicast header information based on the location
+	 * of our mac address in the coded_packet header
+	 */
+	if (batadv_is_my_mac(bat_priv, coded_packet_tmp.second_dest)) {
+		/* If we are the second destination the packet was overheard,
+		 * so the Ethernet address must be copied to h_dest and
+		 * pkt_type changed from PACKET_OTHERHOST to PACKET_HOST
+		 */
+		memcpy(ethhdr->h_dest, coded_packet_tmp.second_dest, ETH_ALEN);
+		skb->pkt_type = PACKET_HOST;
+
+		orig_dest = coded_packet_tmp.second_orig_dest;
+		ttl = coded_packet_tmp.second_ttl;
+		ttvn = coded_packet_tmp.second_ttvn;
+	} else {
+		orig_dest = coded_packet_tmp.first_orig_dest;
+		ttl = coded_packet_tmp.header.ttl;
+		ttvn = coded_packet_tmp.first_ttvn;
+	}
+
+	coding_len = ntohs(coded_packet_tmp.coded_len);
+
+	if (coding_len > skb->len)
+		return NULL;
+
+	/* Here the magic is reversed:
+	 *   extract the missing packet from the received coded packet
+	 */
+	batadv_nc_memxor(skb->data + h_size,
+			 nc_packet->skb->data + h_size,
+			 coding_len);
+
+	/* Resize decoded skb if decoded with larger packet */
+	if (nc_packet->skb->len > coding_len + h_size)
+		pskb_trim_rcsum(skb, coding_len + h_size);
+
+	/* Create decoded unicast packet */
+	unicast_packet = (struct batadv_unicast_packet *)skb->data;
+	unicast_packet->header.packet_type = BATADV_UNICAST;
+	unicast_packet->header.version = BATADV_COMPAT_VERSION;
+	unicast_packet->header.ttl = ttl;
+	memcpy(unicast_packet->dest, orig_dest, ETH_ALEN);
+	unicast_packet->ttvn = ttvn;
+
+	batadv_nc_packet_free(nc_packet);
+	return unicast_packet;
+}
+
+/**
+ * batadv_nc_find_decoding_packet - search through buffered decoding data to
+ *  find the data needed to decode the coded packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ethhdr: pointer to the ethernet header inside the coded packet
+ * @coded: coded packet we try to find decode data for
+ *
+ * Returns pointer to nc packet if the needed data was found or NULL otherwise.
+ */
+static struct batadv_nc_packet *
+batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
+			       struct ethhdr *ethhdr,
+			       struct batadv_coded_packet *coded)
+{
+	struct batadv_hashtable *hash = bat_priv->nc.decoding_hash;
+	struct batadv_nc_packet *tmp_nc_packet, *nc_packet = NULL;
+	struct batadv_nc_path *nc_path, nc_path_key;
+	uint8_t *dest, *source;
+	__be32 packet_id;
+	int index;
+
+	if (!hash)
+		return NULL;
+
+	/* Select the correct packet id based on the location of our mac-addr */
+	dest = ethhdr->h_source;
+	if (!batadv_is_my_mac(bat_priv, coded->second_dest)) {
+		source = coded->second_source;
+		packet_id = coded->second_crc;
+	} else {
+		source = coded->first_source;
+		packet_id = coded->first_crc;
+	}
+
+	batadv_nc_hash_key_gen(&nc_path_key, source, dest);
+	index = batadv_nc_hash_choose(&nc_path_key, hash->size);
+
+	/* Search for matching coding path */
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(nc_path, &hash->table[index], hash_entry) {
+		/* Find matching nc_packet */
+		spin_lock_bh(&nc_path->packet_list_lock);
+		list_for_each_entry(tmp_nc_packet,
+				    &nc_path->packet_list, list) {
+			if (packet_id == tmp_nc_packet->packet_id) {
+				list_del(&tmp_nc_packet->list);
+
+				nc_packet = tmp_nc_packet;
+				break;
+			}
+		}
+		spin_unlock_bh(&nc_path->packet_list_lock);
+
+		if (nc_packet)
+			break;
+	}
+	rcu_read_unlock();
+
+	if (!nc_packet)
+		batadv_dbg(BATADV_DBG_NC, bat_priv,
+			   "No decoding packet found for %u\n", packet_id);
+
+	return nc_packet;
+}
+
+/**
+ * batadv_nc_recv_coded_packet - try to decode coded packet and enqueue the
+ *  resulting unicast packet
+ * @skb: incoming coded packet
+ * @recv_if: pointer to interface this packet was received on
+ */
+static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
+				       struct batadv_hard_iface *recv_if)
+{
+	struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+	struct batadv_unicast_packet *unicast_packet;
+	struct batadv_coded_packet *coded_packet;
+	struct batadv_nc_packet *nc_packet;
+	struct ethhdr *ethhdr;
+	int hdr_size = sizeof(*coded_packet);
+
+	/* Check if network coding is enabled */
+	if (!atomic_read(&bat_priv->network_coding))
+		return NET_RX_DROP;
+
+	/* Make sure we can access (and remove) header */
+	if (unlikely(!pskb_may_pull(skb, hdr_size)))
+		return NET_RX_DROP;
+
+	coded_packet = (struct batadv_coded_packet *)skb->data;
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+	/* Verify frame is destined for us */
+	if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) &&
+	    !batadv_is_my_mac(bat_priv, coded_packet->second_dest))
+		return NET_RX_DROP;
+
+	/* Update stat counter */
+	if (batadv_is_my_mac(bat_priv, coded_packet->second_dest))
+		batadv_inc_counter(bat_priv, BATADV_CNT_NC_SNIFFED);
+
+	nc_packet = batadv_nc_find_decoding_packet(bat_priv, ethhdr,
+						   coded_packet);
+	if (!nc_packet) {
+		batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED);
+		return NET_RX_DROP;
+	}
+
+	/* Make skb's linear, because decoding accesses the entire buffer */
+	if (skb_linearize(skb) < 0)
+		goto free_nc_packet;
+
+	if (skb_linearize(nc_packet->skb) < 0)
+		goto free_nc_packet;
+
+	/* Decode the packet */
+	unicast_packet = batadv_nc_skb_decode_packet(bat_priv, skb, nc_packet);
+	if (!unicast_packet) {
+		batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED);
+		goto free_nc_packet;
+	}
+
+	/* Mark packet as decoded to do correct recoding when forwarding */
+	BATADV_SKB_CB(skb)->decoded = true;
+	batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE);
+	batadv_add_counter(bat_priv, BATADV_CNT_NC_DECODE_BYTES,
+			   skb->len + ETH_HLEN);
+	return batadv_recv_unicast_packet(skb, recv_if);
+
+free_nc_packet:
+	batadv_nc_packet_free(nc_packet);
+	return NET_RX_DROP;
+}
+
+/**
+ * batadv_nc_free - clean up network coding memory
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_nc_free(struct batadv_priv *bat_priv)
+{
+	batadv_recv_handler_unregister(BATADV_CODED);
+	cancel_delayed_work_sync(&bat_priv->nc.work);
+
+	batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL);
+	batadv_hash_destroy(bat_priv->nc.coding_hash);
+	batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, NULL);
+	batadv_hash_destroy(bat_priv->nc.decoding_hash);
+}
+
+/**
+ * batadv_nc_nodes_seq_print_text - print the nc node information
+ * @seq: seq file to print on
+ * @offset: not used
+ */
+int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
+{
+	struct net_device *net_dev = (struct net_device *)seq->private;
+	struct batadv_priv *bat_priv = netdev_priv(net_dev);
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct batadv_hard_iface *primary_if;
+	struct hlist_head *head;
+	struct batadv_orig_node *orig_node;
+	struct batadv_nc_node *nc_node;
+	int i;
+
+	primary_if = batadv_seq_print_text_primary_if_get(seq);
+	if (!primary_if)
+		goto out;
+
+	/* Traverse list of originators */
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		/* For each orig_node in this bin */
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+			seq_printf(seq, "Node:      %pM\n", orig_node->orig);
+
+			seq_puts(seq, " Ingoing:  ");
+			/* For each in_nc_node to this orig_node */
+			list_for_each_entry_rcu(nc_node,
+						&orig_node->in_coding_list,
+						list)
+				seq_printf(seq, "%pM ",
+					   nc_node->addr);
+			seq_puts(seq, "\n");
+
+			seq_puts(seq, " Outgoing: ");
+			/* For out_nc_node to this orig_node */
+			list_for_each_entry_rcu(nc_node,
+						&orig_node->out_coding_list,
+						list)
+				seq_printf(seq, "%pM ",
+					   nc_node->addr);
+			seq_puts(seq, "\n\n");
+		}
+		rcu_read_unlock();
+	}
+
+out:
+	if (primary_if)
+		batadv_hardif_free_ref(primary_if);
+	return 0;
+}
+
+/**
+ * batadv_nc_init_debugfs - create nc folder and related files in debugfs
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
+{
+	struct dentry *nc_dir, *file;
+
+	nc_dir = debugfs_create_dir("nc", bat_priv->debug_dir);
+	if (!nc_dir)
+		goto out;
+
+	file = debugfs_create_u8("min_tq", S_IRUGO | S_IWUSR, nc_dir,
+				 &bat_priv->nc.min_tq);
+	if (!file)
+		goto out;
+
+	file = debugfs_create_u32("max_fwd_delay", S_IRUGO | S_IWUSR, nc_dir,
+				  &bat_priv->nc.max_fwd_delay);
+	if (!file)
+		goto out;
+
+	file = debugfs_create_u32("max_buffer_time", S_IRUGO | S_IWUSR, nc_dir,
+				  &bat_priv->nc.max_buffer_time);
+	if (!file)
+		goto out;
+
+	return 0;
+
+out:
+	return -ENOMEM;
+}
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
new file mode 100644
index 000000000000..4fa6d0caddbd
--- /dev/null
+++ b/net/batman-adv/network-coding.h
@@ -0,0 +1,123 @@
+/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors:
+ *
+ * Martin Hundebøll, Jeppe Ledet-Pedersen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_
+#define _NET_BATMAN_ADV_NETWORK_CODING_H_
+
+#ifdef CONFIG_BATMAN_ADV_NC
+
+int batadv_nc_init(struct batadv_priv *bat_priv);
+void batadv_nc_free(struct batadv_priv *bat_priv);
+void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
+			      struct batadv_orig_node *orig_node,
+			      struct batadv_orig_node *orig_neigh_node,
+			      struct batadv_ogm_packet *ogm_packet,
+			      int is_single_hop_neigh);
+void batadv_nc_purge_orig(struct batadv_priv *bat_priv,
+			  struct batadv_orig_node *orig_node,
+			  bool (*to_purge)(struct batadv_priv *,
+					   struct batadv_nc_node *));
+void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv);
+void batadv_nc_init_orig(struct batadv_orig_node *orig_node);
+bool batadv_nc_skb_forward(struct sk_buff *skb,
+			   struct batadv_neigh_node *neigh_node,
+			   struct ethhdr *ethhdr);
+void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
+				      struct sk_buff *skb);
+void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
+					 struct sk_buff *skb);
+int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_nc_init_debugfs(struct batadv_priv *bat_priv);
+
+#else /* ifdef CONFIG_BATMAN_ADV_NC */
+
+static inline int batadv_nc_init(struct batadv_priv *bat_priv)
+{
+	return 0;
+}
+
+static inline void batadv_nc_free(struct batadv_priv *bat_priv)
+{
+	return;
+}
+
+static inline void
+batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
+			 struct batadv_orig_node *orig_node,
+			 struct batadv_orig_node *orig_neigh_node,
+			 struct batadv_ogm_packet *ogm_packet,
+			 int is_single_hop_neigh)
+{
+	return;
+}
+
+static inline void
+batadv_nc_purge_orig(struct batadv_priv *bat_priv,
+		     struct batadv_orig_node *orig_node,
+		     bool (*to_purge)(struct batadv_priv *,
+				      struct batadv_nc_node *))
+{
+	return;
+}
+
+static inline void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
+{
+	return;
+}
+
+static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
+{
+	return;
+}
+
+static inline bool batadv_nc_skb_forward(struct sk_buff *skb,
+					 struct batadv_neigh_node *neigh_node,
+					 struct ethhdr *ethhdr)
+{
+	return false;
+}
+
+static inline void
+batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
+				 struct sk_buff *skb)
+{
+	return;
+}
+
+static inline void
+batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
+				    struct sk_buff *skb)
+{
+	return;
+}
+
+static inline int batadv_nc_nodes_seq_print_text(struct seq_file *seq,
+						 void *offset)
+{
+	return 0;
+}
+
+static inline int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
+{
+	return 0;
+}
+
+#endif /* ifdef CONFIG_BATMAN_ADV_NC */
+
+#endif /* _NET_BATMAN_ADV_NETWORK_CODING_H_ */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 8c32cf1c2dec..2f3452546636 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -28,15 +28,12 @@
 #include "unicast.h"
 #include "soft-interface.h"
 #include "bridge_loop_avoidance.h"
+#include "network-coding.h"
 
-static void batadv_purge_orig(struct work_struct *work);
+/* hash class keys */
+static struct lock_class_key batadv_orig_hash_lock_class_key;
 
-static void batadv_start_purge_timer(struct batadv_priv *bat_priv)
-{
-	INIT_DELAYED_WORK(&bat_priv->orig_work, batadv_purge_orig);
-	queue_delayed_work(batadv_event_workqueue,
-			   &bat_priv->orig_work, msecs_to_jiffies(1000));
-}
+static void batadv_purge_orig(struct work_struct *work);
 
 /* returns 1 if they are the same originator */
 static int batadv_compare_orig(const struct hlist_node *node, const void *data2)
@@ -57,7 +54,14 @@ int batadv_originator_init(struct batadv_priv *bat_priv)
 	if (!bat_priv->orig_hash)
 		goto err;
 
-	batadv_start_purge_timer(bat_priv);
+	batadv_hash_set_lock_class(bat_priv->orig_hash,
+				   &batadv_orig_hash_lock_class_key);
+
+	INIT_DELAYED_WORK(&bat_priv->orig_work, batadv_purge_orig);
+	queue_delayed_work(batadv_event_workqueue,
+			   &bat_priv->orig_work,
+			   msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD));
+
 	return 0;
 
 err:
@@ -115,7 +119,7 @@ out:
 
 static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
 {
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct batadv_neigh_node *neigh_node, *tmp_neigh_node;
 	struct batadv_orig_node *orig_node;
 
@@ -131,7 +135,7 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
 	}
 
 	/* for all neighbors towards this originator ... */
-	hlist_for_each_entry_safe(neigh_node, node, node_tmp,
+	hlist_for_each_entry_safe(neigh_node, node_tmp,
 				  &orig_node->neigh_list, list) {
 		hlist_del_rcu(&neigh_node->list);
 		batadv_neigh_node_free_ref(neigh_node);
@@ -139,6 +143,9 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
 
 	spin_unlock_bh(&orig_node->neigh_list_lock);
 
+	/* Free nc_nodes */
+	batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
+
 	batadv_frag_list_free(&orig_node->frag_list);
 	batadv_tt_global_del_orig(orig_node->bat_priv, orig_node,
 				  "originator timed out");
@@ -158,7 +165,7 @@ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node)
 void batadv_originator_free(struct batadv_priv *bat_priv)
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	spinlock_t *list_lock; /* spinlock to protect write access */
 	struct batadv_orig_node *orig_node;
@@ -176,10 +183,9 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(orig_node, node, node_tmp,
+		hlist_for_each_entry_safe(orig_node, node_tmp,
 					  head, hash_entry) {
-
-			hlist_del_rcu(node);
+			hlist_del_rcu(&orig_node->hash_entry);
 			batadv_orig_node_free_ref(orig_node);
 		}
 		spin_unlock_bh(list_lock);
@@ -217,6 +223,8 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
 	spin_lock_init(&orig_node->neigh_list_lock);
 	spin_lock_init(&orig_node->tt_buff_lock);
 
+	batadv_nc_init_orig(orig_node);
+
 	/* extra reference for return */
 	atomic_set(&orig_node->refcount, 2);
 
@@ -272,7 +280,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
 			    struct batadv_orig_node *orig_node,
 			    struct batadv_neigh_node **best_neigh_node)
 {
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct batadv_neigh_node *neigh_node;
 	bool neigh_purged = false;
 	unsigned long last_seen;
@@ -283,9 +291,8 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
 	spin_lock_bh(&orig_node->neigh_list_lock);
 
 	/* for all neighbors towards this originator ... */
-	hlist_for_each_entry_safe(neigh_node, node, node_tmp,
+	hlist_for_each_entry_safe(neigh_node, node_tmp,
 				  &orig_node->neigh_list, list) {
-
 		last_seen = neigh_node->last_seen;
 		if_incoming = neigh_node->if_incoming;
 
@@ -293,7 +300,6 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
 		    (if_incoming->if_status == BATADV_IF_INACTIVE) ||
 		    (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
 		    (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) {
-
 			if ((if_incoming->if_status == BATADV_IF_INACTIVE) ||
 			    (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
 			    (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED))
@@ -348,7 +354,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
 static void _batadv_purge_orig(struct batadv_priv *bat_priv)
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	spinlock_t *list_lock; /* spinlock to protect write access */
 	struct batadv_orig_node *orig_node;
@@ -363,13 +369,13 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(orig_node, node, node_tmp,
+		hlist_for_each_entry_safe(orig_node, node_tmp,
 					  head, hash_entry) {
 			if (batadv_purge_orig_node(bat_priv, orig_node)) {
 				if (orig_node->gw_flags)
 					batadv_gw_node_delete(bat_priv,
 							      orig_node);
-				hlist_del_rcu(node);
+				hlist_del_rcu(&orig_node->hash_entry);
 				batadv_orig_node_free_ref(orig_node);
 				continue;
 			}
@@ -393,7 +399,9 @@ static void batadv_purge_orig(struct work_struct *work)
 	delayed_work = container_of(work, struct delayed_work, work);
 	bat_priv = container_of(delayed_work, struct batadv_priv, orig_work);
 	_batadv_purge_orig(bat_priv);
-	batadv_start_purge_timer(bat_priv);
+	queue_delayed_work(batadv_event_workqueue,
+			   &bat_priv->orig_work,
+			   msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD));
 }
 
 void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
@@ -406,7 +414,6 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 	struct net_device *net_dev = (struct net_device *)seq->private;
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node, *node_tmp;
 	struct hlist_head *head;
 	struct batadv_hard_iface *primary_if;
 	struct batadv_orig_node *orig_node;
@@ -432,7 +439,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			neigh_node = batadv_orig_node_get_router(orig_node);
 			if (!neigh_node)
 				continue;
@@ -451,14 +458,14 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 				   neigh_node->addr,
 				   neigh_node->if_incoming->net_dev->name);
 
-			hlist_for_each_entry_rcu(neigh_node_tmp, node_tmp,
+			hlist_for_each_entry_rcu(neigh_node_tmp,
 						 &orig_node->neigh_list, list) {
 				seq_printf(seq, " %pM (%3i)",
 					   neigh_node_tmp->addr,
 					   neigh_node_tmp->tq_avg);
 			}
 
-			seq_printf(seq, "\n");
+			seq_puts(seq, "\n");
 			batman_count++;
 
 next:
@@ -468,7 +475,7 @@ next:
 	}
 
 	if (batman_count == 0)
-		seq_printf(seq, "No batman nodes in range ...\n");
+		seq_puts(seq, "No batman nodes in range ...\n");
 
 out:
 	if (primary_if)
@@ -509,7 +516,6 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
 	uint32_t i;
@@ -522,7 +528,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			spin_lock_bh(&orig_node->ogm_cnt_lock);
 			ret = batadv_orig_node_add_if(orig_node, max_if_num);
 			spin_unlock_bh(&orig_node->ogm_cnt_lock);
@@ -593,7 +599,6 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_hard_iface *hard_iface_tmp;
 	struct batadv_orig_node *orig_node;
@@ -607,7 +612,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			spin_lock_bh(&orig_node->ogm_cnt_lock);
 			ret = batadv_orig_node_del_if(orig_node, max_if_num,
 						      hard_iface->if_num);
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 9778e656dec7..7df48fa7669d 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -68,7 +68,6 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data)
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_orig_node *orig_node, *orig_node_tmp = NULL;
 	int index;
 
@@ -79,7 +78,7 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data)
 	head = &hash->table[index];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 		if (!batadv_compare_eth(orig_node, data))
 			continue;
 
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index cb6405bf755c..a51ccfc39da4 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -30,6 +30,7 @@ enum batadv_packettype {
 	BATADV_TT_QUERY		= 0x07,
 	BATADV_ROAM_ADV		= 0x08,
 	BATADV_UNICAST_4ADDR	= 0x09,
+	BATADV_CODED		= 0x0a,
 };
 
 /**
@@ -278,4 +279,36 @@ struct batadv_tt_change {
 	uint8_t addr[ETH_ALEN];
 } __packed;
 
+/**
+ * struct batadv_coded_packet - network coded packet
+ * @header: common batman packet header and ttl of first included packet
+ * @reserved: Align following fields to 2-byte boundaries
+ * @first_source: original source of first included packet
+ * @first_orig_dest: original destinal of first included packet
+ * @first_crc: checksum of first included packet
+ * @first_ttvn: tt-version number of first included packet
+ * @second_ttl: ttl of second packet
+ * @second_dest: second receiver of this coded packet
+ * @second_source: original source of second included packet
+ * @second_orig_dest: original destination of second included packet
+ * @second_crc: checksum of second included packet
+ * @second_ttvn: tt version number of second included packet
+ * @coded_len: length of network coded part of the payload
+ */
+struct batadv_coded_packet {
+	struct batadv_header header;
+	uint8_t  first_ttvn;
+	/* uint8_t  first_dest[ETH_ALEN]; - saved in mac header destination */
+	uint8_t  first_source[ETH_ALEN];
+	uint8_t  first_orig_dest[ETH_ALEN];
+	__be32   first_crc;
+	uint8_t  second_ttl;
+	uint8_t  second_ttvn;
+	uint8_t  second_dest[ETH_ALEN];
+	uint8_t  second_source[ETH_ALEN];
+	uint8_t  second_orig_dest[ETH_ALEN];
+	__be32   second_crc;
+	__be16   coded_len;
+};
+
 #endif /* _NET_BATMAN_ADV_PACKET_H_ */
diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c
index c8f61e395b74..ccab0bbdbb59 100644
--- a/net/batman-adv/ring_buffer.c
+++ b/net/batman-adv/ring_buffer.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h
index fda8c17df273..3f92ae248e83 100644
--- a/net/batman-adv/ring_buffer.h
+++ b/net/batman-adv/ring_buffer.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 1aa1722d0187..b27a4d792d15 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -29,6 +29,7 @@
 #include "unicast.h"
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
+#include "network-coding.h"
 
 static int batadv_route_unicast_packet(struct sk_buff *skb,
 				       struct batadv_hard_iface *recv_if);
@@ -37,7 +38,6 @@ void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
 	unsigned long *word;
@@ -49,7 +49,7 @@ void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			spin_lock_bh(&orig_node->ogm_cnt_lock);
 			word_index = hard_iface->if_num * BATADV_NUM_WORDS;
 			word = &(orig_node->bcast_own[word_index]);
@@ -80,7 +80,6 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
 
 	/* route added */
 	} else if ((!curr_router) && (neigh_node)) {
-
 		batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
 			   "Adding route towards: %pM (via %pM)\n",
 			   orig_node->orig, neigh_node->addr);
@@ -147,7 +146,6 @@ out:
 void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node,
 				  struct batadv_neigh_node *neigh_node)
 {
-	struct hlist_node *node;
 	struct batadv_neigh_node *tmp_neigh_node, *router = NULL;
 	uint8_t interference_candidate = 0;
 
@@ -170,9 +168,8 @@ void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node,
 	 * interface. If we do, we won't select this candidate because of
 	 * possible interference.
 	 */
-	hlist_for_each_entry_rcu(tmp_neigh_node, node,
+	hlist_for_each_entry_rcu(tmp_neigh_node,
 				 &orig_node->neigh_list, list) {
-
 		if (tmp_neigh_node == neigh_node)
 			continue;
 
@@ -406,7 +403,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 		goto out;
 
 	/* not for me */
-	if (!batadv_is_my_mac(ethhdr->h_dest))
+	if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
 		goto out;
 
 	icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
@@ -420,7 +417,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 	}
 
 	/* packet for me */
-	if (batadv_is_my_mac(icmp_packet->dst))
+	if (batadv_is_my_mac(bat_priv, icmp_packet->dst))
 		return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size);
 
 	/* TTL exceeded */
@@ -552,27 +549,39 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig,
 	return router;
 }
 
-static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size)
+/**
+ * batadv_check_unicast_packet - Check for malformed unicast packets
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: packet to check
+ * @hdr_size: size of header to pull
+ *
+ * Check for short header and bad addresses in given packet. Returns negative
+ * value when check fails and 0 otherwise. The negative value depends on the
+ * reason: -ENODATA for bad header, -EBADR for broadcast destination or source,
+ * and -EREMOTE for non-local (other host) destination.
+ */
+static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
+				       struct sk_buff *skb, int hdr_size)
 {
 	struct ethhdr *ethhdr;
 
 	/* drop packet if it has not necessary minimum size */
 	if (unlikely(!pskb_may_pull(skb, hdr_size)))
-		return -1;
+		return -ENODATA;
 
 	ethhdr = (struct ethhdr *)skb_mac_header(skb);
 
 	/* packet with unicast indication but broadcast recipient */
 	if (is_broadcast_ether_addr(ethhdr->h_dest))
-		return -1;
+		return -EBADR;
 
 	/* packet with broadcast sender address */
 	if (is_broadcast_ether_addr(ethhdr->h_source))
-		return -1;
+		return -EBADR;
 
 	/* not for me */
-	if (!batadv_is_my_mac(ethhdr->h_dest))
-		return -1;
+	if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
+		return -EREMOTE;
 
 	return 0;
 }
@@ -586,7 +595,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
 	char tt_flag;
 	size_t packet_size;
 
-	if (batadv_check_unicast_packet(skb, hdr_size) < 0)
+	if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
 		return NET_RX_DROP;
 
 	/* I could need to modify it */
@@ -618,7 +627,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
 	case BATADV_TT_RESPONSE:
 		batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX);
 
-		if (batadv_is_my_mac(tt_query->dst)) {
+		if (batadv_is_my_mac(bat_priv, tt_query->dst)) {
 			/* packet needs to be linearized to access the TT
 			 * changes
 			 */
@@ -661,14 +670,15 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
 	struct batadv_roam_adv_packet *roam_adv_packet;
 	struct batadv_orig_node *orig_node;
 
-	if (batadv_check_unicast_packet(skb, sizeof(*roam_adv_packet)) < 0)
+	if (batadv_check_unicast_packet(bat_priv, skb,
+					sizeof(*roam_adv_packet)) < 0)
 		goto out;
 
 	batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX);
 
 	roam_adv_packet = (struct batadv_roam_adv_packet *)skb->data;
 
-	if (!batadv_is_my_mac(roam_adv_packet->dst))
+	if (!batadv_is_my_mac(bat_priv, roam_adv_packet->dst))
 		return batadv_route_unicast_packet(skb, recv_if);
 
 	/* check if it is a backbone gateway. we don't accept
@@ -836,7 +846,6 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
 	if (unicast_packet->header.packet_type == BATADV_UNICAST_FRAG &&
 	    batadv_frag_can_reassemble(skb,
 				       neigh_node->if_incoming->net_dev->mtu)) {
-
 		ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb);
 
 		if (ret == NET_RX_DROP)
@@ -855,14 +864,17 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
 	/* decrement ttl */
 	unicast_packet->header.ttl--;
 
-	/* Update stats counter */
-	batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
-	batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
-			   skb->len + ETH_HLEN);
-
-	/* route it */
-	if (batadv_send_skb_to_orig(skb, orig_node, recv_if))
+	/* network code packet if possible */
+	if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) {
 		ret = NET_RX_SUCCESS;
+	} else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) {
+		ret = NET_RX_SUCCESS;
+
+		/* Update stats counter */
+		batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
+		batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
+				   skb->len + ETH_HLEN);
+	}
 
 out:
 	if (neigh_node)
@@ -927,7 +939,7 @@ out:
 }
 
 static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
-				     struct sk_buff *skb) {
+				     struct sk_buff *skb, int hdr_len) {
 	uint8_t curr_ttvn, old_ttvn;
 	struct batadv_orig_node *orig_node;
 	struct ethhdr *ethhdr;
@@ -936,7 +948,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	int is_old_ttvn;
 
 	/* check if there is enough data before accessing it */
-	if (pskb_may_pull(skb, sizeof(*unicast_packet) + ETH_HLEN) < 0)
+	if (pskb_may_pull(skb, hdr_len + ETH_HLEN) < 0)
 		return 0;
 
 	/* create a copy of the skb (in case of for re-routing) to modify it. */
@@ -944,7 +956,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 		return 0;
 
 	unicast_packet = (struct batadv_unicast_packet *)skb->data;
-	ethhdr = (struct ethhdr *)(skb->data + sizeof(*unicast_packet));
+	ethhdr = (struct ethhdr *)(skb->data + hdr_len);
 
 	/* check if the destination client was served by this node and it is now
 	 * roaming. In this case, it means that the node has got a ROAM_ADV
@@ -972,7 +984,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	 * last time) the packet had an updated information or not
 	 */
 	curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
-	if (!batadv_is_my_mac(unicast_packet->dest)) {
+	if (!batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
 		orig_node = batadv_orig_hash_find(bat_priv,
 						  unicast_packet->dest);
 		/* if it is not possible to find the orig_node representing the
@@ -1038,7 +1050,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
 	struct batadv_unicast_4addr_packet *unicast_4addr_packet;
 	uint8_t *orig_addr;
 	struct batadv_orig_node *orig_node = NULL;
-	int hdr_size = sizeof(*unicast_packet);
+	int check, hdr_size = sizeof(*unicast_packet);
 	bool is4addr;
 
 	unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -1049,14 +1061,22 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
 	if (is4addr)
 		hdr_size = sizeof(*unicast_4addr_packet);
 
-	if (batadv_check_unicast_packet(skb, hdr_size) < 0)
-		return NET_RX_DROP;
+	/* function returns -EREMOTE for promiscuous packets */
+	check = batadv_check_unicast_packet(bat_priv, skb, hdr_size);
 
-	if (!batadv_check_unicast_ttvn(bat_priv, skb))
+	/* Even though the packet is not for us, we might save it to use for
+	 * decoding a later received coded packet
+	 */
+	if (check == -EREMOTE)
+		batadv_nc_skb_store_sniffed_unicast(bat_priv, skb);
+
+	if (check < 0)
+		return NET_RX_DROP;
+	if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
 		return NET_RX_DROP;
 
 	/* packet for me */
-	if (batadv_is_my_mac(unicast_packet->dest)) {
+	if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
 		if (is4addr) {
 			batadv_dat_inc_counter(bat_priv,
 					       unicast_4addr_packet->subtype);
@@ -1093,17 +1113,16 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
 	struct sk_buff *new_skb = NULL;
 	int ret;
 
-	if (batadv_check_unicast_packet(skb, hdr_size) < 0)
+	if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
 		return NET_RX_DROP;
 
-	if (!batadv_check_unicast_ttvn(bat_priv, skb))
+	if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
 		return NET_RX_DROP;
 
 	unicast_packet = (struct batadv_unicast_frag_packet *)skb->data;
 
 	/* packet for me */
-	if (batadv_is_my_mac(unicast_packet->dest)) {
-
+	if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
 		ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb);
 
 		if (ret == NET_RX_DROP)
@@ -1157,13 +1176,13 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
 		goto out;
 
 	/* ignore broadcasts sent by myself */
-	if (batadv_is_my_mac(ethhdr->h_source))
+	if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
 		goto out;
 
 	bcast_packet = (struct batadv_bcast_packet *)skb->data;
 
 	/* ignore broadcasts originated by myself */
-	if (batadv_is_my_mac(bcast_packet->orig))
+	if (batadv_is_my_mac(bat_priv, bcast_packet->orig))
 		goto out;
 
 	if (bcast_packet->header.ttl < 2)
@@ -1249,14 +1268,14 @@ int batadv_recv_vis_packet(struct sk_buff *skb,
 	ethhdr = (struct ethhdr *)skb_mac_header(skb);
 
 	/* not for me */
-	if (!batadv_is_my_mac(ethhdr->h_dest))
+	if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
 		return NET_RX_DROP;
 
 	/* ignore own packets */
-	if (batadv_is_my_mac(vis_packet->vis_orig))
+	if (batadv_is_my_mac(bat_priv, vis_packet->vis_orig))
 		return NET_RX_DROP;
 
-	if (batadv_is_my_mac(vis_packet->sender_orig))
+	if (batadv_is_my_mac(bat_priv, vis_packet->sender_orig))
 		return NET_RX_DROP;
 
 	switch (vis_packet->vis_type) {
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 9262279ea667..99eeafaba407 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 4425af9dad40..263cfd1ccee7 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -27,6 +27,7 @@
 #include "vis.h"
 #include "gateway_common.h"
 #include "originator.h"
+#include "network-coding.h"
 
 #include <linux/if_ether.h>
 
@@ -39,6 +40,7 @@ int batadv_send_skb_packet(struct sk_buff *skb,
 			   struct batadv_hard_iface *hard_iface,
 			   const uint8_t *dst_addr)
 {
+	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct ethhdr *ethhdr;
 
 	if (hard_iface->if_status != BATADV_IF_ACTIVE)
@@ -70,6 +72,9 @@ int batadv_send_skb_packet(struct sk_buff *skb,
 
 	skb->dev = hard_iface->net_dev;
 
+	/* Save a clone of the skb to use when decoding coded packets */
+	batadv_nc_skb_store_for_decoding(bat_priv, skb);
+
 	/* dev_queue_xmit() returns a negative result on error.	 However on
 	 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP
 	 * (which is > 0). This will not be treated as an error.
@@ -155,8 +160,6 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
 	spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
 
 	/* start timer for this packet */
-	INIT_DELAYED_WORK(&forw_packet->delayed_work,
-			  batadv_send_outstanding_bcast_packet);
 	queue_delayed_work(batadv_event_workqueue, &forw_packet->delayed_work,
 			   send_time);
 }
@@ -210,6 +213,9 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
 	/* how often did we send the bcast packet ? */
 	forw_packet->num_packets = 0;
 
+	INIT_DELAYED_WORK(&forw_packet->delayed_work,
+			  batadv_send_outstanding_bcast_packet);
+
 	_batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay);
 	return NETDEV_TX_OK;
 
@@ -315,7 +321,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 				 const struct batadv_hard_iface *hard_iface)
 {
 	struct batadv_forw_packet *forw_packet;
-	struct hlist_node *tmp_node, *safe_tmp_node;
+	struct hlist_node *safe_tmp_node;
 	bool pending;
 
 	if (hard_iface)
@@ -328,9 +334,8 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 
 	/* free bcast list */
 	spin_lock_bh(&bat_priv->forw_bcast_list_lock);
-	hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node,
+	hlist_for_each_entry_safe(forw_packet, safe_tmp_node,
 				  &bat_priv->forw_bcast_list, list) {
-
 		/* if purge_outstanding_packets() was called with an argument
 		 * we delete only packets belonging to the given interface
 		 */
@@ -355,9 +360,8 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 
 	/* free batman packet list */
 	spin_lock_bh(&bat_priv->forw_bat_list_lock);
-	hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node,
+	hlist_for_each_entry_safe(forw_packet, safe_tmp_node,
 				  &bat_priv->forw_bat_list, list) {
-
 		/* if purge_outstanding_packets() was called with an argument
 		 * we delete only packets belonging to the given interface
 		 */
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 0078dece1abc..38e662f619ac 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 6b548fde8e04..6f20d339e33a 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -37,6 +37,7 @@
 #include <linux/if_ether.h>
 #include "unicast.h"
 #include "bridge_loop_avoidance.h"
+#include "network-coding.h"
 
 
 static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
@@ -124,7 +125,6 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
 		batadv_tt_local_add(dev, addr->sa_data, BATADV_NULL_IFINDEX);
 	}
 
-	dev->addr_assign_type &= ~NET_ADDR_RANDOM;
 	return 0;
 }
 
@@ -181,7 +181,8 @@ static int batadv_interface_tx(struct sk_buff *skb,
 		goto dropped;
 
 	/* Register the client MAC in the transtable */
-	batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif);
+	if (!is_multicast_ether_addr(ethhdr->h_source))
+		batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif);
 
 	/* don't accept stp packets. STP does not help in meshes.
 	 * better use the bridge loop avoidance ...
@@ -401,86 +402,58 @@ static void batadv_set_lockdep_class(struct net_device *dev)
 }
 
 /**
- * batadv_softif_init - Late stage initialization of soft interface
- * @dev: registered network device to modify
+ * batadv_softif_destroy_finish - cleans up the remains of a softif
+ * @work: work queue item
  *
- * Returns error code on failures
+ * Free the parts of the soft interface which can not be removed under
+ * rtnl lock (to prevent deadlock situations).
  */
-static int batadv_softif_init(struct net_device *dev)
-{
-	batadv_set_lockdep_class(dev);
-
-	return 0;
-}
-
-static const struct net_device_ops batadv_netdev_ops = {
-	.ndo_init = batadv_softif_init,
-	.ndo_open = batadv_interface_open,
-	.ndo_stop = batadv_interface_release,
-	.ndo_get_stats = batadv_interface_stats,
-	.ndo_set_mac_address = batadv_interface_set_mac_addr,
-	.ndo_change_mtu = batadv_interface_change_mtu,
-	.ndo_start_xmit = batadv_interface_tx,
-	.ndo_validate_addr = eth_validate_addr
-};
-
-static void batadv_interface_setup(struct net_device *dev)
+static void batadv_softif_destroy_finish(struct work_struct *work)
 {
-	struct batadv_priv *priv = netdev_priv(dev);
-
-	ether_setup(dev);
-
-	dev->netdev_ops = &batadv_netdev_ops;
-	dev->destructor = free_netdev;
-	dev->tx_queue_len = 0;
+	struct batadv_priv *bat_priv;
+	struct net_device *soft_iface;
 
-	/* can't call min_mtu, because the needed variables
-	 * have not been initialized yet
-	 */
-	dev->mtu = ETH_DATA_LEN;
-	/* reserve more space in the skbuff for our header */
-	dev->hard_header_len = BATADV_HEADER_LEN;
+	bat_priv = container_of(work, struct batadv_priv,
+				cleanup_work);
+	soft_iface = bat_priv->soft_iface;
 
-	/* generate random address */
-	eth_hw_addr_random(dev);
-
-	SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
+	batadv_sysfs_del_meshif(soft_iface);
 
-	memset(priv, 0, sizeof(*priv));
+	rtnl_lock();
+	unregister_netdevice(soft_iface);
+	rtnl_unlock();
 }
 
-struct net_device *batadv_softif_create(const char *name)
+/**
+ * batadv_softif_init_late - late stage initialization of soft interface
+ * @dev: registered network device to modify
+ *
+ * Returns error code on failures
+ */
+static int batadv_softif_init_late(struct net_device *dev)
 {
-	struct net_device *soft_iface;
 	struct batadv_priv *bat_priv;
 	int ret;
 	size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM;
 
-	soft_iface = alloc_netdev(sizeof(*bat_priv), name,
-				  batadv_interface_setup);
-
-	if (!soft_iface)
-		goto out;
+	batadv_set_lockdep_class(dev);
 
-	bat_priv = netdev_priv(soft_iface);
+	bat_priv = netdev_priv(dev);
+	bat_priv->soft_iface = dev;
+	INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);
 
 	/* batadv_interface_stats() needs to be available as soon as
 	 * register_netdevice() has been called
 	 */
 	bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t));
 	if (!bat_priv->bat_counters)
-		goto free_soft_iface;
-
-	ret = register_netdevice(soft_iface);
-	if (ret < 0) {
-		pr_err("Unable to register the batman interface '%s': %i\n",
-		       name, ret);
-		goto free_bat_counters;
-	}
+		return -ENOMEM;
 
 	atomic_set(&bat_priv->aggregated_ogms, 1);
 	atomic_set(&bat_priv->bonding, 0);
+#ifdef CONFIG_BATMAN_ADV_BLA
 	atomic_set(&bat_priv->bridge_loop_avoidance, 0);
+#endif
 #ifdef CONFIG_BATMAN_ADV_DAT
 	atomic_set(&bat_priv->distributed_arp_table, 1);
 #endif
@@ -491,7 +464,9 @@ struct net_device *batadv_softif_create(const char *name)
 	atomic_set(&bat_priv->gw_bandwidth, 41);
 	atomic_set(&bat_priv->orig_interval, 1000);
 	atomic_set(&bat_priv->hop_penalty, 30);
+#ifdef CONFIG_BATMAN_ADV_DEBUG
 	atomic_set(&bat_priv->log_level, 0);
+#endif
 	atomic_set(&bat_priv->fragmentation, 1);
 	atomic_set(&bat_priv->bcast_queue_left, BATADV_BCAST_QUEUE_LEN);
 	atomic_set(&bat_priv->batman_queue_left, BATADV_BATMAN_QUEUE_LEN);
@@ -510,47 +485,194 @@ struct net_device *batadv_softif_create(const char *name)
 	bat_priv->primary_if = NULL;
 	bat_priv->num_ifaces = 0;
 
-	ret = batadv_algo_select(bat_priv, batadv_routing_algo);
-	if (ret < 0)
-		goto unreg_soft_iface;
+	batadv_nc_init_bat_priv(bat_priv);
 
-	ret = batadv_sysfs_add_meshif(soft_iface);
+	ret = batadv_algo_select(bat_priv, batadv_routing_algo);
 	if (ret < 0)
-		goto unreg_soft_iface;
+		goto free_bat_counters;
 
-	ret = batadv_debugfs_add_meshif(soft_iface);
+	ret = batadv_debugfs_add_meshif(dev);
 	if (ret < 0)
-		goto unreg_sysfs;
+		goto free_bat_counters;
 
-	ret = batadv_mesh_init(soft_iface);
+	ret = batadv_mesh_init(dev);
 	if (ret < 0)
 		goto unreg_debugfs;
 
-	return soft_iface;
+	return 0;
 
 unreg_debugfs:
-	batadv_debugfs_del_meshif(soft_iface);
-unreg_sysfs:
-	batadv_sysfs_del_meshif(soft_iface);
-unreg_soft_iface:
-	free_percpu(bat_priv->bat_counters);
-	unregister_netdevice(soft_iface);
-	return NULL;
-
+	batadv_debugfs_del_meshif(dev);
 free_bat_counters:
 	free_percpu(bat_priv->bat_counters);
-free_soft_iface:
-	free_netdev(soft_iface);
+
+	return ret;
+}
+
+/**
+ * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface
+ * @dev: batadv_soft_interface used as master interface
+ * @slave_dev: net_device which should become the slave interface
+ *
+ * Return 0 if successful or error otherwise.
+ */
+static int batadv_softif_slave_add(struct net_device *dev,
+				   struct net_device *slave_dev)
+{
+	struct batadv_hard_iface *hard_iface;
+	int ret = -EINVAL;
+
+	hard_iface = batadv_hardif_get_by_netdev(slave_dev);
+	if (!hard_iface || hard_iface->soft_iface != NULL)
+		goto out;
+
+	ret = batadv_hardif_enable_interface(hard_iface, dev->name);
+
 out:
-	return NULL;
+	if (hard_iface)
+		batadv_hardif_free_ref(hard_iface);
+	return ret;
 }
 
-void batadv_softif_destroy(struct net_device *soft_iface)
+/**
+ * batadv_softif_slave_del - Delete a slave iface from a batadv_soft_interface
+ * @dev: batadv_soft_interface used as master interface
+ * @slave_dev: net_device which should be removed from the master interface
+ *
+ * Return 0 if successful or error otherwise.
+ */
+static int batadv_softif_slave_del(struct net_device *dev,
+				   struct net_device *slave_dev)
 {
-	batadv_debugfs_del_meshif(soft_iface);
+	struct batadv_hard_iface *hard_iface;
+	int ret = -EINVAL;
+
+	hard_iface = batadv_hardif_get_by_netdev(slave_dev);
+
+	if (!hard_iface || hard_iface->soft_iface != dev)
+		goto out;
+
+	batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_KEEP);
+	ret = 0;
+
+out:
+	if (hard_iface)
+		batadv_hardif_free_ref(hard_iface);
+	return ret;
+}
+
+static const struct net_device_ops batadv_netdev_ops = {
+	.ndo_init = batadv_softif_init_late,
+	.ndo_open = batadv_interface_open,
+	.ndo_stop = batadv_interface_release,
+	.ndo_get_stats = batadv_interface_stats,
+	.ndo_set_mac_address = batadv_interface_set_mac_addr,
+	.ndo_change_mtu = batadv_interface_change_mtu,
+	.ndo_start_xmit = batadv_interface_tx,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_add_slave = batadv_softif_slave_add,
+	.ndo_del_slave = batadv_softif_slave_del,
+};
+
+/**
+ * batadv_softif_free - Deconstructor of batadv_soft_interface
+ * @dev: Device to cleanup and remove
+ */
+static void batadv_softif_free(struct net_device *dev)
+{
+	batadv_debugfs_del_meshif(dev);
+	batadv_mesh_free(dev);
+
+	/* some scheduled RCU callbacks need the bat_priv struct to accomplish
+	 * their tasks. Wait for them all to be finished before freeing the
+	 * netdev and its private data (bat_priv)
+	 */
+	rcu_barrier();
+
+	free_netdev(dev);
+}
+
+/**
+ * batadv_softif_init_early - early stage initialization of soft interface
+ * @dev: registered network device to modify
+ */
+static void batadv_softif_init_early(struct net_device *dev)
+{
+	struct batadv_priv *priv = netdev_priv(dev);
+
+	ether_setup(dev);
+
+	dev->netdev_ops = &batadv_netdev_ops;
+	dev->destructor = batadv_softif_free;
+	dev->tx_queue_len = 0;
+
+	/* can't call min_mtu, because the needed variables
+	 * have not been initialized yet
+	 */
+	dev->mtu = ETH_DATA_LEN;
+	/* reserve more space in the skbuff for our header */
+	dev->hard_header_len = BATADV_HEADER_LEN;
+
+	/* generate random address */
+	eth_hw_addr_random(dev);
+
+	SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
+
+	memset(priv, 0, sizeof(*priv));
+}
+
+struct net_device *batadv_softif_create(const char *name)
+{
+	struct net_device *soft_iface;
+	int ret;
+
+	soft_iface = alloc_netdev(sizeof(struct batadv_priv), name,
+				  batadv_softif_init_early);
+	if (!soft_iface)
+		return NULL;
+
+	soft_iface->rtnl_link_ops = &batadv_link_ops;
+
+	ret = register_netdevice(soft_iface);
+	if (ret < 0) {
+		pr_err("Unable to register the batman interface '%s': %i\n",
+		       name, ret);
+		free_netdev(soft_iface);
+		return NULL;
+	}
+
+	return soft_iface;
+}
+
+/**
+ * batadv_softif_destroy_sysfs - deletion of batadv_soft_interface via sysfs
+ * @soft_iface: the to-be-removed batman-adv interface
+ */
+void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
+{
+	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+
+	queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);
+}
+
+/**
+ * batadv_softif_destroy_netlink - deletion of batadv_soft_interface via netlink
+ * @soft_iface: the to-be-removed batman-adv interface
+ * @head: list pointer
+ */
+static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
+					  struct list_head *head)
+{
+	struct batadv_hard_iface *hard_iface;
+
+	list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
+		if (hard_iface->soft_iface == soft_iface)
+			batadv_hardif_disable_interface(hard_iface,
+							BATADV_IF_CLEANUP_KEEP);
+	}
+
 	batadv_sysfs_del_meshif(soft_iface);
-	batadv_mesh_free(soft_iface);
-	unregister_netdevice(soft_iface);
+	unregister_netdevice_queue(soft_iface, head);
 }
 
 int batadv_softif_is_valid(const struct net_device *net_dev)
@@ -561,6 +683,13 @@ int batadv_softif_is_valid(const struct net_device *net_dev)
 	return 0;
 }
 
+struct rtnl_link_ops batadv_link_ops __read_mostly = {
+	.kind		= "batadv",
+	.priv_size	= sizeof(struct batadv_priv),
+	.setup		= batadv_softif_init_early,
+	.dellink	= batadv_softif_destroy_netlink,
+};
+
 /* ethtool */
 static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
@@ -581,10 +710,10 @@ static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 static void batadv_get_drvinfo(struct net_device *dev,
 			       struct ethtool_drvinfo *info)
 {
-	strcpy(info->driver, "B.A.T.M.A.N. advanced");
-	strcpy(info->version, BATADV_SOURCE_VERSION);
-	strcpy(info->fw_version, "N/A");
-	strcpy(info->bus_info, "batman");
+	strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver));
+	strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version));
+	strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+	strlcpy(info->bus_info, "batman", sizeof(info->bus_info));
 }
 
 static u32 batadv_get_msglevel(struct net_device *dev)
@@ -632,6 +761,17 @@ static const struct {
 	{ "dat_put_rx" },
 	{ "dat_cached_reply_tx" },
 #endif
+#ifdef CONFIG_BATMAN_ADV_NC
+	{ "nc_code" },
+	{ "nc_code_bytes" },
+	{ "nc_recode" },
+	{ "nc_recode_bytes" },
+	{ "nc_buffer" },
+	{ "nc_decode" },
+	{ "nc_decode_bytes" },
+	{ "nc_decode_failed" },
+	{ "nc_sniffed" },
+#endif
 };
 
 static void batadv_get_strings(struct net_device *dev, uint32_t stringset,
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 07a08fed28b9..2f2472c2ea0d 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -25,7 +25,8 @@ void batadv_interface_rx(struct net_device *soft_iface,
 			 struct sk_buff *skb, struct batadv_hard_iface *recv_if,
 			 int hdr_size, struct batadv_orig_node *orig_node);
 struct net_device *batadv_softif_create(const char *name);
-void batadv_softif_destroy(struct net_device *soft_iface);
+void batadv_softif_destroy_sysfs(struct net_device *soft_iface);
 int batadv_softif_is_valid(const struct net_device *net_dev);
+extern struct rtnl_link_ops batadv_link_ops;
 
 #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 84a55cb19b0b..15a22efa9a67 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -442,6 +442,9 @@ static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
 #ifdef CONFIG_BATMAN_ADV_DEBUG
 BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL);
 #endif
+#ifdef CONFIG_BATMAN_ADV_NC
+BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, NULL);
+#endif
 
 static struct batadv_attribute *batadv_mesh_attrs[] = {
 	&batadv_attr_aggregated_ogms,
@@ -464,6 +467,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
 #ifdef CONFIG_BATMAN_ADV_DEBUG
 	&batadv_attr_log_level,
 #endif
+#ifdef CONFIG_BATMAN_ADV_NC
+	&batadv_attr_network_coding,
+#endif
 	NULL,
 };
 
@@ -582,13 +588,15 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
 	}
 
 	if (status_tmp == BATADV_IF_NOT_IN_USE) {
-		batadv_hardif_disable_interface(hard_iface);
+		batadv_hardif_disable_interface(hard_iface,
+						BATADV_IF_CLEANUP_AUTO);
 		goto unlock;
 	}
 
 	/* if the interface already is in use */
 	if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
-		batadv_hardif_disable_interface(hard_iface);
+		batadv_hardif_disable_interface(hard_iface,
+						BATADV_IF_CLEANUP_AUTO);
 
 	ret = batadv_hardif_enable_interface(hard_iface, buff);
 
@@ -688,15 +696,10 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
 			enum batadv_uev_action action, const char *data)
 {
 	int ret = -ENOMEM;
-	struct batadv_hard_iface *primary_if;
 	struct kobject *bat_kobj;
 	char *uevent_env[4] = { NULL, NULL, NULL, NULL };
 
-	primary_if = batadv_primary_if_get_selected(bat_priv);
-	if (!primary_if)
-		goto out;
-
-	bat_kobj = &primary_if->soft_iface->dev.kobj;
+	bat_kobj = &bat_priv->soft_iface->dev.kobj;
 
 	uevent_env[0] = kmalloc(strlen(BATADV_UEV_TYPE_VAR) +
 				strlen(batadv_uev_type_str[type]) + 1,
@@ -732,9 +735,6 @@ out:
 	kfree(uevent_env[1]);
 	kfree(uevent_env[2]);
 
-	if (primary_if)
-		batadv_hardif_free_ref(primary_if);
-
 	if (ret)
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n",
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index 3fd1412b0620..479acf4c16f4 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 22457a7952ba..5e89deeb9542 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
@@ -29,6 +29,10 @@
 
 #include <linux/crc16.h>
 
+/* hash class keys */
+static struct lock_class_key batadv_tt_local_hash_lock_class_key;
+static struct lock_class_key batadv_tt_global_hash_lock_class_key;
+
 static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
 				 struct batadv_orig_node *orig_node);
 static void batadv_tt_purge(struct work_struct *work);
@@ -48,18 +52,10 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
 	return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
 }
 
-static void batadv_tt_start_timer(struct batadv_priv *bat_priv)
-{
-	INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge);
-	queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work,
-			   msecs_to_jiffies(5000));
-}
-
 static struct batadv_tt_common_entry *
 batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_tt_common_entry *tt_common_entry;
 	struct batadv_tt_common_entry *tt_common_entry_tmp = NULL;
 	uint32_t index;
@@ -71,7 +67,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data)
 	head = &hash->table[index];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) {
+	hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) {
 		if (!batadv_compare_eth(tt_common_entry, data))
 			continue;
 
@@ -112,7 +108,6 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const void *data)
 					       struct batadv_tt_global_entry,
 					       common);
 	return tt_global_entry;
-
 }
 
 static void
@@ -235,6 +230,9 @@ static int batadv_tt_local_init(struct batadv_priv *bat_priv)
 	if (!bat_priv->tt.local_hash)
 		return -ENOMEM;
 
+	batadv_hash_set_lock_class(bat_priv->tt.local_hash,
+				   &batadv_tt_local_hash_lock_class_key);
+
 	return 0;
 }
 
@@ -249,7 +247,6 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
 	batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
 			   batadv_choose_orig, tt_global->common.addr);
 	batadv_tt_global_entry_free_ref(tt_global);
-
 }
 
 void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
@@ -259,7 +256,6 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	struct batadv_tt_local_entry *tt_local;
 	struct batadv_tt_global_entry *tt_global;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_tt_orig_list_entry *orig_entry;
 	int hash_added;
 	bool roamed_back = false;
@@ -305,7 +301,11 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 		   (uint8_t)atomic_read(&bat_priv->tt.vn));
 
 	memcpy(tt_local->common.addr, addr, ETH_ALEN);
-	tt_local->common.flags = BATADV_NO_FLAGS;
+	/* The local entry has to be marked as NEW to avoid to send it in
+	 * a full table response going out before the next ttvn increment
+	 * (consistency check)
+	 */
+	tt_local->common.flags = BATADV_TT_CLIENT_NEW;
 	if (batadv_is_wifi_iface(ifindex))
 		tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
 	atomic_set(&tt_local->common.refcount, 2);
@@ -316,12 +316,6 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	if (batadv_compare_eth(addr, soft_iface->dev_addr))
 		tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE;
 
-	/* The local entry has to be marked as NEW to avoid to send it in
-	 * a full table response going out before the next ttvn increment
-	 * (consistency check)
-	 */
-	tt_local->common.flags |= BATADV_TT_CLIENT_NEW;
-
 	hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt,
 				     batadv_choose_orig, &tt_local->common,
 				     &tt_local->common.hash_entry);
@@ -343,7 +337,7 @@ check_roaming:
 		/* These node are probably going to update their tt table */
 		head = &tt_global->orig_list;
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_entry, node, head, list) {
+		hlist_for_each_entry_rcu(orig_entry, head, list) {
 			batadv_send_roam_adv(bat_priv, tt_global->common.addr,
 					     orig_entry->orig_node);
 		}
@@ -391,25 +385,19 @@ static void batadv_tt_prepare_packet_buff(struct batadv_priv *bat_priv,
 					  int *packet_buff_len,
 					  int min_packet_len)
 {
-	struct batadv_hard_iface *primary_if;
 	int req_len;
 
-	primary_if = batadv_primary_if_get_selected(bat_priv);
-
 	req_len = min_packet_len;
 	req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes));
 
 	/* if we have too many changes for one packet don't send any
 	 * and wait for the tt table request which will be fragmented
 	 */
-	if ((!primary_if) || (req_len > primary_if->soft_iface->mtu))
+	if (req_len > bat_priv->soft_iface->mtu)
 		req_len = min_packet_len;
 
 	batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len,
 				      min_packet_len, req_len);
-
-	if (primary_if)
-		batadv_hardif_free_ref(primary_if);
 }
 
 static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv,
@@ -472,37 +460,56 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hashtable *hash = bat_priv->tt.local_hash;
 	struct batadv_tt_common_entry *tt_common_entry;
+	struct batadv_tt_local_entry *tt_local;
 	struct batadv_hard_iface *primary_if;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	uint32_t i;
+	int last_seen_secs;
+	int last_seen_msecs;
+	unsigned long last_seen_jiffies;
+	bool no_purge;
+	uint16_t np_flag = BATADV_TT_CLIENT_NOPURGE;
 
 	primary_if = batadv_seq_print_text_primary_if_get(seq);
 	if (!primary_if)
 		goto out;
 
 	seq_printf(seq,
-		   "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n",
-		   net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn));
+		   "Locally retrieved addresses (from %s) announced via TT (TTVN: %u CRC: %#.4x):\n",
+		   net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn),
+		   bat_priv->tt.local_crc);
+	seq_printf(seq, "       %-13s %-7s %-10s\n", "Client", "Flags",
+		   "Last seen");
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(tt_common_entry, node,
+		hlist_for_each_entry_rcu(tt_common_entry,
 					 head, hash_entry) {
-			seq_printf(seq, " * %pM [%c%c%c%c%c]\n",
+			tt_local = container_of(tt_common_entry,
+						struct batadv_tt_local_entry,
+						common);
+			last_seen_jiffies = jiffies - tt_local->last_seen;
+			last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
+			last_seen_secs = last_seen_msecs / 1000;
+			last_seen_msecs = last_seen_msecs % 1000;
+
+			no_purge = tt_common_entry->flags & np_flag;
+
+			seq_printf(seq, " * %pM [%c%c%c%c%c] %3u.%03u\n",
 				   tt_common_entry->addr,
 				   (tt_common_entry->flags &
 				    BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
-				   (tt_common_entry->flags &
-				    BATADV_TT_CLIENT_NOPURGE ? 'P' : '.'),
+				   no_purge ? 'P' : '.',
 				   (tt_common_entry->flags &
 				    BATADV_TT_CLIENT_NEW ? 'N' : '.'),
 				   (tt_common_entry->flags &
 				    BATADV_TT_CLIENT_PENDING ? 'X' : '.'),
 				   (tt_common_entry->flags &
-				    BATADV_TT_CLIENT_WIFI ? 'W' : '.'));
+				    BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
+				   no_purge ? 0 : last_seen_secs,
+				   no_purge ? 0 : last_seen_msecs);
 		}
 		rcu_read_unlock();
 	}
@@ -589,9 +596,9 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
 {
 	struct batadv_tt_local_entry *tt_local_entry;
 	struct batadv_tt_common_entry *tt_common_entry;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 
-	hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head,
+	hlist_for_each_entry_safe(tt_common_entry, node_tmp, head,
 				  hash_entry) {
 		tt_local_entry = container_of(tt_common_entry,
 					      struct batadv_tt_local_entry,
@@ -627,7 +634,6 @@ static void batadv_tt_local_purge(struct batadv_priv *bat_priv)
 		batadv_tt_local_purge_list(bat_priv, head);
 		spin_unlock_bh(list_lock);
 	}
-
 }
 
 static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
@@ -636,7 +642,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 	struct batadv_tt_common_entry *tt_common_entry;
 	struct batadv_tt_local_entry *tt_local;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	uint32_t i;
 
@@ -650,9 +656,9 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(tt_common_entry, node, node_tmp,
+		hlist_for_each_entry_safe(tt_common_entry, node_tmp,
 					  head, hash_entry) {
-			hlist_del_rcu(node);
+			hlist_del_rcu(&tt_common_entry->hash_entry);
 			tt_local = container_of(tt_common_entry,
 						struct batadv_tt_local_entry,
 						common);
@@ -676,6 +682,9 @@ static int batadv_tt_global_init(struct batadv_priv *bat_priv)
 	if (!bat_priv->tt.global_hash)
 		return -ENOMEM;
 
+	batadv_hash_set_lock_class(bat_priv->tt.global_hash,
+				   &batadv_tt_global_hash_lock_class_key);
+
 	return 0;
 }
 
@@ -706,11 +715,10 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
 {
 	struct batadv_tt_orig_list_entry *tmp_orig_entry, *orig_entry = NULL;
 	const struct hlist_head *head;
-	struct hlist_node *node;
 
 	rcu_read_lock();
 	head = &entry->orig_list;
-	hlist_for_each_entry_rcu(tmp_orig_entry, node, head, list) {
+	hlist_for_each_entry_rcu(tmp_orig_entry, head, list) {
 		if (tmp_orig_entry->orig_node != orig_node)
 			continue;
 		if (!atomic_inc_not_zero(&tmp_orig_entry->refcount))
@@ -894,7 +902,7 @@ out_remove:
 	/* remove address from local hash if present */
 	local_flags = batadv_tt_local_remove(bat_priv, tt_addr,
 					     "global tt received",
-					     !!(flags & BATADV_TT_CLIENT_ROAM));
+					     flags & BATADV_TT_CLIENT_ROAM);
 	tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI;
 
 	if (!(flags & BATADV_TT_CLIENT_ROAM))
@@ -922,12 +930,11 @@ batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry)
 {
 	struct batadv_neigh_node *router = NULL;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL;
 	int best_tq = 0;
 
 	head = &tt_global_entry->orig_list;
-	hlist_for_each_entry_rcu(orig_entry, node, head, list) {
+	hlist_for_each_entry_rcu(orig_entry, head, list) {
 		router = batadv_orig_node_get_router(orig_entry->orig_node);
 		if (!router)
 			continue;
@@ -955,7 +962,6 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
 			     struct seq_file *seq)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_tt_orig_list_entry *orig_entry, *best_entry;
 	struct batadv_tt_common_entry *tt_common_entry;
 	uint16_t flags;
@@ -967,10 +973,11 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
 	best_entry = batadv_transtable_best_orig(tt_global_entry);
 	if (best_entry) {
 		last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn);
-		seq_printf(seq,	" %c %pM  (%3u) via %pM     (%3u)   [%c%c%c]\n",
+		seq_printf(seq,
+			   " %c %pM  (%3u) via %pM     (%3u)   (%#.4x) [%c%c%c]\n",
 			   '*', tt_global_entry->common.addr,
 			   best_entry->ttvn, best_entry->orig_node->orig,
-			   last_ttvn,
+			   last_ttvn, best_entry->orig_node->tt_crc,
 			   (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
 			   (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
 			   (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.'));
@@ -978,7 +985,7 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
 
 	head = &tt_global_entry->orig_list;
 
-	hlist_for_each_entry_rcu(orig_entry, node, head, list) {
+	hlist_for_each_entry_rcu(orig_entry, head, list) {
 		if (best_entry == orig_entry)
 			continue;
 
@@ -1001,7 +1008,6 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_tt_common_entry *tt_common_entry;
 	struct batadv_tt_global_entry *tt_global;
 	struct batadv_hard_iface *primary_if;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	uint32_t i;
 
@@ -1012,14 +1018,15 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
 	seq_printf(seq,
 		   "Globally announced TT entries received via the mesh %s\n",
 		   net_dev->name);
-	seq_printf(seq, "       %-13s %s       %-15s %s %s\n",
-		   "Client", "(TTVN)", "Originator", "(Curr TTVN)", "Flags");
+	seq_printf(seq, "       %-13s %s       %-15s %s (%-6s) %s\n",
+		   "Client", "(TTVN)", "Originator", "(Curr TTVN)", "CRC",
+		   "Flags");
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(tt_common_entry, node,
+		hlist_for_each_entry_rcu(tt_common_entry,
 					 head, hash_entry) {
 			tt_global = container_of(tt_common_entry,
 						 struct batadv_tt_global_entry,
@@ -1039,17 +1046,16 @@ static void
 batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry)
 {
 	struct hlist_head *head;
-	struct hlist_node *node, *safe;
+	struct hlist_node *safe;
 	struct batadv_tt_orig_list_entry *orig_entry;
 
 	spin_lock_bh(&tt_global_entry->list_lock);
 	head = &tt_global_entry->orig_list;
-	hlist_for_each_entry_safe(orig_entry, node, safe, head, list) {
-		hlist_del_rcu(node);
+	hlist_for_each_entry_safe(orig_entry, safe, head, list) {
+		hlist_del_rcu(&orig_entry->list);
 		batadv_tt_orig_list_entry_free_ref(orig_entry);
 	}
 	spin_unlock_bh(&tt_global_entry->list_lock);
-
 }
 
 static void
@@ -1059,18 +1065,18 @@ batadv_tt_global_del_orig_entry(struct batadv_priv *bat_priv,
 				const char *message)
 {
 	struct hlist_head *head;
-	struct hlist_node *node, *safe;
+	struct hlist_node *safe;
 	struct batadv_tt_orig_list_entry *orig_entry;
 
 	spin_lock_bh(&tt_global_entry->list_lock);
 	head = &tt_global_entry->orig_list;
-	hlist_for_each_entry_safe(orig_entry, node, safe, head, list) {
+	hlist_for_each_entry_safe(orig_entry, safe, head, list) {
 		if (orig_entry->orig_node == orig_node) {
 			batadv_dbg(BATADV_DBG_TT, bat_priv,
 				   "Deleting %pM from global tt entry %pM: %s\n",
 				   orig_node->orig,
 				   tt_global_entry->common.addr, message);
-			hlist_del_rcu(node);
+			hlist_del_rcu(&orig_entry->list);
 			batadv_tt_orig_list_entry_free_ref(orig_entry);
 		}
 	}
@@ -1089,7 +1095,6 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv,
 {
 	bool last_entry = true;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_tt_orig_list_entry *orig_entry;
 
 	/* no local entry exists, case 1:
@@ -1098,7 +1103,7 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv,
 
 	rcu_read_lock();
 	head = &tt_global_entry->orig_list;
-	hlist_for_each_entry_rcu(orig_entry, node, head, list) {
+	hlist_for_each_entry_rcu(orig_entry, head, list) {
 		if (orig_entry->orig_node != orig_node) {
 			last_entry = false;
 			break;
@@ -1183,7 +1188,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
 	struct batadv_tt_common_entry *tt_common_entry;
 	uint32_t i;
 	struct batadv_hashtable *hash = bat_priv->tt.global_hash;
-	struct hlist_node *node, *safe;
+	struct hlist_node *safe;
 	struct hlist_head *head;
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 
@@ -1195,7 +1200,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(tt_common_entry, node, safe,
+		hlist_for_each_entry_safe(tt_common_entry, safe,
 					  head, hash_entry) {
 			tt_global = container_of(tt_common_entry,
 						 struct batadv_tt_global_entry,
@@ -1208,7 +1213,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
 				batadv_dbg(BATADV_DBG_TT, bat_priv,
 					   "Deleting global tt entry %pM: %s\n",
 					   tt_global->common.addr, message);
-				hlist_del_rcu(node);
+				hlist_del_rcu(&tt_common_entry->hash_entry);
 				batadv_tt_global_entry_free_ref(tt_global);
 			}
 		}
@@ -1243,7 +1248,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
 {
 	struct batadv_hashtable *hash = bat_priv->tt.global_hash;
 	struct hlist_head *head;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 	uint32_t i;
 	char *msg = NULL;
@@ -1255,7 +1260,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(tt_common, node, node_tmp, head,
+		hlist_for_each_entry_safe(tt_common, node_tmp, head,
 					  hash_entry) {
 			tt_global = container_of(tt_common,
 						 struct batadv_tt_global_entry,
@@ -1268,7 +1273,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
 				   "Deleting global tt entry (%pM): %s\n",
 				   tt_global->common.addr, msg);
 
-			hlist_del_rcu(node);
+			hlist_del_rcu(&tt_common->hash_entry);
 
 			batadv_tt_global_entry_free_ref(tt_global);
 		}
@@ -1282,7 +1287,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv)
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 	struct batadv_tt_common_entry *tt_common_entry;
 	struct batadv_tt_global_entry *tt_global;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	uint32_t i;
 
@@ -1296,9 +1301,9 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(tt_common_entry, node, node_tmp,
+		hlist_for_each_entry_safe(tt_common_entry, node_tmp,
 					  head, hash_entry) {
-			hlist_del_rcu(node);
+			hlist_del_rcu(&tt_common_entry->hash_entry);
 			tt_global = container_of(tt_common_entry,
 						 struct batadv_tt_global_entry,
 						 common);
@@ -1378,7 +1383,6 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
 	struct batadv_hashtable *hash = bat_priv->tt.global_hash;
 	struct batadv_tt_common_entry *tt_common;
 	struct batadv_tt_global_entry *tt_global;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	uint32_t i;
 	int j;
@@ -1387,7 +1391,7 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(tt_common, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(tt_common, head, hash_entry) {
 			tt_global = container_of(tt_common,
 						 struct batadv_tt_global_entry,
 						 common);
@@ -1430,7 +1434,6 @@ static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv)
 	uint16_t total = 0, total_one;
 	struct batadv_hashtable *hash = bat_priv->tt.local_hash;
 	struct batadv_tt_common_entry *tt_common;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	uint32_t i;
 	int j;
@@ -1439,7 +1442,7 @@ static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(tt_common, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(tt_common, head, hash_entry) {
 			/* not yet committed clients have not to be taken into
 			 * account while computing the CRC
 			 */
@@ -1571,14 +1574,13 @@ static int batadv_tt_global_valid(const void *entry_ptr,
 static struct sk_buff *
 batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
 			      struct batadv_hashtable *hash,
-			      struct batadv_hard_iface *primary_if,
+			      struct batadv_priv *bat_priv,
 			      int (*valid_cb)(const void *, const void *),
 			      void *cb_data)
 {
 	struct batadv_tt_common_entry *tt_common_entry;
 	struct batadv_tt_query_packet *tt_response;
 	struct batadv_tt_change *tt_change;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct sk_buff *skb = NULL;
 	uint16_t tt_tot, tt_count;
@@ -1586,8 +1588,8 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
 	uint32_t i;
 	size_t len;
 
-	if (tt_query_size + tt_len > primary_if->soft_iface->mtu) {
-		tt_len = primary_if->soft_iface->mtu - tt_query_size;
+	if (tt_query_size + tt_len > bat_priv->soft_iface->mtu) {
+		tt_len = bat_priv->soft_iface->mtu - tt_query_size;
 		tt_len -= tt_len % sizeof(struct batadv_tt_change);
 	}
 	tt_tot = tt_len / sizeof(struct batadv_tt_change);
@@ -1608,7 +1610,7 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry_rcu(tt_common_entry, node,
+		hlist_for_each_entry_rcu(tt_common_entry,
 					 head, hash_entry) {
 			if (tt_count == tt_tot)
 				break;
@@ -1707,7 +1709,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
 {
 	struct batadv_orig_node *req_dst_orig_node;
 	struct batadv_orig_node *res_dst_orig_node = NULL;
-	struct batadv_hard_iface *primary_if = NULL;
 	uint8_t orig_ttvn, req_ttvn, ttvn;
 	int ret = false;
 	unsigned char *tt_buff;
@@ -1732,10 +1733,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
 	if (!res_dst_orig_node)
 		goto out;
 
-	primary_if = batadv_primary_if_get_selected(bat_priv);
-	if (!primary_if)
-		goto out;
-
 	orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);
 	req_ttvn = tt_request->ttvn;
 
@@ -1783,7 +1780,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
 
 		skb = batadv_tt_response_fill_table(tt_len, ttvn,
 						    bat_priv->tt.global_hash,
-						    primary_if,
+						    bat_priv,
 						    batadv_tt_global_valid,
 						    req_dst_orig_node);
 		if (!skb)
@@ -1820,12 +1817,9 @@ out:
 		batadv_orig_node_free_ref(res_dst_orig_node);
 	if (req_dst_orig_node)
 		batadv_orig_node_free_ref(req_dst_orig_node);
-	if (primary_if)
-		batadv_hardif_free_ref(primary_if);
 	if (!ret)
 		kfree_skb(skb);
 	return ret;
-
 }
 
 static bool
@@ -1900,7 +1894,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
 
 		skb = batadv_tt_response_fill_table(tt_len, ttvn,
 						    bat_priv->tt.local_hash,
-						    primary_if,
+						    bat_priv,
 						    batadv_tt_local_valid_entry,
 						    NULL);
 		if (!skb)
@@ -1946,7 +1940,7 @@ out:
 bool batadv_send_tt_response(struct batadv_priv *bat_priv,
 			     struct batadv_tt_query_packet *tt_request)
 {
-	if (batadv_is_my_mac(tt_request->dst)) {
+	if (batadv_is_my_mac(bat_priv, tt_request->dst)) {
 		/* don't answer backbone gws! */
 		if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_request->src))
 			return true;
@@ -2111,7 +2105,9 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
 	if (ret < 0)
 		return ret;
 
-	batadv_tt_start_timer(bat_priv);
+	INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge);
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work,
+			   msecs_to_jiffies(BATADV_TT_WORK_PERIOD));
 
 	return 1;
 }
@@ -2261,7 +2257,8 @@ static void batadv_tt_purge(struct work_struct *work)
 	batadv_tt_req_purge(bat_priv);
 	batadv_tt_roam_purge(bat_priv);
 
-	batadv_tt_start_timer(bat_priv);
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work,
+			   msecs_to_jiffies(BATADV_TT_WORK_PERIOD));
 }
 
 void batadv_tt_free(struct batadv_priv *bat_priv)
@@ -2286,7 +2283,6 @@ static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash,
 	uint32_t i;
 	uint16_t changed_num = 0;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_tt_common_entry *tt_common_entry;
 
 	if (!hash)
@@ -2296,7 +2292,7 @@ static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(tt_common_entry, node,
+		hlist_for_each_entry_rcu(tt_common_entry,
 					 head, hash_entry) {
 			if (enable) {
 				if ((tt_common_entry->flags & flags) == flags)
@@ -2321,7 +2317,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
 	struct batadv_hashtable *hash = bat_priv->tt.local_hash;
 	struct batadv_tt_common_entry *tt_common;
 	struct batadv_tt_local_entry *tt_local;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 	uint32_t i;
@@ -2334,7 +2330,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(tt_common, node, node_tmp, head,
+		hlist_for_each_entry_safe(tt_common, node_tmp, head,
 					  hash_entry) {
 			if (!(tt_common->flags & BATADV_TT_CLIENT_PENDING))
 				continue;
@@ -2344,7 +2340,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
 				   tt_common->addr);
 
 			atomic_dec(&bat_priv->tt.local_entry_num);
-			hlist_del_rcu(node);
+			hlist_del_rcu(&tt_common->hash_entry);
 			tt_local = container_of(tt_common,
 						struct batadv_tt_local_entry,
 						common);
@@ -2352,7 +2348,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
 		}
 		spin_unlock_bh(list_lock);
 	}
-
 }
 
 static int batadv_tt_commit_changes(struct batadv_priv *bat_priv,
@@ -2496,7 +2491,7 @@ void batadv_tt_update_orig(struct batadv_priv *bat_priv,
 		    orig_node->tt_crc != tt_crc) {
 request_table:
 			batadv_dbg(BATADV_DBG_TT, bat_priv,
-				   "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %u last_crc: %u num_changes: %u)\n",
+				   "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %#.4x last_crc: %#.4x num_changes: %u)\n",
 				   orig_node->orig, ttvn, orig_ttvn, tt_crc,
 				   orig_node->tt_crc, tt_num_changes);
 			batadv_send_tt_request(bat_priv, orig_node, ttvn,
@@ -2520,7 +2515,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
 	if (!tt_global_entry)
 		goto out;
 
-	ret = !!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM);
+	ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM;
 	batadv_tt_global_entry_free_ref(tt_global_entry);
 out:
 	return ret;
@@ -2549,7 +2544,6 @@ bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
 	batadv_tt_local_entry_free_ref(tt_local_entry);
 out:
 	return ret;
-
 }
 
 bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 46d4451a59ee..ab8e683b402f 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ae9ac9aca8c5..aba8364c3689 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -24,6 +24,9 @@
 #include "bitarray.h"
 #include <linux/kernel.h>
 
+/**
+ * Maximum overhead for the encapsulation for a payload packet
+ */
 #define BATADV_HEADER_LEN \
 	(ETH_HLEN + max(sizeof(struct batadv_unicast_packet), \
 			sizeof(struct batadv_bcast_packet)))
@@ -51,6 +54,22 @@ struct batadv_hard_iface_bat_iv {
 	atomic_t ogm_seqno;
 };
 
+/**
+ * struct batadv_hard_iface - network device known to batman-adv
+ * @list: list node for batadv_hardif_list
+ * @if_num: identificator of the interface
+ * @if_status: status of the interface for batman-adv
+ * @net_dev: pointer to the net_device
+ * @frag_seqno: last fragment sequence number sent by this interface
+ * @hardif_obj: kobject of the per interface sysfs "mesh" directory
+ * @refcount: number of contexts the object is used
+ * @batman_adv_ptype: packet type describing packets that should be processed by
+ *  batman-adv for this interface
+ * @soft_iface: the batman-adv interface which uses this network interface
+ * @rcu: struct used for freeing in an RCU-safe manner
+ * @bat_iv: BATMAN IV specific per hard interface data
+ * @cleanup_work: work queue callback item for hard interface deinit
+ */
 struct batadv_hard_iface {
 	struct list_head list;
 	int16_t if_num;
@@ -63,22 +82,56 @@ struct batadv_hard_iface {
 	struct net_device *soft_iface;
 	struct rcu_head rcu;
 	struct batadv_hard_iface_bat_iv bat_iv;
+	struct work_struct cleanup_work;
 };
 
 /**
- *	struct batadv_orig_node - structure for orig_list maintaining nodes of mesh
- *	@primary_addr: hosts primary interface address
- *	@last_seen: when last packet from this node was received
- *	@bcast_seqno_reset: time when the broadcast seqno window was reset
- *	@batman_seqno_reset: time when the batman seqno window was reset
- *	@gw_flags: flags related to gateway class
- *	@flags: for now only VIS_SERVER flag
- *	@last_real_seqno: last and best known sequence number
- *	@last_ttl: ttl of last received packet
- *	@last_bcast_seqno: last broadcast sequence number received by this host
- *
- *	@candidates: how many candidates are available
- *	@selected: next bonding candidate
+ * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh
+ * @orig: originator ethernet address
+ * @primary_addr: hosts primary interface address
+ * @router: router that should be used to reach this originator
+ * @batadv_dat_addr_t:  address of the orig node in the distributed hash
+ * @bcast_own: bitfield containing the number of our OGMs this orig_node
+ *  rebroadcasted "back" to us (relative to last_real_seqno)
+ * @bcast_own_sum: counted result of bcast_own
+ * @last_seen: time when last packet from this node was received
+ * @bcast_seqno_reset: time when the broadcast seqno window was reset
+ * @batman_seqno_reset: time when the batman seqno window was reset
+ * @gw_flags: flags related to gateway class
+ * @flags: for now only VIS_SERVER flag
+ * @last_ttvn: last seen translation table version number
+ * @tt_crc: CRC of the translation table
+ * @tt_buff: last tt changeset this node received from the orig node
+ * @tt_buff_len: length of the last tt changeset this node received from the
+ *  orig node
+ * @tt_buff_lock: lock that protects tt_buff and tt_buff_len
+ * @tt_size: number of global TT entries announced by the orig node
+ * @tt_initialised: bool keeping track of whether or not this node have received
+ *  any translation table information from the orig node yet
+ * @last_real_seqno: last and best known sequence number
+ * @last_ttl: ttl of last received packet
+ * @bcast_bits: bitfield containing the info which payload broadcast originated
+ *  from this orig node this host already has seen (relative to
+ *  last_bcast_seqno)
+ * @last_bcast_seqno: last broadcast sequence number received by this host
+ * @neigh_list: list of potential next hop neighbor towards this orig node
+ * @frag_list: fragmentation buffer list for fragment re-assembly
+ * @last_frag_packet: time when last fragmented packet from this node was
+ *  received
+ * @neigh_list_lock: lock protecting neigh_list, router and bonding_list
+ * @hash_entry: hlist node for batadv_priv::orig_hash
+ * @bat_priv: pointer to soft_iface this orig node belongs to
+ * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum,
+ *  neigh_node->real_bits & neigh_node->real_packet_count
+ * @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno
+ * @bond_candidates: how many candidates are available
+ * @bond_list: list of bonding candidates
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ * @in_coding_list: list of nodes this orig can hear
+ * @out_coding_list: list of nodes that can hear this orig
+ * @in_coding_list_lock: protects in_coding_list
+ * @out_coding_list_lock: protects out_coding_list
  */
 struct batadv_orig_node {
 	uint8_t orig[ETH_ALEN];
@@ -94,11 +147,11 @@ struct batadv_orig_node {
 	unsigned long batman_seqno_reset;
 	uint8_t gw_flags;
 	uint8_t flags;
-	atomic_t last_ttvn; /* last seen translation table version number */
+	atomic_t last_ttvn;
 	uint16_t tt_crc;
 	unsigned char *tt_buff;
 	int16_t tt_buff_len;
-	spinlock_t tt_buff_lock; /* protects tt_buff */
+	spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */
 	atomic_t tt_size;
 	bool tt_initialised;
 	uint32_t last_real_seqno;
@@ -107,23 +160,37 @@ struct batadv_orig_node {
 	uint32_t last_bcast_seqno;
 	struct hlist_head neigh_list;
 	struct list_head frag_list;
-	spinlock_t neigh_list_lock; /* protects neigh_list and router */
-	atomic_t refcount;
-	struct rcu_head rcu;
+	unsigned long last_frag_packet;
+	/* neigh_list_lock protects: neigh_list, router & bonding_list */
+	spinlock_t neigh_list_lock;
 	struct hlist_node hash_entry;
 	struct batadv_priv *bat_priv;
-	unsigned long last_frag_packet;
 	/* ogm_cnt_lock protects: bcast_own, bcast_own_sum,
-	 * neigh_node->real_bits, neigh_node->real_packet_count
+	 * neigh_node->real_bits & neigh_node->real_packet_count
 	 */
 	spinlock_t ogm_cnt_lock;
-	/* bcast_seqno_lock protects bcast_bits, last_bcast_seqno */
+	/* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */
 	spinlock_t bcast_seqno_lock;
-	spinlock_t tt_list_lock; /* protects tt_list */
 	atomic_t bond_candidates;
 	struct list_head bond_list;
+	atomic_t refcount;
+	struct rcu_head rcu;
+#ifdef CONFIG_BATMAN_ADV_NC
+	struct list_head in_coding_list;
+	struct list_head out_coding_list;
+	spinlock_t in_coding_list_lock; /* Protects in_coding_list */
+	spinlock_t out_coding_list_lock; /* Protects out_coding_list */
+#endif
 };
 
+/**
+ * struct batadv_gw_node - structure for orig nodes announcing gw capabilities
+ * @list: list node for batadv_priv_gw::list
+ * @orig_node: pointer to corresponding orig node
+ * @deleted: this struct is scheduled for deletion
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
 struct batadv_gw_node {
 	struct hlist_node list;
 	struct batadv_orig_node *orig_node;
@@ -132,13 +199,28 @@ struct batadv_gw_node {
 	struct rcu_head rcu;
 };
 
-/*	batadv_neigh_node
- *	@last_seen: when last packet via this neighbor was received
+/**
+ * struct batadv_neigh_node - structure for single hop neighbors
+ * @list: list node for batadv_orig_node::neigh_list
+ * @addr: mac address of neigh node
+ * @tq_recv: ring buffer of received TQ values from this neigh node
+ * @tq_index: ring buffer index
+ * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv)
+ * @last_ttl: last received ttl from this neigh node
+ * @bonding_list: list node for batadv_orig_node::bond_list
+ * @last_seen: when last packet via this neighbor was received
+ * @real_bits: bitfield containing the number of OGMs received from this neigh
+ *  node (relative to orig_node->last_real_seqno)
+ * @real_packet_count: counted result of real_bits
+ * @orig_node: pointer to corresponding orig_node
+ * @if_incoming: pointer to incoming hard interface
+ * @lq_update_lock: lock protecting tq_recv & tq_index
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
  */
 struct batadv_neigh_node {
 	struct hlist_node list;
 	uint8_t addr[ETH_ALEN];
-	uint8_t real_packet_count;
 	uint8_t tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE];
 	uint8_t tq_index;
 	uint8_t tq_avg;
@@ -146,13 +228,20 @@ struct batadv_neigh_node {
 	struct list_head bonding_list;
 	unsigned long last_seen;
 	DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
-	atomic_t refcount;
-	struct rcu_head rcu;
+	uint8_t real_packet_count;
 	struct batadv_orig_node *orig_node;
 	struct batadv_hard_iface *if_incoming;
-	spinlock_t lq_update_lock;	/* protects: tq_recv, tq_index */
+	spinlock_t lq_update_lock; /* protects tq_recv & tq_index */
+	atomic_t refcount;
+	struct rcu_head rcu;
 };
 
+/**
+ * struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression
+ * @orig[ETH_ALEN]: mac address of orig node orginating the broadcast
+ * @crc: crc32 checksum of broadcast payload
+ * @entrytime: time when the broadcast packet was received
+ */
 #ifdef CONFIG_BATMAN_ADV_BLA
 struct batadv_bcast_duplist_entry {
 	uint8_t orig[ETH_ALEN];
@@ -161,6 +250,44 @@ struct batadv_bcast_duplist_entry {
 };
 #endif
 
+/**
+ * enum batadv_counters - indices for traffic counters
+ * @BATADV_CNT_TX: transmitted payload traffic packet counter
+ * @BATADV_CNT_TX_BYTES: transmitted payload traffic bytes counter
+ * @BATADV_CNT_TX_DROPPED: dropped transmission payload traffic packet counter
+ * @BATADV_CNT_RX: received payload traffic packet counter
+ * @BATADV_CNT_RX_BYTES: received payload traffic bytes counter
+ * @BATADV_CNT_FORWARD: forwarded payload traffic packet counter
+ * @BATADV_CNT_FORWARD_BYTES: forwarded payload traffic bytes counter
+ * @BATADV_CNT_MGMT_TX: transmitted routing protocol traffic packet counter
+ * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes counter
+ * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter
+ * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes counter
+ * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter
+ * @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter
+ * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet counter
+ * @BATADV_CNT_TT_RESPONSE_RX: received tt resp traffic packet counter
+ * @BATADV_CNT_TT_ROAM_ADV_TX: transmitted tt roam traffic packet counter
+ * @BATADV_CNT_TT_ROAM_ADV_RX: received tt roam traffic packet counter
+ * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter
+ * @BATADV_CNT_DAT_GET_RX: received dht GET traffic packet counter
+ * @BATADV_CNT_DAT_PUT_TX: transmitted dht PUT traffic packet counter
+ * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter
+ * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet
+ *  counter
+ * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter
+ * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes counter
+ * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet counter
+ * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes counter
+ * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc decoding
+ * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter
+ * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes counter
+ * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic packet
+ *  counter
+ * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in promisc
+ *  mode.
+ * @BATADV_CNT_NUM: number of traffic counters
+ */
 enum batadv_counters {
 	BATADV_CNT_TX,
 	BATADV_CNT_TX_BYTES,
@@ -186,20 +313,40 @@ enum batadv_counters {
 	BATADV_CNT_DAT_PUT_RX,
 	BATADV_CNT_DAT_CACHED_REPLY_TX,
 #endif
+#ifdef CONFIG_BATMAN_ADV_NC
+	BATADV_CNT_NC_CODE,
+	BATADV_CNT_NC_CODE_BYTES,
+	BATADV_CNT_NC_RECODE,
+	BATADV_CNT_NC_RECODE_BYTES,
+	BATADV_CNT_NC_BUFFER,
+	BATADV_CNT_NC_DECODE,
+	BATADV_CNT_NC_DECODE_BYTES,
+	BATADV_CNT_NC_DECODE_FAILED,
+	BATADV_CNT_NC_SNIFFED,
+#endif
 	BATADV_CNT_NUM,
 };
 
 /**
  * struct batadv_priv_tt - per mesh interface translation table data
  * @vn: translation table version number
+ * @ogm_append_cnt: counter of number of OGMs containing the local tt diff
  * @local_changes: changes registered in an originator interval
- * @poss_change: Detect an ongoing roaming phase. If true, then this node
- *  received a roaming_adv and has to inspect every packet directed to it to
- *  check whether it still is the true destination or not. This flag will be
- *  reset to false as soon as the this node's ttvn is increased
  * @changes_list: tracks tt local changes within an originator interval
- * @req_list: list of pending tt_requests
+ * @local_hash: local translation table hash table
+ * @global_hash: global translation table hash table
+ * @req_list: list of pending & unanswered tt_requests
+ * @roam_list: list of the last roaming events of each client limiting the
+ *  number of roaming events to avoid route flapping
+ * @changes_list_lock: lock protecting changes_list
+ * @req_list_lock: lock protecting req_list
+ * @roam_list_lock: lock protecting roam_list
+ * @local_entry_num: number of entries in the local hash table
  * @local_crc: Checksum of the local table, recomputed before sending a new OGM
+ * @last_changeset: last tt changeset this host has generated
+ * @last_changeset_len: length of last tt changeset this host has generated
+ * @last_changeset_lock: lock protecting last_changeset & last_changeset_len
+ * @work: work queue callback item for translation table purging
  */
 struct batadv_priv_tt {
 	atomic_t vn;
@@ -217,36 +364,83 @@ struct batadv_priv_tt {
 	uint16_t local_crc;
 	unsigned char *last_changeset;
 	int16_t last_changeset_len;
-	spinlock_t last_changeset_lock; /* protects last_changeset */
+	/* protects last_changeset & last_changeset_len */
+	spinlock_t last_changeset_lock;
 	struct delayed_work work;
 };
 
+/**
+ * struct batadv_priv_bla - per mesh interface bridge loope avoidance data
+ * @num_requests; number of bla requests in flight
+ * @claim_hash: hash table containing mesh nodes this host has claimed
+ * @backbone_hash: hash table containing all detected backbone gateways
+ * @bcast_duplist: recently received broadcast packets array (for broadcast
+ *  duplicate suppression)
+ * @bcast_duplist_curr: index of last broadcast packet added to bcast_duplist
+ * @bcast_duplist_lock: lock protecting bcast_duplist & bcast_duplist_curr
+ * @claim_dest: local claim data (e.g. claim group)
+ * @work: work queue callback item for cleanups & bla announcements
+ */
 #ifdef CONFIG_BATMAN_ADV_BLA
 struct batadv_priv_bla {
-	atomic_t num_requests; /* number of bla requests in flight */
+	atomic_t num_requests;
 	struct batadv_hashtable *claim_hash;
 	struct batadv_hashtable *backbone_hash;
 	struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE];
 	int bcast_duplist_curr;
-	/* protects bcast_duplist and bcast_duplist_curr */
+	/* protects bcast_duplist & bcast_duplist_curr */
 	spinlock_t bcast_duplist_lock;
 	struct batadv_bla_claim_dst claim_dest;
 	struct delayed_work work;
 };
 #endif
 
+/**
+ * struct batadv_debug_log - debug logging data
+ * @log_buff: buffer holding the logs (ring bufer)
+ * @log_start: index of next character to read
+ * @log_end: index of next character to write
+ * @lock: lock protecting log_buff, log_start & log_end
+ * @queue_wait: log reader's wait queue
+ */
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+struct batadv_priv_debug_log {
+	char log_buff[BATADV_LOG_BUF_LEN];
+	unsigned long log_start;
+	unsigned long log_end;
+	spinlock_t lock; /* protects log_buff, log_start and log_end */
+	wait_queue_head_t queue_wait;
+};
+#endif
+
+/**
+ * struct batadv_priv_gw - per mesh interface gateway data
+ * @list: list of available gateway nodes
+ * @list_lock: lock protecting gw_list & curr_gw
+ * @curr_gw: pointer to currently selected gateway node
+ * @reselect: bool indicating a gateway re-selection is in progress
+ */
 struct batadv_priv_gw {
 	struct hlist_head list;
-	spinlock_t list_lock; /* protects gw_list and curr_gw */
+	spinlock_t list_lock; /* protects gw_list & curr_gw */
 	struct batadv_gw_node __rcu *curr_gw;  /* rcu protected pointer */
 	atomic_t reselect;
 };
 
+/**
+ * struct batadv_priv_vis - per mesh interface vis data
+ * @send_list: list of batadv_vis_info packets to sent
+ * @hash: hash table containing vis data from other nodes in the network
+ * @hash_lock: lock protecting the hash table
+ * @list_lock: lock protecting my_info::recv_list
+ * @work: work queue callback item for vis packet sending
+ * @my_info: holds this node's vis data sent on a regular basis
+ */
 struct batadv_priv_vis {
 	struct list_head send_list;
 	struct batadv_hashtable *hash;
 	spinlock_t hash_lock; /* protects hash */
-	spinlock_t list_lock; /* protects info::recv_list */
+	spinlock_t list_lock; /* protects my_info::recv_list */
 	struct delayed_work work;
 	struct batadv_vis_info *my_info;
 };
@@ -265,30 +459,109 @@ struct batadv_priv_dat {
 };
 #endif
 
+/**
+ * struct batadv_priv_nc - per mesh interface network coding private data
+ * @work: work queue callback item for cleanup
+ * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
+ * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq
+ * @max_fwd_delay: maximum packet forward delay to allow coding of packets
+ * @max_buffer_time: buffer time for sniffed packets used to decoding
+ * @timestamp_fwd_flush: timestamp of last forward packet queue flush
+ * @timestamp_sniffed_purge: timestamp of last sniffed packet queue purge
+ * @coding_hash: Hash table used to buffer skbs while waiting for another
+ *  incoming skb to code it with. Skbs are added to the buffer just before being
+ *  forwarded in routing.c
+ * @decoding_hash: Hash table used to buffer skbs that might be needed to decode
+ *  a received coded skb. The buffer is used for 1) skbs arriving on the
+ *  soft-interface; 2) skbs overheard on the hard-interface; and 3) skbs
+ *  forwarded by batman-adv.
+ */
+struct batadv_priv_nc {
+	struct delayed_work work;
+	struct dentry *debug_dir;
+	u8 min_tq;
+	u32 max_fwd_delay;
+	u32 max_buffer_time;
+	unsigned long timestamp_fwd_flush;
+	unsigned long timestamp_sniffed_purge;
+	struct batadv_hashtable *coding_hash;
+	struct batadv_hashtable *decoding_hash;
+};
+
+/**
+ * struct batadv_priv - per mesh interface data
+ * @mesh_state: current status of the mesh (inactive/active/deactivating)
+ * @soft_iface: net device which holds this struct as private data
+ * @stats: structure holding the data for the ndo_get_stats() call
+ * @bat_counters: mesh internal traffic statistic counters (see batadv_counters)
+ * @aggregated_ogms: bool indicating whether OGM aggregation is enabled
+ * @bonding: bool indicating whether traffic bonding is enabled
+ * @fragmentation: bool indicating whether traffic fragmentation is enabled
+ * @ap_isolation: bool indicating whether ap isolation is enabled
+ * @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is
+ *  enabled
+ * @distributed_arp_table: bool indicating whether distributed ARP table is
+ *  enabled
+ * @vis_mode: vis operation: client or server (see batadv_vis_packettype)
+ * @gw_mode: gateway operation: off, client or server (see batadv_gw_modes)
+ * @gw_sel_class: gateway selection class (applies if gw_mode client)
+ * @gw_bandwidth: gateway announced bandwidth (applies if gw_mode server)
+ * @orig_interval: OGM broadcast interval in milliseconds
+ * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop
+ * @log_level: configured log level (see batadv_dbg_level)
+ * @bcast_seqno: last sent broadcast packet sequence number
+ * @bcast_queue_left: number of remaining buffered broadcast packet slots
+ * @batman_queue_left: number of remaining OGM packet slots
+ * @num_ifaces: number of interfaces assigned to this mesh interface
+ * @mesh_obj: kobject for sysfs mesh subdirectory
+ * @debug_dir: dentry for debugfs batman-adv subdirectory
+ * @forw_bat_list: list of aggregated OGMs that will be forwarded
+ * @forw_bcast_list: list of broadcast packets that will be rebroadcasted
+ * @orig_hash: hash table containing mesh participants (orig nodes)
+ * @forw_bat_list_lock: lock protecting forw_bat_list
+ * @forw_bcast_list_lock: lock protecting forw_bcast_list
+ * @orig_work: work queue callback item for orig node purging
+ * @cleanup_work: work queue callback item for soft interface deinit
+ * @primary_if: one of the hard interfaces assigned to this mesh interface
+ *  becomes the primary interface
+ * @bat_algo_ops: routing algorithm used by this mesh interface
+ * @bla: bridge loope avoidance data
+ * @debug_log: holding debug logging relevant data
+ * @gw: gateway data
+ * @tt: translation table data
+ * @vis: vis data
+ * @dat: distributed arp table data
+ * @network_coding: bool indicating whether network coding is enabled
+ * @batadv_priv_nc: network coding data
+ */
 struct batadv_priv {
 	atomic_t mesh_state;
+	struct net_device *soft_iface;
 	struct net_device_stats stats;
 	uint64_t __percpu *bat_counters; /* Per cpu counters */
-	atomic_t aggregated_ogms;	/* boolean */
-	atomic_t bonding;		/* boolean */
-	atomic_t fragmentation;		/* boolean */
-	atomic_t ap_isolation;		/* boolean */
-	atomic_t bridge_loop_avoidance;	/* boolean */
+	atomic_t aggregated_ogms;
+	atomic_t bonding;
+	atomic_t fragmentation;
+	atomic_t ap_isolation;
+#ifdef CONFIG_BATMAN_ADV_BLA
+	atomic_t bridge_loop_avoidance;
+#endif
 #ifdef CONFIG_BATMAN_ADV_DAT
-	atomic_t distributed_arp_table;	/* boolean */
+	atomic_t distributed_arp_table;
+#endif
+	atomic_t vis_mode;
+	atomic_t gw_mode;
+	atomic_t gw_sel_class;
+	atomic_t gw_bandwidth;
+	atomic_t orig_interval;
+	atomic_t hop_penalty;
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+	atomic_t log_level;
 #endif
-	atomic_t vis_mode;		/* VIS_TYPE_* */
-	atomic_t gw_mode;		/* GW_MODE_* */
-	atomic_t gw_sel_class;		/* uint */
-	atomic_t gw_bandwidth;		/* gw bandwidth */
-	atomic_t orig_interval;		/* uint */
-	atomic_t hop_penalty;		/* uint */
-	atomic_t log_level;		/* uint */
 	atomic_t bcast_seqno;
 	atomic_t bcast_queue_left;
 	atomic_t batman_queue_left;
 	char num_ifaces;
-	struct batadv_debug_log *debug_log;
 	struct kobject *mesh_obj;
 	struct dentry *debug_dir;
 	struct hlist_head forw_bat_list;
@@ -297,34 +570,118 @@ struct batadv_priv {
 	spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
 	spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */
 	struct delayed_work orig_work;
+	struct work_struct cleanup_work;
 	struct batadv_hard_iface __rcu *primary_if;  /* rcu protected pointer */
 	struct batadv_algo_ops *bat_algo_ops;
 #ifdef CONFIG_BATMAN_ADV_BLA
 	struct batadv_priv_bla bla;
 #endif
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+	struct batadv_priv_debug_log *debug_log;
+#endif
 	struct batadv_priv_gw gw;
 	struct batadv_priv_tt tt;
 	struct batadv_priv_vis vis;
 #ifdef CONFIG_BATMAN_ADV_DAT
 	struct batadv_priv_dat dat;
 #endif
+#ifdef CONFIG_BATMAN_ADV_NC
+	atomic_t network_coding;
+	struct batadv_priv_nc nc;
+#endif /* CONFIG_BATMAN_ADV_NC */
 };
 
+/**
+ * struct batadv_socket_client - layer2 icmp socket client data
+ * @queue_list: packet queue for packets destined for this socket client
+ * @queue_len: number of packets in the packet queue (queue_list)
+ * @index: socket client's index in the batadv_socket_client_hash
+ * @lock: lock protecting queue_list, queue_len & index
+ * @queue_wait: socket client's wait queue
+ * @bat_priv: pointer to soft_iface this client belongs to
+ */
 struct batadv_socket_client {
 	struct list_head queue_list;
 	unsigned int queue_len;
 	unsigned char index;
-	spinlock_t lock; /* protects queue_list, queue_len, index */
+	spinlock_t lock; /* protects queue_list, queue_len & index */
 	wait_queue_head_t queue_wait;
 	struct batadv_priv *bat_priv;
 };
 
+/**
+ * struct batadv_socket_packet - layer2 icmp packet for socket client
+ * @list: list node for batadv_socket_client::queue_list
+ * @icmp_len: size of the layer2 icmp packet
+ * @icmp_packet: layer2 icmp packet
+ */
 struct batadv_socket_packet {
 	struct list_head list;
 	size_t icmp_len;
 	struct batadv_icmp_packet_rr icmp_packet;
 };
 
+/**
+ * struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN
+ * @orig: originator address of backbone node (mac address of primary iface)
+ * @vid: vlan id this gateway was detected on
+ * @hash_entry: hlist node for batadv_priv_bla::backbone_hash
+ * @bat_priv: pointer to soft_iface this backbone gateway belongs to
+ * @lasttime: last time we heard of this backbone gw
+ * @wait_periods: grace time for bridge forward delays and bla group forming at
+ *  bootup phase - no bcast traffic is formwared until it has elapsed
+ * @request_sent: if this bool is set to true we are out of sync with this
+ *  backbone gateway - no bcast traffic is formwared until the situation was
+ *  resolved
+ * @crc: crc16 checksum over all claims
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
+#ifdef CONFIG_BATMAN_ADV_BLA
+struct batadv_bla_backbone_gw {
+	uint8_t orig[ETH_ALEN];
+	short vid;
+	struct hlist_node hash_entry;
+	struct batadv_priv *bat_priv;
+	unsigned long lasttime;
+	atomic_t wait_periods;
+	atomic_t request_sent;
+	uint16_t crc;
+	atomic_t refcount;
+	struct rcu_head rcu;
+};
+
+/**
+ * struct batadv_bla_claim - claimed non-mesh client structure
+ * @addr: mac address of claimed non-mesh client
+ * @vid: vlan id this client was detected on
+ * @batadv_bla_backbone_gw: pointer to backbone gw claiming this client
+ * @lasttime: last time we heard of claim (locals only)
+ * @hash_entry: hlist node for batadv_priv_bla::claim_hash
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
+struct batadv_bla_claim {
+	uint8_t addr[ETH_ALEN];
+	short vid;
+	struct batadv_bla_backbone_gw *backbone_gw;
+	unsigned long lasttime;
+	struct hlist_node hash_entry;
+	struct rcu_head rcu;
+	atomic_t refcount;
+};
+#endif
+
+/**
+ * struct batadv_tt_common_entry - tt local & tt global common data
+ * @addr: mac address of non-mesh client
+ * @hash_entry: hlist node for batadv_priv_tt::local_hash or for
+ *  batadv_priv_tt::global_hash
+ * @flags: various state handling flags (see batadv_tt_client_flags)
+ * @added_at: timestamp used for purging stale tt common entries
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
 struct batadv_tt_common_entry {
 	uint8_t addr[ETH_ALEN];
 	struct hlist_node hash_entry;
@@ -334,62 +691,76 @@ struct batadv_tt_common_entry {
 	struct rcu_head rcu;
 };
 
+/**
+ * struct batadv_tt_local_entry - translation table local entry data
+ * @common: general translation table data
+ * @last_seen: timestamp used for purging stale tt local entries
+ */
 struct batadv_tt_local_entry {
 	struct batadv_tt_common_entry common;
 	unsigned long last_seen;
 };
 
+/**
+ * struct batadv_tt_global_entry - translation table global entry data
+ * @common: general translation table data
+ * @orig_list: list of orig nodes announcing this non-mesh client
+ * @list_lock: lock protecting orig_list
+ * @roam_at: time at which TT_GLOBAL_ROAM was set
+ */
 struct batadv_tt_global_entry {
 	struct batadv_tt_common_entry common;
 	struct hlist_head orig_list;
-	spinlock_t list_lock;	/* protects the list */
-	unsigned long roam_at; /* time at which TT_GLOBAL_ROAM was set */
+	spinlock_t list_lock;	/* protects orig_list */
+	unsigned long roam_at;
 };
 
+/**
+ * struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client
+ * @orig_node: pointer to orig node announcing this non-mesh client
+ * @ttvn: translation table version number which added the non-mesh client
+ * @list: list node for batadv_tt_global_entry::orig_list
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
 struct batadv_tt_orig_list_entry {
 	struct batadv_orig_node *orig_node;
 	uint8_t ttvn;
-	atomic_t refcount;
-	struct rcu_head rcu;
 	struct hlist_node list;
-};
-
-#ifdef CONFIG_BATMAN_ADV_BLA
-struct batadv_backbone_gw {
-	uint8_t orig[ETH_ALEN];
-	short vid;		/* used VLAN ID */
-	struct hlist_node hash_entry;
-	struct batadv_priv *bat_priv;
-	unsigned long lasttime;	/* last time we heard of this backbone gw */
-	atomic_t wait_periods;
-	atomic_t request_sent;
 	atomic_t refcount;
 	struct rcu_head rcu;
-	uint16_t crc;		/* crc checksum over all claims */
 };
 
-struct batadv_claim {
-	uint8_t addr[ETH_ALEN];
-	short vid;
-	struct batadv_backbone_gw *backbone_gw;
-	unsigned long lasttime;	/* last time we heard of claim (locals only) */
-	struct rcu_head rcu;
-	atomic_t refcount;
-	struct hlist_node hash_entry;
-};
-#endif
-
+/**
+ * struct batadv_tt_change_node - structure for tt changes occured
+ * @list: list node for batadv_priv_tt::changes_list
+ * @change: holds the actual translation table diff data
+ */
 struct batadv_tt_change_node {
 	struct list_head list;
 	struct batadv_tt_change change;
 };
 
+/**
+ * struct batadv_tt_req_node - data to keep track of the tt requests in flight
+ * @addr: mac address address of the originator this request was sent to
+ * @issued_at: timestamp used for purging stale tt requests
+ * @list: list node for batadv_priv_tt::req_list
+ */
 struct batadv_tt_req_node {
 	uint8_t addr[ETH_ALEN];
 	unsigned long issued_at;
 	struct list_head list;
 };
 
+/**
+ * struct batadv_tt_roam_node - roaming client data
+ * @addr: mac address of the client in the roaming phase
+ * @counter: number of allowed roaming events per client within a single
+ *  OGM interval (changes are committed with each OGM)
+ * @first_time: timestamp used for purging stale roaming node entries
+ * @list: list node for batadv_priv_tt::roam_list
+ */
 struct batadv_tt_roam_node {
 	uint8_t addr[ETH_ALEN];
 	atomic_t counter;
@@ -397,8 +768,88 @@ struct batadv_tt_roam_node {
 	struct list_head list;
 };
 
-/*	forw_packet - structure for forw_list maintaining packets to be
- *	              send/forwarded
+/**
+ * struct batadv_nc_node - network coding node
+ * @list: next and prev pointer for the list handling
+ * @addr: the node's mac address
+ * @refcount: number of contexts the object is used by
+ * @rcu: struct used for freeing in an RCU-safe manner
+ * @orig_node: pointer to corresponding orig node struct
+ * @last_seen: timestamp of last ogm received from this node
+ */
+struct batadv_nc_node {
+	struct list_head list;
+	uint8_t addr[ETH_ALEN];
+	atomic_t refcount;
+	struct rcu_head rcu;
+	struct batadv_orig_node *orig_node;
+	unsigned long last_seen;
+};
+
+/**
+ * struct batadv_nc_path - network coding path
+ * @hash_entry: next and prev pointer for the list handling
+ * @rcu: struct used for freeing in an RCU-safe manner
+ * @refcount: number of contexts the object is used by
+ * @packet_list: list of buffered packets for this path
+ * @packet_list_lock: access lock for packet list
+ * @next_hop: next hop (destination) of path
+ * @prev_hop: previous hop (source) of path
+ * @last_valid: timestamp for last validation of path
+ */
+struct batadv_nc_path {
+	struct hlist_node hash_entry;
+	struct rcu_head rcu;
+	atomic_t refcount;
+	struct list_head packet_list;
+	spinlock_t packet_list_lock; /* Protects packet_list */
+	uint8_t next_hop[ETH_ALEN];
+	uint8_t prev_hop[ETH_ALEN];
+	unsigned long last_valid;
+};
+
+/**
+ * struct batadv_nc_packet - network coding packet used when coding and
+ *  decoding packets
+ * @list: next and prev pointer for the list handling
+ * @packet_id: crc32 checksum of skb data
+ * @timestamp: field containing the info when the packet was added to path
+ * @neigh_node: pointer to original next hop neighbor of skb
+ * @skb: skb which can be encoded or used for decoding
+ * @nc_path: pointer to path this nc packet is attached to
+ */
+struct batadv_nc_packet {
+	struct list_head list;
+	__be32 packet_id;
+	unsigned long timestamp;
+	struct batadv_neigh_node *neigh_node;
+	struct sk_buff *skb;
+	struct batadv_nc_path *nc_path;
+};
+
+/**
+ * batadv_skb_cb - control buffer structure used to store private data relevant
+ *  to batman-adv in the skb->cb buffer in skbs.
+ * @decoded: Marks a skb as decoded, which is checked when searching for coding
+ *  opportunities in network-coding.c
+ */
+struct batadv_skb_cb {
+	bool decoded;
+};
+
+/**
+ * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded
+ * @list: list node for batadv_socket_client::queue_list
+ * @send_time: execution time for delayed_work (packet sending)
+ * @own: bool for locally generated packets (local OGMs are re-scheduled after
+ *  sending)
+ * @skb: bcast packet's skb buffer
+ * @packet_len: size of aggregated OGM packet inside the skb buffer
+ * @direct_link_flags: direct link flags for aggregated OGM packets
+ * @num_packets: counter for bcast packet retransmission
+ * @delayed_work: work queue callback item for packet sending
+ * @if_incoming: pointer incoming hard-iface or primary iface if locally
+ *  generated packet
  */
 struct batadv_forw_packet {
 	struct hlist_node list;
@@ -412,72 +863,98 @@ struct batadv_forw_packet {
 	struct batadv_hard_iface *if_incoming;
 };
 
-/* While scanning for vis-entries of a particular vis-originator
- * this list collects its interfaces to create a subgraph/cluster
- * out of them later
+/**
+ * struct batadv_frag_packet_list_entry - storage for fragment packet
+ * @list: list node for orig_node::frag_list
+ * @seqno: sequence number of the fragment
+ * @skb: fragment's skb buffer
  */
-struct batadv_if_list_entry {
-	uint8_t addr[ETH_ALEN];
-	bool primary;
-	struct hlist_node list;
-};
-
-struct batadv_debug_log {
-	char log_buff[BATADV_LOG_BUF_LEN];
-	unsigned long log_start;
-	unsigned long log_end;
-	spinlock_t lock; /* protects log_buff, log_start and log_end */
-	wait_queue_head_t queue_wait;
-};
-
 struct batadv_frag_packet_list_entry {
 	struct list_head list;
 	uint16_t seqno;
 	struct sk_buff *skb;
 };
 
+/**
+ * struct batadv_vis_info - local data for vis information
+ * @first_seen: timestamp used for purging stale vis info entries
+ * @recv_list: List of server-neighbors we have received this packet from. This
+ *  packet should not be re-forward to them again. List elements are struct
+ *  batadv_vis_recvlist_node
+ * @send_list: list of packets to be forwarded
+ * @refcount: number of contexts the object is used
+ * @hash_entry: hlist node for batadv_priv_vis::hash
+ * @bat_priv: pointer to soft_iface this orig node belongs to
+ * @skb_packet: contains the vis packet
+ */
 struct batadv_vis_info {
 	unsigned long first_seen;
-	/* list of server-neighbors we received a vis-packet
-	 * from.  we should not reply to them.
-	 */
 	struct list_head recv_list;
 	struct list_head send_list;
 	struct kref refcount;
 	struct hlist_node hash_entry;
 	struct batadv_priv *bat_priv;
-	/* this packet might be part of the vis send queue. */
 	struct sk_buff *skb_packet;
-	/* vis_info may follow here */
 } __packed;
 
+/**
+ * struct batadv_vis_info_entry - contains link information for vis
+ * @src: source MAC of the link, all zero for local TT entry
+ * @dst: destination MAC of the link, client mac address for local TT entry
+ * @quality: transmission quality of the link, or 0 for local TT entry
+ */
 struct batadv_vis_info_entry {
 	uint8_t  src[ETH_ALEN];
 	uint8_t  dest[ETH_ALEN];
-	uint8_t  quality;	/* quality = 0 client */
+	uint8_t  quality;
 } __packed;
 
-struct batadv_recvlist_node {
+/**
+ * struct batadv_vis_recvlist_node - list entry for batadv_vis_info::recv_list
+ * @list: list node for batadv_vis_info::recv_list
+ * @mac: MAC address of the originator from where the vis_info was received
+ */
+struct batadv_vis_recvlist_node {
 	struct list_head list;
 	uint8_t mac[ETH_ALEN];
 };
 
+/**
+ * struct batadv_vis_if_list_entry - auxiliary data for vis data generation
+ * @addr: MAC address of the interface
+ * @primary: true if this interface is the primary interface
+ * @list: list node the interface list
+ *
+ * While scanning for vis-entries of a particular vis-originator
+ * this list collects its interfaces to create a subgraph/cluster
+ * out of them later
+ */
+struct batadv_vis_if_list_entry {
+	uint8_t addr[ETH_ALEN];
+	bool primary;
+	struct hlist_node list;
+};
+
+/**
+ * struct batadv_algo_ops - mesh algorithm callbacks
+ * @list: list node for the batadv_algo_list
+ * @name: name of the algorithm
+ * @bat_iface_enable: init routing info when hard-interface is enabled
+ * @bat_iface_disable: de-init routing info when hard-interface is disabled
+ * @bat_iface_update_mac: (re-)init mac addresses of the protocol information
+ *  belonging to this hard-interface
+ * @bat_primary_iface_set: called when primary interface is selected / changed
+ * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue
+ * @bat_ogm_emit: send scheduled OGM
+ */
 struct batadv_algo_ops {
 	struct hlist_node list;
 	char *name;
-	/* init routing info when hard-interface is enabled */
 	int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface);
-	/* de-init routing info when hard-interface is disabled */
 	void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface);
-	/* (re-)init mac addresses of the protocol information
-	 * belonging to this hard-interface
-	 */
 	void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface);
-	/* called when primary interface is selected / changed */
 	void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface);
-	/* prepare a new outgoing OGM for the send queue */
 	void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface);
-	/* send scheduled OGM */
 	void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
 };
 
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 10aff49fcf25..0bb3b5982f94 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
  *
  * Andreas Langer
  *
@@ -122,7 +122,7 @@ batadv_frag_search_packet(struct list_head *head,
 {
 	struct batadv_frag_packet_list_entry *tfp;
 	struct batadv_unicast_frag_packet *tmp_up = NULL;
-	int is_head_tmp, is_head;
+	bool is_head_tmp, is_head;
 	uint16_t search_seqno;
 
 	if (up->flags & BATADV_UNI_FRAG_HEAD)
@@ -130,10 +130,9 @@ batadv_frag_search_packet(struct list_head *head,
 	else
 		search_seqno = ntohs(up->seqno)-1;
 
-	is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD);
+	is_head = up->flags & BATADV_UNI_FRAG_HEAD;
 
 	list_for_each_entry(tfp, head, list) {
-
 		if (!tfp->skb)
 			continue;
 
@@ -143,7 +142,7 @@ batadv_frag_search_packet(struct list_head *head,
 		tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data;
 
 		if (tfp->seqno == search_seqno) {
-			is_head_tmp = !!(tmp_up->flags & BATADV_UNI_FRAG_HEAD);
+			is_head_tmp = tmp_up->flags & BATADV_UNI_FRAG_HEAD;
 			if (is_head_tmp != is_head)
 				return tfp;
 			else
@@ -162,7 +161,6 @@ void batadv_frag_list_free(struct list_head *head)
 	struct batadv_frag_packet_list_entry *pf, *tmp_pf;
 
 	if (!list_empty(head)) {
-
 		list_for_each_entry_safe(pf, tmp_pf, head, list) {
 			kfree_skb(pf->skb);
 			list_del(&pf->list);
diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h
index 61abba58bd8f..429cf8a4a31e 100644
--- a/net/batman-adv/unicast.h
+++ b/net/batman-adv/unicast.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
  *
  * Andreas Langer
  *
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 0f65a9de5f74..1625e5793a89 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2008-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
@@ -28,14 +28,15 @@
 
 #define BATADV_MAX_VIS_PACKET_SIZE 1000
 
-static void batadv_start_vis_timer(struct batadv_priv *bat_priv);
+/* hash class keys */
+static struct lock_class_key batadv_vis_hash_lock_class_key;
 
 /* free the info */
 static void batadv_free_info(struct kref *ref)
 {
 	struct batadv_vis_info *info;
 	struct batadv_priv *bat_priv;
-	struct batadv_recvlist_node *entry, *tmp;
+	struct batadv_vis_recvlist_node *entry, *tmp;
 
 	info = container_of(ref, struct batadv_vis_info, refcount);
 	bat_priv = info->bat_priv;
@@ -96,7 +97,6 @@ batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data)
 {
 	struct batadv_hashtable *hash = bat_priv->vis.hash;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_vis_info *vis_info, *vis_info_tmp = NULL;
 	uint32_t index;
 
@@ -107,8 +107,8 @@ batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data)
 	head = &hash->table[index];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(vis_info, node, head, hash_entry) {
-		if (!batadv_vis_info_cmp(node, data))
+	hlist_for_each_entry_rcu(vis_info, head, hash_entry) {
+		if (!batadv_vis_info_cmp(&vis_info->hash_entry, data))
 			continue;
 
 		vis_info_tmp = vis_info;
@@ -126,10 +126,9 @@ static void batadv_vis_data_insert_interface(const uint8_t *interface,
 					     struct hlist_head *if_list,
 					     bool primary)
 {
-	struct batadv_if_list_entry *entry;
-	struct hlist_node *pos;
+	struct batadv_vis_if_list_entry *entry;
 
-	hlist_for_each_entry(entry, pos, if_list, list) {
+	hlist_for_each_entry(entry, if_list, list) {
 		if (batadv_compare_eth(entry->addr, interface))
 			return;
 	}
@@ -146,12 +145,11 @@ static void batadv_vis_data_insert_interface(const uint8_t *interface,
 static void batadv_vis_data_read_prim_sec(struct seq_file *seq,
 					  const struct hlist_head *if_list)
 {
-	struct batadv_if_list_entry *entry;
-	struct hlist_node *pos;
+	struct batadv_vis_if_list_entry *entry;
 
-	hlist_for_each_entry(entry, pos, if_list, list) {
+	hlist_for_each_entry(entry, if_list, list) {
 		if (entry->primary)
-			seq_printf(seq, "PRIMARY, ");
+			seq_puts(seq, "PRIMARY, ");
 		else
 			seq_printf(seq,  "SEC %pM, ", entry->addr);
 	}
@@ -196,10 +194,9 @@ static void batadv_vis_data_read_entries(struct seq_file *seq,
 					 struct batadv_vis_info_entry *entries)
 {
 	int i;
-	struct batadv_if_list_entry *entry;
-	struct hlist_node *pos;
+	struct batadv_vis_if_list_entry *entry;
 
-	hlist_for_each_entry(entry, pos, list, list) {
+	hlist_for_each_entry(entry, list, list) {
 		seq_printf(seq, "%pM,", entry->addr);
 
 		for (i = 0; i < packet->entries; i++)
@@ -210,24 +207,23 @@ static void batadv_vis_data_read_entries(struct seq_file *seq,
 		if (batadv_compare_eth(entry->addr, packet->vis_orig))
 			batadv_vis_data_read_prim_sec(seq, list);
 
-		seq_printf(seq, "\n");
+		seq_puts(seq, "\n");
 	}
 }
 
 static void batadv_vis_seq_print_text_bucket(struct seq_file *seq,
 					     const struct hlist_head *head)
 {
-	struct hlist_node *node;
 	struct batadv_vis_info *info;
 	struct batadv_vis_packet *packet;
 	uint8_t *entries_pos;
 	struct batadv_vis_info_entry *entries;
-	struct batadv_if_list_entry *entry;
-	struct hlist_node *pos, *n;
+	struct batadv_vis_if_list_entry *entry;
+	struct hlist_node *n;
 
 	HLIST_HEAD(vis_if_list);
 
-	hlist_for_each_entry_rcu(info, node, head, hash_entry) {
+	hlist_for_each_entry_rcu(info, head, hash_entry) {
 		packet = (struct batadv_vis_packet *)info->skb_packet->data;
 		entries_pos = (uint8_t *)packet + sizeof(*packet);
 		entries = (struct batadv_vis_info_entry *)entries_pos;
@@ -239,7 +235,7 @@ static void batadv_vis_seq_print_text_bucket(struct seq_file *seq,
 		batadv_vis_data_read_entries(seq, &vis_if_list, packet,
 					     entries);
 
-		hlist_for_each_entry_safe(entry, pos, n, &vis_if_list, list) {
+		hlist_for_each_entry_safe(entry, n, &vis_if_list, list) {
 			hlist_del(&entry->list);
 			kfree(entry);
 		}
@@ -304,7 +300,7 @@ static void batadv_send_list_del(struct batadv_vis_info *info)
 static void batadv_recv_list_add(struct batadv_priv *bat_priv,
 				 struct list_head *recv_list, const char *mac)
 {
-	struct batadv_recvlist_node *entry;
+	struct batadv_vis_recvlist_node *entry;
 
 	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
 	if (!entry)
@@ -321,7 +317,7 @@ static int batadv_recv_list_is_in(struct batadv_priv *bat_priv,
 				  const struct list_head *recv_list,
 				  const char *mac)
 {
-	const struct batadv_recvlist_node *entry;
+	const struct batadv_vis_recvlist_node *entry;
 
 	spin_lock_bh(&bat_priv->vis.list_lock);
 	list_for_each_entry(entry, recv_list, list) {
@@ -481,7 +477,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv,
 
 	/* Are we the target for this VIS packet? */
 	if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC	&&
-	    batadv_is_my_mac(vis_packet->target_orig))
+	    batadv_is_my_mac(bat_priv, vis_packet->target_orig))
 		are_target = 1;
 
 	spin_lock_bh(&bat_priv->vis.hash_lock);
@@ -500,7 +496,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv,
 		batadv_send_list_add(bat_priv, info);
 
 		/* ... we're not the recipient (and thus need to forward). */
-	} else if (!batadv_is_my_mac(packet->target_orig)) {
+	} else if (!batadv_is_my_mac(bat_priv, packet->target_orig)) {
 		batadv_send_list_add(bat_priv, info);
 	}
 
@@ -518,7 +514,6 @@ static int batadv_find_best_vis_server(struct batadv_priv *bat_priv,
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
 	struct batadv_neigh_node *router;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
 	struct batadv_vis_packet *packet;
@@ -531,7 +526,7 @@ static int batadv_find_best_vis_server(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			router = batadv_orig_node_get_router(orig_node);
 			if (!router)
 				continue;
@@ -570,7 +565,6 @@ static bool batadv_vis_packet_full(const struct batadv_vis_info *info)
 static int batadv_generate_vis_packet(struct batadv_priv *bat_priv)
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
 	struct batadv_neigh_node *router;
@@ -604,7 +598,7 @@ static int batadv_generate_vis_packet(struct batadv_priv *bat_priv)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			router = batadv_orig_node_get_router(orig_node);
 			if (!router)
 				continue;
@@ -643,7 +637,7 @@ next:
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(tt_common_entry, node, head,
+		hlist_for_each_entry_rcu(tt_common_entry, head,
 					 hash_entry) {
 			packet_pos = skb_put(info->skb_packet, sizeof(*entry));
 			entry = (struct batadv_vis_info_entry *)packet_pos;
@@ -672,14 +666,14 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv)
 {
 	uint32_t i;
 	struct batadv_hashtable *hash = bat_priv->vis.hash;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	struct batadv_vis_info *info;
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry_safe(info, node, node_tmp,
+		hlist_for_each_entry_safe(info, node_tmp,
 					  head, hash_entry) {
 			/* never purge own data. */
 			if (info == bat_priv->vis.my_info)
@@ -687,7 +681,7 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv)
 
 			if (batadv_has_timed_out(info->first_seen,
 						 BATADV_VIS_TIMEOUT)) {
-				hlist_del(node);
+				hlist_del(&info->hash_entry);
 				batadv_send_list_del(info);
 				kref_put(&info->refcount, batadv_free_info);
 			}
@@ -699,7 +693,6 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv,
 					struct batadv_vis_info *info)
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
 	struct batadv_vis_packet *packet;
@@ -714,7 +707,7 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			/* if it's a vis server and reachable, send it. */
 			if (!(orig_node->flags & BATADV_VIS_SERVER))
 				continue;
@@ -827,7 +820,9 @@ static void batadv_send_vis_packets(struct work_struct *work)
 		kref_put(&info->refcount, batadv_free_info);
 	}
 	spin_unlock_bh(&bat_priv->vis.hash_lock);
-	batadv_start_vis_timer(bat_priv);
+
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work,
+			   msecs_to_jiffies(BATADV_VIS_INTERVAL));
 }
 
 /* init the vis server. this may only be called when if_list is already
@@ -852,6 +847,9 @@ int batadv_vis_init(struct batadv_priv *bat_priv)
 		goto err;
 	}
 
+	batadv_hash_set_lock_class(bat_priv->vis.hash,
+				   &batadv_vis_hash_lock_class_key);
+
 	bat_priv->vis.my_info = kmalloc(BATADV_MAX_VIS_PACKET_SIZE, GFP_ATOMIC);
 	if (!bat_priv->vis.my_info)
 		goto err;
@@ -894,7 +892,11 @@ int batadv_vis_init(struct batadv_priv *bat_priv)
 	}
 
 	spin_unlock_bh(&bat_priv->vis.hash_lock);
-	batadv_start_vis_timer(bat_priv);
+
+	INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets);
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work,
+			   msecs_to_jiffies(BATADV_VIS_INTERVAL));
+
 	return 0;
 
 free_info:
@@ -931,11 +933,3 @@ void batadv_vis_quit(struct batadv_priv *bat_priv)
 	bat_priv->vis.my_info = NULL;
 	spin_unlock_bh(&bat_priv->vis.hash_lock);
 }
-
-/* schedule packets for (re)transmission */
-static void batadv_start_vis_timer(struct batadv_priv *bat_priv)
-{
-	INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets);
-	queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work,
-			   msecs_to_jiffies(BATADV_VIS_INTERVAL));
-}
diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h
index 873282fa86da..ad92b0e3c230 100644
--- a/net/batman-adv/vis.h
+++ b/net/batman-adv/vis.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2008-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 2f67d5ecc907..17f33a62f6db 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -290,7 +290,7 @@ static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb,
 		goto done;
 	}
 
-	mgr->state = READ_LOC_AMP_INFO;
+	set_bit(READ_LOC_AMP_INFO, &mgr->state);
 	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
 
 done:
@@ -397,13 +397,12 @@ static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
 	if (ctrl) {
 		u8 *assoc;
 
-		assoc = kzalloc(assoc_len, GFP_KERNEL);
+		assoc = kmemdup(rsp->amp_assoc, assoc_len, GFP_KERNEL);
 		if (!assoc) {
 			amp_ctrl_put(ctrl);
 			return -ENOMEM;
 		}
 
-		memcpy(assoc, rsp->amp_assoc, assoc_len);
 		ctrl->assoc = assoc;
 		ctrl->assoc_len = assoc_len;
 		ctrl->assoc_rem_len = assoc_len;
@@ -472,13 +471,12 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
 		size_t assoc_len = le16_to_cpu(hdr->len) - sizeof(*req);
 		u8 *assoc;
 
-		assoc = kzalloc(assoc_len, GFP_KERNEL);
+		assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL);
 		if (!assoc) {
 			amp_ctrl_put(ctrl);
 			return -ENOMEM;
 		}
 
-		memcpy(assoc, req->amp_assoc, assoc_len);
 		ctrl->assoc = assoc;
 		ctrl->assoc_len = assoc_len;
 		ctrl->assoc_rem_len = assoc_len;
@@ -499,8 +497,16 @@ send_rsp:
 	if (hdev)
 		hci_dev_put(hdev);
 
-	a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, hdr->ident, sizeof(rsp),
-		  &rsp);
+	/* Reply error now and success after HCI Write Remote AMP Assoc
+	   command complete with success status
+	 */
+	if (rsp.status != A2MP_STATUS_SUCCESS) {
+		a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, hdr->ident,
+			  sizeof(rsp), &rsp);
+	} else {
+		set_bit(WRITE_REMOTE_AMP_ASSOC, &mgr->state);
+		mgr->ident = hdr->ident;
+	}
 
 	skb_pull(skb, le16_to_cpu(hdr->len));
 	return 0;
@@ -840,7 +846,7 @@ struct amp_mgr *amp_mgr_lookup_by_state(u8 state)
 
 	mutex_lock(&amp_mgr_list_lock);
 	list_for_each_entry(mgr, &amp_mgr_list, list) {
-		if (mgr->state == state) {
+		if (test_and_clear_bit(state, &mgr->state)) {
 			amp_mgr_get(mgr);
 			mutex_unlock(&amp_mgr_list_lock);
 			return mgr;
@@ -949,6 +955,32 @@ clean:
 	kfree(req);
 }
 
+void a2mp_send_create_phy_link_rsp(struct hci_dev *hdev, u8 status)
+{
+	struct amp_mgr *mgr;
+	struct a2mp_physlink_rsp rsp;
+	struct hci_conn *hs_hcon;
+
+	mgr = amp_mgr_lookup_by_state(WRITE_REMOTE_AMP_ASSOC);
+	if (!mgr)
+		return;
+
+	hs_hcon = hci_conn_hash_lookup_state(hdev, AMP_LINK, BT_CONNECT);
+	if (!hs_hcon) {
+		rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION;
+	} else {
+		rsp.remote_id = hs_hcon->remote_id;
+		rsp.status = A2MP_STATUS_SUCCESS;
+	}
+
+	BT_DBG("%s mgr %p hs_hcon %p status %u", hdev->name, mgr, hs_hcon,
+	       status);
+
+	rsp.local_id = hdev->id;
+	a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, mgr->ident, sizeof(rsp), &rsp);
+	amp_mgr_put(mgr);
+}
+
 void a2mp_discover_amp(struct l2cap_chan *chan)
 {
 	struct l2cap_conn *conn = chan->conn;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 5355df63d39b..9096137c889c 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -92,23 +92,14 @@ int bt_sock_register(int proto, const struct net_proto_family *ops)
 }
 EXPORT_SYMBOL(bt_sock_register);
 
-int bt_sock_unregister(int proto)
+void bt_sock_unregister(int proto)
 {
-	int err = 0;
-
 	if (proto < 0 || proto >= BT_MAX_PROTO)
-		return -EINVAL;
+		return;
 
 	write_lock(&bt_proto_lock);
-
-	if (!bt_proto[proto])
-		err = -ENOENT;
-	else
-		bt_proto[proto] = NULL;
-
+	bt_proto[proto] = NULL;
 	write_unlock(&bt_proto_lock);
-
-	return err;
 }
 EXPORT_SYMBOL(bt_sock_unregister);
 
@@ -230,6 +221,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (flags & (MSG_OOB))
 		return -EOPNOTSUPP;
 
+	msg->msg_namelen = 0;
+
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	if (!skb) {
 		if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -237,8 +230,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return err;
 	}
 
-	msg->msg_namelen = 0;
-
 	copied = skb->len;
 	if (len < copied) {
 		msg->msg_flags |= MSG_TRUNC;
@@ -422,7 +413,8 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
 		return bt_accept_poll(sk);
 
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
-		mask |= POLLERR;
+		mask |= POLLERR |
+			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
 
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
@@ -617,7 +609,7 @@ static int bt_seq_open(struct inode *inode, struct file *file)
 	struct bt_sock_list *sk_list;
 	struct bt_seq_state *s;
 
-	sk_list = PDE(inode)->data;
+	sk_list = PDE_DATA(inode);
 	s = __seq_open_private(file, &bt_seq_ops,
 			       sizeof(struct bt_seq_state));
 	if (!s)
@@ -627,35 +619,30 @@ static int bt_seq_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-int bt_procfs_init(struct module* module, struct net *net, const char *name,
+static const struct file_operations bt_fops = {
+	.open = bt_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private
+};
+
+int bt_procfs_init(struct net *net, const char *name,
 		   struct bt_sock_list* sk_list,
 		   int (* seq_show)(struct seq_file *, void *))
 {
-	struct proc_dir_entry * pde;
-
 	sk_list->custom_seq_show = seq_show;
 
-	sk_list->fops.owner     = module;
-	sk_list->fops.open      = bt_seq_open;
-	sk_list->fops.read      = seq_read;
-	sk_list->fops.llseek    = seq_lseek;
-	sk_list->fops.release   = seq_release_private;
-
-	pde = proc_net_fops_create(net, name, 0, &sk_list->fops);
-	if (!pde)
+	if (!proc_create_data(name, 0, net->proc_net, &bt_fops, sk_list))
 		return -ENOMEM;
-
-	pde->data = sk_list;
-
 	return 0;
 }
 
 void bt_procfs_cleanup(struct net *net, const char *name)
 {
-	proc_net_remove(net, name);
+	remove_proc_entry(name, net->proc_net);
 }
 #else
-int bt_procfs_init(struct module* module, struct net *net, const char *name,
+int bt_procfs_init(struct net *net, const char *name,
 		   struct bt_sock_list* sk_list,
 		   int (* seq_show)(struct seq_file *, void *))
 {
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index 1b0d92c0643a..d459ed43c779 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -236,7 +236,7 @@ void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr)
 
 	cp.max_len = cpu_to_le16(hdev->amp_assoc_size);
 
-	mgr->state = READ_LOC_AMP_ASSOC;
+	set_bit(READ_LOC_AMP_ASSOC, &mgr->state);
 	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
 }
 
@@ -250,7 +250,7 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
 	cp.len_so_far = cpu_to_le16(0);
 	cp.max_len = cpu_to_le16(hdev->amp_assoc_size);
 
-	mgr->state = READ_LOC_AMP_ASSOC_FINAL;
+	set_bit(READ_LOC_AMP_ASSOC_FINAL, &mgr->state);
 
 	/* Read Local AMP Assoc final link information data */
 	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
@@ -317,7 +317,9 @@ void amp_write_rem_assoc_continue(struct hci_dev *hdev, u8 handle)
 	if (!hcon)
 		return;
 
-	amp_write_rem_assoc_frag(hdev, hcon);
+	/* Send A2MP create phylink rsp when all fragments are written */
+	if (amp_write_rem_assoc_frag(hdev, hcon))
+		a2mp_send_create_phy_link_rsp(hdev, 0);
 }
 
 void amp_write_remote_assoc(struct hci_dev *hdev, u8 handle)
@@ -403,26 +405,20 @@ void amp_physical_cfm(struct hci_conn *bredr_hcon, struct hci_conn *hs_hcon)
 
 void amp_create_logical_link(struct l2cap_chan *chan)
 {
+	struct hci_conn *hs_hcon = chan->hs_hcon;
 	struct hci_cp_create_accept_logical_link cp;
-	struct hci_conn *hcon;
 	struct hci_dev *hdev;
 
-	BT_DBG("chan %p", chan);
+	BT_DBG("chan %p hs_hcon %p dst %pMR", chan, hs_hcon, chan->conn->dst);
 
-	if (!chan->hs_hcon)
+	if (!hs_hcon)
 		return;
 
 	hdev = hci_dev_hold(chan->hs_hcon->hdev);
 	if (!hdev)
 		return;
 
-	BT_DBG("chan %p dst %pMR", chan, chan->conn->dst);
-
-	hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK, chan->conn->dst);
-	if (!hcon)
-		goto done;
-
-	cp.phy_handle = hcon->handle;
+	cp.phy_handle = hs_hcon->handle;
 
 	cp.tx_flow_spec.id = chan->local_id;
 	cp.tx_flow_spec.stype = chan->local_stype;
@@ -438,14 +434,13 @@ void amp_create_logical_link(struct l2cap_chan *chan)
 	cp.rx_flow_spec.acc_lat = cpu_to_le32(chan->remote_acc_lat);
 	cp.rx_flow_spec.flush_to = cpu_to_le32(chan->remote_flush_to);
 
-	if (hcon->out)
+	if (hs_hcon->out)
 		hci_send_cmd(hdev, HCI_OP_CREATE_LOGICAL_LINK, sizeof(cp),
 			     &cp);
 	else
 		hci_send_cmd(hdev, HCI_OP_ACCEPT_LOGICAL_LINK, sizeof(cp),
 			     &cp);
 
-done:
 	hci_dev_put(hdev);
 }
 
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index a5b639702637..e430b1abcd2f 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -33,7 +33,6 @@
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
-#include <net/bluetooth/l2cap.h>
 
 #include "bnep.h"
 
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index e58c8b32589c..4b488ec26105 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -136,7 +136,7 @@ static u16 bnep_net_eth_proto(struct sk_buff *skb)
 	struct ethhdr *eh = (void *) skb->data;
 	u16 proto = ntohs(eh->h_proto);
 
-	if (proto >= 1536)
+	if (proto >= ETH_P_802_3_MIN)
 		return proto;
 
 	if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF))
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index e7154a58465f..5f051290daba 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -234,7 +234,7 @@ int __init bnep_sock_init(void)
 		goto error;
 	}
 
-	err = bt_procfs_init(THIS_MODULE, &init_net, "bnep", &bnep_sk_list, NULL);
+	err = bt_procfs_init(&init_net, "bnep", &bnep_sk_list, NULL);
 	if (err < 0) {
 		BT_ERR("Failed to create BNEP proc file");
 		bt_sock_unregister(BTPROTO_BNEP);
@@ -253,8 +253,6 @@ error:
 void __exit bnep_sock_cleanup(void)
 {
 	bt_procfs_cleanup(&init_net, "bnep");
-	if (bt_sock_unregister(BTPROTO_BNEP) < 0)
-		BT_ERR("Can't unregister BNEP socket");
-
+	bt_sock_unregister(BTPROTO_BNEP);
 	proto_unregister(&bnep_proto);
 }
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index a4a9d4b6816c..cd75e4d64b90 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -539,7 +539,7 @@ static int cmtp_proc_show(struct seq_file *m, void *v)
 
 static int cmtp_proc_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, cmtp_proc_show, PDE(inode)->data);
+	return single_open(file, cmtp_proc_show, PDE_DATA(inode));
 }
 
 static const struct file_operations cmtp_proc_fops = {
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 1c57482112b6..d82787d417bd 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -245,7 +245,7 @@ int cmtp_init_sockets(void)
 		goto error;
 	}
 
-	err = bt_procfs_init(THIS_MODULE, &init_net, "cmtp", &cmtp_sk_list, NULL);
+	err = bt_procfs_init(&init_net, "cmtp", &cmtp_sk_list, NULL);
 	if (err < 0) {
 		BT_ERR("Failed to create CMTP proc file");
 		bt_sock_unregister(BTPROTO_HIDP);
@@ -264,8 +264,6 @@ error:
 void cmtp_cleanup_sockets(void)
 {
 	bt_procfs_cleanup(&init_net, "cmtp");
-	if (bt_sock_unregister(BTPROTO_CMTP) < 0)
-		BT_ERR("Can't unregister CMTP socket");
-
+	bt_sock_unregister(BTPROTO_CMTP);
 	proto_unregister(&cmtp_proto);
 }
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 4925a02ae7e4..6c7f36379722 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -117,7 +117,17 @@ static void hci_acl_create_connection_cancel(struct hci_conn *conn)
 	hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp);
 }
 
-void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
+static void hci_reject_sco(struct hci_conn *conn)
+{
+	struct hci_cp_reject_sync_conn_req cp;
+
+	cp.reason = HCI_ERROR_REMOTE_USER_TERM;
+	bacpy(&cp.bdaddr, &conn->dst);
+
+	hci_send_cmd(conn->hdev, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(cp), &cp);
+}
+
+void hci_disconnect(struct hci_conn *conn, __u8 reason)
 {
 	struct hci_cp_disconnect cp;
 
@@ -253,7 +263,7 @@ static void hci_conn_disconnect(struct hci_conn *conn)
 		hci_amp_disconn(conn, reason);
 		break;
 	default:
-		hci_acl_disconn(conn, reason);
+		hci_disconnect(conn, reason);
 		break;
 	}
 }
@@ -276,6 +286,8 @@ static void hci_conn_timeout(struct work_struct *work)
 				hci_acl_create_connection_cancel(conn);
 			else if (conn->type == LE_LINK)
 				hci_le_create_connection_cancel(conn);
+		} else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
+			hci_reject_sco(conn);
 		}
 		break;
 	case BT_CONFIG:
@@ -398,8 +410,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 	if (hdev->notify)
 		hdev->notify(hdev, HCI_NOTIFY_CONN_ADD);
 
-	atomic_set(&conn->devref, 0);
-
 	hci_conn_init_sysfs(conn);
 
 	return conn;
@@ -433,7 +443,7 @@ int hci_conn_del(struct hci_conn *conn)
 		struct hci_conn *acl = conn->link;
 		if (acl) {
 			acl->link = NULL;
-			hci_conn_put(acl);
+			hci_conn_drop(acl);
 		}
 	}
 
@@ -448,12 +458,11 @@ int hci_conn_del(struct hci_conn *conn)
 
 	skb_queue_purge(&conn->data_q);
 
-	hci_conn_put_device(conn);
+	hci_conn_del_sysfs(conn);
 
 	hci_dev_put(hdev);
 
-	if (conn->handle == 0)
-		kfree(conn);
+	hci_conn_put(conn);
 
 	return 0;
 }
@@ -565,7 +574,7 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
 	if (!sco) {
 		sco = hci_conn_add(hdev, type, dst);
 		if (!sco) {
-			hci_conn_put(acl);
+			hci_conn_drop(acl);
 			return ERR_PTR(-ENOMEM);
 		}
 	}
@@ -835,19 +844,6 @@ void hci_conn_check_pending(struct hci_dev *hdev)
 	hci_dev_unlock(hdev);
 }
 
-void hci_conn_hold_device(struct hci_conn *conn)
-{
-	atomic_inc(&conn->devref);
-}
-EXPORT_SYMBOL(hci_conn_hold_device);
-
-void hci_conn_put_device(struct hci_conn *conn)
-{
-	if (atomic_dec_and_test(&conn->devref))
-		hci_conn_del_sysfs(conn);
-}
-EXPORT_SYMBOL(hci_conn_put_device);
-
 int hci_get_conn_list(void __user *arg)
 {
 	struct hci_conn *c;
@@ -980,7 +976,7 @@ void hci_chan_del(struct hci_chan *chan)
 
 	synchronize_rcu();
 
-	hci_conn_put(conn);
+	hci_conn_drop(conn);
 
 	skb_queue_purge(&chan->data_q);
 	kfree(chan);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0f78e34220c9..33843c5c4939 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -57,36 +57,9 @@ static void hci_notify(struct hci_dev *hdev, int event)
 
 /* ---- HCI requests ---- */
 
-void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result)
+static void hci_req_sync_complete(struct hci_dev *hdev, u8 result)
 {
-	BT_DBG("%s command 0x%4.4x result 0x%2.2x", hdev->name, cmd, result);
-
-	/* If this is the init phase check if the completed command matches
-	 * the last init command, and if not just return.
-	 */
-	if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd) {
-		struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
-		u16 opcode = __le16_to_cpu(sent->opcode);
-		struct sk_buff *skb;
-
-		/* Some CSR based controllers generate a spontaneous
-		 * reset complete event during init and any pending
-		 * command will never be completed. In such a case we
-		 * need to resend whatever was the last sent
-		 * command.
-		 */
-
-		if (cmd != HCI_OP_RESET || opcode == HCI_OP_RESET)
-			return;
-
-		skb = skb_clone(hdev->sent_cmd, GFP_ATOMIC);
-		if (skb) {
-			skb_queue_head(&hdev->cmd_q, skb);
-			queue_work(hdev->workqueue, &hdev->cmd_work);
-		}
-
-		return;
-	}
+	BT_DBG("%s result 0x%2.2x", hdev->name, result);
 
 	if (hdev->req_status == HCI_REQ_PEND) {
 		hdev->req_result = result;
@@ -106,22 +79,158 @@ static void hci_req_cancel(struct hci_dev *hdev, int err)
 	}
 }
 
+static struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
+					    u8 event)
+{
+	struct hci_ev_cmd_complete *ev;
+	struct hci_event_hdr *hdr;
+	struct sk_buff *skb;
+
+	hci_dev_lock(hdev);
+
+	skb = hdev->recv_evt;
+	hdev->recv_evt = NULL;
+
+	hci_dev_unlock(hdev);
+
+	if (!skb)
+		return ERR_PTR(-ENODATA);
+
+	if (skb->len < sizeof(*hdr)) {
+		BT_ERR("Too short HCI event");
+		goto failed;
+	}
+
+	hdr = (void *) skb->data;
+	skb_pull(skb, HCI_EVENT_HDR_SIZE);
+
+	if (event) {
+		if (hdr->evt != event)
+			goto failed;
+		return skb;
+	}
+
+	if (hdr->evt != HCI_EV_CMD_COMPLETE) {
+		BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt);
+		goto failed;
+	}
+
+	if (skb->len < sizeof(*ev)) {
+		BT_ERR("Too short cmd_complete event");
+		goto failed;
+	}
+
+	ev = (void *) skb->data;
+	skb_pull(skb, sizeof(*ev));
+
+	if (opcode == __le16_to_cpu(ev->opcode))
+		return skb;
+
+	BT_DBG("opcode doesn't match (0x%2.2x != 0x%2.2x)", opcode,
+	       __le16_to_cpu(ev->opcode));
+
+failed:
+	kfree_skb(skb);
+	return ERR_PTR(-ENODATA);
+}
+
+struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
+				  const void *param, u8 event, u32 timeout)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct hci_request req;
+	int err = 0;
+
+	BT_DBG("%s", hdev->name);
+
+	hci_req_init(&req, hdev);
+
+	hci_req_add_ev(&req, opcode, plen, param, event);
+
+	hdev->req_status = HCI_REQ_PEND;
+
+	err = hci_req_run(&req, hci_req_sync_complete);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	add_wait_queue(&hdev->req_wait_q, &wait);
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	schedule_timeout(timeout);
+
+	remove_wait_queue(&hdev->req_wait_q, &wait);
+
+	if (signal_pending(current))
+		return ERR_PTR(-EINTR);
+
+	switch (hdev->req_status) {
+	case HCI_REQ_DONE:
+		err = -bt_to_errno(hdev->req_result);
+		break;
+
+	case HCI_REQ_CANCELED:
+		err = -hdev->req_result;
+		break;
+
+	default:
+		err = -ETIMEDOUT;
+		break;
+	}
+
+	hdev->req_status = hdev->req_result = 0;
+
+	BT_DBG("%s end: err %d", hdev->name, err);
+
+	if (err < 0)
+		return ERR_PTR(err);
+
+	return hci_get_cmd_complete(hdev, opcode, event);
+}
+EXPORT_SYMBOL(__hci_cmd_sync_ev);
+
+struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
+			       const void *param, u32 timeout)
+{
+	return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout);
+}
+EXPORT_SYMBOL(__hci_cmd_sync);
+
 /* Execute request and wait for completion. */
-static int __hci_request(struct hci_dev *hdev,
-			 void (*req)(struct hci_dev *hdev, unsigned long opt),
-			 unsigned long opt, __u32 timeout)
+static int __hci_req_sync(struct hci_dev *hdev,
+			  void (*func)(struct hci_request *req,
+				      unsigned long opt),
+			  unsigned long opt, __u32 timeout)
 {
+	struct hci_request req;
 	DECLARE_WAITQUEUE(wait, current);
 	int err = 0;
 
 	BT_DBG("%s start", hdev->name);
 
+	hci_req_init(&req, hdev);
+
 	hdev->req_status = HCI_REQ_PEND;
 
+	func(&req, opt);
+
+	err = hci_req_run(&req, hci_req_sync_complete);
+	if (err < 0) {
+		hdev->req_status = 0;
+
+		/* ENODATA means the HCI request command queue is empty.
+		 * This can happen when a request with conditionals doesn't
+		 * trigger any commands to be sent. This is normal behavior
+		 * and should not trigger an error return.
+		 */
+		if (err == -ENODATA)
+			return 0;
+
+		return err;
+	}
+
 	add_wait_queue(&hdev->req_wait_q, &wait);
 	set_current_state(TASK_INTERRUPTIBLE);
 
-	req(hdev, opt);
 	schedule_timeout(timeout);
 
 	remove_wait_queue(&hdev->req_wait_q, &wait);
@@ -150,9 +259,10 @@ static int __hci_request(struct hci_dev *hdev,
 	return err;
 }
 
-static int hci_request(struct hci_dev *hdev,
-		       void (*req)(struct hci_dev *hdev, unsigned long opt),
-		       unsigned long opt, __u32 timeout)
+static int hci_req_sync(struct hci_dev *hdev,
+			void (*req)(struct hci_request *req,
+				    unsigned long opt),
+			unsigned long opt, __u32 timeout)
 {
 	int ret;
 
@@ -161,75 +271,66 @@ static int hci_request(struct hci_dev *hdev,
 
 	/* Serialize all requests */
 	hci_req_lock(hdev);
-	ret = __hci_request(hdev, req, opt, timeout);
+	ret = __hci_req_sync(hdev, req, opt, timeout);
 	hci_req_unlock(hdev);
 
 	return ret;
 }
 
-static void hci_reset_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_reset_req(struct hci_request *req, unsigned long opt)
 {
-	BT_DBG("%s %ld", hdev->name, opt);
+	BT_DBG("%s %ld", req->hdev->name, opt);
 
 	/* Reset device */
-	set_bit(HCI_RESET, &hdev->flags);
-	hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL);
+	set_bit(HCI_RESET, &req->hdev->flags);
+	hci_req_add(req, HCI_OP_RESET, 0, NULL);
 }
 
-static void bredr_init(struct hci_dev *hdev)
+static void bredr_init(struct hci_request *req)
 {
-	hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;
+	req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;
 
 	/* Read Local Supported Features */
-	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
+	hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
 
 	/* Read Local Version */
-	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
+	hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
+
+	/* Read BD Address */
+	hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL);
 }
 
-static void amp_init(struct hci_dev *hdev)
+static void amp_init(struct hci_request *req)
 {
-	hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
+	req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
 
 	/* Read Local Version */
-	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
+	hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
 
 	/* Read Local AMP Info */
-	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
+	hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
 
 	/* Read Data Blk size */
-	hci_send_cmd(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL);
+	hci_req_add(req, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL);
 }
 
-static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_init1_req(struct hci_request *req, unsigned long opt)
 {
-	struct sk_buff *skb;
+	struct hci_dev *hdev = req->hdev;
 
 	BT_DBG("%s %ld", hdev->name, opt);
 
-	/* Driver initialization */
-
-	/* Special commands */
-	while ((skb = skb_dequeue(&hdev->driver_init))) {
-		bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
-		skb->dev = (void *) hdev;
-
-		skb_queue_tail(&hdev->cmd_q, skb);
-		queue_work(hdev->workqueue, &hdev->cmd_work);
-	}
-	skb_queue_purge(&hdev->driver_init);
-
 	/* Reset */
 	if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks))
-		hci_reset_req(hdev, 0);
+		hci_reset_req(req, 0);
 
 	switch (hdev->dev_type) {
 	case HCI_BREDR:
-		bredr_init(hdev);
+		bredr_init(req);
 		break;
 
 	case HCI_AMP:
-		amp_init(hdev);
+		amp_init(req);
 		break;
 
 	default:
@@ -238,44 +339,347 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
 	}
 }
 
-static void hci_scan_req(struct hci_dev *hdev, unsigned long opt)
+static void bredr_setup(struct hci_request *req)
+{
+	struct hci_cp_delete_stored_link_key cp;
+	__le16 param;
+	__u8 flt_type;
+
+	/* Read Buffer Size (ACL mtu, max pkt, etc.) */
+	hci_req_add(req, HCI_OP_READ_BUFFER_SIZE, 0, NULL);
+
+	/* Read Class of Device */
+	hci_req_add(req, HCI_OP_READ_CLASS_OF_DEV, 0, NULL);
+
+	/* Read Local Name */
+	hci_req_add(req, HCI_OP_READ_LOCAL_NAME, 0, NULL);
+
+	/* Read Voice Setting */
+	hci_req_add(req, HCI_OP_READ_VOICE_SETTING, 0, NULL);
+
+	/* Clear Event Filters */
+	flt_type = HCI_FLT_CLEAR_ALL;
+	hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
+
+	/* Connection accept timeout ~20 secs */
+	param = __constant_cpu_to_le16(0x7d00);
+	hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
+
+	bacpy(&cp.bdaddr, BDADDR_ANY);
+	cp.delete_all = 0x01;
+	hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
+
+	/* Read page scan parameters */
+	if (req->hdev->hci_ver > BLUETOOTH_VER_1_1) {
+		hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
+		hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
+	}
+}
+
+static void le_setup(struct hci_request *req)
+{
+	struct hci_dev *hdev = req->hdev;
+
+	/* Read LE Buffer Size */
+	hci_req_add(req, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
+
+	/* Read LE Local Supported Features */
+	hci_req_add(req, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL);
+
+	/* Read LE Advertising Channel TX Power */
+	hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
+
+	/* Read LE White List Size */
+	hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
+
+	/* Read LE Supported States */
+	hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
+
+	/* LE-only controllers have LE implicitly enabled */
+	if (!lmp_bredr_capable(hdev))
+		set_bit(HCI_LE_ENABLED, &hdev->dev_flags);
+}
+
+static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
+{
+	if (lmp_ext_inq_capable(hdev))
+		return 0x02;
+
+	if (lmp_inq_rssi_capable(hdev))
+		return 0x01;
+
+	if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
+	    hdev->lmp_subver == 0x0757)
+		return 0x01;
+
+	if (hdev->manufacturer == 15) {
+		if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
+			return 0x01;
+		if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
+			return 0x01;
+		if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
+			return 0x01;
+	}
+
+	if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
+	    hdev->lmp_subver == 0x1805)
+		return 0x01;
+
+	return 0x00;
+}
+
+static void hci_setup_inquiry_mode(struct hci_request *req)
+{
+	u8 mode;
+
+	mode = hci_get_inquiry_mode(req->hdev);
+
+	hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
+}
+
+static void hci_setup_event_mask(struct hci_request *req)
+{
+	struct hci_dev *hdev = req->hdev;
+
+	/* The second byte is 0xff instead of 0x9f (two reserved bits
+	 * disabled) since a Broadcom 1.2 dongle doesn't respond to the
+	 * command otherwise.
+	 */
+	u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
+
+	/* CSR 1.1 dongles does not accept any bitfield so don't try to set
+	 * any event mask for pre 1.2 devices.
+	 */
+	if (hdev->hci_ver < BLUETOOTH_VER_1_2)
+		return;
+
+	if (lmp_bredr_capable(hdev)) {
+		events[4] |= 0x01; /* Flow Specification Complete */
+		events[4] |= 0x02; /* Inquiry Result with RSSI */
+		events[4] |= 0x04; /* Read Remote Extended Features Complete */
+		events[5] |= 0x08; /* Synchronous Connection Complete */
+		events[5] |= 0x10; /* Synchronous Connection Changed */
+	}
+
+	if (lmp_inq_rssi_capable(hdev))
+		events[4] |= 0x02; /* Inquiry Result with RSSI */
+
+	if (lmp_sniffsubr_capable(hdev))
+		events[5] |= 0x20; /* Sniff Subrating */
+
+	if (lmp_pause_enc_capable(hdev))
+		events[5] |= 0x80; /* Encryption Key Refresh Complete */
+
+	if (lmp_ext_inq_capable(hdev))
+		events[5] |= 0x40; /* Extended Inquiry Result */
+
+	if (lmp_no_flush_capable(hdev))
+		events[7] |= 0x01; /* Enhanced Flush Complete */
+
+	if (lmp_lsto_capable(hdev))
+		events[6] |= 0x80; /* Link Supervision Timeout Changed */
+
+	if (lmp_ssp_capable(hdev)) {
+		events[6] |= 0x01;	/* IO Capability Request */
+		events[6] |= 0x02;	/* IO Capability Response */
+		events[6] |= 0x04;	/* User Confirmation Request */
+		events[6] |= 0x08;	/* User Passkey Request */
+		events[6] |= 0x10;	/* Remote OOB Data Request */
+		events[6] |= 0x20;	/* Simple Pairing Complete */
+		events[7] |= 0x04;	/* User Passkey Notification */
+		events[7] |= 0x08;	/* Keypress Notification */
+		events[7] |= 0x10;	/* Remote Host Supported
+					 * Features Notification
+					 */
+	}
+
+	if (lmp_le_capable(hdev))
+		events[7] |= 0x20;	/* LE Meta-Event */
+
+	hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
+
+	if (lmp_le_capable(hdev)) {
+		memset(events, 0, sizeof(events));
+		events[0] = 0x1f;
+		hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK,
+			    sizeof(events), events);
+	}
+}
+
+static void hci_init2_req(struct hci_request *req, unsigned long opt)
+{
+	struct hci_dev *hdev = req->hdev;
+
+	if (lmp_bredr_capable(hdev))
+		bredr_setup(req);
+
+	if (lmp_le_capable(hdev))
+		le_setup(req);
+
+	hci_setup_event_mask(req);
+
+	if (hdev->hci_ver > BLUETOOTH_VER_1_1)
+		hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
+
+	if (lmp_ssp_capable(hdev)) {
+		if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
+			u8 mode = 0x01;
+			hci_req_add(req, HCI_OP_WRITE_SSP_MODE,
+				    sizeof(mode), &mode);
+		} else {
+			struct hci_cp_write_eir cp;
+
+			memset(hdev->eir, 0, sizeof(hdev->eir));
+			memset(&cp, 0, sizeof(cp));
+
+			hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
+		}
+	}
+
+	if (lmp_inq_rssi_capable(hdev))
+		hci_setup_inquiry_mode(req);
+
+	if (lmp_inq_tx_pwr_capable(hdev))
+		hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
+
+	if (lmp_ext_feat_capable(hdev)) {
+		struct hci_cp_read_local_ext_features cp;
+
+		cp.page = 0x01;
+		hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES,
+			    sizeof(cp), &cp);
+	}
+
+	if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) {
+		u8 enable = 1;
+		hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable),
+			    &enable);
+	}
+}
+
+static void hci_setup_link_policy(struct hci_request *req)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct hci_cp_write_def_link_policy cp;
+	u16 link_policy = 0;
+
+	if (lmp_rswitch_capable(hdev))
+		link_policy |= HCI_LP_RSWITCH;
+	if (lmp_hold_capable(hdev))
+		link_policy |= HCI_LP_HOLD;
+	if (lmp_sniff_capable(hdev))
+		link_policy |= HCI_LP_SNIFF;
+	if (lmp_park_capable(hdev))
+		link_policy |= HCI_LP_PARK;
+
+	cp.policy = cpu_to_le16(link_policy);
+	hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp);
+}
+
+static void hci_set_le_support(struct hci_request *req)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct hci_cp_write_le_host_supported cp;
+
+	/* LE-only devices do not support explicit enablement */
+	if (!lmp_bredr_capable(hdev))
+		return;
+
+	memset(&cp, 0, sizeof(cp));
+
+	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
+		cp.le = 0x01;
+		cp.simul = lmp_le_br_capable(hdev);
+	}
+
+	if (cp.le != lmp_host_le_capable(hdev))
+		hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp),
+			    &cp);
+}
+
+static void hci_init3_req(struct hci_request *req, unsigned long opt)
+{
+	struct hci_dev *hdev = req->hdev;
+	u8 p;
+
+	if (hdev->commands[5] & 0x10)
+		hci_setup_link_policy(req);
+
+	if (lmp_le_capable(hdev)) {
+		hci_set_le_support(req);
+		hci_update_ad(req);
+	}
+
+	/* Read features beyond page 1 if available */
+	for (p = 2; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) {
+		struct hci_cp_read_local_ext_features cp;
+
+		cp.page = p;
+		hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES,
+			    sizeof(cp), &cp);
+	}
+}
+
+static int __hci_init(struct hci_dev *hdev)
+{
+	int err;
+
+	err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT);
+	if (err < 0)
+		return err;
+
+	/* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode
+	 * BR/EDR/LE type controllers. AMP controllers only need the
+	 * first stage init.
+	 */
+	if (hdev->dev_type != HCI_BREDR)
+		return 0;
+
+	err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT);
+	if (err < 0)
+		return err;
+
+	return __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT);
+}
+
+static void hci_scan_req(struct hci_request *req, unsigned long opt)
 {
 	__u8 scan = opt;
 
-	BT_DBG("%s %x", hdev->name, scan);
+	BT_DBG("%s %x", req->hdev->name, scan);
 
 	/* Inquiry and Page scans */
-	hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+	hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
 }
 
-static void hci_auth_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_auth_req(struct hci_request *req, unsigned long opt)
 {
 	__u8 auth = opt;
 
-	BT_DBG("%s %x", hdev->name, auth);
+	BT_DBG("%s %x", req->hdev->name, auth);
 
 	/* Authentication */
-	hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
+	hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
 }
 
-static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_encrypt_req(struct hci_request *req, unsigned long opt)
 {
 	__u8 encrypt = opt;
 
-	BT_DBG("%s %x", hdev->name, encrypt);
+	BT_DBG("%s %x", req->hdev->name, encrypt);
 
 	/* Encryption */
-	hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
+	hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
 }
 
-static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_linkpol_req(struct hci_request *req, unsigned long opt)
 {
 	__le16 policy = cpu_to_le16(opt);
 
-	BT_DBG("%s %x", hdev->name, policy);
+	BT_DBG("%s %x", req->hdev->name, policy);
 
 	/* Default link policy */
-	hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
+	hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
 }
 
 /* Get HCI device by index.
@@ -512,9 +916,10 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf)
 	return copied;
 }
 
-static void hci_inq_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_inq_req(struct hci_request *req, unsigned long opt)
 {
 	struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt;
+	struct hci_dev *hdev = req->hdev;
 	struct hci_cp_inquiry cp;
 
 	BT_DBG("%s", hdev->name);
@@ -526,7 +931,13 @@ static void hci_inq_req(struct hci_dev *hdev, unsigned long opt)
 	memcpy(&cp.lap, &ir->lap, 3);
 	cp.length  = ir->length;
 	cp.num_rsp = ir->num_rsp;
-	hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp);
+	hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
+}
+
+static int wait_inquiry(void *word)
+{
+	schedule();
+	return signal_pending(current);
 }
 
 int hci_inquiry(void __user *arg)
@@ -556,9 +967,17 @@ int hci_inquiry(void __user *arg)
 	timeo = ir.length * msecs_to_jiffies(2000);
 
 	if (do_inquiry) {
-		err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo);
+		err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir,
+				   timeo);
 		if (err < 0)
 			goto done;
+
+		/* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is
+		 * cleared). If it is interrupted by a signal, return -EINTR.
+		 */
+		if (wait_on_bit(&hdev->flags, HCI_INQUIRY, wait_inquiry,
+				TASK_INTERRUPTIBLE))
+			return -EINTR;
 	}
 
 	/* for unlimited number of responses we will use buffer with
@@ -654,39 +1073,29 @@ static u8 create_ad(struct hci_dev *hdev, u8 *ptr)
 	return ad_len;
 }
 
-int hci_update_ad(struct hci_dev *hdev)
+void hci_update_ad(struct hci_request *req)
 {
+	struct hci_dev *hdev = req->hdev;
 	struct hci_cp_le_set_adv_data cp;
 	u8 len;
-	int err;
 
-	hci_dev_lock(hdev);
-
-	if (!lmp_le_capable(hdev)) {
-		err = -EINVAL;
-		goto unlock;
-	}
+	if (!lmp_le_capable(hdev))
+		return;
 
 	memset(&cp, 0, sizeof(cp));
 
 	len = create_ad(hdev, cp.data);
 
 	if (hdev->adv_data_len == len &&
-	    memcmp(cp.data, hdev->adv_data, len) == 0) {
-		err = 0;
-		goto unlock;
-	}
+	    memcmp(cp.data, hdev->adv_data, len) == 0)
+		return;
 
 	memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
 	hdev->adv_data_len = len;
 
 	cp.length = len;
-	err = hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
-
-unlock:
-	hci_dev_unlock(hdev);
 
-	return err;
+	hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
 }
 
 /* ---- HCI ioctl helpers ---- */
@@ -719,34 +1128,37 @@ int hci_dev_open(__u16 dev)
 		goto done;
 	}
 
-	if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
-		set_bit(HCI_RAW, &hdev->flags);
-
-	/* Treat all non BR/EDR controllers as raw devices if
-	   enable_hs is not set */
-	if (hdev->dev_type != HCI_BREDR && !enable_hs)
-		set_bit(HCI_RAW, &hdev->flags);
-
 	if (hdev->open(hdev)) {
 		ret = -EIO;
 		goto done;
 	}
 
-	if (!test_bit(HCI_RAW, &hdev->flags)) {
-		atomic_set(&hdev->cmd_cnt, 1);
-		set_bit(HCI_INIT, &hdev->flags);
-		hdev->init_last_cmd = 0;
+	atomic_set(&hdev->cmd_cnt, 1);
+	set_bit(HCI_INIT, &hdev->flags);
 
-		ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT);
+	if (hdev->setup && test_bit(HCI_SETUP, &hdev->dev_flags))
+		ret = hdev->setup(hdev);
 
-		clear_bit(HCI_INIT, &hdev->flags);
+	if (!ret) {
+		/* Treat all non BR/EDR controllers as raw devices if
+		 * enable_hs is not set.
+		 */
+		if (hdev->dev_type != HCI_BREDR && !enable_hs)
+			set_bit(HCI_RAW, &hdev->flags);
+
+		if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+			set_bit(HCI_RAW, &hdev->flags);
+
+		if (!test_bit(HCI_RAW, &hdev->flags))
+			ret = __hci_init(hdev);
 	}
 
+	clear_bit(HCI_INIT, &hdev->flags);
+
 	if (!ret) {
 		hci_dev_hold(hdev);
 		set_bit(HCI_UP, &hdev->flags);
 		hci_notify(hdev, HCI_DEV_UP);
-		hci_update_ad(hdev);
 		if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
 		    mgmt_valid_hdev(hdev)) {
 			hci_dev_lock(hdev);
@@ -828,7 +1240,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	if (!test_bit(HCI_RAW, &hdev->flags) &&
 	    test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
 		set_bit(HCI_INIT, &hdev->flags);
-		__hci_request(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
+		__hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
 		clear_bit(HCI_INIT, &hdev->flags);
 	}
 
@@ -847,10 +1259,17 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 		hdev->sent_cmd = NULL;
 	}
 
+	kfree_skb(hdev->recv_evt);
+	hdev->recv_evt = NULL;
+
 	/* After this point our queues are empty
 	 * and no tasks are scheduled. */
 	hdev->close(hdev);
 
+	/* Clear flags */
+	hdev->flags = 0;
+	hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
+
 	if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) &&
 	    mgmt_valid_hdev(hdev)) {
 		hci_dev_lock(hdev);
@@ -858,9 +1277,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 		hci_dev_unlock(hdev);
 	}
 
-	/* Clear flags */
-	hdev->flags = 0;
-
 	/* Controller radio is available but is currently powered down */
 	hdev->amp_status = 0;
 
@@ -921,7 +1337,7 @@ int hci_dev_reset(__u16 dev)
 	hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0;
 
 	if (!test_bit(HCI_RAW, &hdev->flags))
-		ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
+		ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
 
 done:
 	hci_req_unlock(hdev);
@@ -960,8 +1376,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
 
 	switch (cmd) {
 	case HCISETAUTH:
-		err = hci_request(hdev, hci_auth_req, dr.dev_opt,
-				  HCI_INIT_TIMEOUT);
+		err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
+				   HCI_INIT_TIMEOUT);
 		break;
 
 	case HCISETENCRYPT:
@@ -972,24 +1388,24 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
 
 		if (!test_bit(HCI_AUTH, &hdev->flags)) {
 			/* Auth must be enabled first */
-			err = hci_request(hdev, hci_auth_req, dr.dev_opt,
-					  HCI_INIT_TIMEOUT);
+			err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
+					   HCI_INIT_TIMEOUT);
 			if (err)
 				break;
 		}
 
-		err = hci_request(hdev, hci_encrypt_req, dr.dev_opt,
-				  HCI_INIT_TIMEOUT);
+		err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt,
+				   HCI_INIT_TIMEOUT);
 		break;
 
 	case HCISETSCAN:
-		err = hci_request(hdev, hci_scan_req, dr.dev_opt,
-				  HCI_INIT_TIMEOUT);
+		err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt,
+				   HCI_INIT_TIMEOUT);
 		break;
 
 	case HCISETLINKPOL:
-		err = hci_request(hdev, hci_linkpol_req, dr.dev_opt,
-				  HCI_INIT_TIMEOUT);
+		err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt,
+				   HCI_INIT_TIMEOUT);
 		break;
 
 	case HCISETLINKMODE:
@@ -1146,7 +1562,8 @@ static void hci_power_on(struct work_struct *work)
 		return;
 
 	if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
-		schedule_delayed_work(&hdev->power_off, HCI_AUTO_OFF_TIMEOUT);
+		queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
+				   HCI_AUTO_OFF_TIMEOUT);
 
 	if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags))
 		mgmt_index_added(hdev);
@@ -1182,14 +1599,10 @@ static void hci_discov_off(struct work_struct *work)
 
 int hci_uuids_clear(struct hci_dev *hdev)
 {
-	struct list_head *p, *n;
-
-	list_for_each_safe(p, n, &hdev->uuids) {
-		struct bt_uuid *uuid;
+	struct bt_uuid *uuid, *tmp;
 
-		uuid = list_entry(p, struct bt_uuid, list);
-
-		list_del(p);
+	list_for_each_entry_safe(uuid, tmp, &hdev->uuids, list) {
+		list_del(&uuid->list);
 		kfree(uuid);
 	}
 
@@ -1569,7 +1982,7 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
 	return mgmt_device_unblocked(hdev, bdaddr, type);
 }
 
-static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt)
+static void le_scan_param_req(struct hci_request *req, unsigned long opt)
 {
 	struct le_scan_params *param =  (struct le_scan_params *) opt;
 	struct hci_cp_le_set_scan_param cp;
@@ -1579,18 +1992,18 @@ static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt)
 	cp.interval = cpu_to_le16(param->interval);
 	cp.window = cpu_to_le16(param->window);
 
-	hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp);
+	hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp);
 }
 
-static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt)
+static void le_scan_enable_req(struct hci_request *req, unsigned long opt)
 {
 	struct hci_cp_le_set_scan_enable cp;
 
 	memset(&cp, 0, sizeof(cp));
-	cp.enable = 1;
-	cp.filter_dup = 1;
+	cp.enable = LE_SCAN_ENABLE;
+	cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
 
-	hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
 }
 
 static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,
@@ -1611,18 +2024,18 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,
 
 	hci_req_lock(hdev);
 
-	err = __hci_request(hdev, le_scan_param_req, (unsigned long) &param,
-			    timeo);
+	err = __hci_req_sync(hdev, le_scan_param_req, (unsigned long) &param,
+			     timeo);
 	if (!err)
-		err = __hci_request(hdev, le_scan_enable_req, 0, timeo);
+		err = __hci_req_sync(hdev, le_scan_enable_req, 0, timeo);
 
 	hci_req_unlock(hdev);
 
 	if (err < 0)
 		return err;
 
-	schedule_delayed_work(&hdev->le_scan_disable,
-			      msecs_to_jiffies(timeout));
+	queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable,
+			   timeout);
 
 	return 0;
 }
@@ -1732,7 +2145,6 @@ struct hci_dev *hci_alloc_dev(void)
 	INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off);
 	INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work);
 
-	skb_queue_head_init(&hdev->driver_init);
 	skb_queue_head_init(&hdev->rx_q);
 	skb_queue_head_init(&hdev->cmd_q);
 	skb_queue_head_init(&hdev->raw_q);
@@ -1751,8 +2163,6 @@ EXPORT_SYMBOL(hci_alloc_dev);
 /* Free HCI device */
 void hci_free_dev(struct hci_dev *hdev)
 {
-	skb_queue_purge(&hdev->driver_init);
-
 	/* will free via device release */
 	put_device(&hdev->dev);
 }
@@ -1799,6 +2209,15 @@ int hci_register_dev(struct hci_dev *hdev)
 		goto err;
 	}
 
+	hdev->req_workqueue = alloc_workqueue(hdev->name,
+					      WQ_HIGHPRI | WQ_UNBOUND |
+					      WQ_MEM_RECLAIM, 1);
+	if (!hdev->req_workqueue) {
+		destroy_workqueue(hdev->workqueue);
+		error = -ENOMEM;
+		goto err;
+	}
+
 	error = hci_add_sysfs(hdev);
 	if (error < 0)
 		goto err_wqueue;
@@ -1821,12 +2240,13 @@ int hci_register_dev(struct hci_dev *hdev)
 	hci_notify(hdev, HCI_DEV_REG);
 	hci_dev_hold(hdev);
 
-	schedule_work(&hdev->power_on);
+	queue_work(hdev->req_workqueue, &hdev->power_on);
 
 	return id;
 
 err_wqueue:
 	destroy_workqueue(hdev->workqueue);
+	destroy_workqueue(hdev->req_workqueue);
 err:
 	ida_simple_remove(&hci_index_ida, hdev->id);
 	write_lock(&hci_dev_list_lock);
@@ -1880,6 +2300,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
 	hci_del_sysfs(hdev);
 
 	destroy_workqueue(hdev->workqueue);
+	destroy_workqueue(hdev->req_workqueue);
 
 	hci_dev_lock(hdev);
 	hci_blacklist_clear(hdev);
@@ -1921,7 +2342,7 @@ int hci_recv_frame(struct sk_buff *skb)
 		return -ENXIO;
 	}
 
-	/* Incomming skb */
+	/* Incoming skb */
 	bt_cb(skb)->incoming = 1;
 
 	/* Time stamp */
@@ -2152,20 +2573,55 @@ static int hci_send_frame(struct sk_buff *skb)
 	return hdev->send(skb);
 }
 
-/* Send HCI command */
-int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
+void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
+{
+	skb_queue_head_init(&req->cmd_q);
+	req->hdev = hdev;
+	req->err = 0;
+}
+
+int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	BT_DBG("length %u", skb_queue_len(&req->cmd_q));
+
+	/* If an error occured during request building, remove all HCI
+	 * commands queued on the HCI request queue.
+	 */
+	if (req->err) {
+		skb_queue_purge(&req->cmd_q);
+		return req->err;
+	}
+
+	/* Do not allow empty requests */
+	if (skb_queue_empty(&req->cmd_q))
+		return -ENODATA;
+
+	skb = skb_peek_tail(&req->cmd_q);
+	bt_cb(skb)->req.complete = complete;
+
+	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
+	skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
+	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
+
+	queue_work(hdev->workqueue, &hdev->cmd_work);
+
+	return 0;
+}
+
+static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode,
+				       u32 plen, const void *param)
 {
 	int len = HCI_COMMAND_HDR_SIZE + plen;
 	struct hci_command_hdr *hdr;
 	struct sk_buff *skb;
 
-	BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
-
 	skb = bt_skb_alloc(len, GFP_ATOMIC);
-	if (!skb) {
-		BT_ERR("%s no memory for command", hdev->name);
-		return -ENOMEM;
-	}
+	if (!skb)
+		return NULL;
 
 	hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
 	hdr->opcode = cpu_to_le16(opcode);
@@ -2179,8 +2635,27 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
 	bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
 	skb->dev = (void *) hdev;
 
-	if (test_bit(HCI_INIT, &hdev->flags))
-		hdev->init_last_cmd = opcode;
+	return skb;
+}
+
+/* Send HCI command */
+int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
+		 const void *param)
+{
+	struct sk_buff *skb;
+
+	BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
+
+	skb = hci_prepare_cmd(hdev, opcode, plen, param);
+	if (!skb) {
+		BT_ERR("%s no memory for command", hdev->name);
+		return -ENOMEM;
+	}
+
+	/* Stand-alone HCI commands must be flaged as
+	 * single-command requests.
+	 */
+	bt_cb(skb)->req.start = true;
 
 	skb_queue_tail(&hdev->cmd_q, skb);
 	queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -2188,6 +2663,43 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
 	return 0;
 }
 
+/* Queue a command to an asynchronous HCI request */
+void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
+		    const void *param, u8 event)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct sk_buff *skb;
+
+	BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
+
+	/* If an error occured during request building, there is no point in
+	 * queueing the HCI command. We can simply return.
+	 */
+	if (req->err)
+		return;
+
+	skb = hci_prepare_cmd(hdev, opcode, plen, param);
+	if (!skb) {
+		BT_ERR("%s no memory for command (opcode 0x%4.4x)",
+		       hdev->name, opcode);
+		req->err = -ENOMEM;
+		return;
+	}
+
+	if (skb_queue_empty(&req->cmd_q))
+		bt_cb(skb)->req.start = true;
+
+	bt_cb(skb)->req.event = event;
+
+	skb_queue_tail(&req->cmd_q, skb);
+}
+
+void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
+		 const void *param)
+{
+	hci_req_add_ev(req, opcode, plen, param, 0);
+}
+
 /* Get data from the previously sent command */
 void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
 {
@@ -2390,7 +2902,7 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
 		if (c->type == type && c->sent) {
 			BT_ERR("%s killing stalled connection %pMR",
 			       hdev->name, &c->dst);
-			hci_acl_disconn(c, HCI_ERROR_REMOTE_USER_TERM);
+			hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
 		}
 	}
 
@@ -2852,6 +3364,97 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
 	kfree_skb(skb);
 }
 
+static bool hci_req_is_complete(struct hci_dev *hdev)
+{
+	struct sk_buff *skb;
+
+	skb = skb_peek(&hdev->cmd_q);
+	if (!skb)
+		return true;
+
+	return bt_cb(skb)->req.start;
+}
+
+static void hci_resend_last(struct hci_dev *hdev)
+{
+	struct hci_command_hdr *sent;
+	struct sk_buff *skb;
+	u16 opcode;
+
+	if (!hdev->sent_cmd)
+		return;
+
+	sent = (void *) hdev->sent_cmd->data;
+	opcode = __le16_to_cpu(sent->opcode);
+	if (opcode == HCI_OP_RESET)
+		return;
+
+	skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
+	if (!skb)
+		return;
+
+	skb_queue_head(&hdev->cmd_q, skb);
+	queue_work(hdev->workqueue, &hdev->cmd_work);
+}
+
+void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
+{
+	hci_req_complete_t req_complete = NULL;
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	BT_DBG("opcode 0x%04x status 0x%02x", opcode, status);
+
+	/* If the completed command doesn't match the last one that was
+	 * sent we need to do special handling of it.
+	 */
+	if (!hci_sent_cmd_data(hdev, opcode)) {
+		/* Some CSR based controllers generate a spontaneous
+		 * reset complete event during init and any pending
+		 * command will never be completed. In such a case we
+		 * need to resend whatever was the last sent
+		 * command.
+		 */
+		if (test_bit(HCI_INIT, &hdev->flags) && opcode == HCI_OP_RESET)
+			hci_resend_last(hdev);
+
+		return;
+	}
+
+	/* If the command succeeded and there's still more commands in
+	 * this request the request is not yet complete.
+	 */
+	if (!status && !hci_req_is_complete(hdev))
+		return;
+
+	/* If this was the last command in a request the complete
+	 * callback would be found in hdev->sent_cmd instead of the
+	 * command queue (hdev->cmd_q).
+	 */
+	if (hdev->sent_cmd) {
+		req_complete = bt_cb(hdev->sent_cmd)->req.complete;
+		if (req_complete)
+			goto call_complete;
+	}
+
+	/* Remove all pending commands belonging to this request */
+	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
+	while ((skb = __skb_dequeue(&hdev->cmd_q))) {
+		if (bt_cb(skb)->req.start) {
+			__skb_queue_head(&hdev->cmd_q, skb);
+			break;
+		}
+
+		req_complete = bt_cb(skb)->req.complete;
+		kfree_skb(skb);
+	}
+	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
+
+call_complete:
+	if (req_complete)
+		req_complete(hdev, status);
+}
+
 static void hci_rx_work(struct work_struct *work)
 {
 	struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 81b44481d0d9..b93cd2eb5d58 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -48,13 +48,13 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
 	}
 
 	clear_bit(HCI_INQUIRY, &hdev->flags);
+	smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
+	wake_up_bit(&hdev->flags, HCI_INQUIRY);
 
 	hci_dev_lock(hdev);
 	hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
 	hci_dev_unlock(hdev);
 
-	hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status);
-
 	hci_conn_check_pending(hdev);
 }
 
@@ -183,8 +183,6 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev,
 
 	if (!status)
 		hdev->link_policy = get_unaligned_le16(sent);
-
-	hci_req_complete(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, status);
 }
 
 static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
@@ -195,11 +193,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
 
 	clear_bit(HCI_RESET, &hdev->flags);
 
-	hci_req_complete(hdev, HCI_OP_RESET, status);
-
 	/* Reset all non-persistent flags */
-	hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PENDING_CLASS) |
-			     BIT(HCI_PERIODIC_INQ));
+	hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
 
 	hdev->discovery.state = DISCOVERY_STOPPED;
 	hdev->inq_tx_power = HCI_TX_POWER_INVALID;
@@ -228,11 +223,6 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)
 		memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH);
 
 	hci_dev_unlock(hdev);
-
-	if (!status && !test_bit(HCI_INIT, &hdev->flags))
-		hci_update_ad(hdev);
-
-	hci_req_complete(hdev, HCI_OP_WRITE_LOCAL_NAME, status);
 }
 
 static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb)
@@ -270,8 +260,6 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
 
 	if (test_bit(HCI_MGMT, &hdev->dev_flags))
 		mgmt_auth_enable_complete(hdev, status);
-
-	hci_req_complete(hdev, HCI_OP_WRITE_AUTH_ENABLE, status);
 }
 
 static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
@@ -293,8 +281,6 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
 		else
 			clear_bit(HCI_ENCRYPT, &hdev->flags);
 	}
-
-	hci_req_complete(hdev, HCI_OP_WRITE_ENCRYPT_MODE, status);
 }
 
 static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
@@ -343,7 +329,6 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
 
 done:
 	hci_dev_unlock(hdev);
-	hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status);
 }
 
 static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb)
@@ -435,15 +420,6 @@ static void hci_cc_write_voice_setting(struct hci_dev *hdev,
 		hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING);
 }
 
-static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
-{
-	__u8 status = *((__u8 *) skb->data);
-
-	BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
-	hci_req_complete(hdev, HCI_OP_HOST_BUFFER_SIZE, status);
-}
-
 static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	__u8 status = *((__u8 *) skb->data);
@@ -457,9 +433,9 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
 
 	if (!status) {
 		if (sent->mode)
-			hdev->host_features[0] |= LMP_HOST_SSP;
+			hdev->features[1][0] |= LMP_HOST_SSP;
 		else
-			hdev->host_features[0] &= ~LMP_HOST_SSP;
+			hdev->features[1][0] &= ~LMP_HOST_SSP;
 	}
 
 	if (test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -472,202 +448,6 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
 	}
 }
 
-static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
-{
-	if (lmp_ext_inq_capable(hdev))
-		return 2;
-
-	if (lmp_inq_rssi_capable(hdev))
-		return 1;
-
-	if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
-	    hdev->lmp_subver == 0x0757)
-		return 1;
-
-	if (hdev->manufacturer == 15) {
-		if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
-			return 1;
-		if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
-			return 1;
-		if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
-			return 1;
-	}
-
-	if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
-	    hdev->lmp_subver == 0x1805)
-		return 1;
-
-	return 0;
-}
-
-static void hci_setup_inquiry_mode(struct hci_dev *hdev)
-{
-	u8 mode;
-
-	mode = hci_get_inquiry_mode(hdev);
-
-	hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
-}
-
-static void hci_setup_event_mask(struct hci_dev *hdev)
-{
-	/* The second byte is 0xff instead of 0x9f (two reserved bits
-	 * disabled) since a Broadcom 1.2 dongle doesn't respond to the
-	 * command otherwise */
-	u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
-
-	/* CSR 1.1 dongles does not accept any bitfield so don't try to set
-	 * any event mask for pre 1.2 devices */
-	if (hdev->hci_ver < BLUETOOTH_VER_1_2)
-		return;
-
-	if (lmp_bredr_capable(hdev)) {
-		events[4] |= 0x01; /* Flow Specification Complete */
-		events[4] |= 0x02; /* Inquiry Result with RSSI */
-		events[4] |= 0x04; /* Read Remote Extended Features Complete */
-		events[5] |= 0x08; /* Synchronous Connection Complete */
-		events[5] |= 0x10; /* Synchronous Connection Changed */
-	}
-
-	if (lmp_inq_rssi_capable(hdev))
-		events[4] |= 0x02; /* Inquiry Result with RSSI */
-
-	if (lmp_sniffsubr_capable(hdev))
-		events[5] |= 0x20; /* Sniff Subrating */
-
-	if (lmp_pause_enc_capable(hdev))
-		events[5] |= 0x80; /* Encryption Key Refresh Complete */
-
-	if (lmp_ext_inq_capable(hdev))
-		events[5] |= 0x40; /* Extended Inquiry Result */
-
-	if (lmp_no_flush_capable(hdev))
-		events[7] |= 0x01; /* Enhanced Flush Complete */
-
-	if (lmp_lsto_capable(hdev))
-		events[6] |= 0x80; /* Link Supervision Timeout Changed */
-
-	if (lmp_ssp_capable(hdev)) {
-		events[6] |= 0x01;	/* IO Capability Request */
-		events[6] |= 0x02;	/* IO Capability Response */
-		events[6] |= 0x04;	/* User Confirmation Request */
-		events[6] |= 0x08;	/* User Passkey Request */
-		events[6] |= 0x10;	/* Remote OOB Data Request */
-		events[6] |= 0x20;	/* Simple Pairing Complete */
-		events[7] |= 0x04;	/* User Passkey Notification */
-		events[7] |= 0x08;	/* Keypress Notification */
-		events[7] |= 0x10;	/* Remote Host Supported
-					 * Features Notification */
-	}
-
-	if (lmp_le_capable(hdev))
-		events[7] |= 0x20;	/* LE Meta-Event */
-
-	hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
-
-	if (lmp_le_capable(hdev)) {
-		memset(events, 0, sizeof(events));
-		events[0] = 0x1f;
-		hci_send_cmd(hdev, HCI_OP_LE_SET_EVENT_MASK,
-			     sizeof(events), events);
-	}
-}
-
-static void bredr_setup(struct hci_dev *hdev)
-{
-	struct hci_cp_delete_stored_link_key cp;
-	__le16 param;
-	__u8 flt_type;
-
-	/* Read Buffer Size (ACL mtu, max pkt, etc.) */
-	hci_send_cmd(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL);
-
-	/* Read Class of Device */
-	hci_send_cmd(hdev, HCI_OP_READ_CLASS_OF_DEV, 0, NULL);
-
-	/* Read Local Name */
-	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL);
-
-	/* Read Voice Setting */
-	hci_send_cmd(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL);
-
-	/* Clear Event Filters */
-	flt_type = HCI_FLT_CLEAR_ALL;
-	hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
-
-	/* Connection accept timeout ~20 secs */
-	param = __constant_cpu_to_le16(0x7d00);
-	hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
-
-	bacpy(&cp.bdaddr, BDADDR_ANY);
-	cp.delete_all = 1;
-	hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
-}
-
-static void le_setup(struct hci_dev *hdev)
-{
-	/* Read LE Buffer Size */
-	hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
-
-	/* Read LE Advertising Channel TX Power */
-	hci_send_cmd(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
-}
-
-static void hci_setup(struct hci_dev *hdev)
-{
-	if (hdev->dev_type != HCI_BREDR)
-		return;
-
-	/* Read BD Address */
-	hci_send_cmd(hdev, HCI_OP_READ_BD_ADDR, 0, NULL);
-
-	if (lmp_bredr_capable(hdev))
-		bredr_setup(hdev);
-
-	if (lmp_le_capable(hdev))
-		le_setup(hdev);
-
-	hci_setup_event_mask(hdev);
-
-	if (hdev->hci_ver > BLUETOOTH_VER_1_1)
-		hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
-
-	if (lmp_ssp_capable(hdev)) {
-		if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
-			u8 mode = 0x01;
-			hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE,
-				     sizeof(mode), &mode);
-		} else {
-			struct hci_cp_write_eir cp;
-
-			memset(hdev->eir, 0, sizeof(hdev->eir));
-			memset(&cp, 0, sizeof(cp));
-
-			hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
-		}
-	}
-
-	if (lmp_inq_rssi_capable(hdev))
-		hci_setup_inquiry_mode(hdev);
-
-	if (lmp_inq_tx_pwr_capable(hdev))
-		hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
-
-	if (lmp_ext_feat_capable(hdev)) {
-		struct hci_cp_read_local_ext_features cp;
-
-		cp.page = 0x01;
-		hci_send_cmd(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, sizeof(cp),
-			     &cp);
-	}
-
-	if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) {
-		u8 enable = 1;
-		hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable),
-			     &enable);
-	}
-}
-
 static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_rp_read_local_version *rp = (void *) skb->data;
@@ -675,7 +455,7 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
 	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
 
 	if (rp->status)
-		goto done;
+		return;
 
 	hdev->hci_ver = rp->hci_ver;
 	hdev->hci_rev = __le16_to_cpu(rp->hci_rev);
@@ -685,30 +465,6 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
 
 	BT_DBG("%s manufacturer 0x%4.4x hci ver %d:%d", hdev->name,
 	       hdev->manufacturer, hdev->hci_ver, hdev->hci_rev);
-
-	if (test_bit(HCI_INIT, &hdev->flags))
-		hci_setup(hdev);
-
-done:
-	hci_req_complete(hdev, HCI_OP_READ_LOCAL_VERSION, rp->status);
-}
-
-static void hci_setup_link_policy(struct hci_dev *hdev)
-{
-	struct hci_cp_write_def_link_policy cp;
-	u16 link_policy = 0;
-
-	if (lmp_rswitch_capable(hdev))
-		link_policy |= HCI_LP_RSWITCH;
-	if (lmp_hold_capable(hdev))
-		link_policy |= HCI_LP_HOLD;
-	if (lmp_sniff_capable(hdev))
-		link_policy |= HCI_LP_SNIFF;
-	if (lmp_park_capable(hdev))
-		link_policy |= HCI_LP_PARK;
-
-	cp.policy = cpu_to_le16(link_policy);
-	hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp);
 }
 
 static void hci_cc_read_local_commands(struct hci_dev *hdev,
@@ -718,16 +474,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev,
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
 
-	if (rp->status)
-		goto done;
-
-	memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
-
-	if (test_bit(HCI_INIT, &hdev->flags) && (hdev->commands[5] & 0x10))
-		hci_setup_link_policy(hdev);
-
-done:
-	hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status);
+	if (!rp->status)
+		memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
 }
 
 static void hci_cc_read_local_features(struct hci_dev *hdev,
@@ -745,18 +493,18 @@ static void hci_cc_read_local_features(struct hci_dev *hdev,
 	/* Adjust default settings according to features
 	 * supported by device. */
 
-	if (hdev->features[0] & LMP_3SLOT)
+	if (hdev->features[0][0] & LMP_3SLOT)
 		hdev->pkt_type |= (HCI_DM3 | HCI_DH3);
 
-	if (hdev->features[0] & LMP_5SLOT)
+	if (hdev->features[0][0] & LMP_5SLOT)
 		hdev->pkt_type |= (HCI_DM5 | HCI_DH5);
 
-	if (hdev->features[1] & LMP_HV2) {
+	if (hdev->features[0][1] & LMP_HV2) {
 		hdev->pkt_type  |= (HCI_HV2);
 		hdev->esco_type |= (ESCO_HV2);
 	}
 
-	if (hdev->features[1] & LMP_HV3) {
+	if (hdev->features[0][1] & LMP_HV3) {
 		hdev->pkt_type  |= (HCI_HV3);
 		hdev->esco_type |= (ESCO_HV3);
 	}
@@ -764,42 +512,26 @@ static void hci_cc_read_local_features(struct hci_dev *hdev,
 	if (lmp_esco_capable(hdev))
 		hdev->esco_type |= (ESCO_EV3);
 
-	if (hdev->features[4] & LMP_EV4)
+	if (hdev->features[0][4] & LMP_EV4)
 		hdev->esco_type |= (ESCO_EV4);
 
-	if (hdev->features[4] & LMP_EV5)
+	if (hdev->features[0][4] & LMP_EV5)
 		hdev->esco_type |= (ESCO_EV5);
 
-	if (hdev->features[5] & LMP_EDR_ESCO_2M)
+	if (hdev->features[0][5] & LMP_EDR_ESCO_2M)
 		hdev->esco_type |= (ESCO_2EV3);
 
-	if (hdev->features[5] & LMP_EDR_ESCO_3M)
+	if (hdev->features[0][5] & LMP_EDR_ESCO_3M)
 		hdev->esco_type |= (ESCO_3EV3);
 
-	if (hdev->features[5] & LMP_EDR_3S_ESCO)
+	if (hdev->features[0][5] & LMP_EDR_3S_ESCO)
 		hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5);
 
 	BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name,
-	       hdev->features[0], hdev->features[1],
-	       hdev->features[2], hdev->features[3],
-	       hdev->features[4], hdev->features[5],
-	       hdev->features[6], hdev->features[7]);
-}
-
-static void hci_set_le_support(struct hci_dev *hdev)
-{
-	struct hci_cp_write_le_host_supported cp;
-
-	memset(&cp, 0, sizeof(cp));
-
-	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
-		cp.le = 1;
-		cp.simul = lmp_le_br_capable(hdev);
-	}
-
-	if (cp.le != lmp_host_le_capable(hdev))
-		hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp),
-			     &cp);
+	       hdev->features[0][0], hdev->features[0][1],
+	       hdev->features[0][2], hdev->features[0][3],
+	       hdev->features[0][4], hdev->features[0][5],
+	       hdev->features[0][6], hdev->features[0][7]);
 }
 
 static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
@@ -810,22 +542,12 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
 	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
 
 	if (rp->status)
-		goto done;
-
-	switch (rp->page) {
-	case 0:
-		memcpy(hdev->features, rp->features, 8);
-		break;
-	case 1:
-		memcpy(hdev->host_features, rp->features, 8);
-		break;
-	}
+		return;
 
-	if (test_bit(HCI_INIT, &hdev->flags) && lmp_le_capable(hdev))
-		hci_set_le_support(hdev);
+	hdev->max_page = rp->max_page;
 
-done:
-	hci_req_complete(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, rp->status);
+	if (rp->page < HCI_MAX_PAGES)
+		memcpy(hdev->features[rp->page], rp->features, 8);
 }
 
 static void hci_cc_read_flow_control_mode(struct hci_dev *hdev,
@@ -835,12 +557,8 @@ static void hci_cc_read_flow_control_mode(struct hci_dev *hdev,
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
 
-	if (rp->status)
-		return;
-
-	hdev->flow_ctl_mode = rp->mode;
-
-	hci_req_complete(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, rp->status);
+	if (!rp->status)
+		hdev->flow_ctl_mode = rp->mode;
 }
 
 static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
@@ -877,8 +595,65 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb)
 
 	if (!rp->status)
 		bacpy(&hdev->bdaddr, &rp->bdaddr);
+}
+
+static void hci_cc_read_page_scan_activity(struct hci_dev *hdev,
+					   struct sk_buff *skb)
+{
+	struct hci_rp_read_page_scan_activity *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
 
-	hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status);
+	if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) {
+		hdev->page_scan_interval = __le16_to_cpu(rp->interval);
+		hdev->page_scan_window = __le16_to_cpu(rp->window);
+	}
+}
+
+static void hci_cc_write_page_scan_activity(struct hci_dev *hdev,
+					    struct sk_buff *skb)
+{
+	u8 status = *((u8 *) skb->data);
+	struct hci_cp_write_page_scan_activity *sent;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY);
+	if (!sent)
+		return;
+
+	hdev->page_scan_interval = __le16_to_cpu(sent->interval);
+	hdev->page_scan_window = __le16_to_cpu(sent->window);
+}
+
+static void hci_cc_read_page_scan_type(struct hci_dev *hdev,
+					   struct sk_buff *skb)
+{
+	struct hci_rp_read_page_scan_type *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (test_bit(HCI_INIT, &hdev->flags) && !rp->status)
+		hdev->page_scan_type = rp->type;
+}
+
+static void hci_cc_write_page_scan_type(struct hci_dev *hdev,
+					struct sk_buff *skb)
+{
+	u8 status = *((u8 *) skb->data);
+	u8 *type;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	type = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE);
+	if (type)
+		hdev->page_scan_type = *type;
 }
 
 static void hci_cc_read_data_block_size(struct hci_dev *hdev,
@@ -899,17 +674,6 @@ static void hci_cc_read_data_block_size(struct hci_dev *hdev,
 
 	BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu,
 	       hdev->block_cnt, hdev->block_len);
-
-	hci_req_complete(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, rp->status);
-}
-
-static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb)
-{
-	__u8 status = *((__u8 *) skb->data);
-
-	BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
-	hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status);
 }
 
 static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
@@ -933,8 +697,6 @@ static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
 	hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to);
 	hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to);
 
-	hci_req_complete(hdev, HCI_OP_READ_LOCAL_AMP_INFO, rp->status);
-
 a2mp_rsp:
 	a2mp_send_getinfo_rsp(hdev);
 }
@@ -976,35 +738,6 @@ a2mp_rsp:
 	a2mp_send_create_phy_link_req(hdev, rp->status);
 }
 
-static void hci_cc_delete_stored_link_key(struct hci_dev *hdev,
-					  struct sk_buff *skb)
-{
-	__u8 status = *((__u8 *) skb->data);
-
-	BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
-	hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status);
-}
-
-static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
-{
-	__u8 status = *((__u8 *) skb->data);
-
-	BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
-	hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status);
-}
-
-static void hci_cc_write_inquiry_mode(struct hci_dev *hdev,
-				      struct sk_buff *skb)
-{
-	__u8 status = *((__u8 *) skb->data);
-
-	BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
-	hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status);
-}
-
 static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
 					 struct sk_buff *skb)
 {
@@ -1014,17 +747,6 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
 
 	if (!rp->status)
 		hdev->inq_tx_power = rp->tx_power;
-
-	hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, rp->status);
-}
-
-static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb)
-{
-	__u8 status = *((__u8 *) skb->data);
-
-	BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
-	hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status);
 }
 
 static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1086,33 +808,28 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev,
 	hdev->le_cnt = hdev->le_pkts;
 
 	BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts);
-
-	hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status);
 }
 
-static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,
-					struct sk_buff *skb)
+static void hci_cc_le_read_local_features(struct hci_dev *hdev,
+					  struct sk_buff *skb)
 {
-	struct hci_rp_le_read_adv_tx_power *rp = (void *) skb->data;
+	struct hci_rp_le_read_local_features *rp = (void *) skb->data;
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
 
-	if (!rp->status) {
-		hdev->adv_tx_power = rp->tx_power;
-		if (!test_bit(HCI_INIT, &hdev->flags))
-			hci_update_ad(hdev);
-	}
-
-	hci_req_complete(hdev, HCI_OP_LE_READ_ADV_TX_POWER, rp->status);
+	if (!rp->status)
+		memcpy(hdev->le_features, rp->features, 8);
 }
 
-static void hci_cc_le_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
+static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,
+					struct sk_buff *skb)
 {
-	__u8 status = *((__u8 *) skb->data);
+	struct hci_rp_le_read_adv_tx_power *rp = (void *) skb->data;
 
-	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
 
-	hci_req_complete(hdev, HCI_OP_LE_SET_EVENT_MASK, status);
+	if (!rp->status)
+		hdev->adv_tx_power = rp->tx_power;
 }
 
 static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1209,12 +926,15 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
 			clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags);
 	}
 
-	hci_dev_unlock(hdev);
+	if (!test_bit(HCI_INIT, &hdev->flags)) {
+		struct hci_request req;
 
-	if (!test_bit(HCI_INIT, &hdev->flags))
-		hci_update_ad(hdev);
+		hci_req_init(&req, hdev);
+		hci_update_ad(&req);
+		hci_req_run(&req, NULL);
+	}
 
-	hci_req_complete(hdev, HCI_OP_LE_SET_ADV_ENABLE, status);
+	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1223,8 +943,6 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, status);
 
-	hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_PARAM, status);
-
 	if (status) {
 		hci_dev_lock(hdev);
 		mgmt_start_discovery_failed(hdev, status);
@@ -1246,9 +964,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 		return;
 
 	switch (cp->enable) {
-	case LE_SCANNING_ENABLED:
-		hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_ENABLE, status);
-
+	case LE_SCAN_ENABLE:
 		if (status) {
 			hci_dev_lock(hdev);
 			mgmt_start_discovery_failed(hdev, status);
@@ -1263,7 +979,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 		hci_dev_unlock(hdev);
 		break;
 
-	case LE_SCANNING_DISABLED:
+	case LE_SCAN_DISABLE:
 		if (status) {
 			hci_dev_lock(hdev);
 			mgmt_stop_discovery_failed(hdev, status);
@@ -1290,28 +1006,26 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 	}
 }
 
-static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb)
+static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
+					   struct sk_buff *skb)
 {
-	struct hci_rp_le_ltk_reply *rp = (void *) skb->data;
+	struct hci_rp_le_read_white_list_size *rp = (void *) skb->data;
 
-	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+	BT_DBG("%s status 0x%2.2x size %u", hdev->name, rp->status, rp->size);
 
-	if (rp->status)
-		return;
-
-	hci_req_complete(hdev, HCI_OP_LE_LTK_REPLY, rp->status);
+	if (!rp->status)
+		hdev->le_white_list_size = rp->size;
 }
 
-static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb)
+static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
+					    struct sk_buff *skb)
 {
-	struct hci_rp_le_ltk_neg_reply *rp = (void *) skb->data;
+	struct hci_rp_le_read_supported_states *rp = (void *) skb->data;
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
 
-	if (rp->status)
-		return;
-
-	hci_req_complete(hdev, HCI_OP_LE_LTK_NEG_REPLY, rp->status);
+	if (!rp->status)
+		memcpy(hdev->le_states, rp->le_states, 8);
 }
 
 static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
@@ -1328,21 +1042,19 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
 
 	if (!status) {
 		if (sent->le)
-			hdev->host_features[0] |= LMP_HOST_LE;
+			hdev->features[1][0] |= LMP_HOST_LE;
 		else
-			hdev->host_features[0] &= ~LMP_HOST_LE;
+			hdev->features[1][0] &= ~LMP_HOST_LE;
 
 		if (sent->simul)
-			hdev->host_features[0] |= LMP_HOST_LE_BREDR;
+			hdev->features[1][0] |= LMP_HOST_LE_BREDR;
 		else
-			hdev->host_features[0] &= ~LMP_HOST_LE_BREDR;
+			hdev->features[1][0] &= ~LMP_HOST_LE_BREDR;
 	}
 
 	if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
 	    !test_bit(HCI_INIT, &hdev->flags))
 		mgmt_le_enable_complete(hdev, sent->le, status);
-
-	hci_req_complete(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, status);
 }
 
 static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev,
@@ -1364,7 +1076,6 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
 	BT_DBG("%s status 0x%2.2x", hdev->name, status);
 
 	if (status) {
-		hci_req_complete(hdev, HCI_OP_INQUIRY, status);
 		hci_conn_check_pending(hdev);
 		hci_dev_lock(hdev);
 		if (test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -1475,7 +1186,7 @@ static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status)
 	if (conn) {
 		if (conn->state == BT_CONFIG) {
 			hci_proto_connect_cfm(conn, status);
-			hci_conn_put(conn);
+			hci_conn_drop(conn);
 		}
 	}
 
@@ -1502,7 +1213,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status)
 	if (conn) {
 		if (conn->state == BT_CONFIG) {
 			hci_proto_connect_cfm(conn, status);
-			hci_conn_put(conn);
+			hci_conn_drop(conn);
 		}
 	}
 
@@ -1664,7 +1375,7 @@ static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status)
 	if (conn) {
 		if (conn->state == BT_CONFIG) {
 			hci_proto_connect_cfm(conn, status);
-			hci_conn_put(conn);
+			hci_conn_drop(conn);
 		}
 	}
 
@@ -1691,7 +1402,7 @@ static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status)
 	if (conn) {
 		if (conn->state == BT_CONFIG) {
 			hci_proto_connect_cfm(conn, status);
-			hci_conn_put(conn);
+			hci_conn_drop(conn);
 		}
 	}
 
@@ -1836,11 +1547,6 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)
 	}
 }
 
-static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status)
-{
-	BT_DBG("%s status 0x%2.2x", hdev->name, status);
-}
-
 static void hci_cs_create_phylink(struct hci_dev *hdev, u8 status)
 {
 	struct hci_cp_create_phy_link *cp;
@@ -1882,11 +1588,6 @@ static void hci_cs_accept_phylink(struct hci_dev *hdev, u8 status)
 	amp_write_remote_assoc(hdev, cp->phy_handle);
 }
 
-static void hci_cs_create_logical_link(struct hci_dev *hdev, u8 status)
-{
-	BT_DBG("%s status 0x%2.2x", hdev->name, status);
-}
-
 static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	__u8 status = *((__u8 *) skb->data);
@@ -1895,13 +1596,14 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, status);
 
-	hci_req_complete(hdev, HCI_OP_INQUIRY, status);
-
 	hci_conn_check_pending(hdev);
 
 	if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags))
 		return;
 
+	smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
+	wake_up_bit(&hdev->flags, HCI_INQUIRY);
+
 	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
 		return;
 
@@ -2000,7 +1702,6 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		} else
 			conn->state = BT_CONNECTED;
 
-		hci_conn_hold_device(conn);
 		hci_conn_add_sysfs(conn);
 
 		if (test_bit(HCI_AUTH, &hdev->flags))
@@ -2047,42 +1748,6 @@ unlock:
 	hci_conn_check_pending(hdev);
 }
 
-void hci_conn_accept(struct hci_conn *conn, int mask)
-{
-	struct hci_dev *hdev = conn->hdev;
-
-	BT_DBG("conn %p", conn);
-
-	conn->state = BT_CONFIG;
-
-	if (!lmp_esco_capable(hdev)) {
-		struct hci_cp_accept_conn_req cp;
-
-		bacpy(&cp.bdaddr, &conn->dst);
-
-		if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
-			cp.role = 0x00; /* Become master */
-		else
-			cp.role = 0x01; /* Remain slave */
-
-		hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
-	} else /* lmp_esco_capable(hdev)) */ {
-		struct hci_cp_accept_sync_conn_req cp;
-
-		bacpy(&cp.bdaddr, &conn->dst);
-		cp.pkt_type = cpu_to_le16(conn->pkt_type);
-
-		cp.tx_bandwidth   = __constant_cpu_to_le32(0x00001f40);
-		cp.rx_bandwidth   = __constant_cpu_to_le32(0x00001f40);
-		cp.max_latency    = __constant_cpu_to_le16(0xffff);
-		cp.content_format = cpu_to_le16(hdev->voice_setting);
-		cp.retrans_effort = 0xff;
-
-		hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
-			     sizeof(cp), &cp);
-	}
-}
-
 static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_conn_request *ev = (void *) skb->data;
@@ -2154,7 +1819,6 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		} else {
 			conn->state = BT_CONNECT2;
 			hci_proto_connect_cfm(conn, 0);
-			hci_conn_put(conn);
 		}
 	} else {
 		/* Connection rejected */
@@ -2261,14 +1925,14 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		} else {
 			conn->state = BT_CONNECTED;
 			hci_proto_connect_cfm(conn, ev->status);
-			hci_conn_put(conn);
+			hci_conn_drop(conn);
 		}
 	} else {
 		hci_auth_cfm(conn, ev->status);
 
 		hci_conn_hold(conn);
 		conn->disc_timeout = HCI_DISCONN_TIMEOUT;
-		hci_conn_put(conn);
+		hci_conn_drop(conn);
 	}
 
 	if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) {
@@ -2351,8 +2015,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
 
 		if (ev->status && conn->state == BT_CONNECTED) {
-			hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE);
-			hci_conn_put(conn);
+			hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
+			hci_conn_drop(conn);
 			goto unlock;
 		}
 
@@ -2361,7 +2025,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
 				conn->state = BT_CONNECTED;
 
 			hci_proto_connect_cfm(conn, ev->status);
-			hci_conn_put(conn);
+			hci_conn_drop(conn);
 		} else
 			hci_encrypt_cfm(conn, ev->status, ev->encrypt);
 	}
@@ -2408,7 +2072,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev,
 		goto unlock;
 
 	if (!ev->status)
-		memcpy(conn->features, ev->features, 8);
+		memcpy(conn->features[0], ev->features, 8);
 
 	if (conn->state != BT_CONFIG)
 		goto unlock;
@@ -2436,27 +2100,17 @@ static void hci_remote_features_evt(struct hci_dev *hdev,
 	if (!hci_outgoing_auth_needed(hdev, conn)) {
 		conn->state = BT_CONNECTED;
 		hci_proto_connect_cfm(conn, ev->status);
-		hci_conn_put(conn);
+		hci_conn_drop(conn);
 	}
 
 unlock:
 	hci_dev_unlock(hdev);
 }
 
-static void hci_remote_version_evt(struct hci_dev *hdev, struct sk_buff *skb)
-{
-	BT_DBG("%s", hdev->name);
-}
-
-static void hci_qos_setup_complete_evt(struct hci_dev *hdev,
-				       struct sk_buff *skb)
-{
-	BT_DBG("%s", hdev->name);
-}
-
 static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_cmd_complete *ev = (void *) skb->data;
+	u8 status = skb->data[sizeof(*ev)];
 	__u16 opcode;
 
 	skb_pull(skb, sizeof(*ev));
@@ -2540,10 +2194,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cc_write_voice_setting(hdev, skb);
 		break;
 
-	case HCI_OP_HOST_BUFFER_SIZE:
-		hci_cc_host_buffer_size(hdev, skb);
-		break;
-
 	case HCI_OP_WRITE_SSP_MODE:
 		hci_cc_write_ssp_mode(hdev, skb);
 		break;
@@ -2572,46 +2222,42 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cc_read_bd_addr(hdev, skb);
 		break;
 
-	case HCI_OP_READ_DATA_BLOCK_SIZE:
-		hci_cc_read_data_block_size(hdev, skb);
+	case HCI_OP_READ_PAGE_SCAN_ACTIVITY:
+		hci_cc_read_page_scan_activity(hdev, skb);
 		break;
 
-	case HCI_OP_WRITE_CA_TIMEOUT:
-		hci_cc_write_ca_timeout(hdev, skb);
+	case HCI_OP_WRITE_PAGE_SCAN_ACTIVITY:
+		hci_cc_write_page_scan_activity(hdev, skb);
 		break;
 
-	case HCI_OP_READ_FLOW_CONTROL_MODE:
-		hci_cc_read_flow_control_mode(hdev, skb);
+	case HCI_OP_READ_PAGE_SCAN_TYPE:
+		hci_cc_read_page_scan_type(hdev, skb);
 		break;
 
-	case HCI_OP_READ_LOCAL_AMP_INFO:
-		hci_cc_read_local_amp_info(hdev, skb);
+	case HCI_OP_WRITE_PAGE_SCAN_TYPE:
+		hci_cc_write_page_scan_type(hdev, skb);
 		break;
 
-	case HCI_OP_READ_LOCAL_AMP_ASSOC:
-		hci_cc_read_local_amp_assoc(hdev, skb);
+	case HCI_OP_READ_DATA_BLOCK_SIZE:
+		hci_cc_read_data_block_size(hdev, skb);
 		break;
 
-	case HCI_OP_DELETE_STORED_LINK_KEY:
-		hci_cc_delete_stored_link_key(hdev, skb);
+	case HCI_OP_READ_FLOW_CONTROL_MODE:
+		hci_cc_read_flow_control_mode(hdev, skb);
 		break;
 
-	case HCI_OP_SET_EVENT_MASK:
-		hci_cc_set_event_mask(hdev, skb);
+	case HCI_OP_READ_LOCAL_AMP_INFO:
+		hci_cc_read_local_amp_info(hdev, skb);
 		break;
 
-	case HCI_OP_WRITE_INQUIRY_MODE:
-		hci_cc_write_inquiry_mode(hdev, skb);
+	case HCI_OP_READ_LOCAL_AMP_ASSOC:
+		hci_cc_read_local_amp_assoc(hdev, skb);
 		break;
 
 	case HCI_OP_READ_INQ_RSP_TX_POWER:
 		hci_cc_read_inq_rsp_tx_power(hdev, skb);
 		break;
 
-	case HCI_OP_SET_EVENT_FLT:
-		hci_cc_set_event_flt(hdev, skb);
-		break;
-
 	case HCI_OP_PIN_CODE_REPLY:
 		hci_cc_pin_code_reply(hdev, skb);
 		break;
@@ -2628,12 +2274,12 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cc_le_read_buffer_size(hdev, skb);
 		break;
 
-	case HCI_OP_LE_READ_ADV_TX_POWER:
-		hci_cc_le_read_adv_tx_power(hdev, skb);
+	case HCI_OP_LE_READ_LOCAL_FEATURES:
+		hci_cc_le_read_local_features(hdev, skb);
 		break;
 
-	case HCI_OP_LE_SET_EVENT_MASK:
-		hci_cc_le_set_event_mask(hdev, skb);
+	case HCI_OP_LE_READ_ADV_TX_POWER:
+		hci_cc_le_read_adv_tx_power(hdev, skb);
 		break;
 
 	case HCI_OP_USER_CONFIRM_REPLY:
@@ -2664,12 +2310,12 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cc_le_set_scan_enable(hdev, skb);
 		break;
 
-	case HCI_OP_LE_LTK_REPLY:
-		hci_cc_le_ltk_reply(hdev, skb);
+	case HCI_OP_LE_READ_WHITE_LIST_SIZE:
+		hci_cc_le_read_white_list_size(hdev, skb);
 		break;
 
-	case HCI_OP_LE_LTK_NEG_REPLY:
-		hci_cc_le_ltk_neg_reply(hdev, skb);
+	case HCI_OP_LE_READ_SUPPORTED_STATES:
+		hci_cc_le_read_supported_states(hdev, skb);
 		break;
 
 	case HCI_OP_WRITE_LE_HOST_SUPPORTED:
@@ -2685,9 +2331,11 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		break;
 	}
 
-	if (ev->opcode != HCI_OP_NOP)
+	if (opcode != HCI_OP_NOP)
 		del_timer(&hdev->cmd_timer);
 
+	hci_req_cmd_complete(hdev, opcode, status);
+
 	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
 		atomic_set(&hdev->cmd_cnt, 1);
 		if (!skb_queue_empty(&hdev->cmd_q))
@@ -2757,10 +2405,6 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cs_le_create_conn(hdev, ev->status);
 		break;
 
-	case HCI_OP_LE_START_ENC:
-		hci_cs_le_start_enc(hdev, ev->status);
-		break;
-
 	case HCI_OP_CREATE_PHY_LINK:
 		hci_cs_create_phylink(hdev, ev->status);
 		break;
@@ -2769,18 +2413,18 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cs_accept_phylink(hdev, ev->status);
 		break;
 
-	case HCI_OP_CREATE_LOGICAL_LINK:
-		hci_cs_create_logical_link(hdev, ev->status);
-		break;
-
 	default:
 		BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
 		break;
 	}
 
-	if (ev->opcode != HCI_OP_NOP)
+	if (opcode != HCI_OP_NOP)
 		del_timer(&hdev->cmd_timer);
 
+	if (ev->status ||
+	    (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
+		hci_req_cmd_complete(hdev, opcode, ev->status);
+
 	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
 		atomic_set(&hdev->cmd_cnt, 1);
 		if (!skb_queue_empty(&hdev->cmd_q))
@@ -2996,7 +2640,7 @@ static void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	if (conn->state == BT_CONNECTED) {
 		hci_conn_hold(conn);
 		conn->disc_timeout = HCI_PAIRING_TIMEOUT;
-		hci_conn_put(conn);
+		hci_conn_drop(conn);
 	}
 
 	if (!test_bit(HCI_PAIRABLE, &hdev->dev_flags))
@@ -3099,7 +2743,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		if (ev->key_type != HCI_LK_CHANGED_COMBINATION)
 			conn->key_type = ev->key_type;
 
-		hci_conn_put(conn);
+		hci_conn_drop(conn);
 	}
 
 	if (test_bit(HCI_LINK_KEYS, &hdev->dev_flags))
@@ -3240,6 +2884,9 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
 	if (!conn)
 		goto unlock;
 
+	if (ev->page < HCI_MAX_PAGES)
+		memcpy(conn->features[ev->page], ev->features, 8);
+
 	if (!ev->status && ev->page == 0x01) {
 		struct inquiry_entry *ie;
 
@@ -3247,8 +2894,19 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
 		if (ie)
 			ie->data.ssp_mode = (ev->features[0] & LMP_HOST_SSP);
 
-		if (ev->features[0] & LMP_HOST_SSP)
+		if (ev->features[0] & LMP_HOST_SSP) {
 			set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
+		} else {
+			/* It is mandatory by the Bluetooth specification that
+			 * Extended Inquiry Results are only used when Secure
+			 * Simple Pairing is enabled, but some devices violate
+			 * this.
+			 *
+			 * To make these devices work, the internal SSP
+			 * enabled flag needs to be cleared if the remote host
+			 * features do not indicate SSP support */
+			clear_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
+		}
 	}
 
 	if (conn->state != BT_CONFIG)
@@ -3268,7 +2926,7 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
 	if (!hci_outgoing_auth_needed(hdev, conn)) {
 		conn->state = BT_CONNECTED;
 		hci_proto_connect_cfm(conn, ev->status);
-		hci_conn_put(conn);
+		hci_conn_drop(conn);
 	}
 
 unlock:
@@ -3302,7 +2960,6 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
 		conn->handle = __le16_to_cpu(ev->handle);
 		conn->state  = BT_CONNECTED;
 
-		hci_conn_hold_device(conn);
 		hci_conn_add_sysfs(conn);
 		break;
 
@@ -3331,18 +2988,6 @@ unlock:
 	hci_dev_unlock(hdev);
 }
 
-static void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buff *skb)
-{
-	BT_DBG("%s", hdev->name);
-}
-
-static void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb)
-{
-	struct hci_ev_sniff_subrate *ev = (void *) skb->data;
-
-	BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
-}
-
 static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
 					    struct sk_buff *skb)
 {
@@ -3412,8 +3057,8 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
 	clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
 
 	if (ev->status && conn->state == BT_CONNECTED) {
-		hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE);
-		hci_conn_put(conn);
+		hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
+		hci_conn_drop(conn);
 		goto unlock;
 	}
 
@@ -3422,13 +3067,13 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
 			conn->state = BT_CONNECTED;
 
 		hci_proto_connect_cfm(conn, ev->status);
-		hci_conn_put(conn);
+		hci_conn_drop(conn);
 	} else {
 		hci_auth_cfm(conn, ev->status);
 
 		hci_conn_hold(conn);
 		conn->disc_timeout = HCI_DISCONN_TIMEOUT;
-		hci_conn_put(conn);
+		hci_conn_drop(conn);
 	}
 
 unlock:
@@ -3689,7 +3334,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev,
 		mgmt_auth_failed(hdev, &conn->dst, conn->type, conn->dst_type,
 				 ev->status);
 
-	hci_conn_put(conn);
+	hci_conn_drop(conn);
 
 unlock:
 	hci_dev_unlock(hdev);
@@ -3700,11 +3345,16 @@ static void hci_remote_host_features_evt(struct hci_dev *hdev,
 {
 	struct hci_ev_remote_host_features *ev = (void *) skb->data;
 	struct inquiry_entry *ie;
+	struct hci_conn *conn;
 
 	BT_DBG("%s", hdev->name);
 
 	hci_dev_lock(hdev);
 
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+	if (conn)
+		memcpy(conn->features[1], ev->features, 8);
+
 	ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
 	if (ie)
 		ie->data.ssp_mode = (ev->features[0] & LMP_HOST_SSP);
@@ -3777,9 +3427,8 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev,
 
 	hci_conn_hold(hcon);
 	hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
-	hci_conn_put(hcon);
+	hci_conn_drop(hcon);
 
-	hci_conn_hold_device(hcon);
 	hci_conn_add_sysfs(hcon);
 
 	amp_physical_cfm(bredr_hcon, hcon);
@@ -3913,7 +3562,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	conn->handle = __le16_to_cpu(ev->handle);
 	conn->state = BT_CONNECTED;
 
-	hci_conn_hold_device(conn);
 	hci_conn_add_sysfs(conn);
 
 	hci_proto_connect_cfm(conn, ev->status);
@@ -3928,8 +3576,6 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	void *ptr = &skb->data[1];
 	s8 rssi;
 
-	hci_dev_lock(hdev);
-
 	while (num_reports--) {
 		struct hci_ev_le_advertising_info *ev = ptr;
 
@@ -3939,8 +3585,6 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 		ptr += sizeof(*ev) + ev->length + 1;
 	}
-
-	hci_dev_unlock(hdev);
 }
 
 static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -4031,8 +3675,27 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 	struct hci_event_hdr *hdr = (void *) skb->data;
 	__u8 event = hdr->evt;
 
+	hci_dev_lock(hdev);
+
+	/* Received events are (currently) only needed when a request is
+	 * ongoing so avoid unnecessary memory allocation.
+	 */
+	if (hdev->req_status == HCI_REQ_PEND) {
+		kfree_skb(hdev->recv_evt);
+		hdev->recv_evt = skb_clone(skb, GFP_KERNEL);
+	}
+
+	hci_dev_unlock(hdev);
+
 	skb_pull(skb, HCI_EVENT_HDR_SIZE);
 
+	if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) {
+		struct hci_command_hdr *hdr = (void *) hdev->sent_cmd->data;
+		u16 opcode = __le16_to_cpu(hdr->opcode);
+
+		hci_req_cmd_complete(hdev, opcode, 0);
+	}
+
 	switch (event) {
 	case HCI_EV_INQUIRY_COMPLETE:
 		hci_inquiry_complete_evt(hdev, skb);
@@ -4074,14 +3737,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_remote_features_evt(hdev, skb);
 		break;
 
-	case HCI_EV_REMOTE_VERSION:
-		hci_remote_version_evt(hdev, skb);
-		break;
-
-	case HCI_EV_QOS_SETUP_COMPLETE:
-		hci_qos_setup_complete_evt(hdev, skb);
-		break;
-
 	case HCI_EV_CMD_COMPLETE:
 		hci_cmd_complete_evt(hdev, skb);
 		break;
@@ -4138,14 +3793,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_sync_conn_complete_evt(hdev, skb);
 		break;
 
-	case HCI_EV_SYNC_CONN_CHANGED:
-		hci_sync_conn_changed_evt(hdev, skb);
-		break;
-
-	case HCI_EV_SNIFF_SUBRATE:
-		hci_sniff_subrate_evt(hdev, skb);
-		break;
-
 	case HCI_EV_EXTENDED_INQUIRY_RESULT:
 		hci_extended_inquiry_result_evt(hdev, skb);
 		break;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 07f073935811..9bd7d959e384 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -70,14 +70,13 @@ static struct bt_sock_list hci_sk_list = {
 void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	struct sk_buff *skb_copy = NULL;
 
 	BT_DBG("hdev %p len %d", hdev, skb->len);
 
 	read_lock(&hci_sk_list.lock);
 
-	sk_for_each(sk, node, &hci_sk_list.head) {
+	sk_for_each(sk, &hci_sk_list.head) {
 		struct hci_filter *flt;
 		struct sk_buff *nskb;
 
@@ -142,13 +141,12 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 	BT_DBG("len %d", skb->len);
 
 	read_lock(&hci_sk_list.lock);
 
-	sk_for_each(sk, node, &hci_sk_list.head) {
+	sk_for_each(sk, &hci_sk_list.head) {
 		struct sk_buff *nskb;
 
 		/* Skip the original socket */
@@ -176,7 +174,6 @@ void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk)
 void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	struct sk_buff *skb_copy = NULL;
 	__le16 opcode;
 
@@ -210,7 +207,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 
 	read_lock(&hci_sk_list.lock);
 
-	sk_for_each(sk, node, &hci_sk_list.head) {
+	sk_for_each(sk, &hci_sk_list.head) {
 		struct sk_buff *nskb;
 
 		if (sk->sk_state != BT_BOUND)
@@ -251,13 +248,12 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 static void send_monitor_event(struct sk_buff *skb)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 	BT_DBG("len %d", skb->len);
 
 	read_lock(&hci_sk_list.lock);
 
-	sk_for_each(sk, node, &hci_sk_list.head) {
+	sk_for_each(sk, &hci_sk_list.head) {
 		struct sk_buff *nskb;
 
 		if (sk->sk_state != BT_BOUND)
@@ -393,11 +389,10 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
 
 	if (event == HCI_DEV_UNREG) {
 		struct sock *sk;
-		struct hlist_node *node;
 
 		/* Detach sockets from device */
 		read_lock(&hci_sk_list.lock);
-		sk_for_each(sk, node, &hci_sk_list.head) {
+		sk_for_each(sk, &hci_sk_list.head) {
 			bh_lock_sock_nested(sk);
 			if (hci_pi(sk)->hdev == hdev) {
 				hci_pi(sk)->hdev = NULL;
@@ -859,6 +854,11 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 			skb_queue_tail(&hdev->raw_q, skb);
 			queue_work(hdev->workqueue, &hdev->tx_work);
 		} else {
+			/* Stand-alone HCI commands must be flaged as
+			 * single-command requests.
+			 */
+			bt_cb(skb)->req.start = true;
+
 			skb_queue_tail(&hdev->cmd_q, skb);
 			queue_work(hdev->workqueue, &hdev->cmd_work);
 		}
@@ -1107,7 +1107,7 @@ int __init hci_sock_init(void)
 		goto error;
 	}
 
-	err = bt_procfs_init(THIS_MODULE, &init_net, "hci", &hci_sk_list, NULL);
+	err = bt_procfs_init(&init_net, "hci", &hci_sk_list, NULL);
 	if (err < 0) {
 		BT_ERR("Failed to create HCI proc file");
 		bt_sock_unregister(BTPROTO_HCI);
@@ -1126,8 +1126,6 @@ error:
 void hci_sock_cleanup(void)
 {
 	bt_procfs_cleanup(&init_net, "hci");
-	if (bt_sock_unregister(BTPROTO_HCI) < 0)
-		BT_ERR("HCI socket unregistration failed");
-
+	bt_sock_unregister(BTPROTO_HCI);
 	proto_unregister(&hci_sk_proto);
 }
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 55cceee02a84..7ad6ecf36f20 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -2,6 +2,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/module.h>
+#include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -47,10 +48,10 @@ static ssize_t show_link_features(struct device *dev,
 	struct hci_conn *conn = to_hci_conn(dev);
 
 	return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
-		       conn->features[0], conn->features[1],
-		       conn->features[2], conn->features[3],
-		       conn->features[4], conn->features[5],
-		       conn->features[6], conn->features[7]);
+		       conn->features[0][0], conn->features[0][1],
+		       conn->features[0][2], conn->features[0][3],
+		       conn->features[0][4], conn->features[0][5],
+		       conn->features[0][6], conn->features[0][7]);
 }
 
 #define LINK_ATTR(_name, _mode, _show, _store) \
@@ -145,7 +146,6 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
 	}
 
 	device_del(&conn->dev);
-	put_device(&conn->dev);
 
 	hci_dev_put(hdev);
 }
@@ -233,10 +233,10 @@ static ssize_t show_features(struct device *dev,
 	struct hci_dev *hdev = to_hci_dev(dev);
 
 	return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
-		       hdev->features[0], hdev->features[1],
-		       hdev->features[2], hdev->features[3],
-		       hdev->features[4], hdev->features[5],
-		       hdev->features[6], hdev->features[7]);
+		       hdev->features[0][0], hdev->features[0][1],
+		       hdev->features[0][2], hdev->features[0][3],
+		       hdev->features[0][4], hdev->features[0][5],
+		       hdev->features[0][6], hdev->features[0][7]);
 }
 
 static ssize_t show_manufacturer(struct device *dev,
@@ -461,19 +461,18 @@ static const struct file_operations blacklist_fops = {
 
 static void print_bt_uuid(struct seq_file *f, u8 *uuid)
 {
-	__be32 data0, data4;
-	__be16 data1, data2, data3, data5;
+	u32 data0, data5;
+	u16 data1, data2, data3, data4;
 
-	memcpy(&data0, &uuid[0], 4);
-	memcpy(&data1, &uuid[4], 2);
-	memcpy(&data2, &uuid[6], 2);
-	memcpy(&data3, &uuid[8], 2);
-	memcpy(&data4, &uuid[10], 4);
-	memcpy(&data5, &uuid[14], 2);
+	data5 = get_unaligned_le32(uuid);
+	data4 = get_unaligned_le16(uuid + 4);
+	data3 = get_unaligned_le16(uuid + 6);
+	data2 = get_unaligned_le16(uuid + 8);
+	data1 = get_unaligned_le16(uuid + 10);
+	data0 = get_unaligned_le32(uuid + 12);
 
-	seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.8x%.4x\n",
-		   ntohl(data0), ntohs(data1), ntohs(data2), ntohs(data3),
-		   ntohl(data4), ntohs(data5));
+	seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.4x%.8x\n",
+		   data0, data1, data2, data3, data4, data5);
 }
 
 static int uuids_show(struct seq_file *f, void *p)
@@ -590,10 +589,8 @@ int __init bt_sysfs_init(void)
 	bt_debugfs = debugfs_create_dir("bluetooth", NULL);
 
 	bt_class = class_create(THIS_MODULE, "bluetooth");
-	if (IS_ERR(bt_class))
-		return PTR_ERR(bt_class);
 
-	return 0;
+	return PTR_RET(bt_class);
 }
 
 void bt_sysfs_cleanup(void)
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index a7352ff3fd1e..940f5acb6694 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1,6 +1,7 @@
 /*
    HIDP implementation for Linux Bluetooth stack (BlueZ).
    Copyright (C) 2003-2004 Marcel Holtmann <marcel@holtmann.org>
+   Copyright (C) 2013 David Herrmann <dh.herrmann@gmail.com>
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License version 2 as
@@ -20,6 +21,7 @@
    SOFTWARE IS DISCLAIMED.
 */
 
+#include <linux/kref.h>
 #include <linux/module.h>
 #include <linux/file.h>
 #include <linux/kthread.h>
@@ -59,39 +61,20 @@ static unsigned char hidp_keycode[256] = {
 
 static unsigned char hidp_mkeyspat[] = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 };
 
-static struct hidp_session *__hidp_get_session(bdaddr_t *bdaddr)
-{
-	struct hidp_session *session;
-
-	BT_DBG("");
+static int hidp_session_probe(struct l2cap_conn *conn,
+			      struct l2cap_user *user);
+static void hidp_session_remove(struct l2cap_conn *conn,
+				struct l2cap_user *user);
+static int hidp_session_thread(void *arg);
+static void hidp_session_terminate(struct hidp_session *s);
 
-	list_for_each_entry(session, &hidp_session_list, list) {
-		if (!bacmp(bdaddr, &session->bdaddr))
-			return session;
-	}
-
-	return NULL;
-}
-
-static void __hidp_link_session(struct hidp_session *session)
-{
-	list_add(&session->list, &hidp_session_list);
-}
-
-static void __hidp_unlink_session(struct hidp_session *session)
-{
-	hci_conn_put_device(session->conn);
-
-	list_del(&session->list);
-}
-
-static void __hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci)
+static void hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci)
 {
 	memset(ci, 0, sizeof(*ci));
 	bacpy(&ci->bdaddr, &session->bdaddr);
 
 	ci->flags = session->flags;
-	ci->state = session->state;
+	ci->state = BT_CONNECTED;
 
 	ci->vendor  = 0x0000;
 	ci->product = 0x0000;
@@ -115,58 +98,80 @@ static void __hidp_copy_session(struct hidp_session *session, struct hidp_connin
 	}
 }
 
-static int hidp_queue_event(struct hidp_session *session, struct input_dev *dev,
-				unsigned int type, unsigned int code, int value)
+/* assemble skb, queue message on @transmit and wake up the session thread */
+static int hidp_send_message(struct hidp_session *session, struct socket *sock,
+			     struct sk_buff_head *transmit, unsigned char hdr,
+			     const unsigned char *data, int size)
 {
-	unsigned char newleds;
 	struct sk_buff *skb;
+	struct sock *sk = sock->sk;
 
-	BT_DBG("session %p type %d code %d value %d", session, type, code, value);
-
-	if (type != EV_LED)
-		return -1;
-
-	newleds = (!!test_bit(LED_KANA,    dev->led) << 3) |
-		  (!!test_bit(LED_COMPOSE, dev->led) << 3) |
-		  (!!test_bit(LED_SCROLLL, dev->led) << 2) |
-		  (!!test_bit(LED_CAPSL,   dev->led) << 1) |
-		  (!!test_bit(LED_NUML,    dev->led));
-
-	if (session->leds == newleds)
-		return 0;
+	BT_DBG("session %p data %p size %d", session, data, size);
 
-	session->leds = newleds;
+	if (atomic_read(&session->terminate))
+		return -EIO;
 
-	skb = alloc_skb(3, GFP_ATOMIC);
+	skb = alloc_skb(size + 1, GFP_ATOMIC);
 	if (!skb) {
 		BT_ERR("Can't allocate memory for new frame");
 		return -ENOMEM;
 	}
 
-	*skb_put(skb, 1) = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
-	*skb_put(skb, 1) = 0x01;
-	*skb_put(skb, 1) = newleds;
-
-	skb_queue_tail(&session->intr_transmit, skb);
+	*skb_put(skb, 1) = hdr;
+	if (data && size > 0)
+		memcpy(skb_put(skb, size), data, size);
 
-	hidp_schedule(session);
+	skb_queue_tail(transmit, skb);
+	wake_up_interruptible(sk_sleep(sk));
 
 	return 0;
 }
 
-static int hidp_hidinput_event(struct input_dev *dev, unsigned int type, unsigned int code, int value)
+static int hidp_send_ctrl_message(struct hidp_session *session,
+				  unsigned char hdr, const unsigned char *data,
+				  int size)
 {
-	struct hid_device *hid = input_get_drvdata(dev);
-	struct hidp_session *session = hid->driver_data;
+	return hidp_send_message(session, session->ctrl_sock,
+				 &session->ctrl_transmit, hdr, data, size);
+}
 
-	return hidp_queue_event(session, dev, type, code, value);
+static int hidp_send_intr_message(struct hidp_session *session,
+				  unsigned char hdr, const unsigned char *data,
+				  int size)
+{
+	return hidp_send_message(session, session->intr_sock,
+				 &session->intr_transmit, hdr, data, size);
 }
 
-static int hidp_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value)
+static int hidp_input_event(struct input_dev *dev, unsigned int type,
+			    unsigned int code, int value)
 {
 	struct hidp_session *session = input_get_drvdata(dev);
+	unsigned char newleds;
+	unsigned char hdr, data[2];
+
+	BT_DBG("session %p type %d code %d value %d",
+	       session, type, code, value);
+
+	if (type != EV_LED)
+		return -1;
+
+	newleds = (!!test_bit(LED_KANA,    dev->led) << 3) |
+		  (!!test_bit(LED_COMPOSE, dev->led) << 3) |
+		  (!!test_bit(LED_SCROLLL, dev->led) << 2) |
+		  (!!test_bit(LED_CAPSL,   dev->led) << 1) |
+		  (!!test_bit(LED_NUML,    dev->led));
 
-	return hidp_queue_event(session, dev, type, code, value);
+	if (session->leds == newleds)
+		return 0;
+
+	session->leds = newleds;
+
+	hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
+	data[0] = 0x01;
+	data[1] = newleds;
+
+	return hidp_send_intr_message(session, hdr, data, 2);
 }
 
 static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
@@ -224,71 +229,9 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
 	input_sync(dev);
 }
 
-static int __hidp_send_ctrl_message(struct hidp_session *session,
-				    unsigned char hdr, unsigned char *data,
-				    int size)
-{
-	struct sk_buff *skb;
-
-	BT_DBG("session %p data %p size %d", session, data, size);
-
-	if (atomic_read(&session->terminate))
-		return -EIO;
-
-	skb = alloc_skb(size + 1, GFP_ATOMIC);
-	if (!skb) {
-		BT_ERR("Can't allocate memory for new frame");
-		return -ENOMEM;
-	}
-
-	*skb_put(skb, 1) = hdr;
-	if (data && size > 0)
-		memcpy(skb_put(skb, size), data, size);
-
-	skb_queue_tail(&session->ctrl_transmit, skb);
-
-	return 0;
-}
-
-static int hidp_send_ctrl_message(struct hidp_session *session,
-			unsigned char hdr, unsigned char *data, int size)
-{
-	int err;
-
-	err = __hidp_send_ctrl_message(session, hdr, data, size);
-
-	hidp_schedule(session);
-
-	return err;
-}
-
-static int hidp_queue_report(struct hidp_session *session,
-				unsigned char *data, int size)
-{
-	struct sk_buff *skb;
-
-	BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size);
-
-	skb = alloc_skb(size + 1, GFP_ATOMIC);
-	if (!skb) {
-		BT_ERR("Can't allocate memory for new frame");
-		return -ENOMEM;
-	}
-
-	*skb_put(skb, 1) = 0xa2;
-	if (size > 0)
-		memcpy(skb_put(skb, size), data, size);
-
-	skb_queue_tail(&session->intr_transmit, skb);
-
-	hidp_schedule(session);
-
-	return 0;
-}
-
 static int hidp_send_report(struct hidp_session *session, struct hid_report *report)
 {
-	unsigned char buf[32];
+	unsigned char buf[32], hdr;
 	int rsize;
 
 	rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0);
@@ -296,8 +239,9 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep
 		return -EIO;
 
 	hid_output_report(report, buf);
+	hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
 
-	return hidp_queue_report(session, buf, rsize);
+	return hidp_send_intr_message(session, hdr, buf, rsize);
 }
 
 static int hidp_get_raw_report(struct hid_device *hid,
@@ -311,6 +255,9 @@ static int hidp_get_raw_report(struct hid_device *hid,
 	int numbered_reports = hid->report_enum[report_type].numbered;
 	int ret;
 
+	if (atomic_read(&session->terminate))
+		return -EIO;
+
 	switch (report_type) {
 	case HID_FEATURE_REPORT:
 		report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_FEATURE;
@@ -333,17 +280,19 @@ static int hidp_get_raw_report(struct hid_device *hid,
 	session->waiting_report_number = numbered_reports ? report_number : -1;
 	set_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
 	data[0] = report_number;
-	ret = hidp_send_ctrl_message(hid->driver_data, report_type, data, 1);
+	ret = hidp_send_ctrl_message(session, report_type, data, 1);
 	if (ret)
 		goto err;
 
 	/* Wait for the return of the report. The returned report
 	   gets put in session->report_return.  */
-	while (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)) {
+	while (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags) &&
+	       !atomic_read(&session->terminate)) {
 		int res;
 
 		res = wait_event_interruptible_timeout(session->report_queue,
-			!test_bit(HIDP_WAITING_FOR_RETURN, &session->flags),
+			!test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)
+				|| atomic_read(&session->terminate),
 			5*HZ);
 		if (res == 0) {
 			/* timeout */
@@ -386,14 +335,11 @@ static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, s
 	struct hidp_session *session = hid->driver_data;
 	int ret;
 
-	switch (report_type) {
-	case HID_FEATURE_REPORT:
-		report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE;
-		break;
-	case HID_OUTPUT_REPORT:
-		report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_OUPUT;
-		break;
-	default:
+	if (report_type == HID_OUTPUT_REPORT) {
+		report_type = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
+		return hidp_send_intr_message(session, report_type,
+					      data, count);
+	} else if (report_type != HID_FEATURE_REPORT) {
 		return -EINVAL;
 	}
 
@@ -402,17 +348,19 @@ static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, s
 
 	/* Set up our wait, and send the report request to the device. */
 	set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
-	ret = hidp_send_ctrl_message(hid->driver_data, report_type, data,
-									count);
+	report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE;
+	ret = hidp_send_ctrl_message(session, report_type, data, count);
 	if (ret)
 		goto err;
 
 	/* Wait for the ACK from the device. */
-	while (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) {
+	while (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags) &&
+	       !atomic_read(&session->terminate)) {
 		int res;
 
 		res = wait_event_interruptible_timeout(session->report_queue,
-			!test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags),
+			!test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)
+				|| atomic_read(&session->terminate),
 			10*HZ);
 		if (res == 0) {
 			/* timeout */
@@ -443,8 +391,7 @@ static void hidp_idle_timeout(unsigned long arg)
 {
 	struct hidp_session *session = (struct hidp_session *) arg;
 
-	atomic_inc(&session->terminate);
-	wake_up_process(session->task);
+	hidp_session_terminate(session);
 }
 
 static void hidp_set_timer(struct hidp_session *session)
@@ -487,12 +434,12 @@ static void hidp_process_handshake(struct hidp_session *session,
 	case HIDP_HSHK_ERR_FATAL:
 		/* Device requests a reboot, as this is the only way this error
 		 * can be recovered. */
-		__hidp_send_ctrl_message(session,
+		hidp_send_ctrl_message(session,
 			HIDP_TRANS_HID_CONTROL | HIDP_CTRL_SOFT_RESET, NULL, 0);
 		break;
 
 	default:
-		__hidp_send_ctrl_message(session,
+		hidp_send_ctrl_message(session,
 			HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
 		break;
 	}
@@ -512,8 +459,7 @@ static void hidp_process_hid_control(struct hidp_session *session,
 		skb_queue_purge(&session->ctrl_transmit);
 		skb_queue_purge(&session->intr_transmit);
 
-		atomic_inc(&session->terminate);
-		wake_up_process(current);
+		hidp_session_terminate(session);
 	}
 }
 
@@ -541,7 +487,7 @@ static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
 		break;
 
 	default:
-		__hidp_send_ctrl_message(session,
+		hidp_send_ctrl_message(session,
 			HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
 	}
 
@@ -588,7 +534,7 @@ static void hidp_recv_ctrl_frame(struct hidp_session *session,
 		break;
 
 	default:
-		__hidp_send_ctrl_message(session,
+		hidp_send_ctrl_message(session,
 			HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_UNSUPPORTED_REQUEST, NULL, 0);
 		break;
 	}
@@ -639,32 +585,24 @@ static int hidp_send_frame(struct socket *sock, unsigned char *data, int len)
 	return kernel_sendmsg(sock, &msg, &iv, 1, len);
 }
 
-static void hidp_process_intr_transmit(struct hidp_session *session)
+/* dequeue message from @transmit and send via @sock */
+static void hidp_process_transmit(struct hidp_session *session,
+				  struct sk_buff_head *transmit,
+				  struct socket *sock)
 {
 	struct sk_buff *skb;
+	int ret;
 
 	BT_DBG("session %p", session);
 
-	while ((skb = skb_dequeue(&session->intr_transmit))) {
-		if (hidp_send_frame(session->intr_sock, skb->data, skb->len) < 0) {
-			skb_queue_head(&session->intr_transmit, skb);
+	while ((skb = skb_dequeue(transmit))) {
+		ret = hidp_send_frame(sock, skb->data, skb->len);
+		if (ret == -EAGAIN) {
+			skb_queue_head(transmit, skb);
 			break;
-		}
-
-		hidp_set_timer(session);
-		kfree_skb(skb);
-	}
-}
-
-static void hidp_process_ctrl_transmit(struct hidp_session *session)
-{
-	struct sk_buff *skb;
-
-	BT_DBG("session %p", session);
-
-	while ((skb = skb_dequeue(&session->ctrl_transmit))) {
-		if (hidp_send_frame(session->ctrl_sock, skb->data, skb->len) < 0) {
-			skb_queue_head(&session->ctrl_transmit, skb);
+		} else if (ret < 0) {
+			hidp_session_terminate(session);
+			kfree_skb(skb);
 			break;
 		}
 
@@ -673,121 +611,6 @@ static void hidp_process_ctrl_transmit(struct hidp_session *session)
 	}
 }
 
-static int hidp_session(void *arg)
-{
-	struct hidp_session *session = arg;
-	struct sock *ctrl_sk = session->ctrl_sock->sk;
-	struct sock *intr_sk = session->intr_sock->sk;
-	struct sk_buff *skb;
-	wait_queue_t ctrl_wait, intr_wait;
-
-	BT_DBG("session %p", session);
-
-	__module_get(THIS_MODULE);
-	set_user_nice(current, -15);
-
-	init_waitqueue_entry(&ctrl_wait, current);
-	init_waitqueue_entry(&intr_wait, current);
-	add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
-	add_wait_queue(sk_sleep(intr_sk), &intr_wait);
-	session->waiting_for_startup = 0;
-	wake_up_interruptible(&session->startup_queue);
-	set_current_state(TASK_INTERRUPTIBLE);
-	while (!atomic_read(&session->terminate)) {
-		if (ctrl_sk->sk_state != BT_CONNECTED ||
-				intr_sk->sk_state != BT_CONNECTED)
-			break;
-
-		while ((skb = skb_dequeue(&intr_sk->sk_receive_queue))) {
-			skb_orphan(skb);
-			if (!skb_linearize(skb))
-				hidp_recv_intr_frame(session, skb);
-			else
-				kfree_skb(skb);
-		}
-
-		hidp_process_intr_transmit(session);
-
-		while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) {
-			skb_orphan(skb);
-			if (!skb_linearize(skb))
-				hidp_recv_ctrl_frame(session, skb);
-			else
-				kfree_skb(skb);
-		}
-
-		hidp_process_ctrl_transmit(session);
-
-		schedule();
-		set_current_state(TASK_INTERRUPTIBLE);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk_sleep(intr_sk), &intr_wait);
-	remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
-
-	clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
-	clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
-	wake_up_interruptible(&session->report_queue);
-
-	down_write(&hidp_session_sem);
-
-	hidp_del_timer(session);
-
-	if (session->input) {
-		input_unregister_device(session->input);
-		session->input = NULL;
-	}
-
-	if (session->hid) {
-		hid_destroy_device(session->hid);
-		session->hid = NULL;
-	}
-
-	/* Wakeup user-space polling for socket errors */
-	session->intr_sock->sk->sk_err = EUNATCH;
-	session->ctrl_sock->sk->sk_err = EUNATCH;
-
-	hidp_schedule(session);
-
-	fput(session->intr_sock->file);
-
-	wait_event_timeout(*(sk_sleep(ctrl_sk)),
-		(ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500));
-
-	fput(session->ctrl_sock->file);
-
-	__hidp_unlink_session(session);
-
-	up_write(&hidp_session_sem);
-
-	kfree(session->rd_data);
-	kfree(session);
-	module_put_and_exit(0);
-	return 0;
-}
-
-static struct hci_conn *hidp_get_connection(struct hidp_session *session)
-{
-	bdaddr_t *src = &bt_sk(session->ctrl_sock->sk)->src;
-	bdaddr_t *dst = &bt_sk(session->ctrl_sock->sk)->dst;
-	struct hci_conn *conn;
-	struct hci_dev *hdev;
-
-	hdev = hci_get_route(dst, src);
-	if (!hdev)
-		return NULL;
-
-	hci_dev_lock(hdev);
-	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
-	if (conn)
-		hci_conn_hold_device(conn);
-	hci_dev_unlock(hdev);
-
-	hci_dev_put(hdev);
-
-	return conn;
-}
-
 static int hidp_setup_input(struct hidp_session *session,
 				struct hidp_connadd_req *req)
 {
@@ -835,7 +658,7 @@ static int hidp_setup_input(struct hidp_session *session,
 		input->relbit[0] |= BIT_MASK(REL_WHEEL);
 	}
 
-	input->dev.parent = &session->conn->dev;
+	input->dev.parent = &session->conn->hcon->dev;
 
 	input->event = hidp_input_event;
 
@@ -894,7 +717,6 @@ static struct hid_ll_driver hidp_hid_driver = {
 	.stop = hidp_stop,
 	.open  = hidp_open,
 	.close = hidp_close,
-	.hidinput_input_event = hidp_hidinput_event,
 };
 
 /* This function sets up the hid device. It does not add it
@@ -939,7 +761,7 @@ static int hidp_setup_hid(struct hidp_session *session,
 	snprintf(hid->uniq, sizeof(hid->uniq), "%pMR",
 		 &bt_sk(session->ctrl_sock->sk)->dst);
 
-	hid->dev.parent = &session->conn->dev;
+	hid->dev.parent = &session->conn->hcon->dev;
 	hid->ll_driver = &hidp_hid_driver;
 
 	hid->hid_get_raw_report = hidp_get_raw_report;
@@ -961,80 +783,217 @@ fault:
 	return err;
 }
 
-int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock)
+/* initialize session devices */
+static int hidp_session_dev_init(struct hidp_session *session,
+				 struct hidp_connadd_req *req)
 {
-	struct hidp_session *session, *s;
-	int vendor, product;
-	int err;
+	int ret;
 
-	BT_DBG("");
+	if (req->rd_size > 0) {
+		ret = hidp_setup_hid(session, req);
+		if (ret && ret != -ENODEV)
+			return ret;
+	}
 
-	if (bacmp(&bt_sk(ctrl_sock->sk)->src, &bt_sk(intr_sock->sk)->src) ||
-			bacmp(&bt_sk(ctrl_sock->sk)->dst, &bt_sk(intr_sock->sk)->dst))
-		return -ENOTUNIQ;
+	if (!session->hid) {
+		ret = hidp_setup_input(session, req);
+		if (ret < 0)
+			return ret;
+	}
 
-	BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size);
+	return 0;
+}
 
-	down_write(&hidp_session_sem);
+/* destroy session devices */
+static void hidp_session_dev_destroy(struct hidp_session *session)
+{
+	if (session->hid)
+		put_device(&session->hid->dev);
+	else if (session->input)
+		input_put_device(session->input);
 
-	s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst);
-	if (s && s->state == BT_CONNECTED) {
-		up_write(&hidp_session_sem);
-		return -EEXIST;
-	}
+	kfree(session->rd_data);
+	session->rd_data = NULL;
+}
 
-	session = kzalloc(sizeof(struct hidp_session), GFP_KERNEL);
-	if (!session) {
-		up_write(&hidp_session_sem);
-		return -ENOMEM;
-	}
+/* add HID/input devices to their underlying bus systems */
+static int hidp_session_dev_add(struct hidp_session *session)
+{
+	int ret;
 
-	bacpy(&session->bdaddr, &bt_sk(ctrl_sock->sk)->dst);
+	/* Both HID and input systems drop a ref-count when unregistering the
+	 * device but they don't take a ref-count when registering them. Work
+	 * around this by explicitly taking a refcount during registration
+	 * which is dropped automatically by unregistering the devices. */
 
-	session->ctrl_mtu = min_t(uint, l2cap_pi(ctrl_sock->sk)->chan->omtu,
-					l2cap_pi(ctrl_sock->sk)->chan->imtu);
-	session->intr_mtu = min_t(uint, l2cap_pi(intr_sock->sk)->chan->omtu,
-					l2cap_pi(intr_sock->sk)->chan->imtu);
+	if (session->hid) {
+		ret = hid_add_device(session->hid);
+		if (ret)
+			return ret;
+		get_device(&session->hid->dev);
+	} else if (session->input) {
+		ret = input_register_device(session->input);
+		if (ret)
+			return ret;
+		input_get_device(session->input);
+	}
 
-	BT_DBG("ctrl mtu %d intr mtu %d", session->ctrl_mtu, session->intr_mtu);
+	return 0;
+}
 
-	session->ctrl_sock = ctrl_sock;
-	session->intr_sock = intr_sock;
-	session->state     = BT_CONNECTED;
+/* remove HID/input devices from their bus systems */
+static void hidp_session_dev_del(struct hidp_session *session)
+{
+	if (session->hid)
+		hid_destroy_device(session->hid);
+	else if (session->input)
+		input_unregister_device(session->input);
+}
 
-	session->conn = hidp_get_connection(session);
-	if (!session->conn) {
-		err = -ENOTCONN;
-		goto failed;
-	}
+/*
+ * Create new session object
+ * Allocate session object, initialize static fields, copy input data into the
+ * object and take a reference to all sub-objects.
+ * This returns 0 on success and puts a pointer to the new session object in
+ * \out. Otherwise, an error code is returned.
+ * The new session object has an initial ref-count of 1.
+ */
+static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
+			    struct socket *ctrl_sock,
+			    struct socket *intr_sock,
+			    struct hidp_connadd_req *req,
+			    struct l2cap_conn *conn)
+{
+	struct hidp_session *session;
+	int ret;
+	struct bt_sock *ctrl, *intr;
+
+	ctrl = bt_sk(ctrl_sock->sk);
+	intr = bt_sk(intr_sock->sk);
 
-	setup_timer(&session->timer, hidp_idle_timeout, (unsigned long)session);
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (!session)
+		return -ENOMEM;
 
+	/* object and runtime management */
+	kref_init(&session->ref);
+	atomic_set(&session->state, HIDP_SESSION_IDLING);
+	init_waitqueue_head(&session->state_queue);
+	session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
+
+	/* connection management */
+	bacpy(&session->bdaddr, bdaddr);
+	session->conn = conn;
+	session->user.probe = hidp_session_probe;
+	session->user.remove = hidp_session_remove;
+	session->ctrl_sock = ctrl_sock;
+	session->intr_sock = intr_sock;
 	skb_queue_head_init(&session->ctrl_transmit);
 	skb_queue_head_init(&session->intr_transmit);
+	session->ctrl_mtu = min_t(uint, l2cap_pi(ctrl)->chan->omtu,
+					l2cap_pi(ctrl)->chan->imtu);
+	session->intr_mtu = min_t(uint, l2cap_pi(intr)->chan->omtu,
+					l2cap_pi(intr)->chan->imtu);
+	session->idle_to = req->idle_to;
 
+	/* device management */
+	setup_timer(&session->timer, hidp_idle_timeout,
+		    (unsigned long)session);
+
+	/* session data */
 	mutex_init(&session->report_mutex);
 	init_waitqueue_head(&session->report_queue);
-	init_waitqueue_head(&session->startup_queue);
-	session->waiting_for_startup = 1;
-	session->flags   = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
-	session->idle_to = req->idle_to;
 
-	__hidp_link_session(session);
+	ret = hidp_session_dev_init(session, req);
+	if (ret)
+		goto err_free;
 
-	if (req->rd_size > 0) {
-		err = hidp_setup_hid(session, req);
-		if (err && err != -ENODEV)
-			goto purge;
-	}
+	l2cap_conn_get(session->conn);
+	get_file(session->intr_sock->file);
+	get_file(session->ctrl_sock->file);
+	*out = session;
+	return 0;
 
-	if (!session->hid) {
-		err = hidp_setup_input(session, req);
-		if (err < 0)
-			goto purge;
+err_free:
+	kfree(session);
+	return ret;
+}
+
+/* increase ref-count of the given session by one */
+static void hidp_session_get(struct hidp_session *session)
+{
+	kref_get(&session->ref);
+}
+
+/* release callback */
+static void session_free(struct kref *ref)
+{
+	struct hidp_session *session = container_of(ref, struct hidp_session,
+						    ref);
+
+	hidp_session_dev_destroy(session);
+	skb_queue_purge(&session->ctrl_transmit);
+	skb_queue_purge(&session->intr_transmit);
+	fput(session->intr_sock->file);
+	fput(session->ctrl_sock->file);
+	l2cap_conn_put(session->conn);
+	kfree(session);
+}
+
+/* decrease ref-count of the given session by one */
+static void hidp_session_put(struct hidp_session *session)
+{
+	kref_put(&session->ref, session_free);
+}
+
+/*
+ * Search the list of active sessions for a session with target address
+ * \bdaddr. You must hold at least a read-lock on \hidp_session_sem. As long as
+ * you do not release this lock, the session objects cannot vanish and you can
+ * safely take a reference to the session yourself.
+ */
+static struct hidp_session *__hidp_session_find(const bdaddr_t *bdaddr)
+{
+	struct hidp_session *session;
+
+	list_for_each_entry(session, &hidp_session_list, list) {
+		if (!bacmp(bdaddr, &session->bdaddr))
+			return session;
 	}
 
-	hidp_set_timer(session);
+	return NULL;
+}
+
+/*
+ * Same as __hidp_session_find() but no locks must be held. This also takes a
+ * reference of the returned session (if non-NULL) so you must drop this
+ * reference if you no longer use the object.
+ */
+static struct hidp_session *hidp_session_find(const bdaddr_t *bdaddr)
+{
+	struct hidp_session *session;
+
+	down_read(&hidp_session_sem);
+
+	session = __hidp_session_find(bdaddr);
+	if (session)
+		hidp_session_get(session);
+
+	up_read(&hidp_session_sem);
+
+	return session;
+}
+
+/*
+ * Start session synchronously
+ * This starts a session thread and waits until initialization
+ * is done or returns an error if it couldn't be started.
+ * If this returns 0 the session thread is up and running. You must call
+ * hipd_session_stop_sync() before deleting any runtime resources.
+ */
+static int hidp_session_start_sync(struct hidp_session *session)
+{
+	unsigned int vendor, product;
 
 	if (session->hid) {
 		vendor  = session->hid->vendor;
@@ -1047,98 +1006,320 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
 		product = 0x0000;
 	}
 
-	session->task = kthread_run(hidp_session, session, "khidpd_%04x%04x",
-							vendor, product);
-	if (IS_ERR(session->task)) {
-		err = PTR_ERR(session->task);
-		goto unlink;
-	}
+	session->task = kthread_run(hidp_session_thread, session,
+				    "khidpd_%04x%04x", vendor, product);
+	if (IS_ERR(session->task))
+		return PTR_ERR(session->task);
 
-	while (session->waiting_for_startup) {
-		wait_event_interruptible(session->startup_queue,
-			!session->waiting_for_startup);
-	}
+	while (atomic_read(&session->state) <= HIDP_SESSION_IDLING)
+		wait_event(session->state_queue,
+			   atomic_read(&session->state) > HIDP_SESSION_IDLING);
 
-	if (session->hid)
-		err = hid_add_device(session->hid);
-	else
-		err = input_register_device(session->input);
+	return 0;
+}
 
-	if (err < 0) {
-		atomic_inc(&session->terminate);
-		wake_up_process(session->task);
-		up_write(&hidp_session_sem);
-		return err;
-	}
+/*
+ * Terminate session thread
+ * Wake up session thread and notify it to stop. This is asynchronous and
+ * returns immediately. Call this whenever a runtime error occurs and you want
+ * the session to stop.
+ * Note: wake_up_process() performs any necessary memory-barriers for us.
+ */
+static void hidp_session_terminate(struct hidp_session *session)
+{
+	atomic_inc(&session->terminate);
+	wake_up_process(session->task);
+}
 
-	if (session->input) {
-		hidp_send_ctrl_message(session,
-			HIDP_TRANS_SET_PROTOCOL | HIDP_PROTO_BOOT, NULL, 0);
-		session->flags |= (1 << HIDP_BOOT_PROTOCOL_MODE);
+/*
+ * Probe HIDP session
+ * This is called from the l2cap_conn core when our l2cap_user object is bound
+ * to the hci-connection. We get the session via the \user object and can now
+ * start the session thread, register the HID/input devices and link it into
+ * the global session list.
+ * The global session-list owns its own reference to the session object so you
+ * can drop your own reference after registering the l2cap_user object.
+ */
+static int hidp_session_probe(struct l2cap_conn *conn,
+			      struct l2cap_user *user)
+{
+	struct hidp_session *session = container_of(user,
+						    struct hidp_session,
+						    user);
+	struct hidp_session *s;
+	int ret;
 
-		session->leds = 0xff;
-		hidp_input_event(session->input, EV_LED, 0, 0);
+	down_write(&hidp_session_sem);
+
+	/* check that no other session for this device exists */
+	s = __hidp_session_find(&session->bdaddr);
+	if (s) {
+		ret = -EEXIST;
+		goto out_unlock;
 	}
 
+	ret = hidp_session_start_sync(session);
+	if (ret)
+		goto out_unlock;
+
+	ret = hidp_session_dev_add(session);
+	if (ret)
+		goto out_stop;
+
+	hidp_session_get(session);
+	list_add(&session->list, &hidp_session_list);
+	ret = 0;
+	goto out_unlock;
+
+out_stop:
+	hidp_session_terminate(session);
+out_unlock:
 	up_write(&hidp_session_sem);
-	return 0;
+	return ret;
+}
 
-unlink:
+/*
+ * Remove HIDP session
+ * Called from the l2cap_conn core when either we explicitly unregistered
+ * the l2cap_user object or if the underlying connection is shut down.
+ * We signal the hidp-session thread to shut down, unregister the HID/input
+ * devices and unlink the session from the global list.
+ * This drops the reference to the session that is owned by the global
+ * session-list.
+ * Note: We _must_ not synchronosly wait for the session-thread to shut down.
+ * This is, because the session-thread might be waiting for an HCI lock that is
+ * held while we are called. Therefore, we only unregister the devices and
+ * notify the session-thread to terminate. The thread itself owns a reference
+ * to the session object so it can safely shut down.
+ */
+static void hidp_session_remove(struct l2cap_conn *conn,
+				struct l2cap_user *user)
+{
+	struct hidp_session *session = container_of(user,
+						    struct hidp_session,
+						    user);
+
+	down_write(&hidp_session_sem);
+
+	hidp_session_terminate(session);
+	hidp_session_dev_del(session);
+	list_del(&session->list);
+
+	up_write(&hidp_session_sem);
+
+	hidp_session_put(session);
+}
+
+/*
+ * Session Worker
+ * This performs the actual main-loop of the HIDP worker. We first check
+ * whether the underlying connection is still alive, then parse all pending
+ * messages and finally send all outstanding messages.
+ */
+static void hidp_session_run(struct hidp_session *session)
+{
+	struct sock *ctrl_sk = session->ctrl_sock->sk;
+	struct sock *intr_sk = session->intr_sock->sk;
+	struct sk_buff *skb;
+
+	for (;;) {
+		/*
+		 * This thread can be woken up two ways:
+		 *  - You call hidp_session_terminate() which sets the
+		 *    session->terminate flag and wakes this thread up.
+		 *  - Via modifying the socket state of ctrl/intr_sock. This
+		 *    thread is woken up by ->sk_state_changed().
+		 *
+		 * Note: set_current_state() performs any necessary
+		 * memory-barriers for us.
+		 */
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (atomic_read(&session->terminate))
+			break;
+
+		if (ctrl_sk->sk_state != BT_CONNECTED ||
+		    intr_sk->sk_state != BT_CONNECTED)
+			break;
+
+		/* parse incoming intr-skbs */
+		while ((skb = skb_dequeue(&intr_sk->sk_receive_queue))) {
+			skb_orphan(skb);
+			if (!skb_linearize(skb))
+				hidp_recv_intr_frame(session, skb);
+			else
+				kfree_skb(skb);
+		}
+
+		/* send pending intr-skbs */
+		hidp_process_transmit(session, &session->intr_transmit,
+				      session->intr_sock);
+
+		/* parse incoming ctrl-skbs */
+		while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) {
+			skb_orphan(skb);
+			if (!skb_linearize(skb))
+				hidp_recv_ctrl_frame(session, skb);
+			else
+				kfree_skb(skb);
+		}
+
+		/* send pending ctrl-skbs */
+		hidp_process_transmit(session, &session->ctrl_transmit,
+				      session->ctrl_sock);
+
+		schedule();
+	}
+
+	atomic_inc(&session->terminate);
+	set_current_state(TASK_RUNNING);
+}
+
+/*
+ * HIDP session thread
+ * This thread runs the I/O for a single HIDP session. Startup is synchronous
+ * which allows us to take references to ourself here instead of doing that in
+ * the caller.
+ * When we are ready to run we notify the caller and call hidp_session_run().
+ */
+static int hidp_session_thread(void *arg)
+{
+	struct hidp_session *session = arg;
+	wait_queue_t ctrl_wait, intr_wait;
+
+	BT_DBG("session %p", session);
+
+	/* initialize runtime environment */
+	hidp_session_get(session);
+	__module_get(THIS_MODULE);
+	set_user_nice(current, -15);
+	hidp_set_timer(session);
+
+	init_waitqueue_entry(&ctrl_wait, current);
+	init_waitqueue_entry(&intr_wait, current);
+	add_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait);
+	add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait);
+	/* This memory barrier is paired with wq_has_sleeper(). See
+	 * sock_poll_wait() for more information why this is needed. */
+	smp_mb();
+
+	/* notify synchronous startup that we're ready */
+	atomic_inc(&session->state);
+	wake_up(&session->state_queue);
+
+	/* run session */
+	hidp_session_run(session);
+
+	/* cleanup runtime environment */
+	remove_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait);
+	remove_wait_queue(sk_sleep(session->intr_sock->sk), &ctrl_wait);
+	wake_up_interruptible(&session->report_queue);
 	hidp_del_timer(session);
 
-	if (session->input) {
-		input_unregister_device(session->input);
-		session->input = NULL;
+	/*
+	 * If we stopped ourself due to any internal signal, we should try to
+	 * unregister our own session here to avoid having it linger until the
+	 * parent l2cap_conn dies or user-space cleans it up.
+	 * This does not deadlock as we don't do any synchronous shutdown.
+	 * Instead, this call has the same semantics as if user-space tried to
+	 * delete the session.
+	 */
+	l2cap_unregister_user(session->conn, &session->user);
+	hidp_session_put(session);
+
+	module_put_and_exit(0);
+	return 0;
+}
+
+static int hidp_verify_sockets(struct socket *ctrl_sock,
+			       struct socket *intr_sock)
+{
+	struct bt_sock *ctrl, *intr;
+	struct hidp_session *session;
+
+	if (!l2cap_is_socket(ctrl_sock) || !l2cap_is_socket(intr_sock))
+		return -EINVAL;
+
+	ctrl = bt_sk(ctrl_sock->sk);
+	intr = bt_sk(intr_sock->sk);
+
+	if (bacmp(&ctrl->src, &intr->src) || bacmp(&ctrl->dst, &intr->dst))
+		return -ENOTUNIQ;
+	if (ctrl->sk.sk_state != BT_CONNECTED ||
+	    intr->sk.sk_state != BT_CONNECTED)
+		return -EBADFD;
+
+	/* early session check, we check again during session registration */
+	session = hidp_session_find(&ctrl->dst);
+	if (session) {
+		hidp_session_put(session);
+		return -EEXIST;
 	}
 
-	if (session->hid) {
-		hid_destroy_device(session->hid);
-		session->hid = NULL;
+	return 0;
+}
+
+int hidp_connection_add(struct hidp_connadd_req *req,
+			struct socket *ctrl_sock,
+			struct socket *intr_sock)
+{
+	struct hidp_session *session;
+	struct l2cap_conn *conn;
+	struct l2cap_chan *chan = l2cap_pi(ctrl_sock->sk)->chan;
+	int ret;
+
+	ret = hidp_verify_sockets(ctrl_sock, intr_sock);
+	if (ret)
+		return ret;
+
+	conn = NULL;
+	l2cap_chan_lock(chan);
+	if (chan->conn) {
+		l2cap_conn_get(chan->conn);
+		conn = chan->conn;
 	}
+	l2cap_chan_unlock(chan);
 
-	kfree(session->rd_data);
-	session->rd_data = NULL;
+	if (!conn)
+		return -EBADFD;
 
-purge:
-	__hidp_unlink_session(session);
+	ret = hidp_session_new(&session, &bt_sk(ctrl_sock->sk)->dst, ctrl_sock,
+			       intr_sock, req, conn);
+	if (ret)
+		goto out_conn;
 
-	skb_queue_purge(&session->ctrl_transmit);
-	skb_queue_purge(&session->intr_transmit);
+	ret = l2cap_register_user(conn, &session->user);
+	if (ret)
+		goto out_session;
 
-failed:
-	up_write(&hidp_session_sem);
+	ret = 0;
 
-	kfree(session);
-	return err;
+out_session:
+	hidp_session_put(session);
+out_conn:
+	l2cap_conn_put(conn);
+	return ret;
 }
 
-int hidp_del_connection(struct hidp_conndel_req *req)
+int hidp_connection_del(struct hidp_conndel_req *req)
 {
 	struct hidp_session *session;
-	int err = 0;
 
-	BT_DBG("");
+	session = hidp_session_find(&req->bdaddr);
+	if (!session)
+		return -ENOENT;
 
-	down_read(&hidp_session_sem);
+	if (req->flags & (1 << HIDP_VIRTUAL_CABLE_UNPLUG))
+		hidp_send_ctrl_message(session,
+				       HIDP_TRANS_HID_CONTROL |
+				         HIDP_CTRL_VIRTUAL_CABLE_UNPLUG,
+				       NULL, 0);
+	else
+		l2cap_unregister_user(session->conn, &session->user);
 
-	session = __hidp_get_session(&req->bdaddr);
-	if (session) {
-		if (req->flags & (1 << HIDP_VIRTUAL_CABLE_UNPLUG)) {
-			hidp_send_ctrl_message(session,
-				HIDP_TRANS_HID_CONTROL | HIDP_CTRL_VIRTUAL_CABLE_UNPLUG, NULL, 0);
-		} else {
-			/* Flush the transmit queues */
-			skb_queue_purge(&session->ctrl_transmit);
-			skb_queue_purge(&session->intr_transmit);
-
-			atomic_inc(&session->terminate);
-			wake_up_process(session->task);
-		}
-	} else
-		err = -ENOENT;
+	hidp_session_put(session);
 
-	up_read(&hidp_session_sem);
-	return err;
+	return 0;
 }
 
 int hidp_get_connlist(struct hidp_connlist_req *req)
@@ -1153,7 +1334,7 @@ int hidp_get_connlist(struct hidp_connlist_req *req)
 	list_for_each_entry(session, &hidp_session_list, list) {
 		struct hidp_conninfo ci;
 
-		__hidp_copy_session(session, &ci);
+		hidp_copy_session(session, &ci);
 
 		if (copy_to_user(req->ci, &ci, sizeof(ci))) {
 			err = -EFAULT;
@@ -1174,18 +1355,14 @@ int hidp_get_connlist(struct hidp_connlist_req *req)
 int hidp_get_conninfo(struct hidp_conninfo *ci)
 {
 	struct hidp_session *session;
-	int err = 0;
-
-	down_read(&hidp_session_sem);
 
-	session = __hidp_get_session(&ci->bdaddr);
-	if (session)
-		__hidp_copy_session(session, ci);
-	else
-		err = -ENOENT;
+	session = hidp_session_find(&ci->bdaddr);
+	if (session) {
+		hidp_copy_session(session, ci);
+		hidp_session_put(session);
+	}
 
-	up_read(&hidp_session_sem);
-	return err;
+	return session ? 0 : -ENOENT;
 }
 
 static int __init hidp_init(void)
@@ -1204,6 +1381,7 @@ module_init(hidp_init);
 module_exit(hidp_exit);
 
 MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
+MODULE_AUTHOR("David Herrmann <dh.herrmann@gmail.com>");
 MODULE_DESCRIPTION("Bluetooth HIDP ver " VERSION);
 MODULE_VERSION(VERSION);
 MODULE_LICENSE("GPL");
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index af1bcc823f26..6162ce8606ac 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -24,7 +24,9 @@
 #define __HIDP_H
 
 #include <linux/types.h>
+#include <linux/kref.h>
 #include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/l2cap.h>
 
 /* HIDP header masks */
 #define HIDP_HEADER_TRANS_MASK			0xf0
@@ -119,43 +121,52 @@ struct hidp_connlist_req {
 	struct hidp_conninfo __user *ci;
 };
 
-int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock);
-int hidp_del_connection(struct hidp_conndel_req *req);
+int hidp_connection_add(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock);
+int hidp_connection_del(struct hidp_conndel_req *req);
 int hidp_get_connlist(struct hidp_connlist_req *req);
 int hidp_get_conninfo(struct hidp_conninfo *ci);
 
+enum hidp_session_state {
+	HIDP_SESSION_IDLING,
+	HIDP_SESSION_RUNNING,
+};
+
 /* HIDP session defines */
 struct hidp_session {
 	struct list_head list;
+	struct kref ref;
 
-	struct hci_conn *conn;
+	/* runtime management */
+	atomic_t state;
+	wait_queue_head_t state_queue;
+	atomic_t terminate;
+	struct task_struct *task;
+	unsigned long flags;
 
+	/* connection management */
+	bdaddr_t bdaddr;
+	struct l2cap_conn *conn;
+	struct l2cap_user user;
 	struct socket *ctrl_sock;
 	struct socket *intr_sock;
-
-	bdaddr_t bdaddr;
-
-	unsigned long state;
-	unsigned long flags;
-	unsigned long idle_to;
-
+	struct sk_buff_head ctrl_transmit;
+	struct sk_buff_head intr_transmit;
 	uint ctrl_mtu;
 	uint intr_mtu;
+	unsigned long idle_to;
 
-	atomic_t terminate;
-	struct task_struct *task;
-
-	unsigned char keys[8];
-	unsigned char leds;
-
+	/* device management */
 	struct input_dev *input;
-
 	struct hid_device *hid;
-
 	struct timer_list timer;
 
-	struct sk_buff_head ctrl_transmit;
-	struct sk_buff_head intr_transmit;
+	/* Report descriptor */
+	__u8 *rd_data;
+	uint rd_size;
+
+	/* session data */
+	unsigned char keys[8];
+	unsigned char leds;
 
 	/* Used in hidp_get_raw_report() */
 	int waiting_report_type; /* HIDP_DATA_RTYPE_* */
@@ -166,24 +177,8 @@ struct hidp_session {
 
 	/* Used in hidp_output_raw_report() */
 	int output_report_success; /* boolean */
-
-	/* Report descriptor */
-	__u8 *rd_data;
-	uint rd_size;
-
-	wait_queue_head_t startup_queue;
-	int waiting_for_startup;
 };
 
-static inline void hidp_schedule(struct hidp_session *session)
-{
-	struct sock *ctrl_sk = session->ctrl_sock->sk;
-	struct sock *intr_sk = session->intr_sock->sk;
-
-	wake_up_interruptible(sk_sleep(ctrl_sk));
-	wake_up_interruptible(sk_sleep(intr_sk));
-}
-
 /* HIDP init defines */
 extern int __init hidp_init_sockets(void);
 extern void __exit hidp_cleanup_sockets(void);
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 82a829d90b0f..cb3fdde1968a 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -77,21 +77,12 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 			return err;
 		}
 
-		if (csock->sk->sk_state != BT_CONNECTED ||
-				isock->sk->sk_state != BT_CONNECTED) {
-			sockfd_put(csock);
-			sockfd_put(isock);
-			return -EBADFD;
-		}
+		err = hidp_connection_add(&ca, csock, isock);
+		if (!err && copy_to_user(argp, &ca, sizeof(ca)))
+			err = -EFAULT;
 
-		err = hidp_add_connection(&ca, csock, isock);
-		if (!err) {
-			if (copy_to_user(argp, &ca, sizeof(ca)))
-				err = -EFAULT;
-		} else {
-			sockfd_put(csock);
-			sockfd_put(isock);
-		}
+		sockfd_put(csock);
+		sockfd_put(isock);
 
 		return err;
 
@@ -102,7 +93,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 		if (copy_from_user(&cd, argp, sizeof(cd)))
 			return -EFAULT;
 
-		return hidp_del_connection(&cd);
+		return hidp_connection_del(&cd);
 
 	case HIDPGETCONNLIST:
 		if (copy_from_user(&cl, argp, sizeof(cl)))
@@ -284,7 +275,7 @@ int __init hidp_init_sockets(void)
 		goto error;
 	}
 
-	err = bt_procfs_init(THIS_MODULE, &init_net, "hidp", &hidp_sk_list, NULL);
+	err = bt_procfs_init(&init_net, "hidp", &hidp_sk_list, NULL);
 	if (err < 0) {
 		BT_ERR("Failed to create HIDP proc file");
 		bt_sock_unregister(BTPROTO_HIDP);
@@ -296,7 +287,6 @@ int __init hidp_init_sockets(void)
 	return 0;
 
 error:
-	BT_ERR("Can't register HIDP socket");
 	proto_unregister(&hidp_proto);
 	return err;
 }
@@ -304,8 +294,6 @@ error:
 void __exit hidp_cleanup_sockets(void)
 {
 	bt_procfs_cleanup(&init_net, "hidp");
-	if (bt_sock_unregister(BTPROTO_HIDP) < 0)
-		BT_ERR("Can't unregister HIDP socket");
-
+	bt_sock_unregister(BTPROTO_HIDP);
 	proto_unregister(&hidp_proto);
 }
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 22e658322845..a76d1ac0321b 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -571,7 +571,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
 		chan->conn = NULL;
 
 		if (chan->chan_type != L2CAP_CHAN_CONN_FIX_A2MP)
-			hci_conn_put(conn->hcon);
+			hci_conn_drop(conn->hcon);
 
 		if (mgr && mgr->bredr_chan == chan)
 			mgr->bredr_chan = NULL;
@@ -1446,6 +1446,89 @@ static void l2cap_info_timeout(struct work_struct *work)
 	l2cap_conn_start(conn);
 }
 
+/*
+ * l2cap_user
+ * External modules can register l2cap_user objects on l2cap_conn. The ->probe
+ * callback is called during registration. The ->remove callback is called
+ * during unregistration.
+ * An l2cap_user object can either be explicitly unregistered or when the
+ * underlying l2cap_conn object is deleted. This guarantees that l2cap->hcon,
+ * l2cap->hchan, .. are valid as long as the remove callback hasn't been called.
+ * External modules must own a reference to the l2cap_conn object if they intend
+ * to call l2cap_unregister_user(). The l2cap_conn object might get destroyed at
+ * any time if they don't.
+ */
+
+int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user)
+{
+	struct hci_dev *hdev = conn->hcon->hdev;
+	int ret;
+
+	/* We need to check whether l2cap_conn is registered. If it is not, we
+	 * must not register the l2cap_user. l2cap_conn_del() is unregisters
+	 * l2cap_conn objects, but doesn't provide its own locking. Instead, it
+	 * relies on the parent hci_conn object to be locked. This itself relies
+	 * on the hci_dev object to be locked. So we must lock the hci device
+	 * here, too. */
+
+	hci_dev_lock(hdev);
+
+	if (user->list.next || user->list.prev) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* conn->hchan is NULL after l2cap_conn_del() was called */
+	if (!conn->hchan) {
+		ret = -ENODEV;
+		goto out_unlock;
+	}
+
+	ret = user->probe(conn, user);
+	if (ret)
+		goto out_unlock;
+
+	list_add(&user->list, &conn->users);
+	ret = 0;
+
+out_unlock:
+	hci_dev_unlock(hdev);
+	return ret;
+}
+EXPORT_SYMBOL(l2cap_register_user);
+
+void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user)
+{
+	struct hci_dev *hdev = conn->hcon->hdev;
+
+	hci_dev_lock(hdev);
+
+	if (!user->list.next || !user->list.prev)
+		goto out_unlock;
+
+	list_del(&user->list);
+	user->list.next = NULL;
+	user->list.prev = NULL;
+	user->remove(conn, user);
+
+out_unlock:
+	hci_dev_unlock(hdev);
+}
+EXPORT_SYMBOL(l2cap_unregister_user);
+
+static void l2cap_unregister_all_users(struct l2cap_conn *conn)
+{
+	struct l2cap_user *user;
+
+	while (!list_empty(&conn->users)) {
+		user = list_first_entry(&conn->users, struct l2cap_user, list);
+		list_del(&user->list);
+		user->list.next = NULL;
+		user->list.prev = NULL;
+		user->remove(conn, user);
+	}
+}
+
 static void l2cap_conn_del(struct hci_conn *hcon, int err)
 {
 	struct l2cap_conn *conn = hcon->l2cap_data;
@@ -1458,6 +1541,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
 
 	kfree_skb(conn->rx_skb);
 
+	l2cap_unregister_all_users(conn);
+
 	mutex_lock(&conn->chan_lock);
 
 	/* Kill channels */
@@ -1486,7 +1571,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
 	}
 
 	hcon->l2cap_data = NULL;
-	kfree(conn);
+	conn->hchan = NULL;
+	l2cap_conn_put(conn);
 }
 
 static void security_timeout(struct work_struct *work)
@@ -1502,12 +1588,12 @@ static void security_timeout(struct work_struct *work)
 	}
 }
 
-static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
+static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
 {
 	struct l2cap_conn *conn = hcon->l2cap_data;
 	struct hci_chan *hchan;
 
-	if (conn || status)
+	if (conn)
 		return conn;
 
 	hchan = hci_chan_create(hcon);
@@ -1520,24 +1606,21 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
 		return NULL;
 	}
 
+	kref_init(&conn->ref);
 	hcon->l2cap_data = conn;
 	conn->hcon = hcon;
+	hci_conn_get(conn->hcon);
 	conn->hchan = hchan;
 
 	BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan);
 
 	switch (hcon->type) {
-	case AMP_LINK:
-		conn->mtu = hcon->hdev->block_mtu;
-		break;
-
 	case LE_LINK:
 		if (hcon->hdev->le_mtu) {
 			conn->mtu = hcon->hdev->le_mtu;
 			break;
 		}
 		/* fall through */
-
 	default:
 		conn->mtu = hcon->hdev->acl_mtu;
 		break;
@@ -1552,6 +1635,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
 	mutex_init(&conn->chan_lock);
 
 	INIT_LIST_HEAD(&conn->chan_l);
+	INIT_LIST_HEAD(&conn->users);
 
 	if (hcon->type == LE_LINK)
 		INIT_DELAYED_WORK(&conn->security_timer, security_timeout);
@@ -1563,6 +1647,26 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
 	return conn;
 }
 
+static void l2cap_conn_free(struct kref *ref)
+{
+	struct l2cap_conn *conn = container_of(ref, struct l2cap_conn, ref);
+
+	hci_conn_put(conn->hcon);
+	kfree(conn);
+}
+
+void l2cap_conn_get(struct l2cap_conn *conn)
+{
+	kref_get(&conn->ref);
+}
+EXPORT_SYMBOL(l2cap_conn_get);
+
+void l2cap_conn_put(struct l2cap_conn *conn)
+{
+	kref_put(&conn->ref, l2cap_conn_free);
+}
+EXPORT_SYMBOL(l2cap_conn_put);
+
 /* ---- Socket interface ---- */
 
 /* Find socket with psm and source / destination bdaddr.
@@ -1700,9 +1804,9 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
 		goto done;
 	}
 
-	conn = l2cap_conn_add(hcon, 0);
+	conn = l2cap_conn_add(hcon);
 	if (!conn) {
-		hci_conn_put(hcon);
+		hci_conn_drop(hcon);
 		err = -ENOMEM;
 		goto done;
 	}
@@ -1712,7 +1816,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
 
 		if (!list_empty(&conn->chan_l)) {
 			err = -EBUSY;
-			hci_conn_put(hcon);
+			hci_conn_drop(hcon);
 		}
 
 		if (err)
@@ -6210,12 +6314,13 @@ drop:
 	kfree_skb(skb);
 }
 
-static void l2cap_att_channel(struct l2cap_conn *conn, u16 cid,
+static void l2cap_att_channel(struct l2cap_conn *conn,
 			      struct sk_buff *skb)
 {
 	struct l2cap_chan *chan;
 
-	chan = l2cap_global_chan_by_scid(0, cid, conn->src, conn->dst);
+	chan = l2cap_global_chan_by_scid(0, L2CAP_CID_LE_DATA,
+					 conn->src, conn->dst);
 	if (!chan)
 		goto drop;
 
@@ -6264,7 +6369,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
 		break;
 
 	case L2CAP_CID_LE_DATA:
-		l2cap_att_channel(conn, cid, skb);
+		l2cap_att_channel(conn, skb);
 		break;
 
 	case L2CAP_CID_SMP:
@@ -6318,7 +6423,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 	BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
 
 	if (!status) {
-		conn = l2cap_conn_add(hcon, status);
+		conn = l2cap_conn_add(hcon);
 		if (conn)
 			l2cap_conn_ready(conn);
 	} else {
@@ -6487,7 +6592,7 @@ int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
 		goto drop;
 
 	if (!conn)
-		conn = l2cap_conn_add(hcon, 0);
+		conn = l2cap_conn_add(hcon);
 
 	if (!conn)
 		goto drop;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 1bcfb8422fdc..36fed40c162c 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -43,6 +43,12 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent);
 static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
 				     int proto, gfp_t prio);
 
+bool l2cap_is_socket(struct socket *sock)
+{
+	return sock && sock->ops == &l2cap_sock_ops;
+}
+EXPORT_SYMBOL(l2cap_is_socket);
+
 static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
 {
 	struct sock *sk = sock->sk;
@@ -1292,7 +1298,7 @@ int __init l2cap_init_sockets(void)
 		goto error;
 	}
 
-	err = bt_procfs_init(THIS_MODULE, &init_net, "l2cap", &l2cap_sk_list,
+	err = bt_procfs_init(&init_net, "l2cap", &l2cap_sk_list,
 			     NULL);
 	if (err < 0) {
 		BT_ERR("Failed to create L2CAP proc file");
@@ -1312,8 +1318,6 @@ error:
 void l2cap_cleanup_sockets(void)
 {
 	bt_procfs_cleanup(&init_net, "l2cap");
-	if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
-		BT_ERR("L2CAP socket unregistration failed");
-
+	bt_sock_unregister(BTPROTO_L2CAP);
 	proto_unregister(&l2cap_proto);
 }
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f559b966279c..35fef22703e9 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -35,7 +35,7 @@
 bool enable_hs;
 
 #define MGMT_VERSION	1
-#define MGMT_REVISION	2
+#define MGMT_REVISION	3
 
 static const u16 mgmt_commands[] = {
 	MGMT_OP_READ_INDEX_LIST,
@@ -106,11 +106,10 @@ static const u16 mgmt_events[] = {
  * These LE scan and inquiry parameters were chosen according to LE General
  * Discovery Procedure specification.
  */
-#define LE_SCAN_TYPE			0x01
 #define LE_SCAN_WIN			0x12
 #define LE_SCAN_INT			0x12
-#define LE_SCAN_TIMEOUT_LE_ONLY		10240	/* TGAP(gen_disc_scan_min) */
-#define LE_SCAN_TIMEOUT_BREDR_LE	5120	/* TGAP(100)/2 */
+#define LE_SCAN_TIMEOUT_LE_ONLY		msecs_to_jiffies(10240)
+#define LE_SCAN_TIMEOUT_BREDR_LE	msecs_to_jiffies(5120)
 
 #define INQUIRY_LEN_BREDR		0x08	/* TGAP(100) */
 #define INQUIRY_LEN_BREDR_LE		0x04	/* TGAP(100)/2 */
@@ -384,7 +383,8 @@ static u32 get_supported_settings(struct hci_dev *hdev)
 
 	if (lmp_bredr_capable(hdev)) {
 		settings |= MGMT_SETTING_CONNECTABLE;
-		settings |= MGMT_SETTING_FAST_CONNECTABLE;
+		if (hdev->hci_ver >= BLUETOOTH_VER_1_2)
+			settings |= MGMT_SETTING_FAST_CONNECTABLE;
 		settings |= MGMT_SETTING_DISCOVERABLE;
 		settings |= MGMT_SETTING_BREDR;
 		settings |= MGMT_SETTING_LINK_SECURITY;
@@ -409,6 +409,9 @@ static u32 get_current_settings(struct hci_dev *hdev)
 	if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
 		settings |= MGMT_SETTING_CONNECTABLE;
 
+	if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
+		settings |= MGMT_SETTING_FAST_CONNECTABLE;
+
 	if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
 		settings |= MGMT_SETTING_DISCOVERABLE;
 
@@ -435,35 +438,117 @@ static u32 get_current_settings(struct hci_dev *hdev)
 
 #define PNP_INFO_SVCLASS_ID		0x1200
 
-static u8 bluetooth_base_uuid[] = {
-			0xFB, 0x34, 0x9B, 0x5F, 0x80, 0x00, 0x00, 0x80,
-			0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-};
+static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
+{
+	u8 *ptr = data, *uuids_start = NULL;
+	struct bt_uuid *uuid;
+
+	if (len < 4)
+		return ptr;
+
+	list_for_each_entry(uuid, &hdev->uuids, list) {
+		u16 uuid16;
 
-static u16 get_uuid16(u8 *uuid128)
+		if (uuid->size != 16)
+			continue;
+
+		uuid16 = get_unaligned_le16(&uuid->uuid[12]);
+		if (uuid16 < 0x1100)
+			continue;
+
+		if (uuid16 == PNP_INFO_SVCLASS_ID)
+			continue;
+
+		if (!uuids_start) {
+			uuids_start = ptr;
+			uuids_start[0] = 1;
+			uuids_start[1] = EIR_UUID16_ALL;
+			ptr += 2;
+		}
+
+		/* Stop if not enough space to put next UUID */
+		if ((ptr - data) + sizeof(u16) > len) {
+			uuids_start[1] = EIR_UUID16_SOME;
+			break;
+		}
+
+		*ptr++ = (uuid16 & 0x00ff);
+		*ptr++ = (uuid16 & 0xff00) >> 8;
+		uuids_start[0] += sizeof(uuid16);
+	}
+
+	return ptr;
+}
+
+static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
 {
-	u32 val;
-	int i;
+	u8 *ptr = data, *uuids_start = NULL;
+	struct bt_uuid *uuid;
 
-	for (i = 0; i < 12; i++) {
-		if (bluetooth_base_uuid[i] != uuid128[i])
-			return 0;
+	if (len < 6)
+		return ptr;
+
+	list_for_each_entry(uuid, &hdev->uuids, list) {
+		if (uuid->size != 32)
+			continue;
+
+		if (!uuids_start) {
+			uuids_start = ptr;
+			uuids_start[0] = 1;
+			uuids_start[1] = EIR_UUID32_ALL;
+			ptr += 2;
+		}
+
+		/* Stop if not enough space to put next UUID */
+		if ((ptr - data) + sizeof(u32) > len) {
+			uuids_start[1] = EIR_UUID32_SOME;
+			break;
+		}
+
+		memcpy(ptr, &uuid->uuid[12], sizeof(u32));
+		ptr += sizeof(u32);
+		uuids_start[0] += sizeof(u32);
 	}
 
-	val = get_unaligned_le32(&uuid128[12]);
-	if (val > 0xffff)
-		return 0;
+	return ptr;
+}
+
+static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
+{
+	u8 *ptr = data, *uuids_start = NULL;
+	struct bt_uuid *uuid;
+
+	if (len < 18)
+		return ptr;
+
+	list_for_each_entry(uuid, &hdev->uuids, list) {
+		if (uuid->size != 128)
+			continue;
 
-	return (u16) val;
+		if (!uuids_start) {
+			uuids_start = ptr;
+			uuids_start[0] = 1;
+			uuids_start[1] = EIR_UUID128_ALL;
+			ptr += 2;
+		}
+
+		/* Stop if not enough space to put next UUID */
+		if ((ptr - data) + 16 > len) {
+			uuids_start[1] = EIR_UUID128_SOME;
+			break;
+		}
+
+		memcpy(ptr, uuid->uuid, 16);
+		ptr += 16;
+		uuids_start[0] += 16;
+	}
+
+	return ptr;
 }
 
 static void create_eir(struct hci_dev *hdev, u8 *data)
 {
 	u8 *ptr = data;
-	u16 eir_len = 0;
-	u16 uuid16_list[HCI_MAX_EIR_LENGTH / sizeof(u16)];
-	int i, truncated = 0;
-	struct bt_uuid *uuid;
 	size_t name_len;
 
 	name_len = strlen(hdev->dev_name);
@@ -481,7 +566,6 @@ static void create_eir(struct hci_dev *hdev, u8 *data)
 
 		memcpy(ptr + 2, hdev->dev_name, name_len);
 
-		eir_len += (name_len + 2);
 		ptr += (name_len + 2);
 	}
 
@@ -490,7 +574,6 @@ static void create_eir(struct hci_dev *hdev, u8 *data)
 		ptr[1] = EIR_TX_POWER;
 		ptr[2] = (u8) hdev->inq_tx_power;
 
-		eir_len += 3;
 		ptr += 3;
 	}
 
@@ -503,88 +586,41 @@ static void create_eir(struct hci_dev *hdev, u8 *data)
 		put_unaligned_le16(hdev->devid_product, ptr + 6);
 		put_unaligned_le16(hdev->devid_version, ptr + 8);
 
-		eir_len += 10;
 		ptr += 10;
 	}
 
-	memset(uuid16_list, 0, sizeof(uuid16_list));
-
-	/* Group all UUID16 types */
-	list_for_each_entry(uuid, &hdev->uuids, list) {
-		u16 uuid16;
-
-		uuid16 = get_uuid16(uuid->uuid);
-		if (uuid16 == 0)
-			return;
-
-		if (uuid16 < 0x1100)
-			continue;
-
-		if (uuid16 == PNP_INFO_SVCLASS_ID)
-			continue;
-
-		/* Stop if not enough space to put next UUID */
-		if (eir_len + 2 + sizeof(u16) > HCI_MAX_EIR_LENGTH) {
-			truncated = 1;
-			break;
-		}
-
-		/* Check for duplicates */
-		for (i = 0; uuid16_list[i] != 0; i++)
-			if (uuid16_list[i] == uuid16)
-				break;
-
-		if (uuid16_list[i] == 0) {
-			uuid16_list[i] = uuid16;
-			eir_len += sizeof(u16);
-		}
-	}
-
-	if (uuid16_list[0] != 0) {
-		u8 *length = ptr;
-
-		/* EIR Data type */
-		ptr[1] = truncated ? EIR_UUID16_SOME : EIR_UUID16_ALL;
-
-		ptr += 2;
-		eir_len += 2;
-
-		for (i = 0; uuid16_list[i] != 0; i++) {
-			*ptr++ = (uuid16_list[i] & 0x00ff);
-			*ptr++ = (uuid16_list[i] & 0xff00) >> 8;
-		}
-
-		/* EIR Data length */
-		*length = (i * sizeof(u16)) + 1;
-	}
+	ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
+	ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
+	ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
 }
 
-static int update_eir(struct hci_dev *hdev)
+static void update_eir(struct hci_request *req)
 {
+	struct hci_dev *hdev = req->hdev;
 	struct hci_cp_write_eir cp;
 
 	if (!hdev_is_powered(hdev))
-		return 0;
+		return;
 
 	if (!lmp_ext_inq_capable(hdev))
-		return 0;
+		return;
 
 	if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
-		return 0;
+		return;
 
 	if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
-		return 0;
+		return;
 
 	memset(&cp, 0, sizeof(cp));
 
 	create_eir(hdev, cp.data);
 
 	if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0)
-		return 0;
+		return;
 
 	memcpy(hdev->eir, cp.data, sizeof(cp.data));
 
-	return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
+	hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
 }
 
 static u8 get_service_classes(struct hci_dev *hdev)
@@ -598,47 +634,48 @@ static u8 get_service_classes(struct hci_dev *hdev)
 	return val;
 }
 
-static int update_class(struct hci_dev *hdev)
+static void update_class(struct hci_request *req)
 {
+	struct hci_dev *hdev = req->hdev;
 	u8 cod[3];
-	int err;
 
 	BT_DBG("%s", hdev->name);
 
 	if (!hdev_is_powered(hdev))
-		return 0;
+		return;
 
 	if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
-		return 0;
+		return;
 
 	cod[0] = hdev->minor_class;
 	cod[1] = hdev->major_class;
 	cod[2] = get_service_classes(hdev);
 
 	if (memcmp(cod, hdev->dev_class, 3) == 0)
-		return 0;
-
-	err = hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
-	if (err == 0)
-		set_bit(HCI_PENDING_CLASS, &hdev->dev_flags);
+		return;
 
-	return err;
+	hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
 }
 
 static void service_cache_off(struct work_struct *work)
 {
 	struct hci_dev *hdev = container_of(work, struct hci_dev,
 					    service_cache.work);
+	struct hci_request req;
 
 	if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
 		return;
 
+	hci_req_init(&req, hdev);
+
 	hci_dev_lock(hdev);
 
-	update_eir(hdev);
-	update_class(hdev);
+	update_eir(&req);
+	update_class(&req);
 
 	hci_dev_unlock(hdev);
+
+	hci_req_run(&req, NULL);
 }
 
 static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev)
@@ -728,13 +765,9 @@ static void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev,
 					    void *data),
 				 void *data)
 {
-	struct list_head *p, *n;
-
-	list_for_each_safe(p, n, &hdev->mgmt_pending) {
-		struct pending_cmd *cmd;
-
-		cmd = list_entry(p, struct pending_cmd, list);
+	struct pending_cmd *cmd, *tmp;
 
+	list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) {
 		if (opcode > 0 && cmd->opcode != opcode)
 			continue;
 
@@ -777,14 +810,19 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	BT_DBG("request for %s", hdev->name);
 
+	if (cp->val != 0x00 && cp->val != 0x01)
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED,
+				  MGMT_STATUS_INVALID_PARAMS);
+
 	hci_dev_lock(hdev);
 
 	if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
 		cancel_delayed_work(&hdev->power_off);
 
 		if (cp->val) {
-			err = send_settings_rsp(sk, MGMT_OP_SET_POWERED, hdev);
-			mgmt_powered(hdev, 1);
+			mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev,
+					 data, len);
+			err = mgmt_powered(hdev, 1);
 			goto failed;
 		}
 	}
@@ -807,9 +845,9 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
 	}
 
 	if (cp->val)
-		schedule_work(&hdev->power_on);
+		queue_work(hdev->req_workqueue, &hdev->power_on);
 	else
-		schedule_work(&hdev->power_off.work);
+		queue_work(hdev->req_workqueue, &hdev->power_off.work);
 
 	err = 0;
 
@@ -872,6 +910,10 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
 		return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE,
 				 MGMT_STATUS_NOT_SUPPORTED);
 
+	if (cp->val != 0x00 && cp->val != 0x01)
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE,
+				  MGMT_STATUS_INVALID_PARAMS);
+
 	timeout = __le16_to_cpu(cp->timeout);
 	if (!cp->val && timeout > 0)
 		return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE,
@@ -957,11 +999,64 @@ failed:
 	return err;
 }
 
+static void write_fast_connectable(struct hci_request *req, bool enable)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct hci_cp_write_page_scan_activity acp;
+	u8 type;
+
+	if (hdev->hci_ver < BLUETOOTH_VER_1_2)
+		return;
+
+	if (enable) {
+		type = PAGE_SCAN_TYPE_INTERLACED;
+
+		/* 160 msec page scan interval */
+		acp.interval = __constant_cpu_to_le16(0x0100);
+	} else {
+		type = PAGE_SCAN_TYPE_STANDARD;	/* default */
+
+		/* default 1.28 sec page scan */
+		acp.interval = __constant_cpu_to_le16(0x0800);
+	}
+
+	acp.window = __constant_cpu_to_le16(0x0012);
+
+	if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval ||
+	    __cpu_to_le16(hdev->page_scan_window) != acp.window)
+		hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY,
+			    sizeof(acp), &acp);
+
+	if (hdev->page_scan_type != type)
+		hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
+}
+
+static void set_connectable_complete(struct hci_dev *hdev, u8 status)
+{
+	struct pending_cmd *cmd;
+
+	BT_DBG("status 0x%02x", status);
+
+	hci_dev_lock(hdev);
+
+	cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);
+	if (!cmd)
+		goto unlock;
+
+	send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev);
+
+	mgmt_pending_remove(cmd);
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
 static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
 			   u16 len)
 {
 	struct mgmt_mode *cp = data;
 	struct pending_cmd *cmd;
+	struct hci_request req;
 	u8 scan;
 	int err;
 
@@ -971,6 +1066,10 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
 		return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE,
 				  MGMT_STATUS_NOT_SUPPORTED);
 
+	if (cp->val != 0x00 && cp->val != 0x01)
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE,
+				  MGMT_STATUS_INVALID_PARAMS);
+
 	hci_dev_lock(hdev);
 
 	if (!hdev_is_powered(hdev)) {
@@ -1024,7 +1123,20 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
 			cancel_delayed_work(&hdev->discov_off);
 	}
 
-	err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+	hci_req_init(&req, hdev);
+
+	hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+
+	/* If we're going from non-connectable to connectable or
+	 * vice-versa when fast connectable is enabled ensure that fast
+	 * connectable gets disabled. write_fast_connectable won't do
+	 * anything if the page scan parameters are already what they
+	 * should be.
+	 */
+	if (cp->val || test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
+		write_fast_connectable(&req, false);
+
+	err = hci_req_run(&req, set_connectable_complete);
 	if (err < 0)
 		mgmt_pending_remove(cmd);
 
@@ -1041,6 +1153,10 @@ static int set_pairable(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	BT_DBG("request for %s", hdev->name);
 
+	if (cp->val != 0x00 && cp->val != 0x01)
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_PAIRABLE,
+				  MGMT_STATUS_INVALID_PARAMS);
+
 	hci_dev_lock(hdev);
 
 	if (cp->val)
@@ -1073,6 +1189,10 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data,
 		return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY,
 				  MGMT_STATUS_NOT_SUPPORTED);
 
+	if (cp->val != 0x00 && cp->val != 0x01)
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY,
+				  MGMT_STATUS_INVALID_PARAMS);
+
 	hci_dev_lock(hdev);
 
 	if (!hdev_is_powered(hdev)) {
@@ -1133,13 +1253,15 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 
 	BT_DBG("request for %s", hdev->name);
 
-	hci_dev_lock(hdev);
+	if (!lmp_ssp_capable(hdev))
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP,
+				  MGMT_STATUS_NOT_SUPPORTED);
 
-	if (!lmp_ssp_capable(hdev)) {
-		err = cmd_status(sk, hdev->id, MGMT_OP_SET_SSP,
-				 MGMT_STATUS_NOT_SUPPORTED);
-		goto failed;
-	}
+	if (cp->val != 0x00 && cp->val != 0x01)
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP,
+				  MGMT_STATUS_INVALID_PARAMS);
+
+	hci_dev_lock(hdev);
 
 	val = !!cp->val;
 
@@ -1199,6 +1321,10 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 		return cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
 				  MGMT_STATUS_NOT_SUPPORTED);
 
+	if (cp->val != 0x00 && cp->val != 0x01)
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
+				  MGMT_STATUS_INVALID_PARAMS);
+
 	if (cp->val)
 		set_bit(HCI_HS_ENABLED, &hdev->dev_flags);
 	else
@@ -1217,13 +1343,20 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 
 	BT_DBG("request for %s", hdev->name);
 
-	hci_dev_lock(hdev);
+	if (!lmp_le_capable(hdev))
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_LE,
+				  MGMT_STATUS_NOT_SUPPORTED);
 
-	if (!lmp_le_capable(hdev)) {
-		err = cmd_status(sk, hdev->id, MGMT_OP_SET_LE,
-				 MGMT_STATUS_NOT_SUPPORTED);
-		goto unlock;
-	}
+	if (cp->val != 0x00 && cp->val != 0x01)
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_LE,
+				  MGMT_STATUS_INVALID_PARAMS);
+
+	/* LE-only devices do not allow toggling LE on/off */
+	if (!lmp_bredr_capable(hdev))
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_LE,
+				  MGMT_STATUS_REJECTED);
+
+	hci_dev_lock(hdev);
 
 	val = !!cp->val;
 	enabled = lmp_host_le_capable(hdev);
@@ -1275,10 +1408,79 @@ unlock:
 	return err;
 }
 
+/* This is a helper function to test for pending mgmt commands that can
+ * cause CoD or EIR HCI commands. We can only allow one such pending
+ * mgmt command at a time since otherwise we cannot easily track what
+ * the current values are, will be, and based on that calculate if a new
+ * HCI command needs to be sent and if yes with what value.
+ */
+static bool pending_eir_or_class(struct hci_dev *hdev)
+{
+	struct pending_cmd *cmd;
+
+	list_for_each_entry(cmd, &hdev->mgmt_pending, list) {
+		switch (cmd->opcode) {
+		case MGMT_OP_ADD_UUID:
+		case MGMT_OP_REMOVE_UUID:
+		case MGMT_OP_SET_DEV_CLASS:
+		case MGMT_OP_SET_POWERED:
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static const u8 bluetooth_base_uuid[] = {
+			0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80,
+			0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static u8 get_uuid_size(const u8 *uuid)
+{
+	u32 val;
+
+	if (memcmp(uuid, bluetooth_base_uuid, 12))
+		return 128;
+
+	val = get_unaligned_le32(&uuid[12]);
+	if (val > 0xffff)
+		return 32;
+
+	return 16;
+}
+
+static void mgmt_class_complete(struct hci_dev *hdev, u16 mgmt_op, u8 status)
+{
+	struct pending_cmd *cmd;
+
+	hci_dev_lock(hdev);
+
+	cmd = mgmt_pending_find(mgmt_op, hdev);
+	if (!cmd)
+		goto unlock;
+
+	cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status),
+		     hdev->dev_class, 3);
+
+	mgmt_pending_remove(cmd);
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
+static void add_uuid_complete(struct hci_dev *hdev, u8 status)
+{
+	BT_DBG("status 0x%02x", status);
+
+	mgmt_class_complete(hdev, MGMT_OP_ADD_UUID, status);
+}
+
 static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 {
 	struct mgmt_cp_add_uuid *cp = data;
 	struct pending_cmd *cmd;
+	struct hci_request req;
 	struct bt_uuid *uuid;
 	int err;
 
@@ -1286,7 +1488,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 
 	hci_dev_lock(hdev);
 
-	if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
+	if (pending_eir_or_class(hdev)) {
 		err = cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID,
 				 MGMT_STATUS_BUSY);
 		goto failed;
@@ -1300,26 +1502,32 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 
 	memcpy(uuid->uuid, cp->uuid, 16);
 	uuid->svc_hint = cp->svc_hint;
+	uuid->size = get_uuid_size(cp->uuid);
 
-	list_add(&uuid->list, &hdev->uuids);
+	list_add_tail(&uuid->list, &hdev->uuids);
 
-	err = update_class(hdev);
-	if (err < 0)
-		goto failed;
+	hci_req_init(&req, hdev);
 
-	err = update_eir(hdev);
-	if (err < 0)
-		goto failed;
+	update_class(&req);
+	update_eir(&req);
+
+	err = hci_req_run(&req, add_uuid_complete);
+	if (err < 0) {
+		if (err != -ENODATA)
+			goto failed;
 
-	if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
 		err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0,
 				   hdev->dev_class, 3);
 		goto failed;
 	}
 
 	cmd = mgmt_pending_add(sk, MGMT_OP_ADD_UUID, hdev, data, len);
-	if (!cmd)
+	if (!cmd) {
 		err = -ENOMEM;
+		goto failed;
+	}
+
+	err = 0;
 
 failed:
 	hci_dev_unlock(hdev);
@@ -1332,27 +1540,36 @@ static bool enable_service_cache(struct hci_dev *hdev)
 		return false;
 
 	if (!test_and_set_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) {
-		schedule_delayed_work(&hdev->service_cache, CACHE_TIMEOUT);
+		queue_delayed_work(hdev->workqueue, &hdev->service_cache,
+				   CACHE_TIMEOUT);
 		return true;
 	}
 
 	return false;
 }
 
+static void remove_uuid_complete(struct hci_dev *hdev, u8 status)
+{
+	BT_DBG("status 0x%02x", status);
+
+	mgmt_class_complete(hdev, MGMT_OP_REMOVE_UUID, status);
+}
+
 static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
 		       u16 len)
 {
 	struct mgmt_cp_remove_uuid *cp = data;
 	struct pending_cmd *cmd;
-	struct list_head *p, *n;
+	struct bt_uuid *match, *tmp;
 	u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+	struct hci_request req;
 	int err, found;
 
 	BT_DBG("request for %s", hdev->name);
 
 	hci_dev_lock(hdev);
 
-	if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
+	if (pending_eir_or_class(hdev)) {
 		err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID,
 				 MGMT_STATUS_BUSY);
 		goto unlock;
@@ -1372,9 +1589,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	found = 0;
 
-	list_for_each_safe(p, n, &hdev->uuids) {
-		struct bt_uuid *match = list_entry(p, struct bt_uuid, list);
-
+	list_for_each_entry_safe(match, tmp, &hdev->uuids, list) {
 		if (memcmp(match->uuid, cp->uuid, 16) != 0)
 			continue;
 
@@ -1390,46 +1605,69 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
 	}
 
 update_class:
-	err = update_class(hdev);
-	if (err < 0)
-		goto unlock;
+	hci_req_init(&req, hdev);
 
-	err = update_eir(hdev);
-	if (err < 0)
-		goto unlock;
+	update_class(&req);
+	update_eir(&req);
+
+	err = hci_req_run(&req, remove_uuid_complete);
+	if (err < 0) {
+		if (err != -ENODATA)
+			goto unlock;
 
-	if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
 		err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0,
 				   hdev->dev_class, 3);
 		goto unlock;
 	}
 
 	cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_UUID, hdev, data, len);
-	if (!cmd)
+	if (!cmd) {
 		err = -ENOMEM;
+		goto unlock;
+	}
+
+	err = 0;
 
 unlock:
 	hci_dev_unlock(hdev);
 	return err;
 }
 
+static void set_class_complete(struct hci_dev *hdev, u8 status)
+{
+	BT_DBG("status 0x%02x", status);
+
+	mgmt_class_complete(hdev, MGMT_OP_SET_DEV_CLASS, status);
+}
+
 static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
 			 u16 len)
 {
 	struct mgmt_cp_set_dev_class *cp = data;
 	struct pending_cmd *cmd;
+	struct hci_request req;
 	int err;
 
 	BT_DBG("request for %s", hdev->name);
 
+	if (!lmp_bredr_capable(hdev))
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
+				  MGMT_STATUS_NOT_SUPPORTED);
+
 	hci_dev_lock(hdev);
 
-	if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
+	if (pending_eir_or_class(hdev)) {
 		err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
 				 MGMT_STATUS_BUSY);
 		goto unlock;
 	}
 
+	if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) {
+		err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
+				 MGMT_STATUS_INVALID_PARAMS);
+		goto unlock;
+	}
+
 	hdev->major_class = cp->major;
 	hdev->minor_class = cp->minor;
 
@@ -1439,26 +1677,34 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
 		goto unlock;
 	}
 
+	hci_req_init(&req, hdev);
+
 	if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) {
 		hci_dev_unlock(hdev);
 		cancel_delayed_work_sync(&hdev->service_cache);
 		hci_dev_lock(hdev);
-		update_eir(hdev);
+		update_eir(&req);
 	}
 
-	err = update_class(hdev);
-	if (err < 0)
-		goto unlock;
+	update_class(&req);
+
+	err = hci_req_run(&req, set_class_complete);
+	if (err < 0) {
+		if (err != -ENODATA)
+			goto unlock;
 
-	if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
 		err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0,
 				   hdev->dev_class, 3);
 		goto unlock;
 	}
 
 	cmd = mgmt_pending_add(sk, MGMT_OP_SET_DEV_CLASS, hdev, data, len);
-	if (!cmd)
+	if (!cmd) {
 		err = -ENOMEM;
+		goto unlock;
+	}
+
+	err = 0;
 
 unlock:
 	hci_dev_unlock(hdev);
@@ -1483,9 +1729,21 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
 				  MGMT_STATUS_INVALID_PARAMS);
 	}
 
+	if (cp->debug_keys != 0x00 && cp->debug_keys != 0x01)
+		return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
+				  MGMT_STATUS_INVALID_PARAMS);
+
 	BT_DBG("%s debug_keys %u key_count %u", hdev->name, cp->debug_keys,
 	       key_count);
 
+	for (i = 0; i < key_count; i++) {
+		struct mgmt_link_key_info *key = &cp->keys[i];
+
+		if (key->addr.type != BDADDR_BREDR)
+			return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
+					  MGMT_STATUS_INVALID_PARAMS);
+	}
+
 	hci_dev_lock(hdev);
 
 	hci_link_keys_clear(hdev);
@@ -1533,12 +1791,22 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct hci_conn *conn;
 	int err;
 
-	hci_dev_lock(hdev);
-
 	memset(&rp, 0, sizeof(rp));
 	bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
 	rp.addr.type = cp->addr.type;
 
+	if (!bdaddr_type_is_valid(cp->addr.type))
+		return cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE,
+				    MGMT_STATUS_INVALID_PARAMS,
+				    &rp, sizeof(rp));
+
+	if (cp->disconnect != 0x00 && cp->disconnect != 0x01)
+		return cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE,
+				    MGMT_STATUS_INVALID_PARAMS,
+				    &rp, sizeof(rp));
+
+	hci_dev_lock(hdev);
+
 	if (!hdev_is_powered(hdev)) {
 		err = cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE,
 				   MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp));
@@ -1596,6 +1864,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
 		      u16 len)
 {
 	struct mgmt_cp_disconnect *cp = data;
+	struct mgmt_rp_disconnect rp;
 	struct hci_cp_disconnect dc;
 	struct pending_cmd *cmd;
 	struct hci_conn *conn;
@@ -1603,17 +1872,26 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	BT_DBG("");
 
+	memset(&rp, 0, sizeof(rp));
+	bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
+	rp.addr.type = cp->addr.type;
+
+	if (!bdaddr_type_is_valid(cp->addr.type))
+		return cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT,
+				    MGMT_STATUS_INVALID_PARAMS,
+				    &rp, sizeof(rp));
+
 	hci_dev_lock(hdev);
 
 	if (!test_bit(HCI_UP, &hdev->flags)) {
-		err = cmd_status(sk, hdev->id, MGMT_OP_DISCONNECT,
-				 MGMT_STATUS_NOT_POWERED);
+		err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT,
+				   MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp));
 		goto failed;
 	}
 
 	if (mgmt_pending_find(MGMT_OP_DISCONNECT, hdev)) {
-		err = cmd_status(sk, hdev->id, MGMT_OP_DISCONNECT,
-				 MGMT_STATUS_BUSY);
+		err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT,
+				   MGMT_STATUS_BUSY, &rp, sizeof(rp));
 		goto failed;
 	}
 
@@ -1624,8 +1902,8 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
 		conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr);
 
 	if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) {
-		err = cmd_status(sk, hdev->id, MGMT_OP_DISCONNECT,
-				 MGMT_STATUS_NOT_CONNECTED);
+		err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT,
+				   MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp));
 		goto failed;
 	}
 
@@ -1857,7 +2135,7 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status)
 	conn->security_cfm_cb = NULL;
 	conn->disconn_cfm_cb = NULL;
 
-	hci_conn_put(conn);
+	hci_conn_drop(conn);
 
 	mgmt_pending_remove(cmd);
 }
@@ -1903,11 +2181,20 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	BT_DBG("");
 
+	memset(&rp, 0, sizeof(rp));
+	bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
+	rp.addr.type = cp->addr.type;
+
+	if (!bdaddr_type_is_valid(cp->addr.type))
+		return cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE,
+				    MGMT_STATUS_INVALID_PARAMS,
+				    &rp, sizeof(rp));
+
 	hci_dev_lock(hdev);
 
 	if (!hdev_is_powered(hdev)) {
-		err = cmd_status(sk, hdev->id, MGMT_OP_PAIR_DEVICE,
-				 MGMT_STATUS_NOT_POWERED);
+		err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE,
+				   MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp));
 		goto unlock;
 	}
 
@@ -1924,10 +2211,6 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 		conn = hci_connect(hdev, LE_LINK, &cp->addr.bdaddr,
 				   cp->addr.type, sec_level, auth_type);
 
-	memset(&rp, 0, sizeof(rp));
-	bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
-	rp.addr.type = cp->addr.type;
-
 	if (IS_ERR(conn)) {
 		int status;
 
@@ -1943,7 +2226,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 	}
 
 	if (conn->connect_cfm_cb) {
-		hci_conn_put(conn);
+		hci_conn_drop(conn);
 		err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE,
 				   MGMT_STATUS_BUSY, &rp, sizeof(rp));
 		goto unlock;
@@ -1952,7 +2235,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 	cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, hdev, data, len);
 	if (!cmd) {
 		err = -ENOMEM;
-		hci_conn_put(conn);
+		hci_conn_drop(conn);
 		goto unlock;
 	}
 
@@ -2021,7 +2304,7 @@ unlock:
 }
 
 static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
-			     bdaddr_t *bdaddr, u8 type, u16 mgmt_op,
+			     struct mgmt_addr_info *addr, u16 mgmt_op,
 			     u16 hci_op, __le32 passkey)
 {
 	struct pending_cmd *cmd;
@@ -2031,37 +2314,41 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
 	hci_dev_lock(hdev);
 
 	if (!hdev_is_powered(hdev)) {
-		err = cmd_status(sk, hdev->id, mgmt_op,
-				 MGMT_STATUS_NOT_POWERED);
+		err = cmd_complete(sk, hdev->id, mgmt_op,
+				   MGMT_STATUS_NOT_POWERED, addr,
+				   sizeof(*addr));
 		goto done;
 	}
 
-	if (type == BDADDR_BREDR)
-		conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, bdaddr);
+	if (addr->type == BDADDR_BREDR)
+		conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr);
 	else
-		conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, bdaddr);
+		conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr);
 
 	if (!conn) {
-		err = cmd_status(sk, hdev->id, mgmt_op,
-				 MGMT_STATUS_NOT_CONNECTED);
+		err = cmd_complete(sk, hdev->id, mgmt_op,
+				   MGMT_STATUS_NOT_CONNECTED, addr,
+				   sizeof(*addr));
 		goto done;
 	}
 
-	if (type == BDADDR_LE_PUBLIC || type == BDADDR_LE_RANDOM) {
+	if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) {
 		/* Continue with pairing via SMP */
 		err = smp_user_confirm_reply(conn, mgmt_op, passkey);
 
 		if (!err)
-			err = cmd_status(sk, hdev->id, mgmt_op,
-					 MGMT_STATUS_SUCCESS);
+			err = cmd_complete(sk, hdev->id, mgmt_op,
+					   MGMT_STATUS_SUCCESS, addr,
+					   sizeof(*addr));
 		else
-			err = cmd_status(sk, hdev->id, mgmt_op,
-					 MGMT_STATUS_FAILED);
+			err = cmd_complete(sk, hdev->id, mgmt_op,
+					   MGMT_STATUS_FAILED, addr,
+					   sizeof(*addr));
 
 		goto done;
 	}
 
-	cmd = mgmt_pending_add(sk, mgmt_op, hdev, bdaddr, sizeof(*bdaddr));
+	cmd = mgmt_pending_add(sk, mgmt_op, hdev, addr, sizeof(*addr));
 	if (!cmd) {
 		err = -ENOMEM;
 		goto done;
@@ -2071,11 +2358,12 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
 	if (hci_op == HCI_OP_USER_PASSKEY_REPLY) {
 		struct hci_cp_user_passkey_reply cp;
 
-		bacpy(&cp.bdaddr, bdaddr);
+		bacpy(&cp.bdaddr, &addr->bdaddr);
 		cp.passkey = passkey;
 		err = hci_send_cmd(hdev, hci_op, sizeof(cp), &cp);
 	} else
-		err = hci_send_cmd(hdev, hci_op, sizeof(*bdaddr), bdaddr);
+		err = hci_send_cmd(hdev, hci_op, sizeof(addr->bdaddr),
+				   &addr->bdaddr);
 
 	if (err < 0)
 		mgmt_pending_remove(cmd);
@@ -2092,7 +2380,7 @@ static int pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev,
 
 	BT_DBG("");
 
-	return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+	return user_pairing_resp(sk, hdev, &cp->addr,
 				MGMT_OP_PIN_CODE_NEG_REPLY,
 				HCI_OP_PIN_CODE_NEG_REPLY, 0);
 }
@@ -2108,7 +2396,7 @@ static int user_confirm_reply(struct sock *sk, struct hci_dev *hdev, void *data,
 		return cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY,
 				  MGMT_STATUS_INVALID_PARAMS);
 
-	return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+	return user_pairing_resp(sk, hdev, &cp->addr,
 				 MGMT_OP_USER_CONFIRM_REPLY,
 				 HCI_OP_USER_CONFIRM_REPLY, 0);
 }
@@ -2120,7 +2408,7 @@ static int user_confirm_neg_reply(struct sock *sk, struct hci_dev *hdev,
 
 	BT_DBG("");
 
-	return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+	return user_pairing_resp(sk, hdev, &cp->addr,
 				 MGMT_OP_USER_CONFIRM_NEG_REPLY,
 				 HCI_OP_USER_CONFIRM_NEG_REPLY, 0);
 }
@@ -2132,7 +2420,7 @@ static int user_passkey_reply(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	BT_DBG("");
 
-	return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+	return user_pairing_resp(sk, hdev, &cp->addr,
 				 MGMT_OP_USER_PASSKEY_REPLY,
 				 HCI_OP_USER_PASSKEY_REPLY, cp->passkey);
 }
@@ -2144,18 +2432,47 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev,
 
 	BT_DBG("");
 
-	return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+	return user_pairing_resp(sk, hdev, &cp->addr,
 				 MGMT_OP_USER_PASSKEY_NEG_REPLY,
 				 HCI_OP_USER_PASSKEY_NEG_REPLY, 0);
 }
 
-static int update_name(struct hci_dev *hdev, const char *name)
+static void update_name(struct hci_request *req)
 {
+	struct hci_dev *hdev = req->hdev;
 	struct hci_cp_write_local_name cp;
 
-	memcpy(cp.name, name, sizeof(cp.name));
+	memcpy(cp.name, hdev->dev_name, sizeof(cp.name));
 
-	return hci_send_cmd(hdev, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
+	hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
+}
+
+static void set_name_complete(struct hci_dev *hdev, u8 status)
+{
+	struct mgmt_cp_set_local_name *cp;
+	struct pending_cmd *cmd;
+
+	BT_DBG("status 0x%02x", status);
+
+	hci_dev_lock(hdev);
+
+	cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev);
+	if (!cmd)
+		goto unlock;
+
+	cp = cmd->param;
+
+	if (status)
+		cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
+			   mgmt_status(status));
+	else
+		cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0,
+			     cp, sizeof(*cp));
+
+	mgmt_pending_remove(cmd);
+
+unlock:
+	hci_dev_unlock(hdev);
 }
 
 static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -2163,12 +2480,24 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
 {
 	struct mgmt_cp_set_local_name *cp = data;
 	struct pending_cmd *cmd;
+	struct hci_request req;
 	int err;
 
 	BT_DBG("");
 
 	hci_dev_lock(hdev);
 
+	/* If the old values are the same as the new ones just return a
+	 * direct command complete event.
+	 */
+	if (!memcmp(hdev->dev_name, cp->name, sizeof(hdev->dev_name)) &&
+	    !memcmp(hdev->short_name, cp->short_name,
+		    sizeof(hdev->short_name))) {
+		err = cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0,
+				   data, len);
+		goto failed;
+	}
+
 	memcpy(hdev->short_name, cp->short_name, sizeof(hdev->short_name));
 
 	if (!hdev_is_powered(hdev)) {
@@ -2191,7 +2520,19 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
 		goto failed;
 	}
 
-	err = update_name(hdev, cp->name);
+	memcpy(hdev->dev_name, cp->name, sizeof(hdev->dev_name));
+
+	hci_req_init(&req, hdev);
+
+	if (lmp_bredr_capable(hdev)) {
+		update_name(&req);
+		update_eir(&req);
+	}
+
+	if (lmp_le_capable(hdev))
+		hci_update_ad(&req);
+
+	err = hci_req_run(&req, set_name_complete);
 	if (err < 0)
 		mgmt_pending_remove(cmd);
 
@@ -2254,24 +2595,16 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
 
 	hci_dev_lock(hdev);
 
-	if (!hdev_is_powered(hdev)) {
-		err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA,
-				   MGMT_STATUS_NOT_POWERED, &cp->addr,
-				   sizeof(cp->addr));
-		goto unlock;
-	}
-
 	err = hci_add_remote_oob_data(hdev, &cp->addr.bdaddr, cp->hash,
 				      cp->randomizer);
 	if (err < 0)
 		status = MGMT_STATUS_FAILED;
 	else
-		status = 0;
+		status = MGMT_STATUS_SUCCESS;
 
 	err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, status,
 			   &cp->addr, sizeof(cp->addr));
 
-unlock:
 	hci_dev_unlock(hdev);
 	return err;
 }
@@ -2287,24 +2620,15 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
 
 	hci_dev_lock(hdev);
 
-	if (!hdev_is_powered(hdev)) {
-		err = cmd_complete(sk, hdev->id,
-				   MGMT_OP_REMOVE_REMOTE_OOB_DATA,
-				   MGMT_STATUS_NOT_POWERED, &cp->addr,
-				   sizeof(cp->addr));
-		goto unlock;
-	}
-
 	err = hci_remove_remote_oob_data(hdev, &cp->addr.bdaddr);
 	if (err < 0)
 		status = MGMT_STATUS_INVALID_PARAMS;
 	else
-		status = 0;
+		status = MGMT_STATUS_SUCCESS;
 
 	err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_REMOTE_OOB_DATA,
 			   status, &cp->addr, sizeof(cp->addr));
 
-unlock:
 	hci_dev_unlock(hdev);
 	return err;
 }
@@ -2365,31 +2689,45 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
 
 	switch (hdev->discovery.type) {
 	case DISCOV_TYPE_BREDR:
-		if (lmp_bredr_capable(hdev))
-			err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR);
-		else
-			err = -ENOTSUPP;
+		if (!lmp_bredr_capable(hdev)) {
+			err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY,
+					 MGMT_STATUS_NOT_SUPPORTED);
+			mgmt_pending_remove(cmd);
+			goto failed;
+		}
+
+		err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR);
 		break;
 
 	case DISCOV_TYPE_LE:
-		if (lmp_host_le_capable(hdev))
-			err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT,
-					  LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY);
-		else
-			err = -ENOTSUPP;
+		if (!lmp_host_le_capable(hdev)) {
+			err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY,
+					 MGMT_STATUS_NOT_SUPPORTED);
+			mgmt_pending_remove(cmd);
+			goto failed;
+		}
+
+		err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT,
+				  LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY);
 		break;
 
 	case DISCOV_TYPE_INTERLEAVED:
-		if (lmp_host_le_capable(hdev) && lmp_bredr_capable(hdev))
-			err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT,
-					  LE_SCAN_WIN,
-					  LE_SCAN_TIMEOUT_BREDR_LE);
-		else
-			err = -ENOTSUPP;
+		if (!lmp_host_le_capable(hdev) || !lmp_bredr_capable(hdev)) {
+			err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY,
+					 MGMT_STATUS_NOT_SUPPORTED);
+			mgmt_pending_remove(cmd);
+			goto failed;
+		}
+
+		err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT,
+				  LE_SCAN_WIN, LE_SCAN_TIMEOUT_BREDR_LE);
 		break;
 
 	default:
-		err = -EINVAL;
+		err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY,
+				 MGMT_STATUS_INVALID_PARAMS);
+		mgmt_pending_remove(cmd);
+		goto failed;
 	}
 
 	if (err < 0)
@@ -2510,7 +2848,8 @@ static int confirm_name(struct sock *sk, struct hci_dev *hdev, void *data,
 		hci_inquiry_cache_update_resolve(hdev, e);
 	}
 
-	err = 0;
+	err = cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, 0, &cp->addr,
+			   sizeof(cp->addr));
 
 failed:
 	hci_dev_unlock(hdev);
@@ -2526,13 +2865,18 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	BT_DBG("%s", hdev->name);
 
+	if (!bdaddr_type_is_valid(cp->addr.type))
+		return cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE,
+				    MGMT_STATUS_INVALID_PARAMS,
+				    &cp->addr, sizeof(cp->addr));
+
 	hci_dev_lock(hdev);
 
 	err = hci_blacklist_add(hdev, &cp->addr.bdaddr, cp->addr.type);
 	if (err < 0)
 		status = MGMT_STATUS_FAILED;
 	else
-		status = 0;
+		status = MGMT_STATUS_SUCCESS;
 
 	err = cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, status,
 			   &cp->addr, sizeof(cp->addr));
@@ -2551,13 +2895,18 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	BT_DBG("%s", hdev->name);
 
+	if (!bdaddr_type_is_valid(cp->addr.type))
+		return cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE,
+				    MGMT_STATUS_INVALID_PARAMS,
+				    &cp->addr, sizeof(cp->addr));
+
 	hci_dev_lock(hdev);
 
 	err = hci_blacklist_del(hdev, &cp->addr.bdaddr, cp->addr.type);
 	if (err < 0)
 		status = MGMT_STATUS_INVALID_PARAMS;
 	else
-		status = 0;
+		status = MGMT_STATUS_SUCCESS;
 
 	err = cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, status,
 			   &cp->addr, sizeof(cp->addr));
@@ -2571,6 +2920,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
 			 u16 len)
 {
 	struct mgmt_cp_set_device_id *cp = data;
+	struct hci_request req;
 	int err;
 	__u16 source;
 
@@ -2591,27 +2941,66 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0);
 
-	update_eir(hdev);
+	hci_req_init(&req, hdev);
+	update_eir(&req);
+	hci_req_run(&req, NULL);
 
 	hci_dev_unlock(hdev);
 
 	return err;
 }
 
+static void fast_connectable_complete(struct hci_dev *hdev, u8 status)
+{
+	struct pending_cmd *cmd;
+
+	BT_DBG("status 0x%02x", status);
+
+	hci_dev_lock(hdev);
+
+	cmd = mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev);
+	if (!cmd)
+		goto unlock;
+
+	if (status) {
+		cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
+			   mgmt_status(status));
+	} else {
+		struct mgmt_mode *cp = cmd->param;
+
+		if (cp->val)
+			set_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
+		else
+			clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
+
+		send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev);
+		new_settings(hdev, cmd->sk);
+	}
+
+	mgmt_pending_remove(cmd);
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
 static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
 				void *data, u16 len)
 {
 	struct mgmt_mode *cp = data;
-	struct hci_cp_write_page_scan_activity acp;
-	u8 type;
+	struct pending_cmd *cmd;
+	struct hci_request req;
 	int err;
 
 	BT_DBG("%s", hdev->name);
 
-	if (!lmp_bredr_capable(hdev))
+	if (!lmp_bredr_capable(hdev) || hdev->hci_ver < BLUETOOTH_VER_1_2)
 		return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
 				  MGMT_STATUS_NOT_SUPPORTED);
 
+	if (cp->val != 0x00 && cp->val != 0x01)
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
+				  MGMT_STATUS_INVALID_PARAMS);
+
 	if (!hdev_is_powered(hdev))
 		return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
 				  MGMT_STATUS_NOT_POWERED);
@@ -2622,49 +3011,59 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
 
 	hci_dev_lock(hdev);
 
-	if (cp->val) {
-		type = PAGE_SCAN_TYPE_INTERLACED;
+	if (mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) {
+		err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
+				 MGMT_STATUS_BUSY);
+		goto unlock;
+	}
 
-		/* 160 msec page scan interval */
-		acp.interval = __constant_cpu_to_le16(0x0100);
-	} else {
-		type = PAGE_SCAN_TYPE_STANDARD;	/* default */
+	if (!!cp->val == test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) {
+		err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE,
+					hdev);
+		goto unlock;
+	}
 
-		/* default 1.28 sec page scan */
-		acp.interval = __constant_cpu_to_le16(0x0800);
+	cmd = mgmt_pending_add(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev,
+			       data, len);
+	if (!cmd) {
+		err = -ENOMEM;
+		goto unlock;
 	}
 
-	/* default 11.25 msec page scan window */
-	acp.window = __constant_cpu_to_le16(0x0012);
+	hci_req_init(&req, hdev);
 
-	err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, sizeof(acp),
-			   &acp);
-	if (err < 0) {
-		err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
-				 MGMT_STATUS_FAILED);
-		goto done;
-	}
+	write_fast_connectable(&req, cp->val);
 
-	err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
+	err = hci_req_run(&req, fast_connectable_complete);
 	if (err < 0) {
 		err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
 				 MGMT_STATUS_FAILED);
-		goto done;
+		mgmt_pending_remove(cmd);
 	}
 
-	err = cmd_complete(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, 0,
-			   NULL, 0);
-done:
+unlock:
 	hci_dev_unlock(hdev);
+
 	return err;
 }
 
+static bool ltk_is_valid(struct mgmt_ltk_info *key)
+{
+	if (key->authenticated != 0x00 && key->authenticated != 0x01)
+		return false;
+	if (key->master != 0x00 && key->master != 0x01)
+		return false;
+	if (!bdaddr_type_is_le(key->addr.type))
+		return false;
+	return true;
+}
+
 static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 			       void *cp_data, u16 len)
 {
 	struct mgmt_cp_load_long_term_keys *cp = cp_data;
 	u16 key_count, expected_len;
-	int i;
+	int i, err;
 
 	key_count = __le16_to_cpu(cp->key_count);
 
@@ -2674,11 +3073,20 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 		BT_ERR("load_keys: expected %u bytes, got %u bytes",
 		       len, expected_len);
 		return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
-				  EINVAL);
+				  MGMT_STATUS_INVALID_PARAMS);
 	}
 
 	BT_DBG("%s key_count %u", hdev->name, key_count);
 
+	for (i = 0; i < key_count; i++) {
+		struct mgmt_ltk_info *key = &cp->keys[i];
+
+		if (!ltk_is_valid(key))
+			return cmd_status(sk, hdev->id,
+					  MGMT_OP_LOAD_LONG_TERM_KEYS,
+					  MGMT_STATUS_INVALID_PARAMS);
+	}
+
 	hci_dev_lock(hdev);
 
 	hci_smp_ltks_clear(hdev);
@@ -2698,9 +3106,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 			    key->enc_size, key->ediv, key->rand);
 	}
 
+	err = cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0,
+			   NULL, 0);
+
 	hci_dev_unlock(hdev);
 
-	return 0;
+	return err;
 }
 
 static const struct mgmt_handler {
@@ -2889,66 +3300,116 @@ static void settings_rsp(struct pending_cmd *cmd, void *data)
 	mgmt_pending_free(cmd);
 }
 
-static int set_bredr_scan(struct hci_dev *hdev)
+static void set_bredr_scan(struct hci_request *req)
 {
+	struct hci_dev *hdev = req->hdev;
 	u8 scan = 0;
 
+	/* Ensure that fast connectable is disabled. This function will
+	 * not do anything if the page scan parameters are already what
+	 * they should be.
+	 */
+	write_fast_connectable(req, false);
+
 	if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
 		scan |= SCAN_PAGE;
 	if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
 		scan |= SCAN_INQUIRY;
 
-	if (!scan)
-		return 0;
+	if (scan)
+		hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+}
+
+static void powered_complete(struct hci_dev *hdev, u8 status)
+{
+	struct cmd_lookup match = { NULL, hdev };
 
-	return hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+	BT_DBG("status 0x%02x", status);
+
+	hci_dev_lock(hdev);
+
+	mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
+
+	new_settings(hdev, match.sk);
+
+	hci_dev_unlock(hdev);
+
+	if (match.sk)
+		sock_put(match.sk);
+}
+
+static int powered_update_hci(struct hci_dev *hdev)
+{
+	struct hci_request req;
+	u8 link_sec;
+
+	hci_req_init(&req, hdev);
+
+	if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
+	    !lmp_host_ssp_capable(hdev)) {
+		u8 ssp = 1;
+
+		hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
+	}
+
+	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
+	    lmp_bredr_capable(hdev)) {
+		struct hci_cp_write_le_host_supported cp;
+
+		cp.le = 1;
+		cp.simul = lmp_le_br_capable(hdev);
+
+		/* Check first if we already have the right
+		 * host state (host features set)
+		 */
+		if (cp.le != lmp_host_le_capable(hdev) ||
+		    cp.simul != lmp_host_le_br_capable(hdev))
+			hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED,
+				    sizeof(cp), &cp);
+	}
+
+	link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
+	if (link_sec != test_bit(HCI_AUTH, &hdev->flags))
+		hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE,
+			    sizeof(link_sec), &link_sec);
+
+	if (lmp_bredr_capable(hdev)) {
+		set_bredr_scan(&req);
+		update_class(&req);
+		update_name(&req);
+		update_eir(&req);
+	}
+
+	return hci_req_run(&req, powered_complete);
 }
 
 int mgmt_powered(struct hci_dev *hdev, u8 powered)
 {
 	struct cmd_lookup match = { NULL, hdev };
+	u8 status_not_powered = MGMT_STATUS_NOT_POWERED;
+	u8 zero_cod[] = { 0, 0, 0 };
 	int err;
 
 	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
 		return 0;
 
-	mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
-
 	if (powered) {
-		if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
-		    !lmp_host_ssp_capable(hdev)) {
-			u8 ssp = 1;
-
-			hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
-		}
+		if (powered_update_hci(hdev) == 0)
+			return 0;
 
-		if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
-			struct hci_cp_write_le_host_supported cp;
+		mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp,
+				     &match);
+		goto new_settings;
+	}
 
-			cp.le = 1;
-			cp.simul = lmp_le_br_capable(hdev);
+	mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
+	mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status_not_powered);
 
-			/* Check first if we already have the right
-			 * host state (host features set)
-			 */
-			if (cp.le != lmp_host_le_capable(hdev) ||
-			    cp.simul != lmp_host_le_br_capable(hdev))
-				hci_send_cmd(hdev,
-					     HCI_OP_WRITE_LE_HOST_SUPPORTED,
-					     sizeof(cp), &cp);
-		}
-
-		if (lmp_bredr_capable(hdev)) {
-			set_bredr_scan(hdev);
-			update_class(hdev);
-			update_name(hdev, hdev->dev_name);
-			update_eir(hdev);
-		}
-	} else {
-		u8 status = MGMT_STATUS_NOT_POWERED;
-		mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status);
-	}
+	if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
+		mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
+			   zero_cod, sizeof(zero_cod), NULL);
 
+new_settings:
 	err = new_settings(hdev, match.sk);
 
 	if (match.sk)
@@ -2985,7 +3446,7 @@ int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable)
 
 int mgmt_connectable(struct hci_dev *hdev, u8 connectable)
 {
-	struct cmd_lookup match = { NULL, hdev };
+	struct pending_cmd *cmd;
 	bool changed = false;
 	int err = 0;
 
@@ -2997,14 +3458,10 @@ int mgmt_connectable(struct hci_dev *hdev, u8 connectable)
 			changed = true;
 	}
 
-	mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, hdev, settings_rsp,
-			     &match);
+	cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);
 
 	if (changed)
-		err = new_settings(hdev, match.sk);
-
-	if (match.sk)
-		sock_put(match.sk);
+		err = new_settings(hdev, cmd ? cmd->sk : NULL);
 
 	return err;
 }
@@ -3388,23 +3845,25 @@ int mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
 	return err;
 }
 
-static int clear_eir(struct hci_dev *hdev)
+static void clear_eir(struct hci_request *req)
 {
+	struct hci_dev *hdev = req->hdev;
 	struct hci_cp_write_eir cp;
 
 	if (!lmp_ext_inq_capable(hdev))
-		return 0;
+		return;
 
 	memset(hdev->eir, 0, sizeof(hdev->eir));
 
 	memset(&cp, 0, sizeof(cp));
 
-	return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
+	hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
 }
 
 int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
 {
 	struct cmd_lookup match = { NULL, hdev };
+	struct hci_request req;
 	bool changed = false;
 	int err = 0;
 
@@ -3437,29 +3896,26 @@ int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
 	if (match.sk)
 		sock_put(match.sk);
 
+	hci_req_init(&req, hdev);
+
 	if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
-		update_eir(hdev);
+		update_eir(&req);
 	else
-		clear_eir(hdev);
+		clear_eir(&req);
+
+	hci_req_run(&req, NULL);
 
 	return err;
 }
 
-static void class_rsp(struct pending_cmd *cmd, void *data)
+static void sk_lookup(struct pending_cmd *cmd, void *data)
 {
 	struct cmd_lookup *match = data;
 
-	cmd_complete(cmd->sk, cmd->index, cmd->opcode, match->mgmt_status,
-		     match->hdev->dev_class, 3);
-
-	list_del(&cmd->list);
-
 	if (match->sk == NULL) {
 		match->sk = cmd->sk;
 		sock_hold(match->sk);
 	}
-
-	mgmt_pending_free(cmd);
 }
 
 int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
@@ -3468,11 +3924,9 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
 	struct cmd_lookup match = { NULL, hdev, mgmt_status(status) };
 	int err = 0;
 
-	clear_bit(HCI_PENDING_CLASS, &hdev->dev_flags);
-
-	mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, class_rsp, &match);
-	mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, class_rsp, &match);
-	mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, class_rsp, &match);
+	mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match);
+	mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match);
+	mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match);
 
 	if (!status)
 		err = mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class,
@@ -3486,55 +3940,29 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
 
 int mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status)
 {
-	struct pending_cmd *cmd;
 	struct mgmt_cp_set_local_name ev;
-	bool changed = false;
-	int err = 0;
+	struct pending_cmd *cmd;
 
-	if (memcmp(name, hdev->dev_name, sizeof(hdev->dev_name)) != 0) {
-		memcpy(hdev->dev_name, name, sizeof(hdev->dev_name));
-		changed = true;
-	}
+	if (status)
+		return 0;
 
 	memset(&ev, 0, sizeof(ev));
 	memcpy(ev.name, name, HCI_MAX_NAME_LENGTH);
 	memcpy(ev.short_name, hdev->short_name, HCI_MAX_SHORT_NAME_LENGTH);
 
 	cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev);
-	if (!cmd)
-		goto send_event;
-
-	/* Always assume that either the short or the complete name has
-	 * changed if there was a pending mgmt command */
-	changed = true;
+	if (!cmd) {
+		memcpy(hdev->dev_name, name, sizeof(hdev->dev_name));
 
-	if (status) {
-		err = cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
-				 mgmt_status(status));
-		goto failed;
+		/* If this is a HCI command related to powering on the
+		 * HCI dev don't send any mgmt signals.
+		 */
+		if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev))
+			return 0;
 	}
 
-	err = cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, &ev,
-			   sizeof(ev));
-	if (err < 0)
-		goto failed;
-
-send_event:
-	if (changed)
-		err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev,
-				 sizeof(ev), cmd ? cmd->sk : NULL);
-
-	/* EIR is taken care of separately when powering on the
-	 * adapter so only update them here if this is a name change
-	 * unrelated to power on.
-	 */
-	if (!test_bit(HCI_INIT, &hdev->flags))
-		update_eir(hdev);
-
-failed:
-	if (cmd)
-		mgmt_pending_remove(cmd);
-	return err;
+	return mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev),
+			  cmd ? cmd->sk : NULL);
 }
 
 int mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash,
diff --git a/net/bluetooth/rfcomm/Kconfig b/net/bluetooth/rfcomm/Kconfig
index 22e718b554e4..18d352ea2bc7 100644
--- a/net/bluetooth/rfcomm/Kconfig
+++ b/net/bluetooth/rfcomm/Kconfig
@@ -12,6 +12,7 @@ config BT_RFCOMM
 config BT_RFCOMM_TTY
 	bool "RFCOMM TTY support"
 	depends on BT_RFCOMM
+	depends on TTY
 	help
 	  This option enables TTY emulation support for RFCOMM channels.
 
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 201fdf737209..ca957d34b0c8 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -69,7 +69,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
 							u8 sec_level,
 							int *err);
 static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst);
-static void rfcomm_session_del(struct rfcomm_session *s);
+static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s);
 
 /* ---- RFCOMM frame parsing macros ---- */
 #define __get_dlci(b)     ((b & 0xfc) >> 2)
@@ -108,12 +108,6 @@ static void rfcomm_schedule(void)
 	wake_up_process(rfcomm_thread);
 }
 
-static void rfcomm_session_put(struct rfcomm_session *s)
-{
-	if (atomic_dec_and_test(&s->refcnt))
-		rfcomm_session_del(s);
-}
-
 /* ---- RFCOMM FCS computation ---- */
 
 /* reversed, 8-bit, poly=0x07 */
@@ -249,16 +243,14 @@ static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout)
 {
 	BT_DBG("session %p state %ld timeout %ld", s, s->state, timeout);
 
-	if (!mod_timer(&s->timer, jiffies + timeout))
-		rfcomm_session_hold(s);
+	mod_timer(&s->timer, jiffies + timeout);
 }
 
 static void rfcomm_session_clear_timer(struct rfcomm_session *s)
 {
 	BT_DBG("session %p state %ld", s, s->state);
 
-	if (timer_pending(&s->timer) && del_timer(&s->timer))
-		rfcomm_session_put(s);
+	del_timer_sync(&s->timer);
 }
 
 /* ---- RFCOMM DLCs ---- */
@@ -285,7 +277,7 @@ static void rfcomm_dlc_clear_timer(struct rfcomm_dlc *d)
 {
 	BT_DBG("dlc %p state %ld", d, d->state);
 
-	if (timer_pending(&d->timer) && del_timer(&d->timer))
+	if (del_timer(&d->timer))
 		rfcomm_dlc_put(d);
 }
 
@@ -336,8 +328,6 @@ static void rfcomm_dlc_link(struct rfcomm_session *s, struct rfcomm_dlc *d)
 {
 	BT_DBG("dlc %p session %p", d, s);
 
-	rfcomm_session_hold(s);
-
 	rfcomm_session_clear_timer(s);
 	rfcomm_dlc_hold(d);
 	list_add(&d->list, &s->dlcs);
@@ -356,8 +346,6 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d)
 
 	if (list_empty(&s->dlcs))
 		rfcomm_session_set_timer(s, RFCOMM_IDLE_TIMEOUT);
-
-	rfcomm_session_put(s);
 }
 
 static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci)
@@ -493,12 +481,34 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
 
 int rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
 {
-	int r;
+	int r = 0;
+	struct rfcomm_dlc *d_list;
+	struct rfcomm_session *s, *s_list;
+
+	BT_DBG("dlc %p state %ld dlci %d err %d", d, d->state, d->dlci, err);
 
 	rfcomm_lock();
 
-	r = __rfcomm_dlc_close(d, err);
+	s = d->session;
+	if (!s)
+		goto no_session;
+
+	/* after waiting on the mutex check the session still exists
+	 * then check the dlc still exists
+	 */
+	list_for_each_entry(s_list, &session_list, list) {
+		if (s_list == s) {
+			list_for_each_entry(d_list, &s->dlcs, list) {
+				if (d_list == d) {
+					r = __rfcomm_dlc_close(d, err);
+					break;
+				}
+			}
+			break;
+		}
+	}
 
+no_session:
 	rfcomm_unlock();
 	return r;
 }
@@ -609,7 +619,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)
 	return s;
 }
 
-static void rfcomm_session_del(struct rfcomm_session *s)
+static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s)
 {
 	int state = s->state;
 
@@ -617,15 +627,14 @@ static void rfcomm_session_del(struct rfcomm_session *s)
 
 	list_del(&s->list);
 
-	if (state == BT_CONNECTED)
-		rfcomm_send_disc(s, 0);
-
 	rfcomm_session_clear_timer(s);
 	sock_release(s->sock);
 	kfree(s);
 
 	if (state != BT_LISTEN)
 		module_put(THIS_MODULE);
+
+	return NULL;
 }
 
 static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
@@ -644,17 +653,16 @@ static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
 	return NULL;
 }
 
-static void rfcomm_session_close(struct rfcomm_session *s, int err)
+static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s,
+						   int err)
 {
 	struct rfcomm_dlc *d;
 	struct list_head *p, *n;
 
-	BT_DBG("session %p state %ld err %d", s, s->state, err);
-
-	rfcomm_session_hold(s);
-
 	s->state = BT_CLOSED;
 
+	BT_DBG("session %p state %ld err %d", s, s->state, err);
+
 	/* Close all dlcs */
 	list_for_each_safe(p, n, &s->dlcs) {
 		d = list_entry(p, struct rfcomm_dlc, list);
@@ -663,7 +671,7 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err)
 	}
 
 	rfcomm_session_clear_timer(s);
-	rfcomm_session_put(s);
+	return rfcomm_session_del(s);
 }
 
 static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
@@ -715,8 +723,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
 	if (*err == 0 || *err == -EINPROGRESS)
 		return s;
 
-	rfcomm_session_del(s);
-	return NULL;
+	return rfcomm_session_del(s);
 
 failed:
 	sock_release(sock);
@@ -1105,7 +1112,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr)
 }
 
 /* ---- RFCOMM frame reception ---- */
-static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
+static struct rfcomm_session *rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
 {
 	BT_DBG("session %p state %ld dlci %d", s, s->state, dlci);
 
@@ -1114,7 +1121,7 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
 		struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci);
 		if (!d) {
 			rfcomm_send_dm(s, dlci);
-			return 0;
+			return s;
 		}
 
 		switch (d->state) {
@@ -1150,25 +1157,14 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
 			break;
 
 		case BT_DISCONN:
-			/* rfcomm_session_put is called later so don't do
-			 * anything here otherwise we will mess up the session
-			 * reference counter:
-			 *
-			 * (a) when we are the initiator dlc_unlink will drive
-			 * the reference counter to 0 (there is no initial put
-			 * after session_add)
-			 *
-			 * (b) when we are not the initiator rfcomm_rx_process
-			 * will explicitly call put to balance the initial hold
-			 * done after session add.
-			 */
+			s = rfcomm_session_close(s, ECONNRESET);
 			break;
 		}
 	}
-	return 0;
+	return s;
 }
 
-static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
+static struct rfcomm_session *rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
 {
 	int err = 0;
 
@@ -1192,13 +1188,13 @@ static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
 		else
 			err = ECONNRESET;
 
-		s->state = BT_CLOSED;
-		rfcomm_session_close(s, err);
+		s = rfcomm_session_close(s, err);
 	}
-	return 0;
+	return s;
 }
 
-static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
+static struct rfcomm_session *rfcomm_recv_disc(struct rfcomm_session *s,
+					       u8 dlci)
 {
 	int err = 0;
 
@@ -1227,11 +1223,9 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
 		else
 			err = ECONNRESET;
 
-		s->state = BT_CLOSED;
-		rfcomm_session_close(s, err);
+		s = rfcomm_session_close(s, err);
 	}
-
-	return 0;
+	return s;
 }
 
 void rfcomm_dlc_accept(struct rfcomm_dlc *d)
@@ -1652,11 +1646,18 @@ drop:
 	return 0;
 }
 
-static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
+static struct rfcomm_session *rfcomm_recv_frame(struct rfcomm_session *s,
+						struct sk_buff *skb)
 {
 	struct rfcomm_hdr *hdr = (void *) skb->data;
 	u8 type, dlci, fcs;
 
+	if (!s) {
+		/* no session, so free socket data */
+		kfree_skb(skb);
+		return s;
+	}
+
 	dlci = __get_dlci(hdr->addr);
 	type = __get_type(hdr->ctrl);
 
@@ -1667,7 +1668,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
 	if (__check_fcs(skb->data, type, fcs)) {
 		BT_ERR("bad checksum in packet");
 		kfree_skb(skb);
-		return -EILSEQ;
+		return s;
 	}
 
 	if (__test_ea(hdr->len))
@@ -1683,22 +1684,23 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
 
 	case RFCOMM_DISC:
 		if (__test_pf(hdr->ctrl))
-			rfcomm_recv_disc(s, dlci);
+			s = rfcomm_recv_disc(s, dlci);
 		break;
 
 	case RFCOMM_UA:
 		if (__test_pf(hdr->ctrl))
-			rfcomm_recv_ua(s, dlci);
+			s = rfcomm_recv_ua(s, dlci);
 		break;
 
 	case RFCOMM_DM:
-		rfcomm_recv_dm(s, dlci);
+		s = rfcomm_recv_dm(s, dlci);
 		break;
 
 	case RFCOMM_UIH:
-		if (dlci)
-			return rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb);
-
+		if (dlci) {
+			rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb);
+			return s;
+		}
 		rfcomm_recv_mcc(s, skb);
 		break;
 
@@ -1707,7 +1709,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
 		break;
 	}
 	kfree_skb(skb);
-	return 0;
+	return s;
 }
 
 /* ---- Connection and data processing ---- */
@@ -1844,7 +1846,7 @@ static void rfcomm_process_dlcs(struct rfcomm_session *s)
 	}
 }
 
-static void rfcomm_process_rx(struct rfcomm_session *s)
+static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
 {
 	struct socket *sock = s->sock;
 	struct sock *sk = sock->sk;
@@ -1856,17 +1858,15 @@ static void rfcomm_process_rx(struct rfcomm_session *s)
 	while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
 		skb_orphan(skb);
 		if (!skb_linearize(skb))
-			rfcomm_recv_frame(s, skb);
+			s = rfcomm_recv_frame(s, skb);
 		else
 			kfree_skb(skb);
 	}
 
-	if (sk->sk_state == BT_CLOSED) {
-		if (!s->initiator)
-			rfcomm_session_put(s);
+	if (s && (sk->sk_state == BT_CLOSED))
+		s = rfcomm_session_close(s, sk->sk_err);
 
-		rfcomm_session_close(s, sk->sk_err);
-	}
+	return s;
 }
 
 static void rfcomm_accept_connection(struct rfcomm_session *s)
@@ -1891,8 +1891,6 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)
 
 	s = rfcomm_session_add(nsock, BT_OPEN);
 	if (s) {
-		rfcomm_session_hold(s);
-
 		/* We should adjust MTU on incoming sessions.
 		 * L2CAP MTU minus UIH header and FCS. */
 		s->mtu = min(l2cap_pi(nsock->sk)->chan->omtu,
@@ -1903,7 +1901,7 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)
 		sock_release(nsock);
 }
 
-static void rfcomm_check_connection(struct rfcomm_session *s)
+static struct rfcomm_session *rfcomm_check_connection(struct rfcomm_session *s)
 {
 	struct sock *sk = s->sock->sk;
 
@@ -1921,10 +1919,10 @@ static void rfcomm_check_connection(struct rfcomm_session *s)
 		break;
 
 	case BT_CLOSED:
-		s->state = BT_CLOSED;
-		rfcomm_session_close(s, sk->sk_err);
+		s = rfcomm_session_close(s, sk->sk_err);
 		break;
 	}
+	return s;
 }
 
 static void rfcomm_process_sessions(void)
@@ -1940,7 +1938,6 @@ static void rfcomm_process_sessions(void)
 		if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) {
 			s->state = BT_DISCONN;
 			rfcomm_send_disc(s, 0);
-			rfcomm_session_put(s);
 			continue;
 		}
 
@@ -1949,21 +1946,18 @@ static void rfcomm_process_sessions(void)
 			continue;
 		}
 
-		rfcomm_session_hold(s);
-
 		switch (s->state) {
 		case BT_BOUND:
-			rfcomm_check_connection(s);
+			s = rfcomm_check_connection(s);
 			break;
 
 		default:
-			rfcomm_process_rx(s);
+			s = rfcomm_process_rx(s);
 			break;
 		}
 
-		rfcomm_process_dlcs(s);
-
-		rfcomm_session_put(s);
+		if (s)
+			rfcomm_process_dlcs(s);
 	}
 
 	rfcomm_unlock();
@@ -2010,10 +2004,11 @@ static int rfcomm_add_listener(bdaddr_t *ba)
 
 	/* Add listening session */
 	s = rfcomm_session_add(sock, BT_LISTEN);
-	if (!s)
+	if (!s) {
+		err = -ENOMEM;
 		goto failed;
+	}
 
-	rfcomm_session_hold(s);
 	return 0;
 failed:
 	sock_release(sock);
@@ -2071,8 +2066,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
 	if (!s)
 		return;
 
-	rfcomm_session_hold(s);
-
 	list_for_each_safe(p, n, &s->dlcs) {
 		d = list_entry(p, struct rfcomm_dlc, list);
 
@@ -2104,8 +2097,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
 			set_bit(RFCOMM_AUTH_REJECT, &d->flags);
 	}
 
-	rfcomm_session_put(s);
-
 	rfcomm_schedule();
 }
 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index ce3f6658f4b2..30b3721dc6d7 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -107,15 +107,14 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
 static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src)
 {
 	struct sock *sk = NULL;
-	struct hlist_node *node;
 
-	sk_for_each(sk, node, &rfcomm_sk_list.head) {
+	sk_for_each(sk, &rfcomm_sk_list.head) {
 		if (rfcomm_pi(sk)->channel == channel &&
 				!bacmp(&bt_sk(sk)->src, src))
 			break;
 	}
 
-	return node ? sk : NULL;
+	return sk ? sk : NULL;
 }
 
 /* Find socket with channel and source bdaddr.
@@ -124,11 +123,10 @@ static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src)
 static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src)
 {
 	struct sock *sk = NULL, *sk1 = NULL;
-	struct hlist_node *node;
 
 	read_lock(&rfcomm_sk_list.lock);
 
-	sk_for_each(sk, node, &rfcomm_sk_list.head) {
+	sk_for_each(sk, &rfcomm_sk_list.head) {
 		if (state && sk->sk_state != state)
 			continue;
 
@@ -145,7 +143,7 @@ static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *
 
 	read_unlock(&rfcomm_sk_list.lock);
 
-	return node ? sk : sk1;
+	return sk ? sk : sk1;
 }
 
 static void rfcomm_sock_destruct(struct sock *sk)
@@ -610,6 +608,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
 		rfcomm_dlc_accept(d);
+		msg->msg_namelen = 0;
 		return 0;
 	}
 
@@ -970,11 +969,10 @@ done:
 static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 	read_lock(&rfcomm_sk_list.lock);
 
-	sk_for_each(sk, node, &rfcomm_sk_list.head) {
+	sk_for_each(sk, &rfcomm_sk_list.head) {
 		seq_printf(f, "%pMR %pMR %d %d\n",
 			   &bt_sk(sk)->src, &bt_sk(sk)->dst,
 			   sk->sk_state, rfcomm_pi(sk)->channel);
@@ -1039,7 +1037,7 @@ int __init rfcomm_init_sockets(void)
 		goto error;
 	}
 
-	err = bt_procfs_init(THIS_MODULE, &init_net, "rfcomm", &rfcomm_sk_list, NULL);
+	err = bt_procfs_init(&init_net, "rfcomm", &rfcomm_sk_list, NULL);
 	if (err < 0) {
 		BT_ERR("Failed to create RFCOMM proc file");
 		bt_sock_unregister(BTPROTO_RFCOMM);
@@ -1068,8 +1066,7 @@ void __exit rfcomm_cleanup_sockets(void)
 
 	debugfs_remove(rfcomm_sock_debugfs);
 
-	if (bt_sock_unregister(BTPROTO_RFCOMM) < 0)
-		BT_ERR("RFCOMM socket layer unregistration failed");
+	bt_sock_unregister(BTPROTO_RFCOMM);
 
 	proto_unregister(&rfcomm_proto);
 }
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index bd6fd0f43d2b..b6e44ad6cca6 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -541,23 +541,21 @@ int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
 {
 	struct rfcomm_dev *dev = dlc->owner;
-	struct tty_struct *tty;
 
 	if (!dev) {
 		kfree_skb(skb);
 		return;
 	}
 
-	tty = dev->port.tty;
-	if (!tty || !skb_queue_empty(&dev->pending)) {
+	if (!skb_queue_empty(&dev->pending)) {
 		skb_queue_tail(&dev->pending, skb);
 		return;
 	}
 
-	BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len);
+	BT_DBG("dlc %p len %d", dlc, skb->len);
 
-	tty_insert_flip_string(tty, skb->data, skb->len);
-	tty_flip_buffer_push(tty);
+	tty_insert_flip_string(&dev->port, skb->data, skb->len);
+	tty_flip_buffer_push(&dev->port);
 
 	kfree_skb(skb);
 }
@@ -621,26 +619,23 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
 /* ---- TTY functions ---- */
 static void rfcomm_tty_copy_pending(struct rfcomm_dev *dev)
 {
-	struct tty_struct *tty = dev->port.tty;
 	struct sk_buff *skb;
 	int inserted = 0;
 
-	if (!tty)
-		return;
-
-	BT_DBG("dev %p tty %p", dev, tty);
+	BT_DBG("dev %p", dev);
 
 	rfcomm_dlc_lock(dev->dlc);
 
 	while ((skb = skb_dequeue(&dev->pending))) {
-		inserted += tty_insert_flip_string(tty, skb->data, skb->len);
+		inserted += tty_insert_flip_string(&dev->port, skb->data,
+				skb->len);
 		kfree_skb(skb);
 	}
 
 	rfcomm_dlc_unlock(dev->dlc);
 
 	if (inserted > 0)
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&dev->port);
 }
 
 static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 57f250c20e39..e7bd4eea575c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -83,7 +83,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
 	if (conn)
 		return conn;
 
-	conn = kzalloc(sizeof(struct sco_conn), GFP_ATOMIC);
+	conn = kzalloc(sizeof(struct sco_conn), GFP_KERNEL);
 	if (!conn)
 		return NULL;
 
@@ -185,7 +185,7 @@ static int sco_connect(struct sock *sk)
 
 	conn = sco_conn_add(hcon);
 	if (!conn) {
-		hci_conn_put(hcon);
+		hci_conn_drop(hcon);
 		err = -ENOMEM;
 		goto done;
 	}
@@ -259,10 +259,9 @@ drop:
 /* -------- Socket interface ---------- */
 static struct sock *__sco_get_sock_listen_by_addr(bdaddr_t *ba)
 {
-	struct hlist_node *node;
 	struct sock *sk;
 
-	sk_for_each(sk, node, &sco_sk_list.head) {
+	sk_for_each(sk, &sco_sk_list.head) {
 		if (sk->sk_state != BT_LISTEN)
 			continue;
 
@@ -279,11 +278,10 @@ static struct sock *__sco_get_sock_listen_by_addr(bdaddr_t *ba)
 static struct sock *sco_get_sock_listen(bdaddr_t *src)
 {
 	struct sock *sk = NULL, *sk1 = NULL;
-	struct hlist_node *node;
 
 	read_lock(&sco_sk_list.lock);
 
-	sk_for_each(sk, node, &sco_sk_list.head) {
+	sk_for_each(sk, &sco_sk_list.head) {
 		if (sk->sk_state != BT_LISTEN)
 			continue;
 
@@ -298,7 +296,7 @@ static struct sock *sco_get_sock_listen(bdaddr_t *src)
 
 	read_unlock(&sco_sk_list.lock);
 
-	return node ? sk : sk1;
+	return sk ? sk : sk1;
 }
 
 static void sco_sock_destruct(struct sock *sk)
@@ -355,12 +353,13 @@ static void __sco_sock_close(struct sock *sk)
 		if (sco_pi(sk)->conn->hcon) {
 			sk->sk_state = BT_DISCONN;
 			sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
-			hci_conn_put(sco_pi(sk)->conn->hcon);
+			hci_conn_drop(sco_pi(sk)->conn->hcon);
 			sco_pi(sk)->conn->hcon = NULL;
 		} else
 			sco_chan_del(sk, ECONNRESET);
 		break;
 
+	case BT_CONNECT2:
 	case BT_CONNECT:
 	case BT_DISCONN:
 		sco_chan_del(sk, ECONNRESET);
@@ -482,8 +481,7 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
 {
 	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
 	struct sock *sk = sock->sk;
-	int err = 0;
-
+	int err;
 
 	BT_DBG("sk %p", sk);
 
@@ -654,6 +652,42 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	return err;
 }
 
+static void sco_conn_defer_accept(struct hci_conn *conn, int mask)
+{
+	struct hci_dev *hdev = conn->hdev;
+
+	BT_DBG("conn %p", conn);
+
+	conn->state = BT_CONFIG;
+
+	if (!lmp_esco_capable(hdev)) {
+		struct hci_cp_accept_conn_req cp;
+
+		bacpy(&cp.bdaddr, &conn->dst);
+
+		if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
+			cp.role = 0x00; /* Become master */
+		else
+			cp.role = 0x01; /* Remain slave */
+
+		hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
+	} else {
+		struct hci_cp_accept_sync_conn_req cp;
+
+		bacpy(&cp.bdaddr, &conn->dst);
+		cp.pkt_type = cpu_to_le16(conn->pkt_type);
+
+		cp.tx_bandwidth   = __constant_cpu_to_le32(0x00001f40);
+		cp.rx_bandwidth   = __constant_cpu_to_le32(0x00001f40);
+		cp.max_latency    = __constant_cpu_to_le16(0xffff);
+		cp.content_format = cpu_to_le16(hdev->voice_setting);
+		cp.retrans_effort = 0xff;
+
+		hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
+			     sizeof(cp), &cp);
+	}
+}
+
 static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 			    struct msghdr *msg, size_t len, int flags)
 {
@@ -664,8 +698,9 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	if (sk->sk_state == BT_CONNECT2 &&
 	    test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
-		hci_conn_accept(pi->conn->hcon, 0);
+		sco_conn_defer_accept(pi->conn->hcon, 0);
 		sk->sk_state = BT_CONFIG;
+		msg->msg_namelen = 0;
 
 		release_sock(sk);
 		return 0;
@@ -883,7 +918,7 @@ static void sco_chan_del(struct sock *sk, int err)
 		sco_conn_unlock(conn);
 
 		if (conn->hcon)
-			hci_conn_put(conn->hcon);
+			hci_conn_drop(conn->hcon);
 	}
 
 	sk->sk_state = BT_CLOSED;
@@ -900,8 +935,6 @@ static void sco_conn_ready(struct sco_conn *conn)
 
 	BT_DBG("conn %p", conn);
 
-	sco_conn_lock(conn);
-
 	if (sk) {
 		sco_sock_clear_timer(sk);
 		bh_lock_sock(sk);
@@ -909,9 +942,13 @@ static void sco_conn_ready(struct sco_conn *conn)
 		sk->sk_state_change(sk);
 		bh_unlock_sock(sk);
 	} else {
+		sco_conn_lock(conn);
+
 		parent = sco_get_sock_listen(conn->src);
-		if (!parent)
-			goto done;
+		if (!parent) {
+			sco_conn_unlock(conn);
+			return;
+		}
 
 		bh_lock_sock(parent);
 
@@ -919,7 +956,8 @@ static void sco_conn_ready(struct sco_conn *conn)
 				    BTPROTO_SCO, GFP_ATOMIC);
 		if (!sk) {
 			bh_unlock_sock(parent);
-			goto done;
+			sco_conn_unlock(conn);
+			return;
 		}
 
 		sco_sock_init(sk, parent);
@@ -939,24 +977,22 @@ static void sco_conn_ready(struct sco_conn *conn)
 		parent->sk_data_ready(parent, 1);
 
 		bh_unlock_sock(parent);
-	}
 
-done:
-	sco_conn_unlock(conn);
+		sco_conn_unlock(conn);
+	}
 }
 
 /* ----- SCO interface with lower layer (HCI) ----- */
 int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	int lm = 0;
 
 	BT_DBG("hdev %s, bdaddr %pMR", hdev->name, bdaddr);
 
 	/* Find listening sockets */
 	read_lock(&sco_sk_list.lock);
-	sk_for_each(sk, node, &sco_sk_list.head) {
+	sk_for_each(sk, &sco_sk_list.head) {
 		if (sk->sk_state != BT_LISTEN)
 			continue;
 
@@ -1016,11 +1052,10 @@ drop:
 static int sco_debugfs_show(struct seq_file *f, void *p)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 	read_lock(&sco_sk_list.lock);
 
-	sk_for_each(sk, node, &sco_sk_list.head) {
+	sk_for_each(sk, &sco_sk_list.head) {
 		seq_printf(f, "%pMR %pMR %d\n", &bt_sk(sk)->src,
 			   &bt_sk(sk)->dst, sk->sk_state);
 	}
@@ -1084,7 +1119,7 @@ int __init sco_init(void)
 		goto error;
 	}
 
-	err = bt_procfs_init(THIS_MODULE, &init_net, "sco", &sco_sk_list, NULL);
+	err = bt_procfs_init(&init_net, "sco", &sco_sk_list, NULL);
 	if (err < 0) {
 		BT_ERR("Failed to create SCO proc file");
 		bt_sock_unregister(BTPROTO_SCO);
@@ -1113,8 +1148,7 @@ void __exit sco_exit(void)
 
 	debugfs_remove(sco_debugfs);
 
-	if (bt_sock_unregister(BTPROTO_SCO) < 0)
-		BT_ERR("SCO socket unregistration failed");
+	bt_sock_unregister(BTPROTO_SCO);
 
 	proto_unregister(&sco_proto);
 }
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 5abefb12891d..b2296d3857a0 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -522,7 +522,7 @@ void smp_chan_destroy(struct l2cap_conn *conn)
 	kfree(smp);
 	conn->smp_chan = NULL;
 	conn->hcon->smp_conn = NULL;
-	hci_conn_put(conn->hcon);
+	hci_conn_drop(conn->hcon);
 }
 
 int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 6dee7bf648a9..aa0d3b2f1bb7 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -46,3 +46,17 @@ config BRIDGE_IGMP_SNOOPING
 	  Say N to exclude this support and reduce the binary size.
 
 	  If unsure, say Y.
+
+config BRIDGE_VLAN_FILTERING
+	bool "VLAN filtering"
+	depends on BRIDGE
+	depends on VLAN_8021Q
+	default n
+	---help---
+	  If you say Y here, then the Ethernet bridge will be able selectively
+	  receive and forward traffic based on VLAN information in the packet
+	  any VLAN information configured on the bridge port or bridge device.
+
+	  Say N to exclude this support and reduce the binary size.
+
+	  If unsure, say Y.
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index e859098f5ee9..e85498b2f166 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -14,4 +14,6 @@ bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
 
 bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
 
+bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
+
 obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 7c78e2640190..967312803e41 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -30,6 +30,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_bridge_fdb_entry *dst;
 	struct net_bridge_mdb_entry *mdst;
 	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
+	u16 vid = 0;
 
 	rcu_read_lock();
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -45,6 +46,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	brstats->tx_bytes += skb->len;
 	u64_stats_update_end(&brstats->syncp);
 
+	if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid))
+		goto out;
+
 	BR_INPUT_SKB_CB(skb)->brdev = dev;
 
 	skb_reset_mac_header(skb);
@@ -62,12 +66,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto out;
 		}
 
-		mdst = br_mdb_get(br, skb);
+		mdst = br_mdb_get(br, skb, vid);
 		if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb))
 			br_multicast_deliver(mdst, skb);
 		else
 			br_flood_deliver(br, skb);
-	} else if ((dst = __br_fdb_get(br, dest)) != NULL)
+	} else if ((dst = __br_fdb_get(br, dest, vid)) != NULL)
 		br_deliver(dst->dst, skb);
 	else
 		br_flood_deliver(br, skb);
@@ -172,12 +176,10 @@ static int br_set_mac_address(struct net_device *dev, void *p)
 
 	spin_lock_bh(&br->lock);
 	if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) {
-		dev->addr_assign_type &= ~NET_ADDR_RANDOM;
 		memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
 		br_fdb_change_mac_address(br, addr->sa_data);
 		br_stp_change_bridge_id(br, addr->sa_data);
 	}
-	br->flags |= BR_SET_MAC_ADDR;
 	spin_unlock_bh(&br->lock);
 
 	return 0;
@@ -185,10 +187,10 @@ static int br_set_mac_address(struct net_device *dev, void *p)
 
 static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
-	strcpy(info->driver, "bridge");
-	strcpy(info->version, BR_VERSION);
-	strcpy(info->fw_version, "N/A");
-	strcpy(info->bus_info, "N/A");
+	strlcpy(info->driver, "bridge", sizeof(info->driver));
+	strlcpy(info->version, BR_VERSION, sizeof(info->version));
+	strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+	strlcpy(info->bus_info, "N/A", sizeof(info->bus_info));
 }
 
 static netdev_features_t br_fix_features(struct net_device *dev,
@@ -267,7 +269,7 @@ void br_netpoll_disable(struct net_bridge_port *p)
 
 	p->np = NULL;
 
-	__netpoll_free_rcu(np);
+	__netpoll_free_async(np);
 }
 
 #endif
@@ -315,6 +317,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_fdb_dump		 = br_fdb_dump,
 	.ndo_bridge_getlink	 = br_getlink,
 	.ndo_bridge_setlink	 = br_setlink,
+	.ndo_bridge_dellink	 = br_dellink,
 };
 
 static void br_dev_free(struct net_device *dev)
@@ -345,10 +348,10 @@ void br_dev_setup(struct net_device *dev)
 
 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
 			NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX |
-			NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX;
+			NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX;
 	dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
 			   NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
-			   NETIF_F_HW_VLAN_TX;
+			   NETIF_F_HW_VLAN_CTAG_TX;
 
 	br->dev = dev;
 	spin_lock_init(&br->lock);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index d9576e6de2b8..ebfa4443c69b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -23,11 +23,12 @@
 #include <linux/slab.h>
 #include <linux/atomic.h>
 #include <asm/unaligned.h>
+#include <linux/if_vlan.h>
 #include "br_private.h"
 
 static struct kmem_cache *br_fdb_cache __read_mostly;
 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-		      const unsigned char *addr);
+		      const unsigned char *addr, u16 vid);
 static void fdb_notify(struct net_bridge *br,
 		       const struct net_bridge_fdb_entry *, int);
 
@@ -67,11 +68,11 @@ static inline int has_expired(const struct net_bridge *br,
 		time_before_eq(fdb->updated + hold_time(br), jiffies);
 }
 
-static inline int br_mac_hash(const unsigned char *mac)
+static inline int br_mac_hash(const unsigned char *mac, __u16 vid)
 {
-	/* use 1 byte of OUI cnd 3 bytes of NIC */
+	/* use 1 byte of OUI and 3 bytes of NIC */
 	u32 key = get_unaligned((u32 *)(mac + 2));
-	return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1);
+	return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1);
 }
 
 static void fdb_rcu_free(struct rcu_head *head)
@@ -91,6 +92,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
 void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 {
 	struct net_bridge *br = p->br;
+	bool no_vlan = (nbp_get_vlan_info(p) == NULL) ? true : false;
 	int i;
 
 	spin_lock_bh(&br->hash_lock);
@@ -105,10 +107,12 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 			if (f->dst == p && f->is_local) {
 				/* maybe another port has same hw addr? */
 				struct net_bridge_port *op;
+				u16 vid = f->vlan_id;
 				list_for_each_entry(op, &br->port_list, list) {
 					if (op != p &&
 					    ether_addr_equal(op->dev->dev_addr,
-							     f->addr.addr)) {
+							     f->addr.addr) &&
+					    nbp_vlan_find(op, vid)) {
 						f->dst = op;
 						goto insert;
 					}
@@ -116,27 +120,53 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 
 				/* delete old one */
 				fdb_delete(br, f);
-				goto insert;
+insert:
+				/* insert new address,  may fail if invalid
+				 * address or dup.
+				 */
+				fdb_insert(br, p, newaddr, vid);
+
+				/* if this port has no vlan information
+				 * configured, we can safely be done at
+				 * this point.
+				 */
+				if (no_vlan)
+					goto done;
 			}
 		}
 	}
- insert:
-	/* insert new address,  may fail if invalid address or dup. */
-	fdb_insert(br, p, newaddr);
 
+done:
 	spin_unlock_bh(&br->hash_lock);
 }
 
 void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 {
 	struct net_bridge_fdb_entry *f;
+	struct net_port_vlans *pv;
+	u16 vid = 0;
 
 	/* If old entry was unassociated with any port, then delete it. */
-	f = __br_fdb_get(br, br->dev->dev_addr);
+	f = __br_fdb_get(br, br->dev->dev_addr, 0);
 	if (f && f->is_local && !f->dst)
 		fdb_delete(br, f);
 
-	fdb_insert(br, NULL, newaddr);
+	fdb_insert(br, NULL, newaddr, 0);
+
+	/* Now remove and add entries for every VLAN configured on the
+	 * bridge.  This function runs under RTNL so the bitmap will not
+	 * change from under us.
+	 */
+	pv = br_get_vlan_info(br);
+	if (!pv)
+		return;
+
+	for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+		f = __br_fdb_get(br, br->dev->dev_addr, vid);
+		if (f && f->is_local && !f->dst)
+			fdb_delete(br, f);
+		fdb_insert(br, NULL, newaddr, vid);
+	}
 }
 
 void br_fdb_cleanup(unsigned long _data)
@@ -149,9 +179,9 @@ void br_fdb_cleanup(unsigned long _data)
 	spin_lock(&br->hash_lock);
 	for (i = 0; i < BR_HASH_SIZE; i++) {
 		struct net_bridge_fdb_entry *f;
-		struct hlist_node *h, *n;
+		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
+		hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
 			unsigned long this_timer;
 			if (f->is_static)
 				continue;
@@ -175,8 +205,8 @@ void br_fdb_flush(struct net_bridge *br)
 	spin_lock_bh(&br->hash_lock);
 	for (i = 0; i < BR_HASH_SIZE; i++) {
 		struct net_bridge_fdb_entry *f;
-		struct hlist_node *h, *n;
-		hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
+		struct hlist_node *n;
+		hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
 			if (!f->is_static)
 				fdb_delete(br, f);
 		}
@@ -231,13 +261,15 @@ void br_fdb_delete_by_port(struct net_bridge *br,
 
 /* No locking or refcounting, assumes caller has rcu_read_lock */
 struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
-					  const unsigned char *addr)
+					  const unsigned char *addr,
+					  __u16 vid)
 {
-	struct hlist_node *h;
 	struct net_bridge_fdb_entry *fdb;
 
-	hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) {
-		if (ether_addr_equal(fdb->addr.addr, addr)) {
+	hlist_for_each_entry_rcu(fdb,
+				&br->hash[br_mac_hash(addr, vid)], hlist) {
+		if (ether_addr_equal(fdb->addr.addr, addr) &&
+		    fdb->vlan_id == vid) {
 			if (unlikely(has_expired(br, fdb)))
 				break;
 			return fdb;
@@ -261,7 +293,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
 	if (!port)
 		ret = 0;
 	else {
-		fdb = __br_fdb_get(port->br, addr);
+		fdb = __br_fdb_get(port->br, addr, 0);
 		ret = fdb && fdb->dst && fdb->dst->dev != dev &&
 			fdb->dst->state == BR_STATE_FORWARDING;
 	}
@@ -280,14 +312,13 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 {
 	struct __fdb_entry *fe = buf;
 	int i, num = 0;
-	struct hlist_node *h;
 	struct net_bridge_fdb_entry *f;
 
 	memset(buf, 0, maxnum*sizeof(struct __fdb_entry));
 
 	rcu_read_lock();
 	for (i = 0; i < BR_HASH_SIZE; i++) {
-		hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) {
+		hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
 			if (num >= maxnum)
 				goto out;
 
@@ -325,26 +356,28 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 }
 
 static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
-					     const unsigned char *addr)
+					     const unsigned char *addr,
+					     __u16 vid)
 {
-	struct hlist_node *h;
 	struct net_bridge_fdb_entry *fdb;
 
-	hlist_for_each_entry(fdb, h, head, hlist) {
-		if (ether_addr_equal(fdb->addr.addr, addr))
+	hlist_for_each_entry(fdb, head, hlist) {
+		if (ether_addr_equal(fdb->addr.addr, addr) &&
+		    fdb->vlan_id == vid)
 			return fdb;
 	}
 	return NULL;
 }
 
 static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
-						 const unsigned char *addr)
+						 const unsigned char *addr,
+						 __u16 vid)
 {
-	struct hlist_node *h;
 	struct net_bridge_fdb_entry *fdb;
 
-	hlist_for_each_entry_rcu(fdb, h, head, hlist) {
-		if (ether_addr_equal(fdb->addr.addr, addr))
+	hlist_for_each_entry_rcu(fdb, head, hlist) {
+		if (ether_addr_equal(fdb->addr.addr, addr) &&
+		    fdb->vlan_id == vid)
 			return fdb;
 	}
 	return NULL;
@@ -352,7 +385,8 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
 
 static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 					       struct net_bridge_port *source,
-					       const unsigned char *addr)
+					       const unsigned char *addr,
+					       __u16 vid)
 {
 	struct net_bridge_fdb_entry *fdb;
 
@@ -360,6 +394,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 	if (fdb) {
 		memcpy(fdb->addr.addr, addr, ETH_ALEN);
 		fdb->dst = source;
+		fdb->vlan_id = vid;
 		fdb->is_local = 0;
 		fdb->is_static = 0;
 		fdb->updated = fdb->used = jiffies;
@@ -369,15 +404,15 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 }
 
 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-		  const unsigned char *addr)
+		  const unsigned char *addr, u16 vid)
 {
-	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
 
 	if (!is_valid_ether_addr(addr))
 		return -EINVAL;
 
-	fdb = fdb_find(head, addr);
+	fdb = fdb_find(head, addr, vid);
 	if (fdb) {
 		/* it is okay to have multiple ports with same
 		 * address, just use the first one.
@@ -386,11 +421,11 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 			return 0;
 		br_warn(br, "adding interface %s with same address "
 		       "as a received packet\n",
-		       source->dev->name);
+		       source ? source->dev->name : br->dev->name);
 		fdb_delete(br, fdb);
 	}
 
-	fdb = fdb_create(head, source, addr);
+	fdb = fdb_create(head, source, addr, vid);
 	if (!fdb)
 		return -ENOMEM;
 
@@ -401,20 +436,20 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 
 /* Add entry for local address of interface */
 int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-		  const unsigned char *addr)
+		  const unsigned char *addr, u16 vid)
 {
 	int ret;
 
 	spin_lock_bh(&br->hash_lock);
-	ret = fdb_insert(br, source, addr);
+	ret = fdb_insert(br, source, addr, vid);
 	spin_unlock_bh(&br->hash_lock);
 	return ret;
 }
 
 void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
-		   const unsigned char *addr)
+		   const unsigned char *addr, u16 vid)
 {
-	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
 
 	/* some users want to always flood. */
@@ -426,7 +461,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 	      source->state == BR_STATE_FORWARDING))
 		return;
 
-	fdb = fdb_find_rcu(head, addr);
+	fdb = fdb_find_rcu(head, addr, vid);
 	if (likely(fdb)) {
 		/* attempt to update an entry for a local interface */
 		if (unlikely(fdb->is_local)) {
@@ -441,8 +476,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 		}
 	} else {
 		spin_lock(&br->hash_lock);
-		if (likely(!fdb_find(head, addr))) {
-			fdb = fdb_create(head, source, addr);
+		if (likely(!fdb_find(head, addr, vid))) {
+			fdb = fdb_create(head, source, addr, vid);
 			if (fdb)
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
 		}
@@ -495,6 +530,10 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
 	ci.ndm_refcnt	 = 0;
 	if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
 		goto nla_put_failure;
+
+	if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id))
+		goto nla_put_failure;
+
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -506,6 +545,7 @@ static inline size_t fdb_nlmsg_size(void)
 {
 	return NLMSG_ALIGN(sizeof(struct ndmsg))
 		+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+		+ nla_total_size(sizeof(u16)) /* NDA_VLAN */
 		+ nla_total_size(sizeof(struct nda_cacheinfo));
 }
 
@@ -547,10 +587,9 @@ int br_fdb_dump(struct sk_buff *skb,
 		goto out;
 
 	for (i = 0; i < BR_HASH_SIZE; i++) {
-		struct hlist_node *h;
 		struct net_bridge_fdb_entry *f;
 
-		hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) {
+		hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
 			if (idx < cb->args[0])
 				goto skip;
 
@@ -571,24 +610,31 @@ out:
 
 /* Update (create or replace) forwarding database entry */
 static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
-			 __u16 state, __u16 flags)
+			 __u16 state, __u16 flags, __u16 vid)
 {
 	struct net_bridge *br = source->br;
-	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
+	bool modified = false;
 
-	fdb = fdb_find(head, addr);
+	fdb = fdb_find(head, addr, vid);
 	if (fdb == NULL) {
 		if (!(flags & NLM_F_CREATE))
 			return -ENOENT;
 
-		fdb = fdb_create(head, source, addr);
+		fdb = fdb_create(head, source, addr, vid);
 		if (!fdb)
 			return -ENOMEM;
-		fdb_notify(br, fdb, RTM_NEWNEIGH);
+
+		modified = true;
 	} else {
 		if (flags & NLM_F_EXCL)
 			return -EEXIST;
+
+		if (fdb->dst != source) {
+			fdb->dst = source;
+			modified = true;
+		}
 	}
 
 	if (fdb_to_nud(fdb) != state) {
@@ -600,13 +646,37 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
 		} else
 			fdb->is_local = fdb->is_static = 0;
 
-		fdb->updated = fdb->used = jiffies;
+		modified = true;
+	}
+
+	fdb->used = jiffies;
+	if (modified) {
+		fdb->updated = jiffies;
 		fdb_notify(br, fdb, RTM_NEWNEIGH);
 	}
 
 	return 0;
 }
 
+static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
+	       const unsigned char *addr, u16 nlh_flags, u16 vid)
+{
+	int err = 0;
+
+	if (ndm->ndm_flags & NTF_USE) {
+		rcu_read_lock();
+		br_fdb_update(p->br, p, addr, vid);
+		rcu_read_unlock();
+	} else {
+		spin_lock_bh(&p->br->hash_lock);
+		err = fdb_add_entry(p, addr, ndm->ndm_state,
+				    nlh_flags, vid);
+		spin_unlock_bh(&p->br->hash_lock);
+	}
+
+	return err;
+}
+
 /* Add new permanent fdb entry with RTM_NEWNEIGH */
 int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	       struct net_device *dev,
@@ -614,12 +684,29 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 {
 	struct net_bridge_port *p;
 	int err = 0;
+	struct net_port_vlans *pv;
+	unsigned short vid = VLAN_N_VID;
 
 	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
 		pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
 		return -EINVAL;
 	}
 
+	if (tb[NDA_VLAN]) {
+		if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
+			return -EINVAL;
+		}
+
+		vid = nla_get_u16(tb[NDA_VLAN]);
+
+		if (vid >= VLAN_N_VID) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
+				vid);
+			return -EINVAL;
+		}
+	}
+
 	p = br_port_get_rtnl(dev);
 	if (p == NULL) {
 		pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
@@ -627,40 +714,87 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		return -EINVAL;
 	}
 
-	if (ndm->ndm_flags & NTF_USE) {
-		rcu_read_lock();
-		br_fdb_update(p->br, p, addr);
-		rcu_read_unlock();
+	pv = nbp_get_vlan_info(p);
+	if (vid != VLAN_N_VID) {
+		if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
+			pr_info("bridge: RTM_NEWNEIGH with unconfigured "
+				"vlan %d on port %s\n", vid, dev->name);
+			return -EINVAL;
+		}
+
+		/* VID was specified, so use it. */
+		err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
 	} else {
-		spin_lock_bh(&p->br->hash_lock);
-		err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags);
-		spin_unlock_bh(&p->br->hash_lock);
+		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+			err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+			goto out;
+		}
+
+		/* We have vlans configured on this port and user didn't
+		 * specify a VLAN.  To be nice, add/update entry for every
+		 * vlan on this port.
+		 */
+		for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+			err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+			if (err)
+				goto out;
+		}
 	}
 
+out:
 	return err;
 }
 
-static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
+int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr,
+		       u16 vlan)
 {
-	struct net_bridge *br = p->br;
-	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)];
 	struct net_bridge_fdb_entry *fdb;
 
-	fdb = fdb_find(head, addr);
+	fdb = fdb_find(head, addr, vlan);
 	if (!fdb)
 		return -ENOENT;
 
-	fdb_delete(p->br, fdb);
+	fdb_delete(br, fdb);
 	return 0;
 }
 
+static int __br_fdb_delete(struct net_bridge_port *p,
+			   const unsigned char *addr, u16 vid)
+{
+	int err;
+
+	spin_lock_bh(&p->br->hash_lock);
+	err = fdb_delete_by_addr(p->br, addr, vid);
+	spin_unlock_bh(&p->br->hash_lock);
+
+	return err;
+}
+
 /* Remove neighbor entry with RTM_DELNEIGH */
-int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
+int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
+		  struct net_device *dev,
 		  const unsigned char *addr)
 {
 	struct net_bridge_port *p;
 	int err;
+	struct net_port_vlans *pv;
+	unsigned short vid = VLAN_N_VID;
+
+	if (tb[NDA_VLAN]) {
+		if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
+			return -EINVAL;
+		}
+
+		vid = nla_get_u16(tb[NDA_VLAN]);
 
+		if (vid >= VLAN_N_VID) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
+				vid);
+			return -EINVAL;
+		}
+	}
 	p = br_port_get_rtnl(dev);
 	if (p == NULL) {
 		pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
@@ -668,9 +802,30 @@ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
 		return -EINVAL;
 	}
 
-	spin_lock_bh(&p->br->hash_lock);
-	err = fdb_delete_by_addr(p, addr);
-	spin_unlock_bh(&p->br->hash_lock);
+	pv = nbp_get_vlan_info(p);
+	if (vid != VLAN_N_VID) {
+		if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
+			pr_info("bridge: RTM_DELNEIGH with unconfigured "
+				"vlan %d on port %s\n", vid, dev->name);
+			return -EINVAL;
+		}
+
+		err = __br_fdb_delete(p, addr, vid);
+	} else {
+		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+			err = __br_fdb_delete(p, addr, 0);
+			goto out;
+		}
 
+		/* We have vlans configured on this port and user didn't
+		 * specify a VLAN.  To be nice, add/update entry for every
+		 * vlan on this port.
+		 */
+		err = -ENOENT;
+		for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+			err &= __br_fdb_delete(p, addr, vid);
+		}
+	}
+out:
 	return err;
 }
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 02015a505d2a..092b20e4ee4c 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -31,6 +31,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
 				 const struct sk_buff *skb)
 {
 	return (((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
+		br_allowed_egress(p->br, nbp_get_vlan_info(p), skb) &&
 		p->state == BR_STATE_FORWARDING);
 }
 
@@ -63,6 +64,10 @@ int br_forward_finish(struct sk_buff *skb)
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
+	skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb);
+	if (!skb)
+		return;
+
 	skb->dev = to->dev;
 
 	if (unlikely(netpoll_tx_running(to->br->dev))) {
@@ -88,6 +93,10 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 		return;
 	}
 
+	skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb);
+	if (!skb)
+		return;
+
 	indev = skb->dev;
 	skb->dev = to->dev;
 	skb_forward_csum(skb);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 37fe693471a8..4cdba60926ff 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -23,6 +23,7 @@
 #include <linux/if_ether.h>
 #include <linux/slab.h>
 #include <net/sock.h>
+#include <linux/if_vlan.h>
 
 #include "br_private.h"
 
@@ -66,14 +67,15 @@ void br_port_carrier_check(struct net_bridge_port *p)
 	struct net_device *dev = p->dev;
 	struct net_bridge *br = p->br;
 
-	if (netif_running(dev) && netif_carrier_ok(dev))
+	if (!(p->flags & BR_ADMIN_COST) &&
+	    netif_running(dev) && netif_oper_up(dev))
 		p->path_cost = port_cost(dev);
 
 	if (!netif_running(br->dev))
 		return;
 
 	spin_lock_bh(&br->lock);
-	if (netif_running(dev) && netif_carrier_ok(dev)) {
+	if (netif_running(dev) && netif_oper_up(dev)) {
 		if (p->state == BR_STATE_DISABLED)
 			br_stp_enable_port(p);
 	} else {
@@ -139,6 +141,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	br_ifinfo_notify(RTM_DELLINK, p);
 
+	nbp_vlan_flush(p);
 	br_fdb_delete_by_port(br, p, 1);
 
 	list_del_rcu(&p->list);
@@ -146,9 +149,8 @@ static void del_nbp(struct net_bridge_port *p)
 	dev->priv_flags &= ~IFF_BRIDGE_PORT;
 
 	netdev_rx_handler_unregister(dev);
-	synchronize_net();
 
-	netdev_set_master(dev, NULL);
+	netdev_upper_dev_unlink(dev, br->dev);
 
 	br_multicast_del_port(p);
 
@@ -364,7 +366,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL))))
 		goto err3;
 
-	err = netdev_set_master(dev, br->dev);
+	err = netdev_master_upper_dev_link(dev, br->dev);
 	if (err)
 		goto err4;
 
@@ -383,7 +385,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	spin_lock_bh(&br->lock);
 	changed_addr = br_stp_recalculate_bridge_id(br);
 
-	if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
+	if (netif_running(dev) && netif_oper_up(dev) &&
 	    (br->dev->flags & IFF_UP))
 		br_stp_enable_port(p);
 	spin_unlock_bh(&br->lock);
@@ -395,7 +397,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	dev_set_mtu(br->dev, br_min_mtu(br));
 
-	if (br_fdb_insert(br, p, dev->dev_addr))
+	if (br_fdb_insert(br, p, dev->dev_addr, 0))
 		netdev_err(dev, "failed insert local address bridge forwarding table\n");
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -403,7 +405,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	return 0;
 
 err5:
-	netdev_set_master(dev, NULL);
+	netdev_upper_dev_unlink(dev, br->dev);
 err4:
 	br_netpoll_disable(p);
 err3:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 4b34207419b1..828e2bcc1f52 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -17,6 +17,7 @@
 #include <linux/etherdevice.h>
 #include <linux/netfilter_bridge.h>
 #include <linux/export.h>
+#include <linux/rculist.h>
 #include "br_private.h"
 
 /* Hook for brouter */
@@ -34,6 +35,20 @@ static int br_pass_frame_up(struct sk_buff *skb)
 	brstats->rx_bytes += skb->len;
 	u64_stats_update_end(&brstats->syncp);
 
+	/* Bridge is just like any other port.  Make sure the
+	 * packet is allowed except in promisc modue when someone
+	 * may be running packet capture.
+	 */
+	if (!(brdev->flags & IFF_PROMISC) &&
+	    !br_allowed_egress(br, br_get_vlan_info(br), skb)) {
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	skb = br_handle_vlan(br, br_get_vlan_info(br), skb);
+	if (!skb)
+		return NET_RX_DROP;
+
 	indev = skb->dev;
 	skb->dev = brdev;
 
@@ -50,13 +65,17 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	struct net_bridge_fdb_entry *dst;
 	struct net_bridge_mdb_entry *mdst;
 	struct sk_buff *skb2;
+	u16 vid = 0;
 
 	if (!p || p->state == BR_STATE_DISABLED)
 		goto drop;
 
+	if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid))
+		goto drop;
+
 	/* insert into forwarding database after filtering to avoid spoofing */
 	br = p->br;
-	br_fdb_update(br, p, eth_hdr(skb)->h_source);
+	br_fdb_update(br, p, eth_hdr(skb)->h_source, vid);
 
 	if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
 	    br_multicast_rcv(br, p, skb))
@@ -78,7 +97,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	if (is_broadcast_ether_addr(dest))
 		skb2 = skb;
 	else if (is_multicast_ether_addr(dest)) {
-		mdst = br_mdb_get(br, skb);
+		mdst = br_mdb_get(br, skb, vid);
 		if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) {
 			if ((mdst && mdst->mglist) ||
 			    br_multicast_is_router(br))
@@ -91,7 +110,8 @@ int br_handle_frame_finish(struct sk_buff *skb)
 			skb2 = skb;
 
 		br->dev->stats.multicast++;
-	} else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) {
+	} else if ((dst = __br_fdb_get(br, dest, vid)) &&
+			dst->is_local) {
 		skb2 = skb;
 		/* Do not forward the packet since it's local. */
 		skb = NULL;
@@ -119,8 +139,10 @@ drop:
 static int br_handle_local_finish(struct sk_buff *skb)
 {
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
+	u16 vid = 0;
 
-	br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
+	br_vlan_get_tag(skb, &vid);
+	br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid);
 	return 0;	 /* process further */
 }
 
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index acc9f4cc18f7..19942e38fd2d 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -18,7 +18,6 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 {
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_port *p;
-	struct hlist_node *n;
 	struct nlattr *nest;
 
 	if (!br->multicast_router || hlist_empty(&br->router_list))
@@ -28,7 +27,7 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 	if (nest == NULL)
 		return -EMSGSIZE;
 
-	hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) {
+	hlist_for_each_entry_rcu(p, &br->router_list, rlist) {
 		if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex))
 			goto fail;
 	}
@@ -61,12 +60,11 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 		return -EMSGSIZE;
 
 	for (i = 0; i < mdb->max; i++) {
-		struct hlist_node *h;
 		struct net_bridge_mdb_entry *mp;
 		struct net_bridge_port_group *p, **pp;
 		struct net_bridge_port *port;
 
-		hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) {
+		hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
 			if (idx < s_idx)
 				goto skip;
 
@@ -82,6 +80,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 				port = p->port;
 				if (port) {
 					struct br_mdb_entry e;
+					memset(&e, 0, sizeof(e));
 					e.ifindex = port->dev->ifindex;
 					e.state = p->state;
 					if (p->addr.proto == htons(ETH_P_IP))
@@ -138,6 +137,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 				break;
 
 			bpm = nlmsg_data(nlh);
+			memset(bpm, 0, sizeof(*bpm));
 			bpm->ifindex = dev->ifindex;
 			if (br_mdb_fill_info(skb, cb, dev) < 0)
 				goto out;
@@ -173,6 +173,7 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
 		return -EMSGSIZE;
 
 	bpm = nlmsg_data(nlh);
+	memset(bpm, 0, sizeof(*bpm));
 	bpm->family  = AF_BRIDGE;
 	bpm->ifindex = dev->ifindex;
 	nest = nla_nest_start(skb, MDBA_MDB);
@@ -230,6 +231,7 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
 {
 	struct br_mdb_entry entry;
 
+	memset(&entry, 0, sizeof(entry));
 	entry.ifindex = port->dev->ifindex;
 	entry.addr.proto = group->proto;
 	entry.addr.u.ip4 = group->u.ip4;
@@ -272,9 +274,6 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net_device *dev;
 	int err;
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
 	err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL);
 	if (err < 0)
 		return err;
@@ -383,7 +382,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
 	return ret;
 }
 
-static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct br_mdb_entry *entry;
@@ -459,7 +458,7 @@ unlock:
 	return err;
 }
 
-static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net_device *dev;
 	struct br_mdb_entry *entry;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6d6f26531de2..81f2389f78eb 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -39,6 +39,8 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
 {
 	if (a->proto != b->proto)
 		return 0;
+	if (a->vid != b->vid)
+		return 0;
 	switch (a->proto) {
 	case htons(ETH_P_IP):
 		return a->u.ip4 == b->u.ip4;
@@ -50,16 +52,19 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
 	return 0;
 }
 
-static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
+static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip,
+				__u16 vid)
 {
-	return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
+	return jhash_2words((__force u32)ip, vid, mdb->secret) & (mdb->max - 1);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
-				const struct in6_addr *ip)
+				const struct in6_addr *ip,
+				__u16 vid)
 {
-	return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1);
+	return jhash_2words(ipv6_addr_hash(ip), vid,
+			    mdb->secret) & (mdb->max - 1);
 }
 #endif
 
@@ -68,10 +73,10 @@ static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
 {
 	switch (ip->proto) {
 	case htons(ETH_P_IP):
-		return __br_ip4_hash(mdb, ip->u.ip4);
+		return __br_ip4_hash(mdb, ip->u.ip4, ip->vid);
 #if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
-		return __br_ip6_hash(mdb, &ip->u.ip6);
+		return __br_ip6_hash(mdb, &ip->u.ip6, ip->vid);
 #endif
 	}
 	return 0;
@@ -81,9 +86,8 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get(
 	struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash)
 {
 	struct net_bridge_mdb_entry *mp;
-	struct hlist_node *p;
 
-	hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
+	hlist_for_each_entry_rcu(mp, &mdb->mhash[hash], hlist[mdb->ver]) {
 		if (br_ip_equal(&mp->addr, dst))
 			return mp;
 	}
@@ -101,31 +105,34 @@ struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge_mdb_htable *mdb,
 }
 
 static struct net_bridge_mdb_entry *br_mdb_ip4_get(
-	struct net_bridge_mdb_htable *mdb, __be32 dst)
+	struct net_bridge_mdb_htable *mdb, __be32 dst, __u16 vid)
 {
 	struct br_ip br_dst;
 
 	br_dst.u.ip4 = dst;
 	br_dst.proto = htons(ETH_P_IP);
+	br_dst.vid = vid;
 
 	return br_mdb_ip_get(mdb, &br_dst);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static struct net_bridge_mdb_entry *br_mdb_ip6_get(
-	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst)
+	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst,
+	__u16 vid)
 {
 	struct br_ip br_dst;
 
 	br_dst.u.ip6 = *dst;
 	br_dst.proto = htons(ETH_P_IPV6);
+	br_dst.vid = vid;
 
 	return br_mdb_ip_get(mdb, &br_dst);
 }
 #endif
 
 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
-					struct sk_buff *skb)
+					struct sk_buff *skb, u16 vid)
 {
 	struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb);
 	struct br_ip ip;
@@ -137,6 +144,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 		return NULL;
 
 	ip.proto = skb->protocol;
+	ip.vid = vid;
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
@@ -170,13 +178,12 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new,
 		       int elasticity)
 {
 	struct net_bridge_mdb_entry *mp;
-	struct hlist_node *p;
 	int maxlen;
 	int len;
 	int i;
 
 	for (i = 0; i < old->max; i++)
-		hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver])
+		hlist_for_each_entry(mp, &old->mhash[i], hlist[old->ver])
 			hlist_add_head(&mp->hlist[new->ver],
 				       &new->mhash[br_ip_hash(new, &mp->addr)]);
 
@@ -186,7 +193,7 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new,
 	maxlen = 0;
 	for (i = 0; i < new->max; i++) {
 		len = 0;
-		hlist_for_each_entry(mp, p, &new->mhash[i], hlist[new->ver])
+		hlist_for_each_entry(mp, &new->mhash[i], hlist[new->ver])
 			len++;
 		if (len > maxlen)
 			maxlen = len;
@@ -502,14 +509,13 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
 {
 	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
-	struct hlist_node *p;
 	unsigned int count = 0;
 	unsigned int max;
 	int elasticity;
 	int err;
 
 	mdb = rcu_dereference_protected(br->mdb, 1);
-	hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
+	hlist_for_each_entry(mp, &mdb->mhash[hash], hlist[mdb->ver]) {
 		count++;
 		if (unlikely(br_ip_equal(group, &mp->addr)))
 			return mp;
@@ -694,7 +700,8 @@ err:
 
 static int br_ip4_multicast_add_group(struct net_bridge *br,
 				      struct net_bridge_port *port,
-				      __be32 group)
+				      __be32 group,
+				      __u16 vid)
 {
 	struct br_ip br_group;
 
@@ -703,6 +710,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
 
 	br_group.u.ip4 = group;
 	br_group.proto = htons(ETH_P_IP);
+	br_group.vid = vid;
 
 	return br_multicast_add_group(br, port, &br_group);
 }
@@ -710,7 +718,8 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
 #if IS_ENABLED(CONFIG_IPV6)
 static int br_ip6_multicast_add_group(struct net_bridge *br,
 				      struct net_bridge_port *port,
-				      const struct in6_addr *group)
+				      const struct in6_addr *group,
+				      __u16 vid)
 {
 	struct br_ip br_group;
 
@@ -719,6 +728,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
 
 	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
+	br_group.vid = vid;
 
 	return br_multicast_add_group(br, port, &br_group);
 }
@@ -870,10 +880,10 @@ void br_multicast_disable_port(struct net_bridge_port *port)
 {
 	struct net_bridge *br = port->br;
 	struct net_bridge_port_group *pg;
-	struct hlist_node *p, *n;
+	struct hlist_node *n;
 
 	spin_lock(&br->multicast_lock);
-	hlist_for_each_entry_safe(pg, p, n, &port->mglist, mglist)
+	hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
 		br_multicast_del_pg(br, pg);
 
 	if (!hlist_unhashed(&port->rlist))
@@ -895,10 +905,12 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 	int type;
 	int err = 0;
 	__be32 group;
+	u16 vid = 0;
 
 	if (!pskb_may_pull(skb, sizeof(*ih)))
 		return -EINVAL;
 
+	br_vlan_get_tag(skb, &vid);
 	ih = igmpv3_report_hdr(skb);
 	num = ntohs(ih->ngrec);
 	len = sizeof(*ih);
@@ -930,7 +942,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 			continue;
 		}
 
-		err = br_ip4_multicast_add_group(br, port, group);
+		err = br_ip4_multicast_add_group(br, port, group, vid);
 		if (err)
 			break;
 	}
@@ -949,10 +961,12 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 	int len;
 	int num;
 	int err = 0;
+	u16 vid = 0;
 
 	if (!pskb_may_pull(skb, sizeof(*icmp6h)))
 		return -EINVAL;
 
+	br_vlan_get_tag(skb, &vid);
 	icmp6h = icmp6_hdr(skb);
 	num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
 	len = sizeof(*icmp6h);
@@ -990,7 +1004,8 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 			continue;
 		}
 
-		err = br_ip6_multicast_add_group(br, port, &grec->grec_mca);
+		err = br_ip6_multicast_add_group(br, port, &grec->grec_mca,
+						 vid);
 		if (!err)
 			break;
 	}
@@ -1008,12 +1023,12 @@ static void br_multicast_add_router(struct net_bridge *br,
 				    struct net_bridge_port *port)
 {
 	struct net_bridge_port *p;
-	struct hlist_node *n, *slot = NULL;
+	struct hlist_node *slot = NULL;
 
-	hlist_for_each_entry(p, n, &br->router_list, rlist) {
+	hlist_for_each_entry(p, &br->router_list, rlist) {
 		if ((unsigned long) port >= (unsigned long) p)
 			break;
-		slot = n;
+		slot = &p->rlist;
 	}
 
 	if (slot)
@@ -1074,6 +1089,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 	unsigned long now = jiffies;
 	__be32 group;
 	int err = 0;
+	u16 vid = 0;
 
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
@@ -1108,7 +1124,8 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 	if (!group)
 		goto out;
 
-	mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group);
+	br_vlan_get_tag(skb, &vid);
+	mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid);
 	if (!mp)
 		goto out;
 
@@ -1149,6 +1166,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	unsigned long now = jiffies;
 	const struct in6_addr *group = NULL;
 	int err = 0;
+	u16 vid = 0;
 
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
@@ -1180,7 +1198,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	if (!group)
 		goto out;
 
-	mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group);
+	br_vlan_get_tag(skb, &vid);
+	mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid);
 	if (!mp)
 		goto out;
 
@@ -1286,7 +1305,8 @@ out:
 
 static void br_ip4_multicast_leave_group(struct net_bridge *br,
 					 struct net_bridge_port *port,
-					 __be32 group)
+					 __be32 group,
+					 __u16 vid)
 {
 	struct br_ip br_group;
 
@@ -1295,6 +1315,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
 
 	br_group.u.ip4 = group;
 	br_group.proto = htons(ETH_P_IP);
+	br_group.vid = vid;
 
 	br_multicast_leave_group(br, port, &br_group);
 }
@@ -1302,7 +1323,8 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
 #if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_leave_group(struct net_bridge *br,
 					 struct net_bridge_port *port,
-					 const struct in6_addr *group)
+					 const struct in6_addr *group,
+					 __u16 vid)
 {
 	struct br_ip br_group;
 
@@ -1311,6 +1333,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
 
 	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
+	br_group.vid = vid;
 
 	br_multicast_leave_group(br, port, &br_group);
 }
@@ -1326,6 +1349,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	unsigned int len;
 	unsigned int offset;
 	int err;
+	u16 vid = 0;
 
 	/* We treat OOM as packet loss for now. */
 	if (!pskb_may_pull(skb, sizeof(*iph)))
@@ -1345,7 +1369,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 		return -EINVAL;
 
 	if (iph->protocol != IPPROTO_IGMP) {
-		if ((iph->daddr & IGMP_LOCAL_GROUP_MASK) != IGMP_LOCAL_GROUP)
+		if (!ipv4_is_local_multicast(iph->daddr))
 			BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
 		return 0;
 	}
@@ -1386,6 +1410,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 
 	err = 0;
 
+	br_vlan_get_tag(skb2, &vid);
 	BR_INPUT_SKB_CB(skb)->igmp = 1;
 	ih = igmp_hdr(skb2);
 
@@ -1393,7 +1418,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	case IGMP_HOST_MEMBERSHIP_REPORT:
 	case IGMPV2_HOST_MEMBERSHIP_REPORT:
 		BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
-		err = br_ip4_multicast_add_group(br, port, ih->group);
+		err = br_ip4_multicast_add_group(br, port, ih->group, vid);
 		break;
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
 		err = br_ip4_multicast_igmp3_report(br, port, skb2);
@@ -1402,7 +1427,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 		err = br_ip4_multicast_query(br, port, skb2);
 		break;
 	case IGMP_HOST_LEAVE_MESSAGE:
-		br_ip4_multicast_leave_group(br, port, ih->group);
+		br_ip4_multicast_leave_group(br, port, ih->group, vid);
 		break;
 	}
 
@@ -1427,6 +1452,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 	unsigned int len;
 	int offset;
 	int err;
+	u16 vid = 0;
 
 	if (!pskb_may_pull(skb, sizeof(*ip6h)))
 		return -EINVAL;
@@ -1510,6 +1536,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 
 	err = 0;
 
+	br_vlan_get_tag(skb, &vid);
 	BR_INPUT_SKB_CB(skb)->igmp = 1;
 
 	switch (icmp6_type) {
@@ -1522,7 +1549,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 		}
 		mld = (struct mld_msg *)skb_transport_header(skb2);
 		BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
-		err = br_ip6_multicast_add_group(br, port, &mld->mld_mca);
+		err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid);
 		break;
 	    }
 	case ICMPV6_MLD2_REPORT:
@@ -1539,7 +1566,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 			goto out;
 		}
 		mld = (struct mld_msg *)skb_transport_header(skb2);
-		br_ip6_multicast_leave_group(br, port, &mld->mld_mca);
+		br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid);
 	    }
 	}
 
@@ -1624,7 +1651,7 @@ void br_multicast_stop(struct net_bridge *br)
 {
 	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
-	struct hlist_node *p, *n;
+	struct hlist_node *n;
 	u32 ver;
 	int i;
 
@@ -1641,7 +1668,7 @@ void br_multicast_stop(struct net_bridge *br)
 
 	ver = mdb->ver;
 	for (i = 0; i < mdb->max; i++) {
-		hlist_for_each_entry_safe(mp, p, n, &mdb->mhash[i],
+		hlist_for_each_entry_safe(mp, n, &mdb->mhash[i],
 					  hlist[ver]) {
 			del_timer(&mp->timer);
 			call_rcu_bh(&mp->rcu, br_multicast_free_group);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index fe43bc7b063f..1ed75bfd8d1d 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -535,7 +535,8 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
 	if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
 		return br;
 
-	vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK);
+	vlan = __vlan_find_dev_deep(br, skb->vlan_proto,
+				    vlan_tx_tag_get(skb) & VLAN_VID_MASK);
 
 	return vlan ? vlan : br;
 }
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 5dc66abcc9e2..8e3abf564798 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -16,6 +16,7 @@
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
+#include <uapi/linux/if_bridge.h>
 
 #include "br_private.h"
 #include "br_private_stp.h"
@@ -28,6 +29,7 @@ static inline size_t br_port_info_size(void)
 		+ nla_total_size(1)	/* IFLA_BRPORT_MODE */
 		+ nla_total_size(1)	/* IFLA_BRPORT_GUARD */
 		+ nla_total_size(1)	/* IFLA_BRPORT_PROTECT */
+		+ nla_total_size(1)	/* IFLA_BRPORT_FAST_LEAVE */
 		+ 0;
 }
 
@@ -64,15 +66,21 @@ static int br_port_fill_attrs(struct sk_buff *skb,
  * Create one netlink message for one interface
  * Contains port and master info as well as carrier and bridge state.
  */
-static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port,
-			  u32 pid, u32 seq, int event, unsigned int flags)
+static int br_fill_ifinfo(struct sk_buff *skb,
+			  const struct net_bridge_port *port,
+			  u32 pid, u32 seq, int event, unsigned int flags,
+			  u32 filter_mask, const struct net_device *dev)
 {
-	const struct net_bridge *br = port->br;
-	const struct net_device *dev = port->dev;
+	const struct net_bridge *br;
 	struct ifinfomsg *hdr;
 	struct nlmsghdr *nlh;
 	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
 
+	if (port)
+		br = port->br;
+	else
+		br = netdev_priv(dev);
+
 	br_debug(br, "br_fill_info event %d port %s master %s\n",
 		     event, dev->name, br->dev->name);
 
@@ -98,7 +106,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
 	     nla_put_u32(skb, IFLA_LINK, dev->iflink)))
 		goto nla_put_failure;
 
-	if (event == RTM_NEWLINK) {
+	if (event == RTM_NEWLINK && port) {
 		struct nlattr *nest
 			= nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED);
 
@@ -107,6 +115,45 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
 		nla_nest_end(skb, nest);
 	}
 
+	/* Check if  the VID information is requested */
+	if (filter_mask & RTEXT_FILTER_BRVLAN) {
+		struct nlattr *af;
+		const struct net_port_vlans *pv;
+		struct bridge_vlan_info vinfo;
+		u16 vid;
+		u16 pvid;
+
+		if (port)
+			pv = nbp_get_vlan_info(port);
+		else
+			pv = br_get_vlan_info(br);
+
+		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN))
+			goto done;
+
+		af = nla_nest_start(skb, IFLA_AF_SPEC);
+		if (!af)
+			goto nla_put_failure;
+
+		pvid = br_get_pvid(pv);
+		for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+			vinfo.vid = vid;
+			vinfo.flags = 0;
+			if (vid == pvid)
+				vinfo.flags |= BRIDGE_VLAN_INFO_PVID;
+
+			if (test_bit(vid, pv->untagged_bitmap))
+				vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+			if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
+				    sizeof(vinfo), &vinfo))
+				goto nla_put_failure;
+		}
+
+		nla_nest_end(skb, af);
+	}
+
+done:
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -119,10 +166,14 @@ nla_put_failure:
  */
 void br_ifinfo_notify(int event, struct net_bridge_port *port)
 {
-	struct net *net = dev_net(port->dev);
+	struct net *net;
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
+	if (!port)
+		return;
+
+	net = dev_net(port->dev);
 	br_debug(port->br, "port %u(%s) event %d\n",
 		 (unsigned int)port->port_no, port->dev->name, event);
 
@@ -130,7 +181,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
 	if (skb == NULL)
 		goto errout;
 
-	err = br_fill_ifinfo(skb, port, 0, 0, event, 0);
+	err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in br_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -144,24 +195,85 @@ errout:
 		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
 }
 
+
 /*
  * Dump information about all ports, in response to GETLINK
  */
 int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-	       struct net_device *dev)
+	       struct net_device *dev, u32 filter_mask)
 {
 	int err = 0;
 	struct net_bridge_port *port = br_port_get_rcu(dev);
 
-	/* not a bridge port */
-	if (!port)
+	/* not a bridge port and  */
+	if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN))
 		goto out;
 
-	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI);
+	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI,
+			     filter_mask, dev);
 out:
 	return err;
 }
 
+static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = {
+	[IFLA_BRIDGE_FLAGS]	= { .type = NLA_U16 },
+	[IFLA_BRIDGE_MODE]	= { .type = NLA_U16 },
+	[IFLA_BRIDGE_VLAN_INFO]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct bridge_vlan_info), },
+};
+
+static int br_afspec(struct net_bridge *br,
+		     struct net_bridge_port *p,
+		     struct nlattr *af_spec,
+		     int cmd)
+{
+	struct nlattr *tb[IFLA_BRIDGE_MAX+1];
+	int err = 0;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy);
+	if (err)
+		return err;
+
+	if (tb[IFLA_BRIDGE_VLAN_INFO]) {
+		struct bridge_vlan_info *vinfo;
+
+		vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
+
+		if (vinfo->vid >= VLAN_N_VID)
+			return -EINVAL;
+
+		switch (cmd) {
+		case RTM_SETLINK:
+			if (p) {
+				err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
+				if (err)
+					break;
+
+				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+					err = br_vlan_add(p->br, vinfo->vid,
+							  vinfo->flags);
+			} else
+				err = br_vlan_add(br, vinfo->vid, vinfo->flags);
+
+			if (err)
+				break;
+
+			break;
+
+		case RTM_DELLINK:
+			if (p) {
+				nbp_vlan_delete(p, vinfo->vid);
+				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+					br_vlan_delete(p->br, vinfo->vid);
+			} else
+				br_vlan_delete(br, vinfo->vid);
+			break;
+		}
+	}
+
+	return err;
+}
+
 static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_STATE]	= { .type = NLA_U8 },
 	[IFLA_BRPORT_COST]	= { .type = NLA_U32 },
@@ -181,8 +293,11 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state)
 	if (p->br->stp_enabled == BR_KERNEL_STP)
 		return -EBUSY;
 
+	/* if device is not up, change is not allowed
+	 * if link is not present, only allowable state is disabled
+	 */
 	if (!netif_running(p->dev) ||
-	    (!netif_carrier_ok(p->dev) && state != BR_STATE_DISABLED))
+	    (!netif_oper_up(p->dev) && state != BR_STATE_DISABLED))
 		return -ENETDOWN;
 
 	p->state = state;
@@ -212,6 +327,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 	br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);
 	br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
 	br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE);
+	br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
 
 	if (tb[IFLA_BRPORT_COST]) {
 		err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));
@@ -236,47 +352,80 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 /* Change state and parameters on port. */
 int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
 {
-	struct ifinfomsg *ifm;
 	struct nlattr *protinfo;
+	struct nlattr *afspec;
 	struct net_bridge_port *p;
 	struct nlattr *tb[IFLA_BRPORT_MAX + 1];
-	int err;
-
-	ifm = nlmsg_data(nlh);
+	int err = 0;
 
-	protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO);
-	if (!protinfo)
+	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
+	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+	if (!protinfo && !afspec)
 		return 0;
 
 	p = br_port_get_rtnl(dev);
-	if (!p)
+	/* We want to accept dev as bridge itself if the AF_SPEC
+	 * is set to see if someone is setting vlan info on the brigde
+	 */
+	if (!p && !afspec)
 		return -EINVAL;
 
-	if (protinfo->nla_type & NLA_F_NESTED) {
-		err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
-				       protinfo, ifla_brport_policy);
+	if (p && protinfo) {
+		if (protinfo->nla_type & NLA_F_NESTED) {
+			err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
+					       protinfo, ifla_brport_policy);
+			if (err)
+				return err;
+
+			spin_lock_bh(&p->br->lock);
+			err = br_setport(p, tb);
+			spin_unlock_bh(&p->br->lock);
+		} else {
+			/* Binary compatability with old RSTP */
+			if (nla_len(protinfo) < sizeof(u8))
+				return -EINVAL;
+
+			spin_lock_bh(&p->br->lock);
+			err = br_set_port_state(p, nla_get_u8(protinfo));
+			spin_unlock_bh(&p->br->lock);
+		}
 		if (err)
-			return err;
-
-		spin_lock_bh(&p->br->lock);
-		err = br_setport(p, tb);
-		spin_unlock_bh(&p->br->lock);
-	} else {
-		/* Binary compatability with old RSTP */
-		if (nla_len(protinfo) < sizeof(u8))
-			return -EINVAL;
+			goto out;
+	}
 
-		spin_lock_bh(&p->br->lock);
-		err = br_set_port_state(p, nla_get_u8(protinfo));
-		spin_unlock_bh(&p->br->lock);
+	if (afspec) {
+		err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
+				afspec, RTM_SETLINK);
 	}
 
 	if (err == 0)
 		br_ifinfo_notify(RTM_NEWLINK, p);
 
+out:
 	return err;
 }
 
+/* Delete port information */
+int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
+{
+	struct nlattr *afspec;
+	struct net_bridge_port *p;
+	int err;
+
+	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+	if (!afspec)
+		return 0;
+
+	p = br_port_get_rtnl(dev);
+	/* We want to accept dev as bridge itself as well */
+	if (!p && !(dev->priv_flags & IFF_EBRIDGE))
+		return -EINVAL;
+
+	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
+			afspec, RTM_DELLINK);
+
+	return err;
+}
 static int br_validate(struct nlattr *tb[], struct nlattr *data[])
 {
 	if (tb[IFLA_ADDRESS]) {
@@ -289,6 +438,29 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
 	return 0;
 }
 
+static size_t br_get_link_af_size(const struct net_device *dev)
+{
+	struct net_port_vlans *pv;
+
+	if (br_port_exists(dev))
+		pv = nbp_get_vlan_info(br_port_get_rcu(dev));
+	else if (dev->priv_flags & IFF_EBRIDGE)
+		pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
+	else
+		return 0;
+
+	if (!pv)
+		return 0;
+
+	/* Each VLAN is returned in bridge_vlan_info along with flags */
+	return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info));
+}
+
+static struct rtnl_af_ops br_af_ops = {
+	.family			= AF_BRIDGE,
+	.get_link_af_size	= br_get_link_af_size,
+};
+
 struct rtnl_link_ops br_link_ops __read_mostly = {
 	.kind		= "bridge",
 	.priv_size	= sizeof(struct net_bridge),
@@ -302,11 +474,18 @@ int __init br_netlink_init(void)
 	int err;
 
 	br_mdb_init();
-	err = rtnl_link_register(&br_link_ops);
+	err = rtnl_af_register(&br_af_ops);
 	if (err)
 		goto out;
 
+	err = rtnl_link_register(&br_link_ops);
+	if (err)
+		goto out_af;
+
 	return 0;
+
+out_af:
+	rtnl_af_unregister(&br_af_ops);
 out:
 	br_mdb_uninit();
 	return err;
@@ -315,5 +494,6 @@ out:
 void __exit br_netlink_fini(void)
 {
 	br_mdb_uninit();
+	rtnl_af_unregister(&br_af_ops);
 	rtnl_link_unregister(&br_link_ops);
 }
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index a76b62135558..1644b3e1f947 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -82,7 +82,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 		break;
 
 	case NETDEV_UP:
-		if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) {
+		if (netif_running(br->dev) && netif_oper_up(dev)) {
 			spin_lock_bh(&br->lock);
 			br_stp_enable_port(p);
 			spin_unlock_bh(&br->lock);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 711094aed41a..d2c043a857b6 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -18,6 +18,7 @@
 #include <linux/netpoll.h>
 #include <linux/u64_stats_sync.h>
 #include <net/route.h>
+#include <linux/if_vlan.h>
 
 #define BR_HASH_BITS 8
 #define BR_HASH_SIZE (1 << BR_HASH_BITS)
@@ -26,6 +27,7 @@
 
 #define BR_PORT_BITS	10
 #define BR_MAX_PORTS	(1<<BR_PORT_BITS)
+#define BR_VLAN_BITMAP_LEN	BITS_TO_LONGS(VLAN_N_VID)
 
 #define BR_VERSION	"2.3"
 
@@ -61,6 +63,20 @@ struct br_ip
 #endif
 	} u;
 	__be16		proto;
+	__u16		vid;
+};
+
+struct net_port_vlans {
+	u16				port_idx;
+	u16				pvid;
+	union {
+		struct net_bridge_port		*port;
+		struct net_bridge		*br;
+	}				parent;
+	struct rcu_head			rcu;
+	unsigned long			vlan_bitmap[BR_VLAN_BITMAP_LEN];
+	unsigned long			untagged_bitmap[BR_VLAN_BITMAP_LEN];
+	u16				num_vlans;
 };
 
 struct net_bridge_fdb_entry
@@ -74,6 +90,7 @@ struct net_bridge_fdb_entry
 	mac_addr			addr;
 	unsigned char			is_local;
 	unsigned char			is_static;
+	__u16				vlan_id;
 };
 
 struct net_bridge_port_group {
@@ -139,6 +156,7 @@ struct net_bridge_port
 #define BR_BPDU_GUARD           0x00000002
 #define BR_ROOT_BLOCK		0x00000004
 #define BR_MULTICAST_FAST_LEAVE	0x00000008
+#define BR_ADMIN_COST		0x00000010
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	u32				multicast_startup_queries_sent;
@@ -156,6 +174,9 @@ struct net_bridge_port
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll			*np;
 #endif
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	struct net_port_vlans __rcu	*vlan_info;
+#endif
 };
 
 #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
@@ -197,9 +218,6 @@ struct net_bridge
 	bool				nf_call_ip6tables;
 	bool				nf_call_arptables;
 #endif
-	unsigned long			flags;
-#define BR_SET_MAC_ADDR		0x00000001
-
 	u16				group_fwd_mask;
 
 	/* STP */
@@ -260,6 +278,10 @@ struct net_bridge
 	struct timer_list		topology_change_timer;
 	struct timer_list		gc_timer;
 	struct kobject			*ifobj;
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	u8				vlan_enabled;
+	struct net_port_vlans __rcu	*vlan_info;
+#endif
 };
 
 struct br_input_skb_cb {
@@ -355,18 +377,22 @@ extern void br_fdb_cleanup(unsigned long arg);
 extern void br_fdb_delete_by_port(struct net_bridge *br,
 				  const struct net_bridge_port *p, int do_all);
 extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
-						 const unsigned char *addr);
+						 const unsigned char *addr,
+						 __u16 vid);
 extern int br_fdb_test_addr(struct net_device *dev, unsigned char *addr);
 extern int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 			  unsigned long count, unsigned long off);
 extern int br_fdb_insert(struct net_bridge *br,
 			 struct net_bridge_port *source,
-			 const unsigned char *addr);
+			 const unsigned char *addr,
+			 u16 vid);
 extern void br_fdb_update(struct net_bridge *br,
 			  struct net_bridge_port *source,
-			  const unsigned char *addr);
+			  const unsigned char *addr,
+			  u16 vid);
+extern int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid);
 
-extern int br_fdb_delete(struct ndmsg *ndm,
+extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 			 struct net_device *dev,
 			 const unsigned char *addr);
 extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[],
@@ -417,7 +443,7 @@ extern int br_multicast_rcv(struct net_bridge *br,
 			    struct net_bridge_port *port,
 			    struct sk_buff *skb);
 extern struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
-					       struct sk_buff *skb);
+					       struct sk_buff *skb, u16 vid);
 extern void br_multicast_add_port(struct net_bridge_port *port);
 extern void br_multicast_del_port(struct net_bridge_port *port);
 extern void br_multicast_enable_port(struct net_bridge_port *port);
@@ -479,7 +505,7 @@ static inline int br_multicast_rcv(struct net_bridge *br,
 }
 
 static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
-						      struct sk_buff *skb)
+						      struct sk_buff *skb, u16 vid)
 {
 	return NULL;
 }
@@ -534,6 +560,142 @@ static inline void br_mdb_uninit(void)
 }
 #endif
 
+/* br_vlan.c */
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+extern bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
+			       struct sk_buff *skb, u16 *vid);
+extern bool br_allowed_egress(struct net_bridge *br,
+			      const struct net_port_vlans *v,
+			      const struct sk_buff *skb);
+extern struct sk_buff *br_handle_vlan(struct net_bridge *br,
+				      const struct net_port_vlans *v,
+				      struct sk_buff *skb);
+extern int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
+extern int br_vlan_delete(struct net_bridge *br, u16 vid);
+extern void br_vlan_flush(struct net_bridge *br);
+extern int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
+extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
+extern int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
+extern void nbp_vlan_flush(struct net_bridge_port *port);
+extern bool nbp_vlan_find(struct net_bridge_port *port, u16 vid);
+
+static inline struct net_port_vlans *br_get_vlan_info(
+						const struct net_bridge *br)
+{
+	return rcu_dereference_rtnl(br->vlan_info);
+}
+
+static inline struct net_port_vlans *nbp_get_vlan_info(
+						const struct net_bridge_port *p)
+{
+	return rcu_dereference_rtnl(p->vlan_info);
+}
+
+/* Since bridge now depends on 8021Q module, but the time bridge sees the
+ * skb, the vlan tag will always be present if the frame was tagged.
+ */
+static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
+{
+	int err = 0;
+
+	if (vlan_tx_tag_present(skb))
+		*vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK;
+	else {
+		*vid = 0;
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static inline u16 br_get_pvid(const struct net_port_vlans *v)
+{
+	/* Return just the VID if it is set, or VLAN_N_VID (invalid vid) if
+	 * vid wasn't set
+	 */
+	smp_rmb();
+	return (v->pvid & VLAN_TAG_PRESENT) ?
+			(v->pvid & ~VLAN_TAG_PRESENT) :
+			VLAN_N_VID;
+}
+
+#else
+static inline bool br_allowed_ingress(struct net_bridge *br,
+				      struct net_port_vlans *v,
+				      struct sk_buff *skb,
+				      u16 *vid)
+{
+	return true;
+}
+
+static inline bool br_allowed_egress(struct net_bridge *br,
+				     const struct net_port_vlans *v,
+				     const struct sk_buff *skb)
+{
+	return true;
+}
+
+static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
+					     const struct net_port_vlans *v,
+					     struct sk_buff *skb)
+{
+	return skb;
+}
+
+static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int br_vlan_delete(struct net_bridge *br, u16 vid)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void br_vlan_flush(struct net_bridge *br)
+{
+}
+
+static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void nbp_vlan_flush(struct net_bridge_port *port)
+{
+}
+
+static inline struct net_port_vlans *br_get_vlan_info(
+						const struct net_bridge *br)
+{
+	return NULL;
+}
+static inline struct net_port_vlans *nbp_get_vlan_info(
+						const struct net_bridge_port *p)
+{
+	return NULL;
+}
+
+static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
+{
+	return false;
+}
+
+static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag)
+{
+	return 0;
+}
+static inline u16 br_get_pvid(const struct net_port_vlans *v)
+{
+	return VLAN_N_VID;	/* Returns invalid vid */
+}
+#endif
+
 /* br_netfilter.c */
 #ifdef CONFIG_BRIDGE_NETFILTER
 extern int br_netfilter_init(void);
@@ -594,8 +756,9 @@ extern int br_netlink_init(void);
 extern void br_netlink_fini(void);
 extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
+extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg);
 extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-		      struct net_device *dev);
+		      struct net_device *dev, u32 filter_mask);
 
 #ifdef CONFIG_SYSFS
 /* br_sysfs_if.c */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index b01849a74310..1c0a50f13229 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -225,7 +225,14 @@ static void br_record_config_timeout_values(struct net_bridge *br,
 /* called under bridge lock */
 void br_transmit_tcn(struct net_bridge *br)
 {
-	br_send_tcn_bpdu(br_get_port(br, br->root_port));
+	struct net_bridge_port *p;
+
+	p = br_get_port(br, br->root_port);
+	if (p)
+		br_send_tcn_bpdu(p);
+	else
+		br_notice(br, "root port %u not found for topology notice\n",
+			  br->root_port);
 }
 
 /* called under bridge lock */
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 9d5a414a3943..d45e760141bb 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -54,7 +54,7 @@ void br_stp_enable_bridge(struct net_bridge *br)
 	br_config_bpdu_generation(br);
 
 	list_for_each_entry(p, &br->port_list, list) {
-		if ((p->dev->flags & IFF_UP) && netif_carrier_ok(p->dev))
+		if (netif_running(p->dev) && netif_oper_up(p->dev))
 			br_stp_enable_port(p);
 
 	}
@@ -216,7 +216,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br)
 	struct net_bridge_port *p;
 
 	/* user has chosen a value so keep it */
-	if (br->flags & BR_SET_MAC_ADDR)
+	if (br->dev->addr_assign_type == NET_ADDR_SET)
 		return false;
 
 	list_for_each_entry(p, &br->port_list, list) {
@@ -288,6 +288,7 @@ int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost)
 	    path_cost > BR_MAX_PATH_COST)
 		return -ERANGE;
 
+	p->flags |= BR_ADMIN_COST;
 	p->path_cost = path_cost;
 	br_configuration_update(p->br);
 	br_port_state_selection(p->br);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index c3530a81a33b..950663d4d330 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -107,7 +107,7 @@ static void br_tcn_timer_expired(unsigned long arg)
 
 	br_debug(br, "tcn timer expired\n");
 	spin_lock(&br->lock);
-	if (br->dev->flags & IFF_UP) {
+	if (!br_is_root_bridge(br) && (br->dev->flags & IFF_UP)) {
 		br_transmit_tcn(br);
 
 		mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 5913a3a0047b..8baa9c08e1a4 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -691,6 +691,24 @@ static ssize_t store_nf_call_arptables(
 static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR,
 		   show_nf_call_arptables, store_nf_call_arptables);
 #endif
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+static ssize_t show_vlan_filtering(struct device *d,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%d\n", br->vlan_enabled);
+}
+
+static ssize_t store_vlan_filtering(struct device *d,
+				    struct device_attribute *attr,
+				    const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, br_vlan_filter_toggle);
+}
+static DEVICE_ATTR(vlan_filtering, S_IRUGO | S_IWUSR,
+		   show_vlan_filtering, store_vlan_filtering);
+#endif
 
 static struct attribute *bridge_attrs[] = {
 	&dev_attr_forward_delay.attr,
@@ -732,6 +750,9 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_nf_call_ip6tables.attr,
 	&dev_attr_nf_call_arptables.attr,
 #endif
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	&dev_attr_vlan_filtering.attr,
+#endif
 	NULL
 };
 
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
new file mode 100644
index 000000000000..bd58b45f5f90
--- /dev/null
+++ b/net/bridge/br_vlan.c
@@ -0,0 +1,419 @@
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+
+#include "br_private.h"
+
+static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid)
+{
+	if (v->pvid == vid)
+		return;
+
+	smp_wmb();
+	v->pvid = vid;
+}
+
+static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid)
+{
+	if (v->pvid != vid)
+		return;
+
+	smp_wmb();
+	v->pvid = 0;
+}
+
+static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags)
+{
+	if (flags & BRIDGE_VLAN_INFO_PVID)
+		__vlan_add_pvid(v, vid);
+
+	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
+		set_bit(vid, v->untagged_bitmap);
+}
+
+static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
+{
+	const struct net_device_ops *ops;
+	struct net_bridge_port *p = NULL;
+	struct net_bridge *br;
+	struct net_device *dev;
+	int err;
+
+	if (test_bit(vid, v->vlan_bitmap)) {
+		__vlan_add_flags(v, vid, flags);
+		return 0;
+	}
+
+	if (vid) {
+		if (v->port_idx) {
+			p = v->parent.port;
+			br = p->br;
+			dev = p->dev;
+		} else {
+			br = v->parent.br;
+			dev = br->dev;
+		}
+		ops = dev->netdev_ops;
+
+		if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
+			/* Add VLAN to the device filter if it is supported.
+			 * Stricly speaking, this is not necessary now, since
+			 * devices are made promiscuous by the bridge, but if
+			 * that ever changes this code will allow tagged
+			 * traffic to enter the bridge.
+			 */
+			err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q),
+						       vid);
+			if (err)
+				return err;
+		}
+
+		err = br_fdb_insert(br, p, dev->dev_addr, vid);
+		if (err) {
+			br_err(br, "failed insert local address into bridge "
+			       "forwarding table\n");
+			goto out_filt;
+		}
+
+	}
+
+	set_bit(vid, v->vlan_bitmap);
+	v->num_vlans++;
+	__vlan_add_flags(v, vid, flags);
+
+	return 0;
+
+out_filt:
+	if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid);
+	return err;
+}
+
+static int __vlan_del(struct net_port_vlans *v, u16 vid)
+{
+	if (!test_bit(vid, v->vlan_bitmap))
+		return -EINVAL;
+
+	__vlan_delete_pvid(v, vid);
+	clear_bit(vid, v->untagged_bitmap);
+
+	if (v->port_idx && vid) {
+		struct net_device *dev = v->parent.port->dev;
+		const struct net_device_ops *ops = dev->netdev_ops;
+
+		if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+			ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid);
+	}
+
+	clear_bit(vid, v->vlan_bitmap);
+	v->num_vlans--;
+	if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+		if (v->port_idx)
+			rcu_assign_pointer(v->parent.port->vlan_info, NULL);
+		else
+			rcu_assign_pointer(v->parent.br->vlan_info, NULL);
+		kfree_rcu(v, rcu);
+	}
+	return 0;
+}
+
+static void __vlan_flush(struct net_port_vlans *v)
+{
+	smp_wmb();
+	v->pvid = 0;
+	bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+	if (v->port_idx)
+		rcu_assign_pointer(v->parent.port->vlan_info, NULL);
+	else
+		rcu_assign_pointer(v->parent.br->vlan_info, NULL);
+	kfree_rcu(v, rcu);
+}
+
+/* Strip the tag from the packet.  Will return skb with tci set 0.  */
+static struct sk_buff *br_vlan_untag(struct sk_buff *skb)
+{
+	if (skb->protocol != htons(ETH_P_8021Q)) {
+		skb->vlan_tci = 0;
+		return skb;
+	}
+
+	skb->vlan_tci = 0;
+	skb = vlan_untag(skb);
+	if (skb)
+		skb->vlan_tci = 0;
+
+	return skb;
+}
+
+struct sk_buff *br_handle_vlan(struct net_bridge *br,
+			       const struct net_port_vlans *pv,
+			       struct sk_buff *skb)
+{
+	u16 vid;
+
+	if (!br->vlan_enabled)
+		goto out;
+
+	/* At this point, we know that the frame was filtered and contains
+	 * a valid vlan id.  If the vlan id is set in the untagged bitmap,
+	 * send untagged; otherwise, send taged.
+	 */
+	br_vlan_get_tag(skb, &vid);
+	if (test_bit(vid, pv->untagged_bitmap))
+		skb = br_vlan_untag(skb);
+	else {
+		/* Egress policy says "send tagged".  If output device
+		 * is the  bridge, we need to add the VLAN header
+		 * ourselves since we'll be going through the RX path.
+		 * Sending to ports puts the frame on the TX path and
+		 * we let dev_hard_start_xmit() add the header.
+		 */
+		if (skb->protocol != htons(ETH_P_8021Q) &&
+		    pv->port_idx == 0) {
+			/* vlan_put_tag expects skb->data to point to
+			 * mac header.
+			 */
+			skb_push(skb, ETH_HLEN);
+			skb = __vlan_put_tag(skb, skb->vlan_proto, skb->vlan_tci);
+			if (!skb)
+				goto out;
+			/* put skb->data back to where it was */
+			skb_pull(skb, ETH_HLEN);
+			skb->vlan_tci = 0;
+		}
+	}
+
+out:
+	return skb;
+}
+
+/* Called under RCU */
+bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
+			struct sk_buff *skb, u16 *vid)
+{
+	/* If VLAN filtering is disabled on the bridge, all packets are
+	 * permitted.
+	 */
+	if (!br->vlan_enabled)
+		return true;
+
+	/* If there are no vlan in the permitted list, all packets are
+	 * rejected.
+	 */
+	if (!v)
+		return false;
+
+	if (br_vlan_get_tag(skb, vid)) {
+		u16 pvid = br_get_pvid(v);
+
+		/* Frame did not have a tag.  See if pvid is set
+		 * on this port.  That tells us which vlan untagged
+		 * traffic belongs to.
+		 */
+		if (pvid == VLAN_N_VID)
+			return false;
+
+		/* PVID is set on this port.  Any untagged ingress
+		 * frame is considered to belong to this vlan.
+		 */
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
+		return true;
+	}
+
+	/* Frame had a valid vlan tag.  See if vlan is allowed */
+	if (test_bit(*vid, v->vlan_bitmap))
+		return true;
+
+	return false;
+}
+
+/* Called under RCU. */
+bool br_allowed_egress(struct net_bridge *br,
+		       const struct net_port_vlans *v,
+		       const struct sk_buff *skb)
+{
+	u16 vid;
+
+	if (!br->vlan_enabled)
+		return true;
+
+	if (!v)
+		return false;
+
+	br_vlan_get_tag(skb, &vid);
+	if (test_bit(vid, v->vlan_bitmap))
+		return true;
+
+	return false;
+}
+
+/* Must be protected by RTNL */
+int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
+{
+	struct net_port_vlans *pv = NULL;
+	int err;
+
+	ASSERT_RTNL();
+
+	pv = rtnl_dereference(br->vlan_info);
+	if (pv)
+		return __vlan_add(pv, vid, flags);
+
+	/* Create port vlan infomration
+	 */
+	pv = kzalloc(sizeof(*pv), GFP_KERNEL);
+	if (!pv)
+		return -ENOMEM;
+
+	pv->parent.br = br;
+	err = __vlan_add(pv, vid, flags);
+	if (err)
+		goto out;
+
+	rcu_assign_pointer(br->vlan_info, pv);
+	return 0;
+out:
+	kfree(pv);
+	return err;
+}
+
+/* Must be protected by RTNL */
+int br_vlan_delete(struct net_bridge *br, u16 vid)
+{
+	struct net_port_vlans *pv;
+
+	ASSERT_RTNL();
+
+	pv = rtnl_dereference(br->vlan_info);
+	if (!pv)
+		return -EINVAL;
+
+	if (vid) {
+		/* If the VID !=0 remove fdb for this vid. VID 0 is special
+		 * in that it's the default and is always there in the fdb.
+		 */
+		spin_lock_bh(&br->hash_lock);
+		fdb_delete_by_addr(br, br->dev->dev_addr, vid);
+		spin_unlock_bh(&br->hash_lock);
+	}
+
+	__vlan_del(pv, vid);
+	return 0;
+}
+
+void br_vlan_flush(struct net_bridge *br)
+{
+	struct net_port_vlans *pv;
+
+	ASSERT_RTNL();
+	pv = rtnl_dereference(br->vlan_info);
+	if (!pv)
+		return;
+
+	__vlan_flush(pv);
+}
+
+int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
+{
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (br->vlan_enabled == val)
+		goto unlock;
+
+	br->vlan_enabled = val;
+
+unlock:
+	rtnl_unlock();
+	return 0;
+}
+
+/* Must be protected by RTNL */
+int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
+{
+	struct net_port_vlans *pv = NULL;
+	int err;
+
+	ASSERT_RTNL();
+
+	pv = rtnl_dereference(port->vlan_info);
+	if (pv)
+		return __vlan_add(pv, vid, flags);
+
+	/* Create port vlan infomration
+	 */
+	pv = kzalloc(sizeof(*pv), GFP_KERNEL);
+	if (!pv) {
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	pv->port_idx = port->port_no;
+	pv->parent.port = port;
+	err = __vlan_add(pv, vid, flags);
+	if (err)
+		goto clean_up;
+
+	rcu_assign_pointer(port->vlan_info, pv);
+	return 0;
+
+clean_up:
+	kfree(pv);
+	return err;
+}
+
+/* Must be protected by RTNL */
+int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
+{
+	struct net_port_vlans *pv;
+
+	ASSERT_RTNL();
+
+	pv = rtnl_dereference(port->vlan_info);
+	if (!pv)
+		return -EINVAL;
+
+	if (vid) {
+		/* If the VID !=0 remove fdb for this vid. VID 0 is special
+		 * in that it's the default and is always there in the fdb.
+		 */
+		spin_lock_bh(&port->br->hash_lock);
+		fdb_delete_by_addr(port->br, port->dev->dev_addr, vid);
+		spin_unlock_bh(&port->br->hash_lock);
+	}
+
+	return __vlan_del(pv, vid);
+}
+
+void nbp_vlan_flush(struct net_bridge_port *port)
+{
+	struct net_port_vlans *pv;
+
+	ASSERT_RTNL();
+
+	pv = rtnl_dereference(port->vlan_info);
+	if (!pv)
+		return;
+
+	__vlan_flush(pv);
+}
+
+bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
+{
+	struct net_port_vlans *pv;
+	bool found = false;
+
+	rcu_read_lock();
+	pv = rcu_dereference(port->vlan_info);
+
+	if (!pv)
+		goto out;
+
+	if (test_bit(vid, pv->vlan_bitmap))
+		found = true;
+
+out:
+	rcu_read_unlock();
+	return found;
+}
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 92de5e5f9db2..9878eb8204c5 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -78,6 +78,11 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
    const char *prefix)
 {
 	unsigned int bitmask;
+	struct net *net = dev_net(in ? in : out);
+
+	/* FIXME: Disabled from containers until syslog ns is supported */
+	if (!net_eq(net, &init_net))
+		return;
 
 	spin_lock_bh(&ebt_log_lock);
 	printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
@@ -176,17 +181,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_log_info *info = par->targinfo;
 	struct nf_loginfo li;
+	struct net *net = dev_net(par->in ? par->in : par->out);
 
 	li.type = NF_LOG_TYPE_LOG;
 	li.u.log.level = info->loglevel;
 	li.u.log.logflags = info->bitmask;
 
 	if (info->bitmask & EBT_LOG_NFLOG)
-		nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
-		              par->out, &li, "%s", info->prefix);
+		nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
+			      par->in, par->out, &li, "%s", info->prefix);
 	else
 		ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
-		               par->out, &li, info->prefix);
+			       par->out, &li, info->prefix);
 	return EBT_CONTINUE;
 }
 
@@ -206,19 +212,47 @@ static struct nf_logger ebt_log_logger __read_mostly = {
 	.me		= THIS_MODULE,
 };
 
+static int __net_init ebt_log_net_init(struct net *net)
+{
+	nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger);
+	return 0;
+}
+
+static void __net_exit ebt_log_net_fini(struct net *net)
+{
+	nf_log_unset(net, &ebt_log_logger);
+}
+
+static struct pernet_operations ebt_log_net_ops = {
+	.init = ebt_log_net_init,
+	.exit = ebt_log_net_fini,
+};
+
 static int __init ebt_log_init(void)
 {
 	int ret;
 
+	ret = register_pernet_subsys(&ebt_log_net_ops);
+	if (ret < 0)
+		goto err_pernet;
+
 	ret = xt_register_target(&ebt_log_tg_reg);
 	if (ret < 0)
-		return ret;
+		goto err_target;
+
 	nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger);
-	return 0;
+
+	return ret;
+
+err_target:
+	unregister_pernet_subsys(&ebt_log_net_ops);
+err_pernet:
+	return ret;
 }
 
 static void __exit ebt_log_fini(void)
 {
+	unregister_pernet_subsys(&ebt_log_net_ops);
 	nf_log_unregister(&ebt_log_logger);
 	xt_unregister_target(&ebt_log_tg_reg);
 }
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 5be68bbcc341..59ac7952010d 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nflog_info *info = par->targinfo;
 	struct nf_loginfo li;
+	struct net *net = dev_net(par->in ? par->in : par->out);
 
 	li.type = NF_LOG_TYPE_ULOG;
 	li.u.ulog.copy_len = info->len;
 	li.u.ulog.group = info->group;
 	li.u.ulog.qthreshold = info->threshold;
 
-	nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out,
-	              &li, "%s", info->prefix);
+	nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in,
+		      par->out, &li, "%s", info->prefix);
 	return EBT_CONTINUE;
 }
 
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 3476ec469740..fc1905c51417 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -35,12 +35,13 @@
 #include <linux/skbuff.h>
 #include <linux/kernel.h>
 #include <linux/timer.h>
-#include <linux/netlink.h>
+#include <net/netlink.h>
 #include <linux/netdevice.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_ulog.h>
 #include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
 #include <net/sock.h>
 #include "../br_private.h"
 
@@ -62,16 +63,24 @@ typedef struct {
 	spinlock_t lock;		/* the per-queue lock */
 } ebt_ulog_buff_t;
 
-static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
-static struct sock *ebtulognl;
+static int ebt_ulog_net_id __read_mostly;
+struct ebt_ulog_net {
+	unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS];
+	ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
+	struct sock *ebtulognl;
+};
+
+static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net)
+{
+	return net_generic(net, ebt_ulog_net_id);
+}
 
 /* send one ulog_buff_t to userspace */
-static void ulog_send(unsigned int nlgroup)
+static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup)
 {
-	ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup];
+	ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup];
 
-	if (timer_pending(&ub->timer))
-		del_timer(&ub->timer);
+	del_timer(&ub->timer);
 
 	if (!ub->skb)
 		return;
@@ -81,7 +90,7 @@ static void ulog_send(unsigned int nlgroup)
 		ub->lastnlh->nlmsg_type = NLMSG_DONE;
 
 	NETLINK_CB(ub->skb).dst_group = nlgroup + 1;
-	netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
+	netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
 
 	ub->qlen = 0;
 	ub->skb = NULL;
@@ -90,10 +99,15 @@ static void ulog_send(unsigned int nlgroup)
 /* timer function to flush queue in flushtimeout time */
 static void ulog_timer(unsigned long data)
 {
-	spin_lock_bh(&ulog_buffers[data].lock);
-	if (ulog_buffers[data].skb)
-		ulog_send(data);
-	spin_unlock_bh(&ulog_buffers[data].lock);
+	struct ebt_ulog_net *ebt = container_of((void *)data,
+						struct ebt_ulog_net,
+						nlgroup[*(unsigned int *)data]);
+
+	ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data];
+	spin_lock_bh(&ub->lock);
+	if (ub->skb)
+		ulog_send(ebt, *(unsigned int *)data);
+	spin_unlock_bh(&ub->lock);
 }
 
 static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -124,8 +138,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	ebt_ulog_packet_msg_t *pm;
 	size_t size, copy_len;
 	struct nlmsghdr *nlh;
+	struct net *net = dev_net(in ? in : out);
+	struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
 	unsigned int group = uloginfo->nlgroup;
-	ebt_ulog_buff_t *ub = &ulog_buffers[group];
+	ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group];
 	spinlock_t *lock = &ub->lock;
 	ktime_t kt;
 
@@ -135,7 +151,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	else
 		copy_len = uloginfo->cprange;
 
-	size = NLMSG_SPACE(sizeof(*pm) + copy_len);
+	size = nlmsg_total_size(sizeof(*pm) + copy_len);
 	if (size > nlbufsiz) {
 		pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz);
 		return;
@@ -147,7 +163,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 		if (!(ub->skb = ulog_alloc_skb(size)))
 			goto unlock;
 	} else if (size > skb_tailroom(ub->skb)) {
-		ulog_send(group);
+		ulog_send(ebt, group);
 
 		if (!(ub->skb = ulog_alloc_skb(size)))
 			goto unlock;
@@ -206,7 +222,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	ub->lastnlh = nlh;
 
 	if (ub->qlen >= uloginfo->qthreshold)
-		ulog_send(group);
+		ulog_send(ebt, group);
 	else if (!timer_pending(&ub->timer)) {
 		ub->timer.expires = jiffies + flushtimeout * HZ / 100;
 		add_timer(&ub->timer);
@@ -278,57 +294,89 @@ static struct nf_logger ebt_ulog_logger __read_mostly = {
 	.me		= THIS_MODULE,
 };
 
-static int __init ebt_ulog_init(void)
+static int __net_init ebt_ulog_net_init(struct net *net)
 {
-	int ret;
 	int i;
+	struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
+
 	struct netlink_kernel_cfg cfg = {
 		.groups	= EBT_ULOG_MAXNLGROUPS,
 	};
 
-	if (nlbufsiz >= 128*1024) {
-		pr_warning("Netlink buffer has to be <= 128kB,"
-			   " please try a smaller nlbufsiz parameter.\n");
-		return -EINVAL;
-	}
-
 	/* initialize ulog_buffers */
 	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
-		setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
-		spin_lock_init(&ulog_buffers[i].lock);
+		ebt->nlgroup[i] = i;
+		setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer,
+			    (unsigned long)&ebt->nlgroup[i]);
+		spin_lock_init(&ebt->ulog_buffers[i].lock);
 	}
 
-	ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
-	if (!ebtulognl)
-		ret = -ENOMEM;
-	else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0)
-		netlink_kernel_release(ebtulognl);
-
-	if (ret == 0)
-		nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
+	ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
+	if (!ebt->ebtulognl)
+		return -ENOMEM;
 
-	return ret;
+	nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger);
+	return 0;
 }
 
-static void __exit ebt_ulog_fini(void)
+static void __net_exit ebt_ulog_net_fini(struct net *net)
 {
-	ebt_ulog_buff_t *ub;
 	int i;
+	struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
 
-	nf_log_unregister(&ebt_ulog_logger);
-	xt_unregister_target(&ebt_ulog_tg_reg);
+	nf_log_unset(net, &ebt_ulog_logger);
 	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
-		ub = &ulog_buffers[i];
-		if (timer_pending(&ub->timer))
-			del_timer(&ub->timer);
-		spin_lock_bh(&ub->lock);
+		ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i];
+		del_timer(&ub->timer);
+
 		if (ub->skb) {
 			kfree_skb(ub->skb);
 			ub->skb = NULL;
 		}
-		spin_unlock_bh(&ub->lock);
 	}
-	netlink_kernel_release(ebtulognl);
+	netlink_kernel_release(ebt->ebtulognl);
+}
+
+static struct pernet_operations ebt_ulog_net_ops = {
+	.init = ebt_ulog_net_init,
+	.exit = ebt_ulog_net_fini,
+	.id   = &ebt_ulog_net_id,
+	.size = sizeof(struct ebt_ulog_net),
+};
+
+static int __init ebt_ulog_init(void)
+{
+	int ret;
+
+	if (nlbufsiz >= 128*1024) {
+		pr_warn("Netlink buffer has to be <= 128kB,"
+			"please try a smaller nlbufsiz parameter.\n");
+		return -EINVAL;
+	}
+
+	ret = register_pernet_subsys(&ebt_ulog_net_ops);
+	if (ret)
+		goto out_pernet;
+
+	ret = xt_register_target(&ebt_ulog_tg_reg);
+	if (ret)
+		goto out_target;
+
+	nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
+
+	return 0;
+
+out_target:
+	unregister_pernet_subsys(&ebt_ulog_net_ops);
+out_pernet:
+	return ret;
+}
+
+static void __exit ebt_ulog_fini(void)
+{
+	nf_log_unregister(&ebt_ulog_logger);
+	xt_unregister_target(&ebt_ulog_tg_reg);
+	unregister_pernet_subsys(&ebt_ulog_net_ops);
 }
 
 module_init(ebt_ulog_init);
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 40d8258bf74f..70f656ce0f4a 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -64,9 +64,7 @@ static int ebt_broute(struct sk_buff *skb)
 static int __net_init broute_net_init(struct net *net)
 {
 	net->xt.broute_table = ebt_register_table(net, &broute_table);
-	if (IS_ERR(net->xt.broute_table))
-		return PTR_ERR(net->xt.broute_table);
-	return 0;
+	return PTR_RET(net->xt.broute_table);
 }
 
 static void __net_exit broute_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5fe2ff3b01ef..3d110c4fc787 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -138,7 +138,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
 		ethproto = h->h_proto;
 
 	if (e->bitmask & EBT_802_3) {
-		if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO))
+		if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO))
 			return 1;
 	} else if (!(e->bitmask & EBT_NOPROTO) &&
 	   FWINV2(e->ethproto != ethproto, EBT_IPROTO))
@@ -1472,16 +1472,17 @@ static int do_ebt_set_ctl(struct sock *sk,
 	int cmd, void __user *user, unsigned int len)
 {
 	int ret;
+	struct net *net = sock_net(sk);
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	switch(cmd) {
 	case EBT_SO_SET_ENTRIES:
-		ret = do_replace(sock_net(sk), user, len);
+		ret = do_replace(net, user, len);
 		break;
 	case EBT_SO_SET_COUNTERS:
-		ret = update_counters(sock_net(sk), user, len);
+		ret = update_counters(net, user, len);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1494,14 +1495,15 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	int ret;
 	struct ebt_replace tmp;
 	struct ebt_table *t;
+	struct net *net = sock_net(sk);
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)))
 		return -EFAULT;
 
-	t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex);
+	t = find_table_lock(net, tmp.name, &ret, &ebt_mutex);
 	if (!t)
 		return ret;
 
@@ -2279,16 +2281,17 @@ static int compat_do_ebt_set_ctl(struct sock *sk,
 		int cmd, void __user *user, unsigned int len)
 {
 	int ret;
+	struct net *net = sock_net(sk);
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	switch (cmd) {
 	case EBT_SO_SET_ENTRIES:
-		ret = compat_do_replace(sock_net(sk), user, len);
+		ret = compat_do_replace(net, user, len);
 		break;
 	case EBT_SO_SET_COUNTERS:
-		ret = compat_update_counters(sock_net(sk), user, len);
+		ret = compat_update_counters(net, user, len);
 		break;
 	default:
 		ret = -EINVAL;
@@ -2302,8 +2305,9 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
 	int ret;
 	struct compat_ebt_replace tmp;
 	struct ebt_table *t;
+	struct net *net = sock_net(sk);
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	/* try real handler in case userland supplied needed padding */
@@ -2314,7 +2318,7 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
 	if (copy_from_user(&tmp, user, sizeof(tmp)))
 		return -EFAULT;
 
-	t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex);
+	t = find_table_lock(net, tmp.name, &ret, &ebt_mutex);
 	if (!t)
 		return ret;
 
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 1ae1d9cb278d..1f9ece1a9c34 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -1,7 +1,7 @@
 /*
  * CAIF Interface registration.
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  *
  * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont
@@ -118,7 +118,7 @@ static struct caif_device_entry *caif_get(struct net_device *dev)
 	return NULL;
 }
 
-void caif_flow_cb(struct sk_buff *skb)
+static void caif_flow_cb(struct sk_buff *skb)
 {
 	struct caif_device_entry *caifd;
 	void (*dtor)(struct sk_buff *skb) = NULL;
@@ -301,10 +301,11 @@ static void dev_flowctrl(struct net_device *dev, int on)
 }
 
 void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
-			struct cflayer *link_support, int head_room,
-			struct cflayer **layer, int (**rcv_func)(
-				struct sk_buff *, struct net_device *,
-				struct packet_type *, struct net_device *))
+		     struct cflayer *link_support, int head_room,
+		     struct cflayer **layer,
+		     int (**rcv_func)(struct sk_buff *, struct net_device *,
+				      struct packet_type *,
+				      struct net_device *))
 {
 	struct caif_device_entry *caifd;
 	enum cfcnfg_phy_preference pref;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 095259f83902..05a41c7ec304 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -197,8 +197,8 @@ static void cfsk_put(struct cflayer *layr)
 
 /* Packet Control Callback function called from CAIF */
 static void caif_ctrl_cb(struct cflayer *layr,
-				enum caif_ctrlcmd flow,
-				int phyid)
+			 enum caif_ctrlcmd flow,
+			 int phyid)
 {
 	struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
 	switch (flow) {
@@ -274,7 +274,7 @@ static void caif_check_flow_release(struct sock *sk)
  * changed locking, address handling and added MSG_TRUNC.
  */
 static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
-				struct msghdr *m, size_t len, int flags)
+			       struct msghdr *m, size_t len, int flags)
 
 {
 	struct sock *sk = sock->sk;
@@ -286,6 +286,8 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (m->msg_flags&MSG_OOB)
 		goto read_error;
 
+	m->msg_namelen = 0;
+
 	skb = skb_recv_datagram(sk, flags, 0 , &ret);
 	if (!skb)
 		goto read_error;
@@ -346,8 +348,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
  * changed locking calls, changed address handling.
  */
 static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
-				struct msghdr *msg, size_t size,
-				int flags)
+			       struct msghdr *msg, size_t size,
+			       int flags)
 {
 	struct sock *sk = sock->sk;
 	int copied = 0;
@@ -462,7 +464,7 @@ out:
  * CAIF flow-on and sock_writable.
  */
 static long caif_wait_for_flow_on(struct caifsock *cf_sk,
-				int wait_writeable, long timeo, int *err)
+				  int wait_writeable, long timeo, int *err)
 {
 	struct sock *sk = &cf_sk->sk;
 	DEFINE_WAIT(wait);
@@ -516,7 +518,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
 
 /* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */
 static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
-			struct msghdr *msg, size_t len)
+			       struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -591,7 +593,7 @@ err:
  * and other minor adaptations.
  */
 static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
-				struct msghdr *msg, size_t len)
+			       struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -670,7 +672,7 @@ out_err:
 }
 
 static int setsockopt(struct socket *sock,
-			int lvl, int opt, char __user *ov, unsigned int ol)
+		      int lvl, int opt, char __user *ov, unsigned int ol)
 {
 	struct sock *sk = sock->sk;
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -932,7 +934,7 @@ static int caif_release(struct socket *sock)
 
 /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
 static unsigned int caif_poll(struct file *file,
-				struct socket *sock, poll_table *wait)
+			      struct socket *sock, poll_table *wait)
 {
 	struct sock *sk = sock->sk;
 	unsigned int mask;
@@ -1022,7 +1024,7 @@ static void caif_sock_destructor(struct sock *sk)
 }
 
 static int caif_create(struct net *net, struct socket *sock, int protocol,
-			int kern)
+		       int kern)
 {
 	struct sock *sk = NULL;
 	struct caifsock *cf_sk = NULL;
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 3ebc8cbc91ff..942e00a425fd 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -1,7 +1,7 @@
 /*
  * CAIF USB handler
  * Copyright (C) ST-Ericsson AB 2011
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  *
  */
@@ -75,14 +75,14 @@ static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt)
 }
 
 static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-					int phyid)
+			   int phyid)
 {
 	if (layr->up && layr->up->ctrlcmd)
 		layr->up->ctrlcmd(layr->up, ctrl, layr->id);
 }
 
-struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN],
-					u8 braddr[ETH_ALEN])
+static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN],
+				      u8 braddr[ETH_ALEN])
 {
 	struct cfusbl *this = kmalloc(sizeof(struct cfusbl), GFP_ATOMIC);
 
@@ -121,7 +121,7 @@ static struct packet_type caif_usb_type __read_mostly = {
 };
 
 static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
-			      void *arg)
+				void *arg)
 {
 	struct net_device *dev = arg;
 	struct caif_dev_common common;
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index ba9cfd47778a..fa39fc298708 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -61,11 +61,11 @@ struct cfcnfg {
 };
 
 static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id,
-			     enum cfctrl_srv serv, u8 phyid,
-			     struct cflayer *adapt_layer);
+			      enum cfctrl_srv serv, u8 phyid,
+			      struct cflayer *adapt_layer);
 static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id);
 static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
-			     struct cflayer *adapt_layer);
+			      struct cflayer *adapt_layer);
 static void cfctrl_resp_func(void);
 static void cfctrl_enum_resp(void);
 
@@ -131,7 +131,7 @@ static void cfctrl_resp_func(void)
 }
 
 static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg,
-							u8 phyid)
+						     u8 phyid)
 {
 	struct cfcnfg_phyinfo *phy;
 
@@ -216,8 +216,8 @@ static const int protohead[CFCTRL_SRV_MASK] = {
 
 
 static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
-				   struct caif_connect_request *s,
-				   struct cfctrl_link_param *l)
+					  struct caif_connect_request *s,
+					  struct cfctrl_link_param *l)
 {
 	struct dev_info *dev_info;
 	enum cfcnfg_phy_preference pref;
@@ -301,8 +301,7 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
 
 int caif_connect_client(struct net *net, struct caif_connect_request *conn_req,
 			struct cflayer *adap_layer, int *ifindex,
-				int *proto_head,
-				int *proto_tail)
+			int *proto_head, int *proto_tail)
 {
 	struct cflayer *frml;
 	struct cfcnfg_phyinfo *phy;
@@ -364,7 +363,7 @@ unlock:
 EXPORT_SYMBOL(caif_connect_client);
 
 static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
-			     struct cflayer *adapt_layer)
+			      struct cflayer *adapt_layer)
 {
 	if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)
 		adapt_layer->ctrlcmd(adapt_layer,
@@ -402,7 +401,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 
 	phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid);
 	if (phyinfo == NULL) {
-		pr_err("ERROR: Link Layer Device dissapeared"
+		pr_err("ERROR: Link Layer Device disappeared"
 				"while connecting\n");
 		goto unlock;
 	}
@@ -526,7 +525,7 @@ out_err:
 EXPORT_SYMBOL(cfcnfg_add_phy_layer);
 
 int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer,
-		bool up)
+			 bool up)
 {
 	struct cfcnfg_phyinfo *phyinfo;
 
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index a376ec1ac0a7..2bd4b58f4372 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -20,12 +20,12 @@
 
 #ifdef CAIF_NO_LOOP
 static int handle_loop(struct cfctrl *ctrl,
-			      int cmd, struct cfpkt *pkt){
+		       int cmd, struct cfpkt *pkt){
 	return -1;
 }
 #else
 static int handle_loop(struct cfctrl *ctrl,
-		int cmd, struct cfpkt *pkt);
+		       int cmd, struct cfpkt *pkt);
 #endif
 static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt);
 static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
@@ -72,7 +72,7 @@ void cfctrl_remove(struct cflayer *layer)
 }
 
 static bool param_eq(const struct cfctrl_link_param *p1,
-			const struct cfctrl_link_param *p2)
+		     const struct cfctrl_link_param *p2)
 {
 	bool eq =
 	    p1->linktype == p2->linktype &&
@@ -197,8 +197,8 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
 }
 
 int cfctrl_linkup_request(struct cflayer *layer,
-			   struct cfctrl_link_param *param,
-			   struct cflayer *user_layer)
+			  struct cfctrl_link_param *param,
+			  struct cflayer *user_layer)
 {
 	struct cfctrl *cfctrl = container_obj(layer);
 	u32 tmp32;
@@ -301,7 +301,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
 }
 
 int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
-				struct cflayer *client)
+			struct cflayer *client)
 {
 	int ret;
 	struct cfpkt *pkt;
@@ -555,7 +555,7 @@ error:
 }
 
 static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-			int phyid)
+			   int phyid)
 {
 	struct cfctrl *this = container_obj(layr);
 	switch (ctrl) {
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 2914659eb9b2..7aae0b56829e 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index a63f4a5f5aff..3bdddb32d55a 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index 0a7df7ef062d..8bc7caa28e64 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -2,7 +2,7 @@
  * CAIF Framing Layer.
  *
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -28,7 +28,7 @@ struct cffrml {
 static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt);
 static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-				int phyid);
+			   int phyid);
 
 static u32 cffrml_rcv_error;
 static u32 cffrml_rcv_checsum_error;
@@ -167,7 +167,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 }
 
 static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-					int phyid)
+			   int phyid)
 {
 	if (layr->up && layr->up->ctrlcmd)
 		layr->up->ctrlcmd(layr->up, ctrl, layr->id);
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 94b08612a4d8..8c5d6386319f 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -42,7 +42,7 @@ struct cfmuxl {
 static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt);
 static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-				int phyid);
+			   int phyid);
 static struct cflayer *get_up(struct cfmuxl *muxl, u16 id);
 
 struct cflayer *cfmuxl_create(void)
@@ -244,7 +244,7 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
 }
 
 static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-				int phyid)
+			   int phyid)
 {
 	struct cfmuxl *muxl = container_obj(layr);
 	struct cflayer *layer;
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 863dedd91bb6..6493351f39c6 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -266,8 +266,8 @@ inline u16 cfpkt_getlen(struct cfpkt *pkt)
 }
 
 inline u16 cfpkt_iterate(struct cfpkt *pkt,
-			    u16 (*iter_func)(u16, void *, u16),
-			    u16 data)
+			 u16 (*iter_func)(u16, void *, u16),
+			 u16 data)
 {
 	/*
 	 * Don't care about the performance hit of linearizing,
@@ -307,8 +307,8 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
 }
 
 struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
-			     struct cfpkt *addpkt,
-			     u16 expectlen)
+			   struct cfpkt *addpkt,
+			   u16 expectlen)
 {
 	struct sk_buff *dst = pkt_to_skb(dstpkt);
 	struct sk_buff *add = pkt_to_skb(addpkt);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 2b563ad04597..61d7617d9249 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -43,7 +43,7 @@ static void cfrfml_release(struct cflayer *layer)
 }
 
 struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
-					int mtu_size)
+			      int mtu_size)
 {
 	int tmp;
 	struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
@@ -69,7 +69,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
 }
 
 static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead,
-			struct cfpkt *pkt, int *err)
+				struct cfpkt *pkt, int *err)
 {
 	struct cfpkt *tmppkt;
 	*err = -EPROTO;
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 8e68b97f13ee..ce60f06d76de 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -29,7 +29,7 @@ struct cfserl {
 static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
 static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-				int phyid);
+			   int phyid);
 
 struct cflayer *cfserl_create(int instance, bool use_stx)
 {
@@ -182,7 +182,7 @@ static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt)
 }
 
 static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-				int phyid)
+			   int phyid)
 {
 	layr->up->ctrlcmd(layr->up, ctrl, phyid);
 }
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index ba217e90765e..353f793d1b3b 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -25,7 +25,7 @@
 #define container_obj(layr) container_of(layr, struct cfsrvl, layer)
 
 static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
-				int phyid)
+			    int phyid)
 {
 	struct cfsrvl *service = container_obj(layr);
 
@@ -158,10 +158,9 @@ static void cfsrvl_release(struct cflayer *layer)
 }
 
 void cfsrvl_init(struct cfsrvl *service,
-			u8 channel_id,
-			struct dev_info *dev_info,
-			bool supports_flowctrl
-			)
+		 u8 channel_id,
+		 struct dev_info *dev_info,
+		 bool supports_flowctrl)
 {
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	service->open = false;
@@ -207,8 +206,8 @@ void caif_free_client(struct cflayer *adap_layer)
 EXPORT_SYMBOL(caif_free_client);
 
 void caif_client_register_refcnt(struct cflayer *adapt_layer,
-					void (*hold)(struct cflayer *lyr),
-					void (*put)(struct cflayer *lyr))
+				 void (*hold)(struct cflayer *lyr),
+				 void (*put)(struct cflayer *lyr))
 {
 	struct cfsrvl *service;
 
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 86d2dadb4b73..1728fa4471cf 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 910ab0661f66..262224581efa 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index a8e2a2d758a5..b3b110e8a350 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index e597733affb8..7344a8fa1bb0 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Authors:	Sjur Brendeland/sjur.brandeland@stericsson.com
- *		Daniel Martensson / Daniel.Martensson@stericsson.com
+ * Authors:	Sjur Brendeland
+ *		Daniel Martensson
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -167,7 +167,7 @@ static void chnl_put(struct cflayer *lyr)
 }
 
 static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
-				int phyid)
+			     int phyid)
 {
 	struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
 	pr_debug("NET flowctrl func called flow: %s\n",
@@ -443,7 +443,7 @@ nla_put_failure:
 }
 
 static void caif_netlink_parms(struct nlattr *data[],
-				struct caif_connect_request *conn_req)
+			       struct caif_connect_request *conn_req)
 {
 	if (!data) {
 		pr_warn("no params data found\n");
@@ -488,7 +488,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
 }
 
 static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[],
-				struct nlattr *data[])
+			     struct nlattr *data[])
 {
 	struct chnl_net *caifdev;
 	ASSERT_RTNL();
diff --git a/net/can/Kconfig b/net/can/Kconfig
index 03200699d274..a15c0e0d1fc7 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -16,10 +16,11 @@ menuconfig CAN
 	  If you want CAN support you should say Y here and also to the
 	  specific driver for your controller(s) below.
 
+if CAN
+
 config CAN_RAW
 	tristate "Raw CAN Protocol (raw access with CAN-ID filtering)"
-	depends on CAN
-	default N
+	default y
 	---help---
 	  The raw CAN protocol option offers access to the CAN bus via
 	  the BSD socket API. You probably want to use the raw socket in
@@ -29,8 +30,7 @@ config CAN_RAW
 
 config CAN_BCM
 	tristate "Broadcast Manager CAN Protocol (with content filtering)"
-	depends on CAN
-	default N
+	default y
 	---help---
 	  The Broadcast Manager offers content filtering, timeout monitoring,
 	  sending of RTR frames, and cyclic CAN messages without permanent user
@@ -42,8 +42,7 @@ config CAN_BCM
 
 config CAN_GW
 	tristate "CAN Gateway/Router (with netlink configuration)"
-	depends on CAN
-	default N
+	default y
 	---help---
 	  The CAN Gateway/Router is used to route (and modify) CAN frames.
 	  It is based on the PF_CAN core infrastructure for msg filtering and
@@ -53,3 +52,5 @@ config CAN_GW
 	  by the netlink configuration interface known e.g. from iptables.
 
 source "drivers/net/can/Kconfig"
+
+endif
diff --git a/net/can/af_can.c b/net/can/af_can.c
index ddac1ee2ed20..c4e50852c9f4 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -516,7 +516,6 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
 {
 	struct receiver *r = NULL;
 	struct hlist_head *rl;
-	struct hlist_node *next;
 	struct dev_rcv_lists *d;
 
 	if (dev && dev->type != ARPHRD_CAN)
@@ -526,7 +525,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
 
 	d = find_dev_rcv_lists(dev);
 	if (!d) {
-		printk(KERN_ERR "BUG: receive list not found for "
+		pr_err("BUG: receive list not found for "
 		       "dev %s, id %03X, mask %03X\n",
 		       DNAME(dev), can_id, mask);
 		goto out;
@@ -540,23 +539,20 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
 	 * been registered before.
 	 */
 
-	hlist_for_each_entry_rcu(r, next, rl, list) {
+	hlist_for_each_entry_rcu(r, rl, list) {
 		if (r->can_id == can_id && r->mask == mask &&
 		    r->func == func && r->data == data)
 			break;
 	}
 
 	/*
-	 * Check for bugs in CAN protocol implementations:
-	 * If no matching list item was found, the list cursor variable next
-	 * will be NULL, while r will point to the last item of the list.
+	 * Check for bugs in CAN protocol implementations using af_can.c:
+	 * 'r' will be NULL if no matching list item was found for removal.
 	 */
 
-	if (!next) {
-		printk(KERN_ERR "BUG: receive list entry not found for "
-		       "dev %s, id %03X, mask %03X\n",
-		       DNAME(dev), can_id, mask);
-		r = NULL;
+	if (!r) {
+		WARN(1, "BUG: receive list entry not found for dev %s, "
+		     "id %03X, mask %03X\n", DNAME(dev), can_id, mask);
 		goto out;
 	}
 
@@ -590,7 +586,6 @@ static inline void deliver(struct sk_buff *skb, struct receiver *r)
 static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 {
 	struct receiver *r;
-	struct hlist_node *n;
 	int matches = 0;
 	struct can_frame *cf = (struct can_frame *)skb->data;
 	canid_t can_id = cf->can_id;
@@ -600,7 +595,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 
 	if (can_id & CAN_ERR_FLAG) {
 		/* check for error message frame entries only */
-		hlist_for_each_entry_rcu(r, n, &d->rx[RX_ERR], list) {
+		hlist_for_each_entry_rcu(r, &d->rx[RX_ERR], list) {
 			if (can_id & r->mask) {
 				deliver(skb, r);
 				matches++;
@@ -610,13 +605,13 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 	}
 
 	/* check for unfiltered entries */
-	hlist_for_each_entry_rcu(r, n, &d->rx[RX_ALL], list) {
+	hlist_for_each_entry_rcu(r, &d->rx[RX_ALL], list) {
 		deliver(skb, r);
 		matches++;
 	}
 
 	/* check for can_id/mask entries */
-	hlist_for_each_entry_rcu(r, n, &d->rx[RX_FIL], list) {
+	hlist_for_each_entry_rcu(r, &d->rx[RX_FIL], list) {
 		if ((can_id & r->mask) == r->can_id) {
 			deliver(skb, r);
 			matches++;
@@ -624,7 +619,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 	}
 
 	/* check for inverted can_id/mask entries */
-	hlist_for_each_entry_rcu(r, n, &d->rx[RX_INV], list) {
+	hlist_for_each_entry_rcu(r, &d->rx[RX_INV], list) {
 		if ((can_id & r->mask) != r->can_id) {
 			deliver(skb, r);
 			matches++;
@@ -636,7 +631,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 		return matches;
 
 	if (can_id & CAN_EFF_FLAG) {
-		hlist_for_each_entry_rcu(r, n, &d->rx[RX_EFF], list) {
+		hlist_for_each_entry_rcu(r, &d->rx[RX_EFF], list) {
 			if (r->can_id == can_id) {
 				deliver(skb, r);
 				matches++;
@@ -644,7 +639,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 		}
 	} else {
 		can_id &= CAN_SFF_MASK;
-		hlist_for_each_entry_rcu(r, n, &d->rx_sff[can_id], list) {
+		hlist_for_each_entry_rcu(r, &d->rx_sff[can_id], list) {
 			deliver(skb, r);
 			matches++;
 		}
@@ -751,8 +746,7 @@ int can_proto_register(const struct can_proto *cp)
 	int err = 0;
 
 	if (proto < 0 || proto >= CAN_NPROTO) {
-		printk(KERN_ERR "can: protocol number %d out of range\n",
-		       proto);
+		pr_err("can: protocol number %d out of range\n", proto);
 		return -EINVAL;
 	}
 
@@ -763,8 +757,7 @@ int can_proto_register(const struct can_proto *cp)
 	mutex_lock(&proto_tab_lock);
 
 	if (proto_tab[proto]) {
-		printk(KERN_ERR "can: protocol %d already registered\n",
-		       proto);
+		pr_err("can: protocol %d already registered\n", proto);
 		err = -EBUSY;
 	} else
 		RCU_INIT_POINTER(proto_tab[proto], cp);
@@ -818,11 +811,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
 
 		/* create new dev_rcv_lists for this device */
 		d = kzalloc(sizeof(*d), GFP_KERNEL);
-		if (!d) {
-			printk(KERN_ERR
-			       "can: allocation of receive list failed\n");
+		if (!d)
 			return NOTIFY_DONE;
-		}
 		BUG_ON(dev->ml_priv);
 		dev->ml_priv = d;
 
@@ -840,8 +830,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
 				dev->ml_priv = NULL;
 			}
 		} else
-			printk(KERN_ERR "can: notifier: receive list not "
-			       "found for dev %s\n", dev->name);
+			pr_err("can: notifier: receive list not found for dev "
+			       "%s\n", dev->name);
 
 		spin_unlock(&can_rcvlists_lock);
 
@@ -929,7 +919,7 @@ static __exit void can_exit(void)
 	/* remove created dev_rcv_lists from still registered CAN devices */
 	rcu_read_lock();
 	for_each_netdev_rcu(&init_net, dev) {
-		if (dev->type == ARPHRD_CAN && dev->ml_priv){
+		if (dev->type == ARPHRD_CAN && dev->ml_priv) {
 
 			struct dev_rcv_lists *d = dev->ml_priv;
 
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 969b7cdff59d..8f113e6ff327 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -54,6 +54,7 @@
 #include <linux/skbuff.h>
 #include <linux/can.h>
 #include <linux/can/core.h>
+#include <linux/can/skb.h>
 #include <linux/can/bcm.h>
 #include <linux/slab.h>
 #include <net/sock.h>
@@ -225,7 +226,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
 
 static int bcm_proc_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, bcm_proc_show, PDE(inode)->data);
+	return single_open(file, bcm_proc_show, PDE_DATA(inode));
 }
 
 static const struct file_operations bcm_proc_fops = {
@@ -256,10 +257,13 @@ static void bcm_can_tx(struct bcm_op *op)
 		return;
 	}
 
-	skb = alloc_skb(CFSIZ, gfp_any());
+	skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), gfp_any());
 	if (!skb)
 		goto out;
 
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = dev->ifindex;
+
 	memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
 
 	/* send with loopback */
@@ -1199,11 +1203,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 	if (!ifindex)
 		return -ENODEV;
 
-	skb = alloc_skb(CFSIZ, GFP_KERNEL);
-
+	skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
+	can_skb_reserve(skb);
+
 	err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ);
 	if (err < 0) {
 		kfree_skb(skb);
@@ -1216,6 +1221,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 		return -ENODEV;
 	}
 
+	can_skb_prv(skb)->ifindex = dev->ifindex;
 	skb->dev = dev;
 	skb->sk  = sk;
 	err = can_send(skb, 1); /* send with loopback */
@@ -1627,7 +1633,7 @@ static void __exit bcm_module_exit(void)
 	can_proto_unregister(&bcm_can_proto);
 
 	if (proc_dir)
-		proc_net_remove(&init_net, "can-bcm");
+		remove_proc_entry("can-bcm", init_net.proc_net);
 }
 
 module_init(bcm_module_init);
diff --git a/net/can/gw.c b/net/can/gw.c
index 574dda78eb0f..3ee690e8c7d3 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -42,6 +42,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
@@ -52,19 +53,31 @@
 #include <linux/skbuff.h>
 #include <linux/can.h>
 #include <linux/can/core.h>
+#include <linux/can/skb.h>
 #include <linux/can/gw.h>
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
-#define CAN_GW_VERSION "20101209"
-static __initconst const char banner[] =
-	KERN_INFO "can: netlink gateway (rev " CAN_GW_VERSION ")\n";
+#define CAN_GW_VERSION "20130117"
+#define CAN_GW_NAME "can-gw"
 
 MODULE_DESCRIPTION("PF_CAN netlink gateway");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
-MODULE_ALIAS("can-gw");
+MODULE_ALIAS(CAN_GW_NAME);
+
+#define CGW_MIN_HOPS 1
+#define CGW_MAX_HOPS 6
+#define CGW_DEFAULT_HOPS 1
+
+static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS;
+module_param(max_hops, uint, S_IRUGO);
+MODULE_PARM_DESC(max_hops,
+		 "maximum " CAN_GW_NAME " routing hops for CAN frames "
+		 "(valid values: " __stringify(CGW_MIN_HOPS) "-"
+		 __stringify(CGW_MAX_HOPS) " hops, "
+		 "default: " __stringify(CGW_DEFAULT_HOPS) ")");
 
 static HLIST_HEAD(cgw_list);
 static struct notifier_block notifier;
@@ -118,6 +131,7 @@ struct cgw_job {
 	struct rcu_head rcu;
 	u32 handled_frames;
 	u32 dropped_frames;
+	u32 deleted_frames;
 	struct cf_mod mod;
 	union {
 		/* CAN frame data source */
@@ -338,15 +352,38 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 	struct sk_buff *nskb;
 	int modidx = 0;
 
-	/* do not handle already routed frames - see comment below */
-	if (skb_mac_header_was_set(skb))
+	/*
+	 * Do not handle CAN frames routed more than 'max_hops' times.
+	 * In general we should never catch this delimiter which is intended
+	 * to cover a misconfiguration protection (e.g. circular CAN routes).
+	 *
+	 * The Controller Area Network controllers only accept CAN frames with
+	 * correct CRCs - which are not visible in the controller registers.
+	 * According to skbuff.h documentation the csum_start element for IP
+	 * checksums is undefined/unsued when ip_summed == CHECKSUM_UNNECESSARY.
+	 * Only CAN skbs can be processed here which already have this property.
+	 */
+
+#define cgw_hops(skb) ((skb)->csum_start)
+
+	BUG_ON(skb->ip_summed != CHECKSUM_UNNECESSARY);
+
+	if (cgw_hops(skb) >= max_hops) {
+		/* indicate deleted frames due to misconfiguration */
+		gwj->deleted_frames++;
 		return;
+	}
 
 	if (!(gwj->dst.dev->flags & IFF_UP)) {
 		gwj->dropped_frames++;
 		return;
 	}
 
+	/* is sending the skb back to the incoming interface not allowed? */
+	if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) &&
+	    can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex)
+		return;
+
 	/*
 	 * clone the given skb, which has not been done in can_rcv()
 	 *
@@ -363,15 +400,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 		return;
 	}
 
-	/*
-	 * Mark routed frames by setting some mac header length which is
-	 * not relevant for the CAN frames located in the skb->data section.
-	 *
-	 * As dev->header_ops is not set in CAN netdevices no one is ever
-	 * accessing the various header offsets in the CAN skbuffs anyway.
-	 * E.g. using the packet socket to read CAN frames is still working.
-	 */
-	skb_set_mac_header(nskb, 8);
+	/* put the incremented hop counter in the cloned skb */
+	cgw_hops(nskb) = cgw_hops(skb) + 1;
 	nskb->dev = gwj->dst.dev;
 
 	/* pointer to modifiable CAN frame */
@@ -427,16 +457,16 @@ static int cgw_notifier(struct notifier_block *nb,
 	if (msg == NETDEV_UNREGISTER) {
 
 		struct cgw_job *gwj = NULL;
-		struct hlist_node *n, *nx;
+		struct hlist_node *nx;
 
 		ASSERT_RTNL();
 
-		hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) {
+		hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
 
 			if (gwj->src.dev == dev || gwj->dst.dev == dev) {
 				hlist_del(&gwj->list);
 				cgw_unregister_filter(gwj);
-				kfree(gwj);
+				kmem_cache_free(cgw_cache, gwj);
 			}
 		}
 	}
@@ -472,6 +502,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
 			goto cancel;
 	}
 
+	if (gwj->deleted_frames) {
+		if (nla_put_u32(skb, CGW_DELETED, gwj->deleted_frames) < 0)
+			goto cancel;
+	}
+
 	/* check non default settings of attributes */
 
 	if (gwj->mod.modtype.and) {
@@ -540,12 +575,11 @@ cancel:
 static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct cgw_job *gwj = NULL;
-	struct hlist_node *n;
 	int idx = 0;
 	int s_idx = cb->args[0];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(gwj, n, &cgw_list, list) {
+	hlist_for_each_entry_rcu(gwj, &cgw_list, list) {
 		if (idx < s_idx)
 			goto cont;
 
@@ -744,8 +778,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 	return 0;
 }
 
-static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh,
-			  void *arg)
+static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh)
 {
 	struct rtcanmsg *r;
 	struct cgw_job *gwj;
@@ -771,6 +804,7 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh,
 
 	gwj->handled_frames = 0;
 	gwj->dropped_frames = 0;
+	gwj->deleted_frames = 0;
 	gwj->flags = r->flags;
 	gwj->gwtype = r->gwtype;
 
@@ -822,21 +856,21 @@ out:
 static void cgw_remove_all_jobs(void)
 {
 	struct cgw_job *gwj = NULL;
-	struct hlist_node *n, *nx;
+	struct hlist_node *nx;
 
 	ASSERT_RTNL();
 
-	hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) {
+	hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
 		hlist_del(&gwj->list);
 		cgw_unregister_filter(gwj);
-		kfree(gwj);
+		kmem_cache_free(cgw_cache, gwj);
 	}
 }
 
-static int cgw_remove_job(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
+static int cgw_remove_job(struct sk_buff *skb,  struct nlmsghdr *nlh)
 {
 	struct cgw_job *gwj = NULL;
-	struct hlist_node *n, *nx;
+	struct hlist_node *nx;
 	struct rtcanmsg *r;
 	struct cf_mod mod;
 	struct can_can_gw ccgw;
@@ -871,7 +905,7 @@ static int cgw_remove_job(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 	ASSERT_RTNL();
 
 	/* remove only the first matching entry */
-	hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) {
+	hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
 
 		if (gwj->flags != r->flags)
 			continue;
@@ -885,7 +919,7 @@ static int cgw_remove_job(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 
 		hlist_del(&gwj->list);
 		cgw_unregister_filter(gwj);
-		kfree(gwj);
+		kmem_cache_free(cgw_cache, gwj);
 		err = 0;
 		break;
 	}
@@ -895,7 +929,11 @@ static int cgw_remove_job(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 
 static __init int cgw_module_init(void)
 {
-	printk(banner);
+	/* sanitize given module parameter */
+	max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS);
+
+	pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n",
+		max_hops);
 
 	cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job),
 				      0, 0, NULL);
diff --git a/net/can/proc.c b/net/can/proc.c
index ae566902d2bf..b543470c8f8b 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -195,9 +195,8 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list,
 			      struct net_device *dev)
 {
 	struct receiver *r;
-	struct hlist_node *n;
 
-	hlist_for_each_entry_rcu(r, n, rx_list, list) {
+	hlist_for_each_entry_rcu(r, rx_list, list) {
 		char *fmt = (r->can_id & CAN_EFF_FLAG)?
 			"   %-5s  %08x  %08x  %pK  %pK  %8ld  %s\n" :
 			"   %-5s     %03x    %08x  %pK  %pK  %8ld  %s\n";
@@ -379,7 +378,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
 
 static int can_rcvlist_proc_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, can_rcvlist_proc_show, PDE(inode)->data);
+	return single_open(file, can_rcvlist_proc_show, PDE_DATA(inode));
 }
 
 static const struct file_operations can_rcvlist_proc_fops = {
@@ -531,5 +530,5 @@ void can_remove_proc(void)
 		can_remove_proc_readentry(CAN_PROC_RCVLIST_SFF);
 
 	if (can_dir)
-		proc_net_remove(&init_net, "can");
+		remove_proc_entry("can", init_net.proc_net);
 }
diff --git a/net/can/raw.c b/net/can/raw.c
index 5b0e3e330d97..1085e65f848e 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -50,6 +50,7 @@
 #include <linux/skbuff.h>
 #include <linux/can.h>
 #include <linux/can/core.h>
+#include <linux/can/skb.h>
 #include <linux/can/raw.h>
 #include <net/sock.h>
 #include <net/net_namespace.h>
@@ -699,17 +700,19 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (!dev)
 		return -ENXIO;
 
-	skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT,
-				  &err);
+	skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv),
+				  msg->msg_flags & MSG_DONTWAIT, &err);
 	if (!skb)
 		goto put_dev;
 
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = dev->ifindex;
+
 	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 	if (err < 0)
 		goto free_skb;
-	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
-	if (err < 0)
-		goto free_skb;
+
+	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
 
 	skb->dev = dev;
 	skb->sk  = sk;
diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig
index cc04dd667a10..e50cc69ae8ca 100644
--- a/net/ceph/Kconfig
+++ b/net/ceph/Kconfig
@@ -1,6 +1,6 @@
 config CEPH_LIB
-        tristate "Ceph core library (EXPERIMENTAL)"
-	depends on INET && EXPERIMENTAL
+	tristate "Ceph core library"
+	depends on INET
 	select LIBCRC32C
 	select CRYPTO_AES
 	select CRYPTO
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index e87ef435e11b..958d9856912c 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -11,5 +11,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
 	crypto.o armor.o \
 	auth_x.o \
 	ceph_fs.o ceph_strings.o ceph_hash.o \
-	pagevec.o
+	pagevec.o snapshot.o
 
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index b4bf4ac090f1..6b923bcaa2a4 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -47,6 +47,7 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_cryp
 	if (!ac)
 		goto out;
 
+	mutex_init(&ac->mutex);
 	ac->negotiating = true;
 	if (name)
 		ac->name = name;
@@ -73,10 +74,12 @@ void ceph_auth_destroy(struct ceph_auth_client *ac)
  */
 void ceph_auth_reset(struct ceph_auth_client *ac)
 {
+	mutex_lock(&ac->mutex);
 	dout("auth_reset %p\n", ac);
 	if (ac->ops && !ac->negotiating)
 		ac->ops->reset(ac);
 	ac->negotiating = true;
+	mutex_unlock(&ac->mutex);
 }
 
 int ceph_entity_name_encode(const char *name, void **p, void *end)
@@ -102,6 +105,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
 	int i, num;
 	int ret;
 
+	mutex_lock(&ac->mutex);
 	dout("auth_build_hello\n");
 	monhdr->have_version = 0;
 	monhdr->session_mon = cpu_to_le16(-1);
@@ -122,15 +126,19 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
 
 	ret = ceph_entity_name_encode(ac->name, &p, end);
 	if (ret < 0)
-		return ret;
+		goto out;
 	ceph_decode_need(&p, end, sizeof(u64), bad);
 	ceph_encode_64(&p, ac->global_id);
 
 	ceph_encode_32(&lenp, p - lenp - sizeof(u32));
-	return p - buf;
+	ret = p - buf;
+out:
+	mutex_unlock(&ac->mutex);
+	return ret;
 
 bad:
-	return -ERANGE;
+	ret = -ERANGE;
+	goto out;
 }
 
 static int ceph_build_auth_request(struct ceph_auth_client *ac,
@@ -151,11 +159,13 @@ static int ceph_build_auth_request(struct ceph_auth_client *ac,
 	if (ret < 0) {
 		pr_err("error %d building auth method %s request\n", ret,
 		       ac->ops->name);
-		return ret;
+		goto out;
 	}
 	dout(" built request %d bytes\n", ret);
 	ceph_encode_32(&p, ret);
-	return p + ret - msg_buf;
+	ret = p + ret - msg_buf;
+out:
+	return ret;
 }
 
 /*
@@ -176,6 +186,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
 	int result_msg_len;
 	int ret = -EINVAL;
 
+	mutex_lock(&ac->mutex);
 	dout("handle_auth_reply %p %p\n", p, end);
 	ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad);
 	protocol = ceph_decode_32(&p);
@@ -227,33 +238,103 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
 
 	ret = ac->ops->handle_reply(ac, result, payload, payload_end);
 	if (ret == -EAGAIN) {
-		return ceph_build_auth_request(ac, reply_buf, reply_len);
+		ret = ceph_build_auth_request(ac, reply_buf, reply_len);
 	} else if (ret) {
 		pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
-		return ret;
 	}
-	return 0;
 
-bad:
-	pr_err("failed to decode auth msg\n");
 out:
+	mutex_unlock(&ac->mutex);
 	return ret;
+
+bad:
+	pr_err("failed to decode auth msg\n");
+	ret = -EINVAL;
+	goto out;
 }
 
 int ceph_build_auth(struct ceph_auth_client *ac,
 		    void *msg_buf, size_t msg_len)
 {
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
 	if (!ac->protocol)
-		return ceph_auth_build_hello(ac, msg_buf, msg_len);
-	BUG_ON(!ac->ops);
-	if (ac->ops->should_authenticate(ac))
-		return ceph_build_auth_request(ac, msg_buf, msg_len);
-	return 0;
+		ret = ceph_auth_build_hello(ac, msg_buf, msg_len);
+	else if (ac->ops->should_authenticate(ac))
+		ret = ceph_build_auth_request(ac, msg_buf, msg_len);
+	mutex_unlock(&ac->mutex);
+	return ret;
 }
 
 int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
 {
-	if (!ac->ops)
-		return 0;
-	return ac->ops->is_authenticated(ac);
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops)
+		ret = ac->ops->is_authenticated(ac);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_is_authenticated);
+
+int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
+				int peer_type,
+				struct ceph_auth_handshake *auth)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->create_authorizer)
+		ret = ac->ops->create_authorizer(ac, peer_type, auth);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_create_authorizer);
+
+void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
+				  struct ceph_authorizer *a)
+{
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->destroy_authorizer)
+		ac->ops->destroy_authorizer(ac, a);
+	mutex_unlock(&ac->mutex);
+}
+EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
+
+int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+				int peer_type,
+				struct ceph_auth_handshake *a)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->update_authorizer)
+		ret = ac->ops->update_authorizer(ac, peer_type, a);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_update_authorizer);
+
+int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+				      struct ceph_authorizer *a, size_t len)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->verify_authorizer_reply)
+		ret = ac->ops->verify_authorizer_reply(ac, a, len);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_verify_authorizer_reply);
+
+void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type)
+{
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->invalidate_authorizer)
+		ac->ops->invalidate_authorizer(ac, peer_type);
+	mutex_unlock(&ac->mutex);
 }
+EXPORT_SYMBOL(ceph_auth_invalidate_authorizer);
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index a16bf14eb027..96238ba95f2b 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -298,6 +298,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
 			return -ENOMEM;
 	}
 	au->service = th->service;
+	au->secret_id = th->secret_id;
 
 	msg_a = au->buf->vec.iov_base;
 	msg_a->struct_v = 1;
@@ -555,6 +556,26 @@ static int ceph_x_create_authorizer(
 	return 0;
 }
 
+static int ceph_x_update_authorizer(
+	struct ceph_auth_client *ac, int peer_type,
+	struct ceph_auth_handshake *auth)
+{
+	struct ceph_x_authorizer *au;
+	struct ceph_x_ticket_handler *th;
+
+	th = get_ticket_handler(ac, peer_type);
+	if (IS_ERR(th))
+		return PTR_ERR(th);
+
+	au = (struct ceph_x_authorizer *)auth->authorizer;
+	if (au->secret_id < th->secret_id) {
+		dout("ceph_x_update_authorizer service %u secret %llu < %llu\n",
+		     au->service, au->secret_id, th->secret_id);
+		return ceph_x_build_authorizer(ac, th, au);
+	}
+	return 0;
+}
+
 static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
 					  struct ceph_authorizer *a, size_t len)
 {
@@ -630,7 +651,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
 
 	th = get_ticket_handler(ac, peer_type);
 	if (!IS_ERR(th))
-		remove_ticket_handler(ac, th);
+		memset(&th->validity, 0, sizeof(th->validity));
 }
 
 
@@ -641,6 +662,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
 	.build_request = ceph_x_build_request,
 	.handle_reply = ceph_x_handle_reply,
 	.create_authorizer = ceph_x_create_authorizer,
+	.update_authorizer = ceph_x_update_authorizer,
 	.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
 	.destroy_authorizer = ceph_x_destroy_authorizer,
 	.invalidate_authorizer = ceph_x_invalidate_authorizer,
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index f459e93b774f..c5a058da7ac8 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -29,6 +29,7 @@ struct ceph_x_authorizer {
 	struct ceph_buffer *buf;
 	unsigned int service;
 	u64 nonce;
+	u64 secret_id;
 	char reply_buf[128];  /* big enough for encrypted blob */
 };
 
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index ee71ea26777a..34b11ee8124e 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -15,6 +15,8 @@
 #include <linux/slab.h>
 #include <linux/statfs.h>
 #include <linux/string.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
 
 
 #include <linux/ceph/ceph_features.h>
@@ -26,6 +28,22 @@
 #include "crypto.h"
 
 
+/*
+ * Module compatibility interface.  For now it doesn't do anything,
+ * but its existence signals a certain level of functionality.
+ *
+ * The data buffer is used to pass information both to and from
+ * libceph.  The return value indicates whether libceph determines
+ * it is compatible with the caller (from another kernel module),
+ * given the provided data.
+ *
+ * The data pointer can be null.
+ */
+bool libceph_compatible(void *data)
+{
+	return true;
+}
+EXPORT_SYMBOL(libceph_compatible);
 
 /*
  * find filename portion of a path (/foo/bar/baz -> baz)
@@ -292,6 +310,9 @@ ceph_parse_options(char *options, const char *dev_name,
 	int err = -ENOMEM;
 	substring_t argstr[MAX_OPT_ARGS];
 
+	if (current->nsproxy->net_ns != &init_net)
+		return ERR_PTR(-EINVAL);
+
 	opt = kzalloc(sizeof(*opt), GFP_KERNEL);
 	if (!opt)
 		return ERR_PTR(-ENOMEM);
@@ -585,13 +606,17 @@ static int __init init_ceph_lib(void)
 	if (ret < 0)
 		goto out_crypto;
 
-	pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n",
-		CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL,
-		CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
-		CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
+	ret = ceph_osdc_setup();
+	if (ret < 0)
+		goto out_msgr;
+
+	pr_info("loaded (mon/osd proto %d/%d)\n",
+		CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL);
 
 	return 0;
 
+out_msgr:
+	ceph_msgr_exit();
 out_crypto:
 	ceph_crypto_shutdown();
 out_debugfs:
@@ -603,6 +628,7 @@ out:
 static void __exit exit_ceph_lib(void)
 {
 	dout("exit_ceph_lib\n");
+	ceph_osdc_cleanup();
 	ceph_msgr_exit();
 	ceph_crypto_shutdown();
 	ceph_debugfs_cleanup();
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
index 3fbda04de29c..1348df96fe15 100644
--- a/net/ceph/ceph_strings.c
+++ b/net/ceph/ceph_strings.c
@@ -21,9 +21,15 @@ const char *ceph_osd_op_name(int op)
 	switch (op) {
 	case CEPH_OSD_OP_READ: return "read";
 	case CEPH_OSD_OP_STAT: return "stat";
+	case CEPH_OSD_OP_MAPEXT: return "mapext";
+	case CEPH_OSD_OP_SPARSE_READ: return "sparse-read";
+	case CEPH_OSD_OP_NOTIFY: return "notify";
+	case CEPH_OSD_OP_NOTIFY_ACK: return "notify-ack";
+	case CEPH_OSD_OP_ASSERT_VER: return "assert-version";
 
 	case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
 
+	case CEPH_OSD_OP_CREATE: return "create";
 	case CEPH_OSD_OP_WRITE: return "write";
 	case CEPH_OSD_OP_DELETE: return "delete";
 	case CEPH_OSD_OP_TRUNCATE: return "truncate";
@@ -39,6 +45,11 @@ const char *ceph_osd_op_name(int op)
 	case CEPH_OSD_OP_TMAPUP: return "tmapup";
 	case CEPH_OSD_OP_TMAPGET: return "tmapget";
 	case CEPH_OSD_OP_TMAPPUT: return "tmapput";
+	case CEPH_OSD_OP_WATCH: return "watch";
+
+	case CEPH_OSD_OP_CLONERANGE: return "clonerange";
+	case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version";
+	case CEPH_OSD_OP_SRC_CMPXATTR: return "src-cmpxattr";
 
 	case CEPH_OSD_OP_GETXATTR: return "getxattr";
 	case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
@@ -53,6 +64,10 @@ const char *ceph_osd_op_name(int op)
 	case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
 	case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
 	case CEPH_OSD_OP_SCRUB: return "scrub";
+	case CEPH_OSD_OP_SCRUB_RESERVE: return "scrub-reserve";
+	case CEPH_OSD_OP_SCRUB_UNRESERVE: return "scrub-unreserve";
+	case CEPH_OSD_OP_SCRUB_STOP: return "scrub-stop";
+	case CEPH_OSD_OP_SCRUB_MAP: return "scrub-map";
 
 	case CEPH_OSD_OP_WRLOCK: return "wrlock";
 	case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
@@ -64,10 +79,34 @@ const char *ceph_osd_op_name(int op)
 	case CEPH_OSD_OP_CALL: return "call";
 
 	case CEPH_OSD_OP_PGLS: return "pgls";
+	case CEPH_OSD_OP_PGLS_FILTER: return "pgls-filter";
+	case CEPH_OSD_OP_OMAPGETKEYS: return "omap-get-keys";
+	case CEPH_OSD_OP_OMAPGETVALS: return "omap-get-vals";
+	case CEPH_OSD_OP_OMAPGETHEADER: return "omap-get-header";
+	case CEPH_OSD_OP_OMAPGETVALSBYKEYS: return "omap-get-vals-by-keys";
+	case CEPH_OSD_OP_OMAPSETVALS: return "omap-set-vals";
+	case CEPH_OSD_OP_OMAPSETHEADER: return "omap-set-header";
+	case CEPH_OSD_OP_OMAPCLEAR: return "omap-clear";
+	case CEPH_OSD_OP_OMAPRMKEYS: return "omap-rm-keys";
 	}
 	return "???";
 }
 
+const char *ceph_osd_state_name(int s)
+{
+	switch (s) {
+	case CEPH_OSD_EXISTS:
+		return "exists";
+	case CEPH_OSD_UP:
+		return "up";
+	case CEPH_OSD_AUTOOUT:
+		return "autoout";
+	case CEPH_OSD_NEW:
+		return "new";
+	default:
+		return "???";
+	}
+}
 
 const char *ceph_pool_op_name(int op)
 {
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 35fce755ce10..cbd06a91941c 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -287,6 +287,7 @@ static int is_out(const struct crush_map *map, const __u32 *weight, int item, in
  * @outpos: our position in that vector
  * @firstn: true if choosing "first n" items, false if choosing "indep"
  * @recurse_to_leaf: true if we want one device under each item of given type
+ * @descend_once: true if we should only try one descent before giving up
  * @out2: second output vector for leaf items (if @recurse_to_leaf)
  */
 static int crush_choose(const struct crush_map *map,
@@ -295,7 +296,7 @@ static int crush_choose(const struct crush_map *map,
 			int x, int numrep, int type,
 			int *out, int outpos,
 			int firstn, int recurse_to_leaf,
-			int *out2)
+			int descend_once, int *out2)
 {
 	int rep;
 	unsigned int ftotal, flocal;
@@ -391,7 +392,7 @@ static int crush_choose(const struct crush_map *map,
 				}
 
 				reject = 0;
-				if (recurse_to_leaf) {
+				if (!collide && recurse_to_leaf) {
 					if (item < 0) {
 						if (crush_choose(map,
 							 map->buckets[-1-item],
@@ -399,6 +400,7 @@ static int crush_choose(const struct crush_map *map,
 							 x, outpos+1, 0,
 							 out2, outpos,
 							 firstn, 0,
+							 map->chooseleaf_descend_once,
 							 NULL) <= outpos)
 							/* didn't get leaf */
 							reject = 1;
@@ -422,7 +424,10 @@ reject:
 					ftotal++;
 					flocal++;
 
-					if (collide && flocal <= map->choose_local_tries)
+					if (reject && descend_once)
+						/* let outer call try again */
+						skip_rep = 1;
+					else if (collide && flocal <= map->choose_local_tries)
 						/* retry locally a few times */
 						retry_bucket = 1;
 					else if (map->choose_local_fallback_tries > 0 &&
@@ -485,6 +490,7 @@ int crush_do_rule(const struct crush_map *map,
 	int i, j;
 	int numrep;
 	int firstn;
+	const int descend_once = 0;
 
 	if ((__u32)ruleno >= map->max_rules) {
 		dprintk(" bad ruleno %d\n", ruleno);
@@ -544,7 +550,8 @@ int crush_do_rule(const struct crush_map *map,
 						      curstep->arg2,
 						      o+osize, j,
 						      firstn,
-						      recurse_to_leaf, c+osize);
+						      recurse_to_leaf,
+						      descend_once, c+osize);
 			}
 
 			if (recurse_to_leaf)
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index af14cb425164..6e7a236525b6 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -423,7 +423,8 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
 	}
 }
 
-int ceph_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
+static int ceph_key_instantiate(struct key *key,
+				struct key_preparsed_payload *prep)
 {
 	struct ceph_crypto_key *ckey;
 	size_t datalen = prep->datalen;
@@ -458,12 +459,12 @@ err:
 	return ret;
 }
 
-int ceph_key_match(const struct key *key, const void *description)
+static int ceph_key_match(const struct key *key, const void *description)
 {
 	return strcmp(key->description, description) == 0;
 }
 
-void ceph_key_destroy(struct key *key) {
+static void ceph_key_destroy(struct key *key) {
 	struct ceph_crypto_key *ckey = key->payload.data;
 
 	ceph_crypto_key_destroy(ckey);
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 38b5dc1823d4..83661cdc0766 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -66,9 +66,9 @@ static int osdmap_show(struct seq_file *s, void *p)
 	for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
 		struct ceph_pg_pool_info *pool =
 			rb_entry(n, struct ceph_pg_pool_info, node);
-		seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
-			   pool->id, pool->v.pg_num, pool->pg_num_mask,
-			   pool->v.lpg_num, pool->lpg_num_mask);
+		seq_printf(s, "pg_pool %llu pg_num %d / %d\n",
+			   (unsigned long long)pool->id, pool->pg_num,
+			   pool->pg_num_mask);
 	}
 	for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
 		struct ceph_entity_addr *addr =
@@ -123,26 +123,16 @@ static int osdc_show(struct seq_file *s, void *pp)
 	mutex_lock(&osdc->request_mutex);
 	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
 		struct ceph_osd_request *req;
-		struct ceph_osd_request_head *head;
-		struct ceph_osd_op *op;
-		int num_ops;
-		int opcode, olen;
-		int i;
+		unsigned int i;
+		int opcode;
 
 		req = rb_entry(p, struct ceph_osd_request, r_node);
 
-		seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
+		seq_printf(s, "%lld\tosd%d\t%lld.%x\t", req->r_tid,
 			   req->r_osd ? req->r_osd->o_osd : -1,
-			   le32_to_cpu(req->r_pgid.pool),
-			   le16_to_cpu(req->r_pgid.ps));
+			   req->r_pgid.pool, req->r_pgid.seed);
 
-		head = req->r_request->front.iov_base;
-		op = (void *)(head + 1);
-
-		num_ops = le16_to_cpu(head->num_ops);
-		olen = le32_to_cpu(head->object_len);
-		seq_printf(s, "%.*s", olen,
-			   (const char *)(head->ops + num_ops));
+		seq_printf(s, "%.*s", req->r_oid_len, req->r_oid);
 
 		if (req->r_reassert_version.epoch)
 			seq_printf(s, "\t%u'%llu",
@@ -151,10 +141,9 @@ static int osdc_show(struct seq_file *s, void *pp)
 		else
 			seq_printf(s, "\t");
 
-		for (i = 0; i < num_ops; i++) {
-			opcode = le16_to_cpu(op->op);
+		for (i = 0; i < req->r_num_ops; i++) {
+			opcode = req->r_ops[i].op;
 			seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
-			op++;
 		}
 
 		seq_printf(s, "\n");
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 5ccf87ed8d68..eb0a46a49bd4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -9,8 +9,9 @@
 #include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/string.h>
+#ifdef	CONFIG_BLOCK
 #include <linux/bio.h>
-#include <linux/blkdev.h>
+#endif	/* CONFIG_BLOCK */
 #include <linux/dns_resolver.h>
 #include <net/tcp.h>
 
@@ -20,6 +21,9 @@
 #include <linux/ceph/pagelist.h>
 #include <linux/export.h>
 
+#define list_entry_next(pos, member)					\
+	list_entry(pos->member.next, typeof(*pos), member)
+
 /*
  * Ceph uses the messenger to exchange ceph_msg messages with other
  * hosts in the system.  The messenger provides ordered and reliable
@@ -97,6 +101,62 @@
 #define CON_FLAG_SOCK_CLOSED	   3  /* socket state changed to closed */
 #define CON_FLAG_BACKOFF           4  /* need to retry queuing delayed work */
 
+static bool con_flag_valid(unsigned long con_flag)
+{
+	switch (con_flag) {
+	case CON_FLAG_LOSSYTX:
+	case CON_FLAG_KEEPALIVE_PENDING:
+	case CON_FLAG_WRITE_PENDING:
+	case CON_FLAG_SOCK_CLOSED:
+	case CON_FLAG_BACKOFF:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void con_flag_clear(struct ceph_connection *con, unsigned long con_flag)
+{
+	BUG_ON(!con_flag_valid(con_flag));
+
+	clear_bit(con_flag, &con->flags);
+}
+
+static void con_flag_set(struct ceph_connection *con, unsigned long con_flag)
+{
+	BUG_ON(!con_flag_valid(con_flag));
+
+	set_bit(con_flag, &con->flags);
+}
+
+static bool con_flag_test(struct ceph_connection *con, unsigned long con_flag)
+{
+	BUG_ON(!con_flag_valid(con_flag));
+
+	return test_bit(con_flag, &con->flags);
+}
+
+static bool con_flag_test_and_clear(struct ceph_connection *con,
+					unsigned long con_flag)
+{
+	BUG_ON(!con_flag_valid(con_flag));
+
+	return test_and_clear_bit(con_flag, &con->flags);
+}
+
+static bool con_flag_test_and_set(struct ceph_connection *con,
+					unsigned long con_flag)
+{
+	BUG_ON(!con_flag_valid(con_flag));
+
+	return test_and_set_bit(con_flag, &con->flags);
+}
+
+/* Slab caches for frequently-allocated structures */
+
+static struct kmem_cache	*ceph_msg_cache;
+static struct kmem_cache	*ceph_msg_data_cache;
+
 /* static tag bytes (protocol control messages) */
 static char tag_msg = CEPH_MSGR_TAG_MSG;
 static char tag_ack = CEPH_MSGR_TAG_ACK;
@@ -114,7 +174,7 @@ static struct lock_class_key socket_class;
 
 static void queue_con(struct ceph_connection *con);
 static void con_work(struct work_struct *);
-static void ceph_fault(struct ceph_connection *con);
+static void con_fault(struct ceph_connection *con);
 
 /*
  * Nicely render a sockaddr as a string.  An array of formatted
@@ -171,13 +231,50 @@ static void encode_my_addr(struct ceph_messenger *msgr)
  */
 static struct workqueue_struct *ceph_msgr_wq;
 
-void _ceph_msgr_exit(void)
+static int ceph_msgr_slab_init(void)
+{
+	BUG_ON(ceph_msg_cache);
+	ceph_msg_cache = kmem_cache_create("ceph_msg",
+					sizeof (struct ceph_msg),
+					__alignof__(struct ceph_msg), 0, NULL);
+
+	if (!ceph_msg_cache)
+		return -ENOMEM;
+
+	BUG_ON(ceph_msg_data_cache);
+	ceph_msg_data_cache = kmem_cache_create("ceph_msg_data",
+					sizeof (struct ceph_msg_data),
+					__alignof__(struct ceph_msg_data),
+					0, NULL);
+	if (ceph_msg_data_cache)
+		return 0;
+
+	kmem_cache_destroy(ceph_msg_cache);
+	ceph_msg_cache = NULL;
+
+	return -ENOMEM;
+}
+
+static void ceph_msgr_slab_exit(void)
+{
+	BUG_ON(!ceph_msg_data_cache);
+	kmem_cache_destroy(ceph_msg_data_cache);
+	ceph_msg_data_cache = NULL;
+
+	BUG_ON(!ceph_msg_cache);
+	kmem_cache_destroy(ceph_msg_cache);
+	ceph_msg_cache = NULL;
+}
+
+static void _ceph_msgr_exit(void)
 {
 	if (ceph_msgr_wq) {
 		destroy_workqueue(ceph_msgr_wq);
 		ceph_msgr_wq = NULL;
 	}
 
+	ceph_msgr_slab_exit();
+
 	BUG_ON(zero_page == NULL);
 	kunmap(zero_page);
 	page_cache_release(zero_page);
@@ -190,6 +287,9 @@ int ceph_msgr_init(void)
 	zero_page = ZERO_PAGE(0);
 	page_cache_get(zero_page);
 
+	if (ceph_msgr_slab_init())
+		return -ENOMEM;
+
 	ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0);
 	if (ceph_msgr_wq)
 		return 0;
@@ -308,7 +408,7 @@ static void ceph_sock_write_space(struct sock *sk)
 	 * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
 	 * and net/core/stream.c:sk_stream_write_space().
 	 */
-	if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) {
+	if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) {
 		if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
 			dout("%s %p queueing write work\n", __func__, con);
 			clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
@@ -333,7 +433,7 @@ static void ceph_sock_state_change(struct sock *sk)
 	case TCP_CLOSE_WAIT:
 		dout("%s TCP_CLOSE_WAIT\n", __func__);
 		con_sock_state_closing(con);
-		set_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
+		con_flag_set(con, CON_FLAG_SOCK_CLOSED);
 		queue_con(con);
 		break;
 	case TCP_ESTABLISHED:
@@ -419,6 +519,22 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
 	return r;
 }
 
+static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
+		     int page_offset, size_t length)
+{
+	void *kaddr;
+	int ret;
+
+	BUG_ON(page_offset + length > PAGE_SIZE);
+
+	kaddr = kmap(page);
+	BUG_ON(!kaddr);
+	ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length);
+	kunmap(page);
+
+	return ret;
+}
+
 /*
  * write something.  @more is true if caller will be sending more data
  * shortly.
@@ -441,7 +557,7 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
 }
 
 static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
-		     int offset, size_t size, int more)
+		     int offset, size_t size, bool more)
 {
 	int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
 	int ret;
@@ -474,7 +590,7 @@ static int con_close_socket(struct ceph_connection *con)
 	 * received a socket close event before we had the chance to
 	 * shut the socket down.
 	 */
-	clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
+	con_flag_clear(con, CON_FLAG_SOCK_CLOSED);
 
 	con_sock_state_closed(con);
 	return rc;
@@ -538,11 +654,10 @@ void ceph_con_close(struct ceph_connection *con)
 	     ceph_pr_addr(&con->peer_addr.in_addr));
 	con->state = CON_STATE_CLOSED;
 
-	clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */
-	clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
-	clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
-	clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
-	clear_bit(CON_FLAG_BACKOFF, &con->flags);
+	con_flag_clear(con, CON_FLAG_LOSSYTX);	/* so we retry next connect */
+	con_flag_clear(con, CON_FLAG_KEEPALIVE_PENDING);
+	con_flag_clear(con, CON_FLAG_WRITE_PENDING);
+	con_flag_clear(con, CON_FLAG_BACKOFF);
 
 	reset_connection(con);
 	con->peer_global_seq = 0;
@@ -646,50 +761,397 @@ static void con_out_kvec_add(struct ceph_connection *con,
 }
 
 #ifdef CONFIG_BLOCK
-static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
+
+/*
+ * For a bio data item, a piece is whatever remains of the next
+ * entry in the current bio iovec, or the first entry in the next
+ * bio in the list.
+ */
+static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
+					size_t length)
 {
-	if (!bio) {
-		*iter = NULL;
-		*seg = 0;
-		return;
+	struct ceph_msg_data *data = cursor->data;
+	struct bio *bio;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+
+	bio = data->bio;
+	BUG_ON(!bio);
+	BUG_ON(!bio->bi_vcnt);
+
+	cursor->resid = min(length, data->bio_length);
+	cursor->bio = bio;
+	cursor->vector_index = 0;
+	cursor->vector_offset = 0;
+	cursor->last_piece = length <= bio->bi_io_vec[0].bv_len;
+}
+
+static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
+						size_t *page_offset,
+						size_t *length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct bio *bio;
+	struct bio_vec *bio_vec;
+	unsigned int index;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+
+	bio = cursor->bio;
+	BUG_ON(!bio);
+
+	index = cursor->vector_index;
+	BUG_ON(index >= (unsigned int) bio->bi_vcnt);
+
+	bio_vec = &bio->bi_io_vec[index];
+	BUG_ON(cursor->vector_offset >= bio_vec->bv_len);
+	*page_offset = (size_t) (bio_vec->bv_offset + cursor->vector_offset);
+	BUG_ON(*page_offset >= PAGE_SIZE);
+	if (cursor->last_piece) /* pagelist offset is always 0 */
+		*length = cursor->resid;
+	else
+		*length = (size_t) (bio_vec->bv_len - cursor->vector_offset);
+	BUG_ON(*length > cursor->resid);
+	BUG_ON(*page_offset + *length > PAGE_SIZE);
+
+	return bio_vec->bv_page;
+}
+
+static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
+					size_t bytes)
+{
+	struct bio *bio;
+	struct bio_vec *bio_vec;
+	unsigned int index;
+
+	BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
+
+	bio = cursor->bio;
+	BUG_ON(!bio);
+
+	index = cursor->vector_index;
+	BUG_ON(index >= (unsigned int) bio->bi_vcnt);
+	bio_vec = &bio->bi_io_vec[index];
+
+	/* Advance the cursor offset */
+
+	BUG_ON(cursor->resid < bytes);
+	cursor->resid -= bytes;
+	cursor->vector_offset += bytes;
+	if (cursor->vector_offset < bio_vec->bv_len)
+		return false;	/* more bytes to process in this segment */
+	BUG_ON(cursor->vector_offset != bio_vec->bv_len);
+
+	/* Move on to the next segment, and possibly the next bio */
+
+	if (++index == (unsigned int) bio->bi_vcnt) {
+		bio = bio->bi_next;
+		index = 0;
+	}
+	cursor->bio = bio;
+	cursor->vector_index = index;
+	cursor->vector_offset = 0;
+
+	if (!cursor->last_piece) {
+		BUG_ON(!cursor->resid);
+		BUG_ON(!bio);
+		/* A short read is OK, so use <= rather than == */
+		if (cursor->resid <= bio->bi_io_vec[index].bv_len)
+			cursor->last_piece = true;
 	}
-	*iter = bio;
-	*seg = bio->bi_idx;
+
+	return true;
 }
+#endif /* CONFIG_BLOCK */
 
-static void iter_bio_next(struct bio **bio_iter, int *seg)
+/*
+ * For a page array, a piece comes from the first page in the array
+ * that has not already been fully consumed.
+ */
+static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
+					size_t length)
 {
-	if (*bio_iter == NULL)
-		return;
+	struct ceph_msg_data *data = cursor->data;
+	int page_count;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
 
-	BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
+	BUG_ON(!data->pages);
+	BUG_ON(!data->length);
 
-	(*seg)++;
-	if (*seg == (*bio_iter)->bi_vcnt)
-		init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
+	cursor->resid = min(length, data->length);
+	page_count = calc_pages_for(data->alignment, (u64)data->length);
+	cursor->page_offset = data->alignment & ~PAGE_MASK;
+	cursor->page_index = 0;
+	BUG_ON(page_count > (int)USHRT_MAX);
+	cursor->page_count = (unsigned short)page_count;
+	BUG_ON(length > SIZE_MAX - cursor->page_offset);
+	cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE;
 }
-#endif
 
-static void prepare_write_message_data(struct ceph_connection *con)
+static struct page *
+ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor,
+					size_t *page_offset, size_t *length)
 {
-	struct ceph_msg *msg = con->out_msg;
+	struct ceph_msg_data *data = cursor->data;
 
-	BUG_ON(!msg);
-	BUG_ON(!msg->hdr.data_len);
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
+
+	BUG_ON(cursor->page_index >= cursor->page_count);
+	BUG_ON(cursor->page_offset >= PAGE_SIZE);
 
-	/* initialize page iterator */
-	con->out_msg_pos.page = 0;
-	if (msg->pages)
-		con->out_msg_pos.page_pos = msg->page_alignment;
+	*page_offset = cursor->page_offset;
+	if (cursor->last_piece)
+		*length = cursor->resid;
 	else
-		con->out_msg_pos.page_pos = 0;
+		*length = PAGE_SIZE - *page_offset;
+
+	return data->pages[cursor->page_index];
+}
+
+static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor,
+						size_t bytes)
+{
+	BUG_ON(cursor->data->type != CEPH_MSG_DATA_PAGES);
+
+	BUG_ON(cursor->page_offset + bytes > PAGE_SIZE);
+
+	/* Advance the cursor page offset */
+
+	cursor->resid -= bytes;
+	cursor->page_offset = (cursor->page_offset + bytes) & ~PAGE_MASK;
+	if (!bytes || cursor->page_offset)
+		return false;	/* more bytes to process in the current page */
+
+	/* Move on to the next page; offset is already at 0 */
+
+	BUG_ON(cursor->page_index >= cursor->page_count);
+	cursor->page_index++;
+	cursor->last_piece = cursor->resid <= PAGE_SIZE;
+
+	return true;
+}
+
+/*
+ * For a pagelist, a piece is whatever remains to be consumed in the
+ * first page in the list, or the front of the next page.
+ */
+static void
+ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor,
+					size_t length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct ceph_pagelist *pagelist;
+	struct page *page;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+	pagelist = data->pagelist;
+	BUG_ON(!pagelist);
+
+	if (!length)
+		return;		/* pagelist can be assigned but empty */
+
+	BUG_ON(list_empty(&pagelist->head));
+	page = list_first_entry(&pagelist->head, struct page, lru);
+
+	cursor->resid = min(length, pagelist->length);
+	cursor->page = page;
+	cursor->offset = 0;
+	cursor->last_piece = cursor->resid <= PAGE_SIZE;
+}
+
+static struct page *
+ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor,
+				size_t *page_offset, size_t *length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct ceph_pagelist *pagelist;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+	pagelist = data->pagelist;
+	BUG_ON(!pagelist);
+
+	BUG_ON(!cursor->page);
+	BUG_ON(cursor->offset + cursor->resid != pagelist->length);
+
+	/* offset of first page in pagelist is always 0 */
+	*page_offset = cursor->offset & ~PAGE_MASK;
+	if (cursor->last_piece)
+		*length = cursor->resid;
+	else
+		*length = PAGE_SIZE - *page_offset;
+
+	return cursor->page;
+}
+
+static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
+						size_t bytes)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct ceph_pagelist *pagelist;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+	pagelist = data->pagelist;
+	BUG_ON(!pagelist);
+
+	BUG_ON(cursor->offset + cursor->resid != pagelist->length);
+	BUG_ON((cursor->offset & ~PAGE_MASK) + bytes > PAGE_SIZE);
+
+	/* Advance the cursor offset */
+
+	cursor->resid -= bytes;
+	cursor->offset += bytes;
+	/* offset of first page in pagelist is always 0 */
+	if (!bytes || cursor->offset & ~PAGE_MASK)
+		return false;	/* more bytes to process in the current page */
+
+	/* Move on to the next page */
+
+	BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
+	cursor->page = list_entry_next(cursor->page, lru);
+	cursor->last_piece = cursor->resid <= PAGE_SIZE;
+
+	return true;
+}
+
+/*
+ * Message data is handled (sent or received) in pieces, where each
+ * piece resides on a single page.  The network layer might not
+ * consume an entire piece at once.  A data item's cursor keeps
+ * track of which piece is next to process and how much remains to
+ * be processed in that piece.  It also tracks whether the current
+ * piece is the last one in the data item.
+ */
+static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
+{
+	size_t length = cursor->total_resid;
+
+	switch (cursor->data->type) {
+	case CEPH_MSG_DATA_PAGELIST:
+		ceph_msg_data_pagelist_cursor_init(cursor, length);
+		break;
+	case CEPH_MSG_DATA_PAGES:
+		ceph_msg_data_pages_cursor_init(cursor, length);
+		break;
 #ifdef CONFIG_BLOCK
-	if (msg->bio)
-		init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
-#endif
-	con->out_msg_pos.data_pos = 0;
-	con->out_msg_pos.did_page_crc = false;
-	con->out_more = 1;  /* data + footer will follow */
+	case CEPH_MSG_DATA_BIO:
+		ceph_msg_data_bio_cursor_init(cursor, length);
+		break;
+#endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_NONE:
+	default:
+		/* BUG(); */
+		break;
+	}
+	cursor->need_crc = true;
+}
+
+static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
+{
+	struct ceph_msg_data_cursor *cursor = &msg->cursor;
+	struct ceph_msg_data *data;
+
+	BUG_ON(!length);
+	BUG_ON(length > msg->data_length);
+	BUG_ON(list_empty(&msg->data));
+
+	cursor->data_head = &msg->data;
+	cursor->total_resid = length;
+	data = list_first_entry(&msg->data, struct ceph_msg_data, links);
+	cursor->data = data;
+
+	__ceph_msg_data_cursor_init(cursor);
+}
+
+/*
+ * Return the page containing the next piece to process for a given
+ * data item, and supply the page offset and length of that piece.
+ * Indicate whether this is the last piece in this data item.
+ */
+static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
+					size_t *page_offset, size_t *length,
+					bool *last_piece)
+{
+	struct page *page;
+
+	switch (cursor->data->type) {
+	case CEPH_MSG_DATA_PAGELIST:
+		page = ceph_msg_data_pagelist_next(cursor, page_offset, length);
+		break;
+	case CEPH_MSG_DATA_PAGES:
+		page = ceph_msg_data_pages_next(cursor, page_offset, length);
+		break;
+#ifdef CONFIG_BLOCK
+	case CEPH_MSG_DATA_BIO:
+		page = ceph_msg_data_bio_next(cursor, page_offset, length);
+		break;
+#endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_NONE:
+	default:
+		page = NULL;
+		break;
+	}
+	BUG_ON(!page);
+	BUG_ON(*page_offset + *length > PAGE_SIZE);
+	BUG_ON(!*length);
+	if (last_piece)
+		*last_piece = cursor->last_piece;
+
+	return page;
+}
+
+/*
+ * Returns true if the result moves the cursor on to the next piece
+ * of the data item.
+ */
+static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
+				size_t bytes)
+{
+	bool new_piece;
+
+	BUG_ON(bytes > cursor->resid);
+	switch (cursor->data->type) {
+	case CEPH_MSG_DATA_PAGELIST:
+		new_piece = ceph_msg_data_pagelist_advance(cursor, bytes);
+		break;
+	case CEPH_MSG_DATA_PAGES:
+		new_piece = ceph_msg_data_pages_advance(cursor, bytes);
+		break;
+#ifdef CONFIG_BLOCK
+	case CEPH_MSG_DATA_BIO:
+		new_piece = ceph_msg_data_bio_advance(cursor, bytes);
+		break;
+#endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_NONE:
+	default:
+		BUG();
+		break;
+	}
+	cursor->total_resid -= bytes;
+
+	if (!cursor->resid && cursor->total_resid) {
+		WARN_ON(!cursor->last_piece);
+		BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
+		cursor->data = list_entry_next(cursor->data, links);
+		__ceph_msg_data_cursor_init(cursor);
+		new_piece = true;
+	}
+	cursor->need_crc = new_piece;
+
+	return new_piece;
+}
+
+static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
+{
+	BUG_ON(!msg);
+	BUG_ON(!data_len);
+
+	/* Initialize data cursor */
+
+	ceph_msg_data_cursor_init(msg, (size_t)data_len);
 }
 
 /*
@@ -752,16 +1214,12 @@ static void prepare_write_message(struct ceph_connection *con)
 		m->hdr.seq = cpu_to_le64(++con->out_seq);
 		m->needs_out_seq = false;
 	}
-#ifdef CONFIG_BLOCK
-	else
-		m->bio_iter = NULL;
-#endif
+	WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
 
-	dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
+	dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
 	     m, con->out_seq, le16_to_cpu(m->hdr.type),
 	     le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
-	     le32_to_cpu(m->hdr.data_len),
-	     m->nr_pages);
+	     m->data_length);
 	BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
 
 	/* tag + hdr + front + middle */
@@ -792,13 +1250,15 @@ static void prepare_write_message(struct ceph_connection *con)
 
 	/* is there a data payload? */
 	con->out_msg->footer.data_crc = 0;
-	if (m->hdr.data_len)
-		prepare_write_message_data(con);
-	else
+	if (m->data_length) {
+		prepare_message_data(con->out_msg, m->data_length);
+		con->out_more = 1;  /* data + footer will follow */
+	} else {
 		/* no, queue up footer too and be done */
 		prepare_write_message_footer(con);
+	}
 
-	set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
 }
 
 /*
@@ -819,7 +1279,25 @@ static void prepare_write_ack(struct ceph_connection *con)
 				&con->out_temp_ack);
 
 	con->out_more = 1;  /* more will follow.. eventually.. */
-	set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+/*
+ * Prepare to share the seq during handshake
+ */
+static void prepare_write_seq(struct ceph_connection *con)
+{
+	dout("prepare_write_seq %p %llu -> %llu\n", con,
+	     con->in_seq_acked, con->in_seq);
+	con->in_seq_acked = con->in_seq;
+
+	con_out_kvec_reset(con);
+
+	con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+	con_out_kvec_add(con, sizeof (con->out_temp_ack),
+			 &con->out_temp_ack);
+
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
 }
 
 /*
@@ -830,7 +1308,7 @@ static void prepare_write_keepalive(struct ceph_connection *con)
 	dout("prepare_write_keepalive %p\n", con);
 	con_out_kvec_reset(con);
 	con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
-	set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
 }
 
 /*
@@ -873,7 +1351,7 @@ static void prepare_write_banner(struct ceph_connection *con)
 					&con->msgr->my_enc_addr);
 
 	con->out_more = 0;
-	set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
 }
 
 static int prepare_write_connect(struct ceph_connection *con)
@@ -923,7 +1401,7 @@ static int prepare_write_connect(struct ceph_connection *con)
 					auth->authorizer_buf);
 
 	con->out_more = 0;
-	set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
 
 	return 0;
 }
@@ -971,35 +1449,19 @@ out:
 	return ret;  /* done! */
 }
 
-static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
-			size_t len, size_t sent, bool in_trail)
+static u32 ceph_crc32c_page(u32 crc, struct page *page,
+				unsigned int page_offset,
+				unsigned int length)
 {
-	struct ceph_msg *msg = con->out_msg;
-
-	BUG_ON(!msg);
-	BUG_ON(!sent);
+	char *kaddr;
 
-	con->out_msg_pos.data_pos += sent;
-	con->out_msg_pos.page_pos += sent;
-	if (sent < len)
-		return;
+	kaddr = kmap(page);
+	BUG_ON(kaddr == NULL);
+	crc = crc32c(crc, kaddr + page_offset, length);
+	kunmap(page);
 
-	BUG_ON(sent != len);
-	con->out_msg_pos.page_pos = 0;
-	con->out_msg_pos.page++;
-	con->out_msg_pos.did_page_crc = false;
-	if (in_trail)
-		list_move_tail(&page->lru,
-			       &msg->trail->head);
-	else if (msg->pagelist)
-		list_move_tail(&page->lru,
-			       &msg->pagelist->head);
-#ifdef CONFIG_BLOCK
-	else if (msg->bio)
-		iter_bio_next(&msg->bio_iter, &msg->bio_seg);
-#endif
+	return crc;
 }
-
 /*
  * Write as much message data payload as we can.  If we finish, queue
  * up the footer.
@@ -1007,21 +1469,17 @@ static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
  *  0 -> socket full, but more to do
  * <0 -> error
  */
-static int write_partial_msg_pages(struct ceph_connection *con)
+static int write_partial_message_data(struct ceph_connection *con)
 {
 	struct ceph_msg *msg = con->out_msg;
-	unsigned int data_len = le32_to_cpu(msg->hdr.data_len);
-	size_t len;
+	struct ceph_msg_data_cursor *cursor = &msg->cursor;
 	bool do_datacrc = !con->msgr->nocrc;
-	int ret;
-	int total_max_write;
-	bool in_trail = false;
-	const size_t trail_len = (msg->trail ? msg->trail->length : 0);
-	const size_t trail_off = data_len - trail_len;
+	u32 crc;
+
+	dout("%s %p msg %p\n", __func__, con, msg);
 
-	dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
-	     con, msg, con->out_msg_pos.page, msg->nr_pages,
-	     con->out_msg_pos.page_pos);
+	if (list_empty(&msg->data))
+		return -EINVAL;
 
 	/*
 	 * Iterate through each page that contains data to be
@@ -1031,72 +1489,41 @@ static int write_partial_msg_pages(struct ceph_connection *con)
 	 * need to map the page.  If we have no pages, they have
 	 * been revoked, so use the zero page.
 	 */
-	while (data_len > con->out_msg_pos.data_pos) {
-		struct page *page = NULL;
-		int max_write = PAGE_SIZE;
-		int bio_offset = 0;
-
-		in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off;
-		if (!in_trail)
-			total_max_write = trail_off - con->out_msg_pos.data_pos;
-
-		if (in_trail) {
-			total_max_write = data_len - con->out_msg_pos.data_pos;
-
-			page = list_first_entry(&msg->trail->head,
-						struct page, lru);
-		} else if (msg->pages) {
-			page = msg->pages[con->out_msg_pos.page];
-		} else if (msg->pagelist) {
-			page = list_first_entry(&msg->pagelist->head,
-						struct page, lru);
-#ifdef CONFIG_BLOCK
-		} else if (msg->bio) {
-			struct bio_vec *bv;
+	crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
+	while (cursor->resid) {
+		struct page *page;
+		size_t page_offset;
+		size_t length;
+		bool last_piece;
+		bool need_crc;
+		int ret;
+
+		page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
+							&last_piece);
+		ret = ceph_tcp_sendpage(con->sock, page, page_offset,
+				      length, last_piece);
+		if (ret <= 0) {
+			if (do_datacrc)
+				msg->footer.data_crc = cpu_to_le32(crc);
 
-			bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg);
-			page = bv->bv_page;
-			bio_offset = bv->bv_offset;
-			max_write = bv->bv_len;
-#endif
-		} else {
-			page = zero_page;
-		}
-		len = min_t(int, max_write - con->out_msg_pos.page_pos,
-			    total_max_write);
-
-		if (do_datacrc && !con->out_msg_pos.did_page_crc) {
-			void *base;
-			u32 crc = le32_to_cpu(msg->footer.data_crc);
-			char *kaddr;
-
-			kaddr = kmap(page);
-			BUG_ON(kaddr == NULL);
-			base = kaddr + con->out_msg_pos.page_pos + bio_offset;
-			crc = crc32c(crc, base, len);
-			kunmap(page);
-			msg->footer.data_crc = cpu_to_le32(crc);
-			con->out_msg_pos.did_page_crc = true;
+			return ret;
 		}
-		ret = ceph_tcp_sendpage(con->sock, page,
-				      con->out_msg_pos.page_pos + bio_offset,
-				      len, 1);
-		if (ret <= 0)
-			goto out;
-
-		out_msg_pos_next(con, page, len, (size_t) ret, in_trail);
+		if (do_datacrc && cursor->need_crc)
+			crc = ceph_crc32c_page(crc, page, page_offset, length);
+		need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret);
 	}
 
-	dout("write_partial_msg_pages %p msg %p done\n", con, msg);
+	dout("%s %p msg %p done\n", __func__, con, msg);
 
 	/* prepare and queue up footer, too */
-	if (!do_datacrc)
+	if (do_datacrc)
+		msg->footer.data_crc = cpu_to_le32(crc);
+	else
 		msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
 	con_out_kvec_reset(con);
 	prepare_write_message_footer(con);
-	ret = 1;
-out:
-	return ret;
+
+	return 1;	/* must return > 0 to indicate success */
 }
 
 /*
@@ -1109,7 +1536,7 @@ static int write_partial_skip(struct ceph_connection *con)
 	while (con->out_skip > 0) {
 		size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
 
-		ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, 1);
+		ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
 		if (ret <= 0)
 			goto out;
 		con->out_skip -= ret;
@@ -1140,6 +1567,13 @@ static void prepare_read_ack(struct ceph_connection *con)
 	con->in_base_pos = 0;
 }
 
+static void prepare_read_seq(struct ceph_connection *con)
+{
+	dout("prepare_read_seq %p\n", con);
+	con->in_base_pos = 0;
+	con->in_tag = CEPH_MSGR_TAG_SEQ;
+}
+
 static void prepare_read_tag(struct ceph_connection *con)
 {
 	dout("prepare_read_tag %p\n", con);
@@ -1546,7 +1980,6 @@ static int process_connect(struct ceph_connection *con)
 			con->error_msg = "connect authorization failure";
 			return -1;
 		}
-		con->auth_retry = 1;
 		con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
@@ -1617,6 +2050,7 @@ static int process_connect(struct ceph_connection *con)
 		prepare_read_connect(con);
 		break;
 
+	case CEPH_MSGR_TAG_SEQ:
 	case CEPH_MSGR_TAG_READY:
 		if (req_feat & ~server_feat) {
 			pr_err("%s%lld %s protocol feature mismatch,"
@@ -1631,7 +2065,7 @@ static int process_connect(struct ceph_connection *con)
 
 		WARN_ON(con->state != CON_STATE_NEGOTIATING);
 		con->state = CON_STATE_OPEN;
-
+		con->auth_retry = 0;    /* we authenticated; clear flag */
 		con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
 		con->connect_seq++;
 		con->peer_features = server_feat;
@@ -1643,11 +2077,16 @@ static int process_connect(struct ceph_connection *con)
 			le32_to_cpu(con->in_reply.connect_seq));
 
 		if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
-			set_bit(CON_FLAG_LOSSYTX, &con->flags);
+			con_flag_set(con, CON_FLAG_LOSSYTX);
 
 		con->delay = 0;      /* reset backoff memory */
 
-		prepare_read_tag(con);
+		if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
+			prepare_write_seq(con);
+			prepare_read_seq(con);
+		} else {
+			prepare_read_tag(con);
+		}
 		break;
 
 	case CEPH_MSGR_TAG_WAIT:
@@ -1681,7 +2120,6 @@ static int read_partial_ack(struct ceph_connection *con)
 	return read_partial(con, end, size, &con->in_temp_ack);
 }
 
-
 /*
  * We can finally discard anything that's been acked.
  */
@@ -1706,8 +2144,6 @@ static void process_ack(struct ceph_connection *con)
 }
 
 
-
-
 static int read_partial_message_section(struct ceph_connection *con,
 					struct kvec *section,
 					unsigned int sec_len, u32 *crc)
@@ -1731,77 +2167,49 @@ static int read_partial_message_section(struct ceph_connection *con,
 	return 1;
 }
 
-static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
-
-static int read_partial_message_pages(struct ceph_connection *con,
-				      struct page **pages,
-				      unsigned int data_len, bool do_datacrc)
+static int read_partial_msg_data(struct ceph_connection *con)
 {
-	void *p;
+	struct ceph_msg *msg = con->in_msg;
+	struct ceph_msg_data_cursor *cursor = &msg->cursor;
+	const bool do_datacrc = !con->msgr->nocrc;
+	struct page *page;
+	size_t page_offset;
+	size_t length;
+	u32 crc = 0;
 	int ret;
-	int left;
-
-	left = min((int)(data_len - con->in_msg_pos.data_pos),
-		   (int)(PAGE_SIZE - con->in_msg_pos.page_pos));
-	/* (page) data */
-	BUG_ON(pages == NULL);
-	p = kmap(pages[con->in_msg_pos.page]);
-	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
-			       left);
-	if (ret > 0 && do_datacrc)
-		con->in_data_crc =
-			crc32c(con->in_data_crc,
-				  p + con->in_msg_pos.page_pos, ret);
-	kunmap(pages[con->in_msg_pos.page]);
-	if (ret <= 0)
-		return ret;
-	con->in_msg_pos.data_pos += ret;
-	con->in_msg_pos.page_pos += ret;
-	if (con->in_msg_pos.page_pos == PAGE_SIZE) {
-		con->in_msg_pos.page_pos = 0;
-		con->in_msg_pos.page++;
-	}
-
-	return ret;
-}
 
-#ifdef CONFIG_BLOCK
-static int read_partial_message_bio(struct ceph_connection *con,
-				    struct bio **bio_iter, int *bio_seg,
-				    unsigned int data_len, bool do_datacrc)
-{
-	struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg);
-	void *p;
-	int ret, left;
+	BUG_ON(!msg);
+	if (list_empty(&msg->data))
+		return -EIO;
 
-	left = min((int)(data_len - con->in_msg_pos.data_pos),
-		   (int)(bv->bv_len - con->in_msg_pos.page_pos));
+	if (do_datacrc)
+		crc = con->in_data_crc;
+	while (cursor->resid) {
+		page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
+							NULL);
+		ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
+		if (ret <= 0) {
+			if (do_datacrc)
+				con->in_data_crc = crc;
 
-	p = kmap(bv->bv_page) + bv->bv_offset;
+			return ret;
+		}
 
-	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
-			       left);
-	if (ret > 0 && do_datacrc)
-		con->in_data_crc =
-			crc32c(con->in_data_crc,
-				  p + con->in_msg_pos.page_pos, ret);
-	kunmap(bv->bv_page);
-	if (ret <= 0)
-		return ret;
-	con->in_msg_pos.data_pos += ret;
-	con->in_msg_pos.page_pos += ret;
-	if (con->in_msg_pos.page_pos == bv->bv_len) {
-		con->in_msg_pos.page_pos = 0;
-		iter_bio_next(bio_iter, bio_seg);
+		if (do_datacrc)
+			crc = ceph_crc32c_page(crc, page, page_offset, ret);
+		(void) ceph_msg_data_advance(&msg->cursor, (size_t)ret);
 	}
+	if (do_datacrc)
+		con->in_data_crc = crc;
 
-	return ret;
+	return 1;	/* must return > 0 to indicate success */
 }
-#endif
 
 /*
  * read (part of) a message.
  */
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
+
 static int read_partial_message(struct ceph_connection *con)
 {
 	struct ceph_msg *m = con->in_msg;
@@ -1834,7 +2242,7 @@ static int read_partial_message(struct ceph_connection *con)
 	if (front_len > CEPH_MSG_MAX_FRONT_LEN)
 		return -EIO;
 	middle_len = le32_to_cpu(con->in_hdr.middle_len);
-	if (middle_len > CEPH_MSG_MAX_DATA_LEN)
+	if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN)
 		return -EIO;
 	data_len = le32_to_cpu(con->in_hdr.data_len);
 	if (data_len > CEPH_MSG_MAX_DATA_LEN)
@@ -1863,14 +2271,22 @@ static int read_partial_message(struct ceph_connection *con)
 		int skip = 0;
 
 		dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
-		     con->in_hdr.front_len, con->in_hdr.data_len);
+		     front_len, data_len);
 		ret = ceph_con_in_msg_alloc(con, &skip);
 		if (ret < 0)
 			return ret;
+
+		BUG_ON(!con->in_msg ^ skip);
+		if (con->in_msg && data_len > con->in_msg->data_length) {
+			pr_warning("%s skipping long message (%u > %zd)\n",
+				__func__, data_len, con->in_msg->data_length);
+			ceph_msg_put(con->in_msg);
+			con->in_msg = NULL;
+			skip = 1;
+		}
 		if (skip) {
 			/* skip this message */
 			dout("alloc_msg said skip message\n");
-			BUG_ON(con->in_msg);
 			con->in_base_pos = -front_len - middle_len - data_len -
 				sizeof(m->footer);
 			con->in_tag = CEPH_MSGR_TAG_READY;
@@ -1885,17 +2301,10 @@ static int read_partial_message(struct ceph_connection *con)
 		if (m->middle)
 			m->middle->vec.iov_len = 0;
 
-		con->in_msg_pos.page = 0;
-		if (m->pages)
-			con->in_msg_pos.page_pos = m->page_alignment;
-		else
-			con->in_msg_pos.page_pos = 0;
-		con->in_msg_pos.data_pos = 0;
+		/* prepare for data payload, if any */
 
-#ifdef CONFIG_BLOCK
-		if (m->bio)
-			init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
-#endif
+		if (data_len)
+			prepare_message_data(con->in_msg, data_len);
 	}
 
 	/* front */
@@ -1914,24 +2323,10 @@ static int read_partial_message(struct ceph_connection *con)
 	}
 
 	/* (page) data */
-	while (con->in_msg_pos.data_pos < data_len) {
-		if (m->pages) {
-			ret = read_partial_message_pages(con, m->pages,
-						 data_len, do_datacrc);
-			if (ret <= 0)
-				return ret;
-#ifdef CONFIG_BLOCK
-		} else if (m->bio) {
-			BUG_ON(!m->bio_iter);
-			ret = read_partial_message_bio(con,
-						 &m->bio_iter, &m->bio_seg,
-						 data_len, do_datacrc);
-			if (ret <= 0)
-				return ret;
-#endif
-		} else {
-			BUG_ON(1);
-		}
+	if (data_len) {
+		ret = read_partial_msg_data(con);
+		if (ret <= 0)
+			return ret;
 	}
 
 	/* footer */
@@ -2057,13 +2452,13 @@ more_kvec:
 			goto do_next;
 		}
 
-		ret = write_partial_msg_pages(con);
+		ret = write_partial_message_data(con);
 		if (ret == 1)
 			goto more_kvec;  /* we need to send the footer, too! */
 		if (ret == 0)
 			goto out;
 		if (ret < 0) {
-			dout("try_write write_partial_msg_pages err %d\n",
+			dout("try_write write_partial_message_data err %d\n",
 			     ret);
 			goto out;
 		}
@@ -2080,15 +2475,14 @@ do_next:
 			prepare_write_ack(con);
 			goto more;
 		}
-		if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING,
-				       &con->flags)) {
+		if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
 			prepare_write_keepalive(con);
 			goto more;
 		}
 	}
 
 	/* Nothing to do! */
-	clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+	con_flag_clear(con, CON_FLAG_WRITE_PENDING);
 	dout("try_write nothing else to write.\n");
 	ret = 0;
 out:
@@ -2216,7 +2610,12 @@ more:
 			prepare_read_tag(con);
 		goto more;
 	}
-	if (con->in_tag == CEPH_MSGR_TAG_ACK) {
+	if (con->in_tag == CEPH_MSGR_TAG_ACK ||
+	    con->in_tag == CEPH_MSGR_TAG_SEQ) {
+		/*
+		 * the final handshake seq exchange is semantically
+		 * equivalent to an ACK
+		 */
 		ret = read_partial_ack(con);
 		if (ret <= 0)
 			goto out;
@@ -2268,7 +2667,7 @@ static void queue_con(struct ceph_connection *con)
 
 static bool con_sock_closed(struct ceph_connection *con)
 {
-	if (!test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags))
+	if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED))
 		return false;
 
 #define CASE(x)								\
@@ -2295,6 +2694,41 @@ static bool con_sock_closed(struct ceph_connection *con)
 	return true;
 }
 
+static bool con_backoff(struct ceph_connection *con)
+{
+	int ret;
+
+	if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF))
+		return false;
+
+	ret = queue_con_delay(con, round_jiffies_relative(con->delay));
+	if (ret) {
+		dout("%s: con %p FAILED to back off %lu\n", __func__,
+			con, con->delay);
+		BUG_ON(ret == -ENOENT);
+		con_flag_set(con, CON_FLAG_BACKOFF);
+	}
+
+	return true;
+}
+
+/* Finish fault handling; con->mutex must *not* be held here */
+
+static void con_fault_finish(struct ceph_connection *con)
+{
+	/*
+	 * in case we faulted due to authentication, invalidate our
+	 * current tickets so that we can get new ones.
+	 */
+	if (con->auth_retry && con->ops->invalidate_authorizer) {
+		dout("calling invalidate_authorizer()\n");
+		con->ops->invalidate_authorizer(con);
+	}
+
+	if (con->ops->fault)
+		con->ops->fault(con);
+}
+
 /*
  * Do some work on a connection.  Drop a connection ref when we're done.
  */
@@ -2302,73 +2736,68 @@ static void con_work(struct work_struct *work)
 {
 	struct ceph_connection *con = container_of(work, struct ceph_connection,
 						   work.work);
-	int ret;
+	bool fault;
 
 	mutex_lock(&con->mutex);
-restart:
-	if (con_sock_closed(con))
-		goto fault;
+	while (true) {
+		int ret;
 
-	if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) {
-		dout("con_work %p backing off\n", con);
-		ret = queue_con_delay(con, round_jiffies_relative(con->delay));
-		if (ret) {
-			dout("con_work %p FAILED to back off %lu\n", con,
-			     con->delay);
-			BUG_ON(ret == -ENOENT);
-			set_bit(CON_FLAG_BACKOFF, &con->flags);
+		if ((fault = con_sock_closed(con))) {
+			dout("%s: con %p SOCK_CLOSED\n", __func__, con);
+			break;
+		}
+		if (con_backoff(con)) {
+			dout("%s: con %p BACKOFF\n", __func__, con);
+			break;
+		}
+		if (con->state == CON_STATE_STANDBY) {
+			dout("%s: con %p STANDBY\n", __func__, con);
+			break;
+		}
+		if (con->state == CON_STATE_CLOSED) {
+			dout("%s: con %p CLOSED\n", __func__, con);
+			BUG_ON(con->sock);
+			break;
+		}
+		if (con->state == CON_STATE_PREOPEN) {
+			dout("%s: con %p PREOPEN\n", __func__, con);
+			BUG_ON(con->sock);
 		}
-		goto done;
-	}
 
-	if (con->state == CON_STATE_STANDBY) {
-		dout("con_work %p STANDBY\n", con);
-		goto done;
-	}
-	if (con->state == CON_STATE_CLOSED) {
-		dout("con_work %p CLOSED\n", con);
-		BUG_ON(con->sock);
-		goto done;
-	}
-	if (con->state == CON_STATE_PREOPEN) {
-		dout("con_work OPENING\n");
-		BUG_ON(con->sock);
-	}
+		ret = try_read(con);
+		if (ret < 0) {
+			if (ret == -EAGAIN)
+				continue;
+			con->error_msg = "socket error on read";
+			fault = true;
+			break;
+		}
 
-	ret = try_read(con);
-	if (ret == -EAGAIN)
-		goto restart;
-	if (ret < 0) {
-		con->error_msg = "socket error on read";
-		goto fault;
-	}
+		ret = try_write(con);
+		if (ret < 0) {
+			if (ret == -EAGAIN)
+				continue;
+			con->error_msg = "socket error on write";
+			fault = true;
+		}
 
-	ret = try_write(con);
-	if (ret == -EAGAIN)
-		goto restart;
-	if (ret < 0) {
-		con->error_msg = "socket error on write";
-		goto fault;
+		break;	/* If we make it to here, we're done */
 	}
-
-done:
+	if (fault)
+		con_fault(con);
 	mutex_unlock(&con->mutex);
-done_unlocked:
-	con->ops->put(con);
-	return;
 
-fault:
-	ceph_fault(con);     /* error/fault path */
-	goto done_unlocked;
-}
+	if (fault)
+		con_fault_finish(con);
 
+	con->ops->put(con);
+}
 
 /*
  * Generic error/fault handler.  A retry mechanism is used with
  * exponential backoff
  */
-static void ceph_fault(struct ceph_connection *con)
-	__releases(con->mutex)
+static void con_fault(struct ceph_connection *con)
 {
 	pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
 	       ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
@@ -2381,10 +2810,10 @@ static void ceph_fault(struct ceph_connection *con)
 
 	con_close_socket(con);
 
-	if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) {
+	if (con_flag_test(con, CON_FLAG_LOSSYTX)) {
 		dout("fault on LOSSYTX channel, marking CLOSED\n");
 		con->state = CON_STATE_CLOSED;
-		goto out_unlock;
+		return;
 	}
 
 	if (con->in_msg) {
@@ -2401,9 +2830,9 @@ static void ceph_fault(struct ceph_connection *con)
 	/* If there are no messages queued or keepalive pending, place
 	 * the connection in a STANDBY state */
 	if (list_empty(&con->out_queue) &&
-	    !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) {
+	    !con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)) {
 		dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
-		clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+		con_flag_clear(con, CON_FLAG_WRITE_PENDING);
 		con->state = CON_STATE_STANDBY;
 	} else {
 		/* retry after a delay. */
@@ -2412,23 +2841,9 @@ static void ceph_fault(struct ceph_connection *con)
 			con->delay = BASE_DELAY_INTERVAL;
 		else if (con->delay < MAX_DELAY_INTERVAL)
 			con->delay *= 2;
-		set_bit(CON_FLAG_BACKOFF, &con->flags);
+		con_flag_set(con, CON_FLAG_BACKOFF);
 		queue_con(con);
 	}
-
-out_unlock:
-	mutex_unlock(&con->mutex);
-	/*
-	 * in case we faulted due to authentication, invalidate our
-	 * current tickets so that we can get new ones.
-	 */
-	if (con->auth_retry && con->ops->invalidate_authorizer) {
-		dout("calling invalidate_authorizer()\n");
-		con->ops->invalidate_authorizer(con);
-	}
-
-	if (con->ops->fault)
-		con->ops->fault(con);
 }
 
 
@@ -2469,8 +2884,8 @@ static void clear_standby(struct ceph_connection *con)
 		dout("clear_standby %p and ++connect_seq\n", con);
 		con->state = CON_STATE_PREOPEN;
 		con->connect_seq++;
-		WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags));
-		WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags));
+		WARN_ON(con_flag_test(con, CON_FLAG_WRITE_PENDING));
+		WARN_ON(con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING));
 	}
 }
 
@@ -2511,7 +2926,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
 
 	/* if there wasn't anything waiting to send before, queue
 	 * new work */
-	if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
+	if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
 		queue_con(con);
 }
 EXPORT_SYMBOL(ceph_con_send);
@@ -2600,12 +3015,94 @@ void ceph_con_keepalive(struct ceph_connection *con)
 	mutex_lock(&con->mutex);
 	clear_standby(con);
 	mutex_unlock(&con->mutex);
-	if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 &&
-	    test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
+	if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 &&
+	    con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
 		queue_con(con);
 }
 EXPORT_SYMBOL(ceph_con_keepalive);
 
+static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
+{
+	struct ceph_msg_data *data;
+
+	if (WARN_ON(!ceph_msg_data_type_valid(type)))
+		return NULL;
+
+	data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
+	if (data)
+		data->type = type;
+	INIT_LIST_HEAD(&data->links);
+
+	return data;
+}
+
+static void ceph_msg_data_destroy(struct ceph_msg_data *data)
+{
+	if (!data)
+		return;
+
+	WARN_ON(!list_empty(&data->links));
+	if (data->type == CEPH_MSG_DATA_PAGELIST) {
+		ceph_pagelist_release(data->pagelist);
+		kfree(data->pagelist);
+	}
+	kmem_cache_free(ceph_msg_data_cache, data);
+}
+
+void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
+		size_t length, size_t alignment)
+{
+	struct ceph_msg_data *data;
+
+	BUG_ON(!pages);
+	BUG_ON(!length);
+
+	data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
+	BUG_ON(!data);
+	data->pages = pages;
+	data->length = length;
+	data->alignment = alignment & ~PAGE_MASK;
+
+	list_add_tail(&data->links, &msg->data);
+	msg->data_length += length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_pages);
+
+void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
+				struct ceph_pagelist *pagelist)
+{
+	struct ceph_msg_data *data;
+
+	BUG_ON(!pagelist);
+	BUG_ON(!pagelist->length);
+
+	data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
+	BUG_ON(!data);
+	data->pagelist = pagelist;
+
+	list_add_tail(&data->links, &msg->data);
+	msg->data_length += pagelist->length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
+
+#ifdef	CONFIG_BLOCK
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
+		size_t length)
+{
+	struct ceph_msg_data *data;
+
+	BUG_ON(!bio);
+
+	data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
+	BUG_ON(!data);
+	data->bio = bio;
+	data->bio_length = length;
+
+	list_add_tail(&data->links, &msg->data);
+	msg->data_length += length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_bio);
+#endif	/* CONFIG_BLOCK */
 
 /*
  * construct a new message with given type, size
@@ -2616,47 +3113,20 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 {
 	struct ceph_msg *m;
 
-	m = kmalloc(sizeof(*m), flags);
+	m = kmem_cache_zalloc(ceph_msg_cache, flags);
 	if (m == NULL)
 		goto out;
-	kref_init(&m->kref);
 
-	m->con = NULL;
-	INIT_LIST_HEAD(&m->list_head);
-
-	m->hdr.tid = 0;
 	m->hdr.type = cpu_to_le16(type);
 	m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
-	m->hdr.version = 0;
 	m->hdr.front_len = cpu_to_le32(front_len);
-	m->hdr.middle_len = 0;
-	m->hdr.data_len = 0;
-	m->hdr.data_off = 0;
-	m->hdr.reserved = 0;
-	m->footer.front_crc = 0;
-	m->footer.middle_crc = 0;
-	m->footer.data_crc = 0;
-	m->footer.flags = 0;
-	m->front_max = front_len;
-	m->front_is_vmalloc = false;
-	m->more_to_follow = false;
-	m->ack_stamp = 0;
-	m->pool = NULL;
 
-	/* middle */
-	m->middle = NULL;
-
-	/* data */
-	m->nr_pages = 0;
-	m->page_alignment = 0;
-	m->pages = NULL;
-	m->pagelist = NULL;
-	m->bio = NULL;
-	m->bio_iter = NULL;
-	m->bio_seg = 0;
-	m->trail = NULL;
+	INIT_LIST_HEAD(&m->list_head);
+	kref_init(&m->kref);
+	INIT_LIST_HEAD(&m->data);
 
 	/* front */
+	m->front_max = front_len;
 	if (front_len) {
 		if (front_len > PAGE_CACHE_SIZE) {
 			m->front.iov_base = __vmalloc(front_len, flags,
@@ -2734,49 +3204,37 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
 static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
 {
 	struct ceph_msg_header *hdr = &con->in_hdr;
-	int type = le16_to_cpu(hdr->type);
-	int front_len = le32_to_cpu(hdr->front_len);
 	int middle_len = le32_to_cpu(hdr->middle_len);
+	struct ceph_msg *msg;
 	int ret = 0;
 
 	BUG_ON(con->in_msg != NULL);
+	BUG_ON(!con->ops->alloc_msg);
 
-	if (con->ops->alloc_msg) {
-		struct ceph_msg *msg;
-
-		mutex_unlock(&con->mutex);
-		msg = con->ops->alloc_msg(con, hdr, skip);
-		mutex_lock(&con->mutex);
-		if (con->state != CON_STATE_OPEN) {
-			if (msg)
-				ceph_msg_put(msg);
-			return -EAGAIN;
-		}
-		con->in_msg = msg;
-		if (con->in_msg) {
-			con->in_msg->con = con->ops->get(con);
-			BUG_ON(con->in_msg->con == NULL);
-		}
-		if (*skip) {
-			con->in_msg = NULL;
-			return 0;
-		}
-		if (!con->in_msg) {
-			con->error_msg =
-				"error allocating memory for incoming message";
-			return -ENOMEM;
-		}
+	mutex_unlock(&con->mutex);
+	msg = con->ops->alloc_msg(con, hdr, skip);
+	mutex_lock(&con->mutex);
+	if (con->state != CON_STATE_OPEN) {
+		if (msg)
+			ceph_msg_put(msg);
+		return -EAGAIN;
 	}
-	if (!con->in_msg) {
-		con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
-		if (!con->in_msg) {
-			pr_err("unable to allocate msg type %d len %d\n",
-			       type, front_len);
-			return -ENOMEM;
-		}
+	if (msg) {
+		BUG_ON(*skip);
+		con->in_msg = msg;
 		con->in_msg->con = con->ops->get(con);
 		BUG_ON(con->in_msg->con == NULL);
-		con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
+	} else {
+		/*
+		 * Null message pointer means either we should skip
+		 * this message or we couldn't allocate memory.  The
+		 * former is not an error.
+		 */
+		if (*skip)
+			return 0;
+		con->error_msg = "error allocating memory for incoming message";
+
+		return -ENOMEM;
 	}
 	memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
 
@@ -2802,7 +3260,7 @@ void ceph_msg_kfree(struct ceph_msg *m)
 		vfree(m->front.iov_base);
 	else
 		kfree(m->front.iov_base);
-	kfree(m);
+	kmem_cache_free(ceph_msg_cache, m);
 }
 
 /*
@@ -2811,6 +3269,9 @@ void ceph_msg_kfree(struct ceph_msg *m)
 void ceph_msg_last_put(struct kref *kref)
 {
 	struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
+	LIST_HEAD(data);
+	struct list_head *links;
+	struct list_head *next;
 
 	dout("ceph_msg_put last one on %p\n", m);
 	WARN_ON(!list_empty(&m->list_head));
@@ -2820,16 +3281,16 @@ void ceph_msg_last_put(struct kref *kref)
 		ceph_buffer_put(m->middle);
 		m->middle = NULL;
 	}
-	m->nr_pages = 0;
-	m->pages = NULL;
 
-	if (m->pagelist) {
-		ceph_pagelist_release(m->pagelist);
-		kfree(m->pagelist);
-		m->pagelist = NULL;
-	}
+	list_splice_init(&m->data, &data);
+	list_for_each_safe(links, next, &data) {
+		struct ceph_msg_data *data;
 
-	m->trail = NULL;
+		data = list_entry(links, struct ceph_msg_data, links);
+		list_del_init(links);
+		ceph_msg_data_destroy(data);
+	}
+	m->data_length = 0;
 
 	if (m->pool)
 		ceph_msgpool_put(m->pool, m);
@@ -2840,8 +3301,8 @@ EXPORT_SYMBOL(ceph_msg_last_put);
 
 void ceph_msg_dump(struct ceph_msg *msg)
 {
-	pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg,
-		 msg->front_max, msg->nr_pages);
+	pr_debug("msg_dump %p (front_max %d length %zd)\n", msg,
+		 msg->front_max, msg->data_length);
 	print_hex_dump(KERN_DEBUG, "header: ",
 		       DUMP_PREFIX_OFFSET, 16, 1,
 		       &msg->hdr, sizeof(msg->hdr), true);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 812eb3b46c1f..1fe25cd29d0e 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -697,7 +697,7 @@ int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
 			    u32 pool, u64 snapid)
 {
 	return do_poolop(monc,  POOL_OP_CREATE_UNMANAGED_SNAP,
-				   pool, snapid, 0, 0);
+				   pool, snapid, NULL, 0);
 
 }
 
@@ -737,7 +737,7 @@ static void delayed_work(struct work_struct *work)
 
 		__validate_auth(monc);
 
-		if (monc->auth->ops->is_authenticated(monc->auth))
+		if (ceph_auth_is_authenticated(monc->auth))
 			__send_subscribe(monc);
 	}
 	__schedule_delayed(monc);
@@ -892,8 +892,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
 
 	mutex_lock(&monc->mutex);
 	had_debugfs_info = have_debugfs_info(monc);
-	if (monc->auth->ops)
-		was_auth = monc->auth->ops->is_authenticated(monc->auth);
+	was_auth = ceph_auth_is_authenticated(monc->auth);
 	monc->pending_auth = 0;
 	ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
 				     msg->front.iov_len,
@@ -904,7 +903,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
 		wake_up_all(&monc->client->auth_wq);
 	} else if (ret > 0) {
 		__send_prepared_auth_request(monc, ret);
-	} else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
+	} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
 		dout("authenticated, starting session\n");
 
 		monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index eb9a44478764..a3395fdfbd4f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1,3 +1,4 @@
+
 #include <linux/ceph/ceph_debug.h>
 
 #include <linux/module.h>
@@ -21,9 +22,11 @@
 #define OSD_OP_FRONT_LEN	4096
 #define OSD_OPREPLY_FRONT_LEN	512
 
+static struct kmem_cache	*ceph_osd_request_cache;
+
 static const struct ceph_connection_operations osd_con_ops;
 
-static void send_queued(struct ceph_osd_client *osdc);
+static void __send_queued(struct ceph_osd_client *osdc);
 static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd);
 static void __register_request(struct ceph_osd_client *osdc,
 			       struct ceph_osd_request *req);
@@ -32,64 +35,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
 static void __send_request(struct ceph_osd_client *osdc,
 			   struct ceph_osd_request *req);
 
-static int op_needs_trail(int op)
-{
-	switch (op) {
-	case CEPH_OSD_OP_GETXATTR:
-	case CEPH_OSD_OP_SETXATTR:
-	case CEPH_OSD_OP_CMPXATTR:
-	case CEPH_OSD_OP_CALL:
-	case CEPH_OSD_OP_NOTIFY:
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-static int op_has_extent(int op)
-{
-	return (op == CEPH_OSD_OP_READ ||
-		op == CEPH_OSD_OP_WRITE);
-}
-
-int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
-			struct ceph_file_layout *layout,
-			u64 snapid,
-			u64 off, u64 *plen, u64 *bno,
-			struct ceph_osd_request *req,
-			struct ceph_osd_req_op *op)
-{
-	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
-	u64 orig_len = *plen;
-	u64 objoff, objlen;    /* extent in object */
-	int r;
-
-	reqhead->snapid = cpu_to_le64(snapid);
-
-	/* object extent? */
-	r = ceph_calc_file_object_mapping(layout, off, plen, bno,
-					  &objoff, &objlen);
-	if (r < 0)
-		return r;
-	if (*plen < orig_len)
-		dout(" skipping last %llu, final file extent %llu~%llu\n",
-		     orig_len - *plen, off, *plen);
-
-	if (op_has_extent(op->op)) {
-		op->extent.offset = objoff;
-		op->extent.length = objlen;
-	}
-	req->r_num_pages = calc_pages_for(off, *plen);
-	req->r_page_alignment = off & ~PAGE_MASK;
-	if (op->op == CEPH_OSD_OP_WRITE)
-		op->payload_len = *plen;
-
-	dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
-	     *bno, objoff, objlen, req->r_num_pages);
-	return 0;
-}
-EXPORT_SYMBOL(ceph_calc_raw_layout);
-
 /*
  * Implement client access to distributed object storage cluster.
  *
@@ -115,25 +60,238 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
  *
  * fill osd op in request message.
  */
-static int calc_layout(struct ceph_osd_client *osdc,
-		       struct ceph_vino vino,
-		       struct ceph_file_layout *layout,
-		       u64 off, u64 *plen,
-		       struct ceph_osd_request *req,
-		       struct ceph_osd_req_op *op)
-{
-	u64 bno;
+static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
+			u64 *objnum, u64 *objoff, u64 *objlen)
+{
+	u64 orig_len = *plen;
 	int r;
 
-	r = ceph_calc_raw_layout(osdc, layout, vino.snap, off,
-				 plen, &bno, req, op);
+	/* object extent? */
+	r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
+					  objoff, objlen);
 	if (r < 0)
 		return r;
+	if (*objlen < orig_len) {
+		*plen = *objlen;
+		dout(" skipping last %llu, final file extent %llu~%llu\n",
+		     orig_len - *plen, off, *plen);
+	}
 
-	snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
-	req->r_oid_len = strlen(req->r_oid);
+	dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
 
-	return r;
+	return 0;
+}
+
+static void ceph_osd_data_init(struct ceph_osd_data *osd_data)
+{
+	memset(osd_data, 0, sizeof (*osd_data));
+	osd_data->type = CEPH_OSD_DATA_TYPE_NONE;
+}
+
+static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data,
+			struct page **pages, u64 length, u32 alignment,
+			bool pages_from_pool, bool own_pages)
+{
+	osd_data->type = CEPH_OSD_DATA_TYPE_PAGES;
+	osd_data->pages = pages;
+	osd_data->length = length;
+	osd_data->alignment = alignment;
+	osd_data->pages_from_pool = pages_from_pool;
+	osd_data->own_pages = own_pages;
+}
+
+static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
+			struct ceph_pagelist *pagelist)
+{
+	osd_data->type = CEPH_OSD_DATA_TYPE_PAGELIST;
+	osd_data->pagelist = pagelist;
+}
+
+#ifdef CONFIG_BLOCK
+static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
+			struct bio *bio, size_t bio_length)
+{
+	osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
+	osd_data->bio = bio;
+	osd_data->bio_length = bio_length;
+}
+#endif /* CONFIG_BLOCK */
+
+#define osd_req_op_data(oreq, whch, typ, fld)	\
+	({						\
+		BUG_ON(whch >= (oreq)->r_num_ops);	\
+		&(oreq)->r_ops[whch].typ.fld;		\
+	})
+
+static struct ceph_osd_data *
+osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
+{
+	BUG_ON(which >= osd_req->r_num_ops);
+
+	return &osd_req->r_ops[which].raw_data_in;
+}
+
+struct ceph_osd_data *
+osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req,
+			unsigned int which)
+{
+	return osd_req_op_data(osd_req, which, extent, osd_data);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data);
+
+struct ceph_osd_data *
+osd_req_op_cls_response_data(struct ceph_osd_request *osd_req,
+			unsigned int which)
+{
+	return osd_req_op_data(osd_req, which, cls, response_data);
+}
+EXPORT_SYMBOL(osd_req_op_cls_response_data);	/* ??? */
+
+void osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req,
+			unsigned int which, struct page **pages,
+			u64 length, u32 alignment,
+			bool pages_from_pool, bool own_pages)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_raw_data_in(osd_req, which);
+	ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+				pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_raw_data_in_pages);
+
+void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req,
+			unsigned int which, struct page **pages,
+			u64 length, u32 alignment,
+			bool pages_from_pool, bool own_pages)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+	ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+				pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages);
+
+void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req,
+			unsigned int which, struct ceph_pagelist *pagelist)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+	ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
+
+#ifdef CONFIG_BLOCK
+void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
+			unsigned int which, struct bio *bio, size_t bio_length)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+	ceph_osd_data_bio_init(osd_data, bio, bio_length);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
+#endif /* CONFIG_BLOCK */
+
+static void osd_req_op_cls_request_info_pagelist(
+			struct ceph_osd_request *osd_req,
+			unsigned int which, struct ceph_pagelist *pagelist)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, cls, request_info);
+	ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+
+void osd_req_op_cls_request_data_pagelist(
+			struct ceph_osd_request *osd_req,
+			unsigned int which, struct ceph_pagelist *pagelist)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+	ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist);
+
+void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
+			unsigned int which, struct page **pages, u64 length,
+			u32 alignment, bool pages_from_pool, bool own_pages)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+	ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+				pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
+
+void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
+			unsigned int which, struct page **pages, u64 length,
+			u32 alignment, bool pages_from_pool, bool own_pages)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, cls, response_data);
+	ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+				pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_cls_response_data_pages);
+
+static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
+{
+	switch (osd_data->type) {
+	case CEPH_OSD_DATA_TYPE_NONE:
+		return 0;
+	case CEPH_OSD_DATA_TYPE_PAGES:
+		return osd_data->length;
+	case CEPH_OSD_DATA_TYPE_PAGELIST:
+		return (u64)osd_data->pagelist->length;
+#ifdef CONFIG_BLOCK
+	case CEPH_OSD_DATA_TYPE_BIO:
+		return (u64)osd_data->bio_length;
+#endif /* CONFIG_BLOCK */
+	default:
+		WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
+		return 0;
+	}
+}
+
+static void ceph_osd_data_release(struct ceph_osd_data *osd_data)
+{
+	if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES && osd_data->own_pages) {
+		int num_pages;
+
+		num_pages = calc_pages_for((u64)osd_data->alignment,
+						(u64)osd_data->length);
+		ceph_release_page_vector(osd_data->pages, num_pages);
+	}
+	ceph_osd_data_init(osd_data);
+}
+
+static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
+			unsigned int which)
+{
+	struct ceph_osd_req_op *op;
+
+	BUG_ON(which >= osd_req->r_num_ops);
+	op = &osd_req->r_ops[which];
+
+	switch (op->op) {
+	case CEPH_OSD_OP_READ:
+	case CEPH_OSD_OP_WRITE:
+		ceph_osd_data_release(&op->extent.osd_data);
+		break;
+	case CEPH_OSD_OP_CALL:
+		ceph_osd_data_release(&op->cls.request_info);
+		ceph_osd_data_release(&op->cls.request_data);
+		ceph_osd_data_release(&op->cls.response_data);
+		break;
+	default:
+		break;
+	}
 }
 
 /*
@@ -141,82 +299,64 @@ static int calc_layout(struct ceph_osd_client *osdc,
  */
 void ceph_osdc_release_request(struct kref *kref)
 {
-	struct ceph_osd_request *req = container_of(kref,
-						    struct ceph_osd_request,
-						    r_kref);
+	struct ceph_osd_request *req;
+	unsigned int which;
 
+	req = container_of(kref, struct ceph_osd_request, r_kref);
 	if (req->r_request)
 		ceph_msg_put(req->r_request);
-	if (req->r_con_filling_msg) {
-		dout("%s revoking pages %p from con %p\n", __func__,
-		     req->r_pages, req->r_con_filling_msg);
+	if (req->r_reply) {
 		ceph_msg_revoke_incoming(req->r_reply);
-		req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
-	}
-	if (req->r_reply)
 		ceph_msg_put(req->r_reply);
-	if (req->r_own_pages)
-		ceph_release_page_vector(req->r_pages,
-					 req->r_num_pages);
-#ifdef CONFIG_BLOCK
-	if (req->r_bio)
-		bio_put(req->r_bio);
-#endif
-	ceph_put_snap_context(req->r_snapc);
-	if (req->r_trail) {
-		ceph_pagelist_release(req->r_trail);
-		kfree(req->r_trail);
 	}
+
+	for (which = 0; which < req->r_num_ops; which++)
+		osd_req_op_data_release(req, which);
+
+	ceph_put_snap_context(req->r_snapc);
 	if (req->r_mempool)
 		mempool_free(req, req->r_osdc->req_mempool);
 	else
-		kfree(req);
-}
-EXPORT_SYMBOL(ceph_osdc_release_request);
+		kmem_cache_free(ceph_osd_request_cache, req);
 
-static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail)
-{
-	int i = 0;
-
-	if (needs_trail)
-		*needs_trail = 0;
-	while (ops[i].op) {
-		if (needs_trail && op_needs_trail(ops[i].op))
-			*needs_trail = 1;
-		i++;
-	}
-
-	return i;
 }
+EXPORT_SYMBOL(ceph_osdc_release_request);
 
 struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
-					       int flags,
 					       struct ceph_snap_context *snapc,
-					       struct ceph_osd_req_op *ops,
+					       unsigned int num_ops,
 					       bool use_mempool,
-					       gfp_t gfp_flags,
-					       struct page **pages,
-					       struct bio *bio)
+					       gfp_t gfp_flags)
 {
 	struct ceph_osd_request *req;
 	struct ceph_msg *msg;
-	int needs_trail;
-	int num_op = get_num_ops(ops, &needs_trail);
-	size_t msg_size = sizeof(struct ceph_osd_request_head);
+	size_t msg_size;
+
+	BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
+	BUG_ON(num_ops > CEPH_OSD_MAX_OP);
 
-	msg_size += num_op*sizeof(struct ceph_osd_op);
+	msg_size = 4 + 4 + 8 + 8 + 4+8;
+	msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
+	msg_size += 1 + 8 + 4 + 4;     /* pg_t */
+	msg_size += 4 + MAX_OBJ_NAME_SIZE;
+	msg_size += 2 + num_ops*sizeof(struct ceph_osd_op);
+	msg_size += 8;  /* snapid */
+	msg_size += 8;  /* snap_seq */
+	msg_size += 8 * (snapc ? snapc->num_snaps : 0);  /* snaps */
+	msg_size += 4;
 
 	if (use_mempool) {
 		req = mempool_alloc(osdc->req_mempool, gfp_flags);
 		memset(req, 0, sizeof(*req));
 	} else {
-		req = kzalloc(sizeof(*req), gfp_flags);
+		req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
 	}
 	if (req == NULL)
 		return NULL;
 
 	req->r_osdc = osdc;
 	req->r_mempool = use_mempool;
+	req->r_num_ops = num_ops;
 
 	kref_init(&req->r_kref);
 	init_completion(&req->r_completion);
@@ -228,10 +368,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	INIT_LIST_HEAD(&req->r_req_lru_item);
 	INIT_LIST_HEAD(&req->r_osd_item);
 
-	req->r_flags = flags;
-
-	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
-
 	/* create reply message */
 	if (use_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
@@ -244,20 +380,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	}
 	req->r_reply = msg;
 
-	/* allocate space for the trailing data */
-	if (needs_trail) {
-		req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags);
-		if (!req->r_trail) {
-			ceph_osdc_put_request(req);
-			return NULL;
-		}
-		ceph_pagelist_init(req->r_trail);
-	}
-
 	/* create request message; allow space for oid */
-	msg_size += MAX_OBJ_NAME_SIZE;
-	if (snapc)
-		msg_size += sizeof(u64) * snapc->num_snaps;
 	if (use_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
 	else
@@ -270,82 +393,280 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	memset(msg->front.iov_base, 0, msg->front.iov_len);
 
 	req->r_request = msg;
-	req->r_pages = pages;
-#ifdef CONFIG_BLOCK
-	if (bio) {
-		req->r_bio = bio;
-		bio_get(req->r_bio);
-	}
-#endif
 
 	return req;
 }
 EXPORT_SYMBOL(ceph_osdc_alloc_request);
 
-static void osd_req_encode_op(struct ceph_osd_request *req,
-			      struct ceph_osd_op *dst,
-			      struct ceph_osd_req_op *src)
+static bool osd_req_opcode_valid(u16 opcode)
 {
-	dst->op = cpu_to_le16(src->op);
+	switch (opcode) {
+	case CEPH_OSD_OP_READ:
+	case CEPH_OSD_OP_STAT:
+	case CEPH_OSD_OP_MAPEXT:
+	case CEPH_OSD_OP_MASKTRUNC:
+	case CEPH_OSD_OP_SPARSE_READ:
+	case CEPH_OSD_OP_NOTIFY:
+	case CEPH_OSD_OP_NOTIFY_ACK:
+	case CEPH_OSD_OP_ASSERT_VER:
+	case CEPH_OSD_OP_WRITE:
+	case CEPH_OSD_OP_WRITEFULL:
+	case CEPH_OSD_OP_TRUNCATE:
+	case CEPH_OSD_OP_ZERO:
+	case CEPH_OSD_OP_DELETE:
+	case CEPH_OSD_OP_APPEND:
+	case CEPH_OSD_OP_STARTSYNC:
+	case CEPH_OSD_OP_SETTRUNC:
+	case CEPH_OSD_OP_TRIMTRUNC:
+	case CEPH_OSD_OP_TMAPUP:
+	case CEPH_OSD_OP_TMAPPUT:
+	case CEPH_OSD_OP_TMAPGET:
+	case CEPH_OSD_OP_CREATE:
+	case CEPH_OSD_OP_ROLLBACK:
+	case CEPH_OSD_OP_WATCH:
+	case CEPH_OSD_OP_OMAPGETKEYS:
+	case CEPH_OSD_OP_OMAPGETVALS:
+	case CEPH_OSD_OP_OMAPGETHEADER:
+	case CEPH_OSD_OP_OMAPGETVALSBYKEYS:
+	case CEPH_OSD_OP_OMAPSETVALS:
+	case CEPH_OSD_OP_OMAPSETHEADER:
+	case CEPH_OSD_OP_OMAPCLEAR:
+	case CEPH_OSD_OP_OMAPRMKEYS:
+	case CEPH_OSD_OP_OMAP_CMP:
+	case CEPH_OSD_OP_CLONERANGE:
+	case CEPH_OSD_OP_ASSERT_SRC_VERSION:
+	case CEPH_OSD_OP_SRC_CMPXATTR:
+	case CEPH_OSD_OP_GETXATTR:
+	case CEPH_OSD_OP_GETXATTRS:
+	case CEPH_OSD_OP_CMPXATTR:
+	case CEPH_OSD_OP_SETXATTR:
+	case CEPH_OSD_OP_SETXATTRS:
+	case CEPH_OSD_OP_RESETXATTRS:
+	case CEPH_OSD_OP_RMXATTR:
+	case CEPH_OSD_OP_PULL:
+	case CEPH_OSD_OP_PUSH:
+	case CEPH_OSD_OP_BALANCEREADS:
+	case CEPH_OSD_OP_UNBALANCEREADS:
+	case CEPH_OSD_OP_SCRUB:
+	case CEPH_OSD_OP_SCRUB_RESERVE:
+	case CEPH_OSD_OP_SCRUB_UNRESERVE:
+	case CEPH_OSD_OP_SCRUB_STOP:
+	case CEPH_OSD_OP_SCRUB_MAP:
+	case CEPH_OSD_OP_WRLOCK:
+	case CEPH_OSD_OP_WRUNLOCK:
+	case CEPH_OSD_OP_RDLOCK:
+	case CEPH_OSD_OP_RDUNLOCK:
+	case CEPH_OSD_OP_UPLOCK:
+	case CEPH_OSD_OP_DNLOCK:
+	case CEPH_OSD_OP_CALL:
+	case CEPH_OSD_OP_PGLS:
+	case CEPH_OSD_OP_PGLS_FILTER:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/*
+ * This is an osd op init function for opcodes that have no data or
+ * other information associated with them.  It also serves as a
+ * common init routine for all the other init functions, below.
+ */
+static struct ceph_osd_req_op *
+_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
+				u16 opcode)
+{
+	struct ceph_osd_req_op *op;
+
+	BUG_ON(which >= osd_req->r_num_ops);
+	BUG_ON(!osd_req_opcode_valid(opcode));
+
+	op = &osd_req->r_ops[which];
+	memset(op, 0, sizeof (*op));
+	op->op = opcode;
+
+	return op;
+}
+
+void osd_req_op_init(struct ceph_osd_request *osd_req,
+				unsigned int which, u16 opcode)
+{
+	(void)_osd_req_op_init(osd_req, which, opcode);
+}
+EXPORT_SYMBOL(osd_req_op_init);
+
+void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
+				unsigned int which, u16 opcode,
+				u64 offset, u64 length,
+				u64 truncate_size, u32 truncate_seq)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+	size_t payload_len = 0;
+
+	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
+
+	op->extent.offset = offset;
+	op->extent.length = length;
+	op->extent.truncate_size = truncate_size;
+	op->extent.truncate_seq = truncate_seq;
+	if (opcode == CEPH_OSD_OP_WRITE)
+		payload_len += length;
+
+	op->payload_len = payload_len;
+}
+EXPORT_SYMBOL(osd_req_op_extent_init);
+
+void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
+				unsigned int which, u64 length)
+{
+	struct ceph_osd_req_op *op;
+	u64 previous;
+
+	BUG_ON(which >= osd_req->r_num_ops);
+	op = &osd_req->r_ops[which];
+	previous = op->extent.length;
+
+	if (length == previous)
+		return;		/* Nothing to do */
+	BUG_ON(length > previous);
+
+	op->extent.length = length;
+	op->payload_len -= previous - length;
+}
+EXPORT_SYMBOL(osd_req_op_extent_update);
+
+void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
+			u16 opcode, const char *class, const char *method)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+	struct ceph_pagelist *pagelist;
+	size_t payload_len = 0;
+	size_t size;
+
+	BUG_ON(opcode != CEPH_OSD_OP_CALL);
+
+	pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+	BUG_ON(!pagelist);
+	ceph_pagelist_init(pagelist);
+
+	op->cls.class_name = class;
+	size = strlen(class);
+	BUG_ON(size > (size_t) U8_MAX);
+	op->cls.class_len = size;
+	ceph_pagelist_append(pagelist, class, size);
+	payload_len += size;
+
+	op->cls.method_name = method;
+	size = strlen(method);
+	BUG_ON(size > (size_t) U8_MAX);
+	op->cls.method_len = size;
+	ceph_pagelist_append(pagelist, method, size);
+	payload_len += size;
+
+	osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
+
+	op->cls.argc = 0;	/* currently unused */
+
+	op->payload_len = payload_len;
+}
+EXPORT_SYMBOL(osd_req_op_cls_init);
+
+void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
+				unsigned int which, u16 opcode,
+				u64 cookie, u64 version, int flag)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+
+	BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
+
+	op->watch.cookie = cookie;
+	op->watch.ver = version;
+	if (opcode == CEPH_OSD_OP_WATCH && flag)
+		op->watch.flag = (u8)1;
+}
+EXPORT_SYMBOL(osd_req_op_watch_init);
+
+static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
+				struct ceph_osd_data *osd_data)
+{
+	u64 length = ceph_osd_data_length(osd_data);
+
+	if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+		BUG_ON(length > (u64) SIZE_MAX);
+		if (length)
+			ceph_msg_data_add_pages(msg, osd_data->pages,
+					length, osd_data->alignment);
+	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) {
+		BUG_ON(!length);
+		ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
+#ifdef CONFIG_BLOCK
+	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
+		ceph_msg_data_add_bio(msg, osd_data->bio, length);
+#endif
+	} else {
+		BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
+	}
+}
+
+static u64 osd_req_encode_op(struct ceph_osd_request *req,
+			      struct ceph_osd_op *dst, unsigned int which)
+{
+	struct ceph_osd_req_op *src;
+	struct ceph_osd_data *osd_data;
+	u64 request_data_len = 0;
+	u64 data_length;
+
+	BUG_ON(which >= req->r_num_ops);
+	src = &req->r_ops[which];
+	if (WARN_ON(!osd_req_opcode_valid(src->op))) {
+		pr_err("unrecognized osd opcode %d\n", src->op);
+
+		return 0;
+	}
 
 	switch (src->op) {
+	case CEPH_OSD_OP_STAT:
+		osd_data = &src->raw_data_in;
+		ceph_osdc_msg_data_add(req->r_reply, osd_data);
+		break;
 	case CEPH_OSD_OP_READ:
 	case CEPH_OSD_OP_WRITE:
-		dst->extent.offset =
-			cpu_to_le64(src->extent.offset);
-		dst->extent.length =
-			cpu_to_le64(src->extent.length);
+		if (src->op == CEPH_OSD_OP_WRITE)
+			request_data_len = src->extent.length;
+		dst->extent.offset = cpu_to_le64(src->extent.offset);
+		dst->extent.length = cpu_to_le64(src->extent.length);
 		dst->extent.truncate_size =
 			cpu_to_le64(src->extent.truncate_size);
 		dst->extent.truncate_seq =
 			cpu_to_le32(src->extent.truncate_seq);
-		break;
-
-	case CEPH_OSD_OP_GETXATTR:
-	case CEPH_OSD_OP_SETXATTR:
-	case CEPH_OSD_OP_CMPXATTR:
-		BUG_ON(!req->r_trail);
-
-		dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
-		dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
-		dst->xattr.cmp_op = src->xattr.cmp_op;
-		dst->xattr.cmp_mode = src->xattr.cmp_mode;
-		ceph_pagelist_append(req->r_trail, src->xattr.name,
-				     src->xattr.name_len);
-		ceph_pagelist_append(req->r_trail, src->xattr.val,
-				     src->xattr.value_len);
+		osd_data = &src->extent.osd_data;
+		if (src->op == CEPH_OSD_OP_WRITE)
+			ceph_osdc_msg_data_add(req->r_request, osd_data);
+		else
+			ceph_osdc_msg_data_add(req->r_reply, osd_data);
 		break;
 	case CEPH_OSD_OP_CALL:
-		BUG_ON(!req->r_trail);
-
 		dst->cls.class_len = src->cls.class_len;
 		dst->cls.method_len = src->cls.method_len;
-		dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
-
-		ceph_pagelist_append(req->r_trail, src->cls.class_name,
-				     src->cls.class_len);
-		ceph_pagelist_append(req->r_trail, src->cls.method_name,
-				     src->cls.method_len);
-		ceph_pagelist_append(req->r_trail, src->cls.indata,
-				     src->cls.indata_len);
-		break;
-	case CEPH_OSD_OP_ROLLBACK:
-		dst->snap.snapid = cpu_to_le64(src->snap.snapid);
+		osd_data = &src->cls.request_info;
+		ceph_osdc_msg_data_add(req->r_request, osd_data);
+		BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGELIST);
+		request_data_len = osd_data->pagelist->length;
+
+		osd_data = &src->cls.request_data;
+		data_length = ceph_osd_data_length(osd_data);
+		if (data_length) {
+			BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
+			dst->cls.indata_len = cpu_to_le32(data_length);
+			ceph_osdc_msg_data_add(req->r_request, osd_data);
+			src->payload_len += data_length;
+			request_data_len += data_length;
+		}
+		osd_data = &src->cls.response_data;
+		ceph_osdc_msg_data_add(req->r_reply, osd_data);
 		break;
 	case CEPH_OSD_OP_STARTSYNC:
 		break;
-	case CEPH_OSD_OP_NOTIFY:
-		{
-			__le32 prot_ver = cpu_to_le32(src->watch.prot_ver);
-			__le32 timeout = cpu_to_le32(src->watch.timeout);
-
-			BUG_ON(!req->r_trail);
-
-			ceph_pagelist_append(req->r_trail,
-						&prot_ver, sizeof(prot_ver));
-			ceph_pagelist_append(req->r_trail,
-						&timeout, sizeof(timeout));
-		}
 	case CEPH_OSD_OP_NOTIFY_ACK:
 	case CEPH_OSD_OP_WATCH:
 		dst->watch.cookie = cpu_to_le64(src->watch.cookie);
@@ -353,90 +674,17 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
 		dst->watch.flag = src->watch.flag;
 		break;
 	default:
-		pr_err("unrecognized osd opcode %d\n", dst->op);
+		pr_err("unsupported osd opcode %s\n",
+			ceph_osd_op_name(src->op));
 		WARN_ON(1);
-		break;
-	}
-	dst->payload_len = cpu_to_le32(src->payload_len);
-}
-
-/*
- * build new request AND message
- *
- */
-void ceph_osdc_build_request(struct ceph_osd_request *req,
-			     u64 off, u64 *plen,
-			     struct ceph_osd_req_op *src_ops,
-			     struct ceph_snap_context *snapc,
-			     struct timespec *mtime,
-			     const char *oid,
-			     int oid_len)
-{
-	struct ceph_msg *msg = req->r_request;
-	struct ceph_osd_request_head *head;
-	struct ceph_osd_req_op *src_op;
-	struct ceph_osd_op *op;
-	void *p;
-	int num_op = get_num_ops(src_ops, NULL);
-	size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
-	int flags = req->r_flags;
-	u64 data_len = 0;
-	int i;
-
-	head = msg->front.iov_base;
-	op = (void *)(head + 1);
-	p = (void *)(op + num_op);
 
-	req->r_snapc = ceph_get_snap_context(snapc);
-
-	head->client_inc = cpu_to_le32(1); /* always, for now. */
-	head->flags = cpu_to_le32(flags);
-	if (flags & CEPH_OSD_FLAG_WRITE)
-		ceph_encode_timespec(&head->mtime, mtime);
-	head->num_ops = cpu_to_le16(num_op);
-
-
-	/* fill in oid */
-	head->object_len = cpu_to_le32(oid_len);
-	memcpy(p, oid, oid_len);
-	p += oid_len;
-
-	src_op = src_ops;
-	while (src_op->op) {
-		osd_req_encode_op(req, op, src_op);
-		src_op++;
-		op++;
-	}
-
-	if (req->r_trail)
-		data_len += req->r_trail->length;
-
-	if (snapc) {
-		head->snap_seq = cpu_to_le64(snapc->seq);
-		head->num_snaps = cpu_to_le32(snapc->num_snaps);
-		for (i = 0; i < snapc->num_snaps; i++) {
-			put_unaligned_le64(snapc->snaps[i], p);
-			p += sizeof(u64);
-		}
-	}
-
-	if (flags & CEPH_OSD_FLAG_WRITE) {
-		req->r_request->hdr.data_off = cpu_to_le16(off);
-		req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len);
-	} else if (data_len) {
-		req->r_request->hdr.data_off = 0;
-		req->r_request->hdr.data_len = cpu_to_le32(data_len);
+		return 0;
 	}
+	dst->op = cpu_to_le16(src->op);
+	dst->payload_len = cpu_to_le32(src->payload_len);
 
-	req->r_request->page_alignment = req->r_page_alignment;
-
-	BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
-	msg_size = p - msg->front.iov_base;
-	msg->front.iov_len = msg_size;
-	msg->hdr.front_len = cpu_to_le32(msg_size);
-	return;
+	return request_data_len;
 }
-EXPORT_SYMBOL(ceph_osdc_build_request);
 
 /*
  * build new request AND message, calculate layout, and adjust file
@@ -452,54 +700,63 @@ EXPORT_SYMBOL(ceph_osdc_build_request);
 struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					       struct ceph_file_layout *layout,
 					       struct ceph_vino vino,
-					       u64 off, u64 *plen,
+					       u64 off, u64 *plen, int num_ops,
 					       int opcode, int flags,
 					       struct ceph_snap_context *snapc,
-					       int do_sync,
 					       u32 truncate_seq,
 					       u64 truncate_size,
-					       struct timespec *mtime,
-					       bool use_mempool, int num_reply,
-					       int page_align)
+					       bool use_mempool)
 {
-	struct ceph_osd_req_op ops[3];
 	struct ceph_osd_request *req;
+	u64 objnum = 0;
+	u64 objoff = 0;
+	u64 objlen = 0;
+	u32 object_size;
+	u64 object_base;
 	int r;
 
-	ops[0].op = opcode;
-	ops[0].extent.truncate_seq = truncate_seq;
-	ops[0].extent.truncate_size = truncate_size;
-	ops[0].payload_len = 0;
-
-	if (do_sync) {
-		ops[1].op = CEPH_OSD_OP_STARTSYNC;
-		ops[1].payload_len = 0;
-		ops[2].op = 0;
-	} else
-		ops[1].op = 0;
-
-	req = ceph_osdc_alloc_request(osdc, flags,
-					 snapc, ops,
-					 use_mempool,
-					 GFP_NOFS, NULL, NULL);
+	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
+
+	req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
+					GFP_NOFS);
 	if (!req)
 		return ERR_PTR(-ENOMEM);
 
+	req->r_flags = flags;
+
 	/* calculate max write size */
-	r = calc_layout(osdc, vino, layout, off, plen, req, ops);
-	if (r < 0)
+	r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen);
+	if (r < 0) {
+		ceph_osdc_put_request(req);
 		return ERR_PTR(r);
-	req->r_file_layout = *layout;  /* keep a copy */
+	}
+
+	object_size = le32_to_cpu(layout->fl_object_size);
+	object_base = off - objoff;
+	if (truncate_size <= object_base) {
+		truncate_size = 0;
+	} else {
+		truncate_size -= object_base;
+		if (truncate_size > object_size)
+			truncate_size = object_size;
+	}
 
-	/* in case it differs from natural (file) alignment that
-	   calc_layout filled in for us */
-	req->r_num_pages = calc_pages_for(page_align, *plen);
-	req->r_page_alignment = page_align;
+	osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
+				truncate_size, truncate_seq);
 
-	ceph_osdc_build_request(req, off, plen, ops,
-				snapc,
-				mtime,
-				req->r_oid, req->r_oid_len);
+	/*
+	 * A second op in the ops array means the caller wants to
+	 * also issue a include a 'startsync' command so that the
+	 * osd will flush data quickly.
+	 */
+	if (num_ops > 1)
+		osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
+	req->r_file_layout = *layout;  /* keep a copy */
+
+	snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx",
+		vino.ino, objnum);
+	req->r_oid_len = strlen(req->r_oid);
 
 	return req;
 }
@@ -577,21 +834,46 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
 				struct ceph_osd *osd)
 {
 	struct ceph_osd_request *req, *nreq;
+	LIST_HEAD(resend);
 	int err;
 
 	dout("__kick_osd_requests osd%d\n", osd->o_osd);
 	err = __reset_osd(osdc, osd);
 	if (err)
 		return;
-
+	/*
+	 * Build up a list of requests to resend by traversing the
+	 * osd's list of requests.  Requests for a given object are
+	 * sent in tid order, and that is also the order they're
+	 * kept on this list.  Therefore all requests that are in
+	 * flight will be found first, followed by all requests that
+	 * have not yet been sent.  And to resend requests while
+	 * preserving this order we will want to put any sent
+	 * requests back on the front of the osd client's unsent
+	 * list.
+	 *
+	 * So we build a separate ordered list of already-sent
+	 * requests for the affected osd and splice it onto the
+	 * front of the osd client's unsent list.  Once we've seen a
+	 * request that has not yet been sent we're done.  Those
+	 * requests are already sitting right where they belong.
+	 */
 	list_for_each_entry(req, &osd->o_requests, r_osd_item) {
-		list_move(&req->r_req_lru_item, &osdc->req_unsent);
-		dout("requeued %p tid %llu osd%d\n", req, req->r_tid,
+		if (!req->r_sent)
+			break;
+		list_move_tail(&req->r_req_lru_item, &resend);
+		dout("requeueing %p tid %llu osd%d\n", req, req->r_tid,
 		     osd->o_osd);
 		if (!req->r_linger)
 			req->r_flags |= CEPH_OSD_FLAG_RETRY;
 	}
+	list_splice(&resend, &osdc->req_unsent);
 
+	/*
+	 * Linger requests are re-registered before sending, which
+	 * sets up a new tid for each.  We add them to the unsent
+	 * list at the end to keep things in tid order.
+	 */
 	list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
 				 r_linger_osd) {
 		/*
@@ -600,8 +882,8 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
 		 */
 		BUG_ON(!list_empty(&req->r_req_lru_item));
 		__register_request(osdc, req);
-		list_add(&req->r_req_lru_item, &osdc->req_unsent);
-		list_add(&req->r_osd_item, &req->r_osd->o_requests);
+		list_add_tail(&req->r_req_lru_item, &osdc->req_unsent);
+		list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
 		__unregister_linger_request(osdc, req);
 		dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
 		     osd->o_osd);
@@ -623,8 +905,8 @@ static void osd_reset(struct ceph_connection *con)
 	down_read(&osdc->map_sem);
 	mutex_lock(&osdc->request_mutex);
 	__kick_osd_requests(osdc, osd);
+	__send_queued(osdc);
 	mutex_unlock(&osdc->request_mutex);
-	send_queued(osdc);
 	up_read(&osdc->map_sem);
 }
 
@@ -673,8 +955,7 @@ static void put_osd(struct ceph_osd *osd)
 	if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
 		struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
 
-		if (ac->ops && ac->ops->destroy_authorizer)
-			ac->ops->destroy_authorizer(ac, osd->o_auth.authorizer);
+		ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer);
 		kfree(osd);
 	}
 }
@@ -739,31 +1020,35 @@ static void remove_old_osds(struct ceph_osd_client *osdc)
  */
 static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
 {
-	struct ceph_osd_request *req;
-	int ret = 0;
+	struct ceph_entity_addr *peer_addr;
 
 	dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
 	if (list_empty(&osd->o_requests) &&
 	    list_empty(&osd->o_linger_requests)) {
 		__remove_osd(osdc, osd);
-		ret = -ENODEV;
-	} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
-			  &osd->o_con.peer_addr,
-			  sizeof(osd->o_con.peer_addr)) == 0 &&
-		   !ceph_con_opened(&osd->o_con)) {
+
+		return -ENODEV;
+	}
+
+	peer_addr = &osdc->osdmap->osd_addr[osd->o_osd];
+	if (!memcmp(peer_addr, &osd->o_con.peer_addr, sizeof (*peer_addr)) &&
+			!ceph_con_opened(&osd->o_con)) {
+		struct ceph_osd_request *req;
+
 		dout(" osd addr hasn't changed and connection never opened,"
 		     " letting msgr retry");
 		/* touch each r_stamp for handle_timeout()'s benfit */
 		list_for_each_entry(req, &osd->o_requests, r_osd_item)
 			req->r_stamp = jiffies;
-		ret = -EAGAIN;
-	} else {
-		ceph_con_close(&osd->o_con);
-		ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd,
-			      &osdc->osdmap->osd_addr[osd->o_osd]);
-		osd->o_incarnation++;
+
+		return -EAGAIN;
 	}
-	return ret;
+
+	ceph_con_close(&osd->o_con);
+	ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, peer_addr);
+	osd->o_incarnation++;
+
+	return 0;
 }
 
 static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new)
@@ -835,14 +1120,6 @@ static void __register_request(struct ceph_osd_client *osdc,
 	}
 }
 
-static void register_request(struct ceph_osd_client *osdc,
-			     struct ceph_osd_request *req)
-{
-	mutex_lock(&osdc->request_mutex);
-	__register_request(osdc, req);
-	mutex_unlock(&osdc->request_mutex);
-}
-
 /*
  * called under osdc->request_mutex
  */
@@ -961,20 +1238,18 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger);
 static int __map_request(struct ceph_osd_client *osdc,
 			 struct ceph_osd_request *req, int force_resend)
 {
-	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
 	struct ceph_pg pgid;
 	int acting[CEPH_PG_MAX_SIZE];
 	int o = -1, num = 0;
 	int err;
 
 	dout("map_request %p tid %lld\n", req, req->r_tid);
-	err = ceph_calc_object_layout(&reqhead->layout, req->r_oid,
-				      &req->r_file_layout, osdc->osdmap);
+	err = ceph_calc_ceph_pg(&pgid, req->r_oid, osdc->osdmap,
+				ceph_file_layout_pg_pool(req->r_file_layout));
 	if (err) {
 		list_move(&req->r_req_lru_item, &osdc->req_notarget);
 		return err;
 	}
-	pgid = reqhead->layout.ol_pgid;
 	req->r_pgid = pgid;
 
 	err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
@@ -991,8 +1266,8 @@ static int __map_request(struct ceph_osd_client *osdc,
 	    (req->r_osd == NULL && o == -1))
 		return 0;  /* no change */
 
-	dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n",
-	     req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
+	dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n",
+	     req->r_tid, pgid.pool, pgid.seed, o,
 	     req->r_osd ? req->r_osd->o_osd : -1);
 
 	/* record full pg acting set */
@@ -1024,10 +1299,10 @@ static int __map_request(struct ceph_osd_client *osdc,
 
 	if (req->r_osd) {
 		__remove_osd_from_lru(req->r_osd);
-		list_add(&req->r_osd_item, &req->r_osd->o_requests);
-		list_move(&req->r_req_lru_item, &osdc->req_unsent);
+		list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
+		list_move_tail(&req->r_req_lru_item, &osdc->req_unsent);
 	} else {
-		list_move(&req->r_req_lru_item, &osdc->req_notarget);
+		list_move_tail(&req->r_req_lru_item, &osdc->req_notarget);
 	}
 	err = 1;   /* osd or pg changed */
 
@@ -1041,37 +1316,47 @@ out:
 static void __send_request(struct ceph_osd_client *osdc,
 			   struct ceph_osd_request *req)
 {
-	struct ceph_osd_request_head *reqhead;
-
-	dout("send_request %p tid %llu to osd%d flags %d\n",
-	     req, req->r_tid, req->r_osd->o_osd, req->r_flags);
+	void *p;
 
-	reqhead = req->r_request->front.iov_base;
-	reqhead->osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch);
-	reqhead->flags |= cpu_to_le32(req->r_flags);  /* e.g., RETRY */
-	reqhead->reassert_version = req->r_reassert_version;
+	dout("send_request %p tid %llu to osd%d flags %d pg %lld.%x\n",
+	     req, req->r_tid, req->r_osd->o_osd, req->r_flags,
+	     (unsigned long long)req->r_pgid.pool, req->r_pgid.seed);
+
+	/* fill in message content that changes each time we send it */
+	put_unaligned_le32(osdc->osdmap->epoch, req->r_request_osdmap_epoch);
+	put_unaligned_le32(req->r_flags, req->r_request_flags);
+	put_unaligned_le64(req->r_pgid.pool, req->r_request_pool);
+	p = req->r_request_pgid;
+	ceph_encode_64(&p, req->r_pgid.pool);
+	ceph_encode_32(&p, req->r_pgid.seed);
+	put_unaligned_le64(1, req->r_request_attempts);  /* FIXME */
+	memcpy(req->r_request_reassert_version, &req->r_reassert_version,
+	       sizeof(req->r_reassert_version));
 
 	req->r_stamp = jiffies;
 	list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
 
 	ceph_msg_get(req->r_request); /* send consumes a ref */
-	ceph_con_send(&req->r_osd->o_con, req->r_request);
+
+	/* Mark the request unsafe if this is the first timet's being sent. */
+
+	if (!req->r_sent && req->r_unsafe_callback)
+		req->r_unsafe_callback(req, true);
 	req->r_sent = req->r_osd->o_incarnation;
+
+	ceph_con_send(&req->r_osd->o_con, req->r_request);
 }
 
 /*
  * Send any requests in the queue (req_unsent).
  */
-static void send_queued(struct ceph_osd_client *osdc)
+static void __send_queued(struct ceph_osd_client *osdc)
 {
 	struct ceph_osd_request *req, *tmp;
 
-	dout("send_queued\n");
-	mutex_lock(&osdc->request_mutex);
-	list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) {
+	dout("__send_queued\n");
+	list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item)
 		__send_request(osdc, req);
-	}
-	mutex_unlock(&osdc->request_mutex);
 }
 
 /*
@@ -1123,8 +1408,8 @@ static void handle_timeout(struct work_struct *work)
 	}
 
 	__schedule_osd_timeout(osdc);
+	__send_queued(osdc);
 	mutex_unlock(&osdc->request_mutex);
-	send_queued(osdc);
 	up_read(&osdc->map_sem);
 }
 
@@ -1147,8 +1432,8 @@ static void handle_osds_timeout(struct work_struct *work)
 
 static void complete_request(struct ceph_osd_request *req)
 {
-	if (req->r_safe_callback)
-		req->r_safe_callback(req, NULL);
+	if (req->r_unsafe_callback)
+		req->r_unsafe_callback(req, false);
 	complete_all(&req->r_safe_completion);  /* fsync waiter */
 }
 
@@ -1159,55 +1444,98 @@ static void complete_request(struct ceph_osd_request *req)
 static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 			 struct ceph_connection *con)
 {
-	struct ceph_osd_reply_head *rhead = msg->front.iov_base;
+	void *p, *end;
 	struct ceph_osd_request *req;
 	u64 tid;
-	int numops, object_len, flags;
+	int object_len;
+	unsigned int numops;
+	int payload_len, flags;
 	s32 result;
+	s32 retry_attempt;
+	struct ceph_pg pg;
+	int err;
+	u32 reassert_epoch;
+	u64 reassert_version;
+	u32 osdmap_epoch;
+	int already_completed;
+	u32 bytes;
+	unsigned int i;
 
 	tid = le64_to_cpu(msg->hdr.tid);
-	if (msg->front.iov_len < sizeof(*rhead))
-		goto bad;
-	numops = le32_to_cpu(rhead->num_ops);
-	object_len = le32_to_cpu(rhead->object_len);
-	result = le32_to_cpu(rhead->result);
-	if (msg->front.iov_len != sizeof(*rhead) + object_len +
-	    numops * sizeof(struct ceph_osd_op))
+	dout("handle_reply %p tid %llu\n", msg, tid);
+
+	p = msg->front.iov_base;
+	end = p + msg->front.iov_len;
+
+	ceph_decode_need(&p, end, 4, bad);
+	object_len = ceph_decode_32(&p);
+	ceph_decode_need(&p, end, object_len, bad);
+	p += object_len;
+
+	err = ceph_decode_pgid(&p, end, &pg);
+	if (err)
 		goto bad;
-	dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
+
+	ceph_decode_need(&p, end, 8 + 4 + 4 + 8 + 4, bad);
+	flags = ceph_decode_64(&p);
+	result = ceph_decode_32(&p);
+	reassert_epoch = ceph_decode_32(&p);
+	reassert_version = ceph_decode_64(&p);
+	osdmap_epoch = ceph_decode_32(&p);
+
 	/* lookup */
 	mutex_lock(&osdc->request_mutex);
 	req = __lookup_request(osdc, tid);
 	if (req == NULL) {
 		dout("handle_reply tid %llu dne\n", tid);
-		mutex_unlock(&osdc->request_mutex);
-		return;
+		goto bad_mutex;
 	}
 	ceph_osdc_get_request(req);
-	flags = le32_to_cpu(rhead->flags);
 
-	/*
-	 * if this connection filled our message, drop our reference now, to
-	 * avoid a (safe but slower) revoke later.
-	 */
-	if (req->r_con_filling_msg == con && req->r_reply == msg) {
-		dout(" dropping con_filling_msg ref %p\n", con);
-		req->r_con_filling_msg = NULL;
-		con->ops->put(con);
-	}
+	dout("handle_reply %p tid %llu req %p result %d\n", msg, tid,
+	     req, result);
+
+	ceph_decode_need(&p, end, 4, bad);
+	numops = ceph_decode_32(&p);
+	if (numops > CEPH_OSD_MAX_OP)
+		goto bad_put;
+	if (numops != req->r_num_ops)
+		goto bad_put;
+	payload_len = 0;
+	ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad);
+	for (i = 0; i < numops; i++) {
+		struct ceph_osd_op *op = p;
+		int len;
+
+		len = le32_to_cpu(op->payload_len);
+		req->r_reply_op_len[i] = len;
+		dout(" op %d has %d bytes\n", i, len);
+		payload_len += len;
+		p += sizeof(*op);
+	}
+	bytes = le32_to_cpu(msg->hdr.data_len);
+	if (payload_len != bytes) {
+		pr_warning("sum of op payload lens %d != data_len %d",
+			   payload_len, bytes);
+		goto bad_put;
+	}
+
+	ceph_decode_need(&p, end, 4 + numops * 4, bad);
+	retry_attempt = ceph_decode_32(&p);
+	for (i = 0; i < numops; i++)
+		req->r_reply_op_result[i] = ceph_decode_32(&p);
 
 	if (!req->r_got_reply) {
-		unsigned int bytes;
 
-		req->r_result = le32_to_cpu(rhead->result);
-		bytes = le32_to_cpu(msg->hdr.data_len);
+		req->r_result = result;
 		dout("handle_reply result %d bytes %d\n", req->r_result,
 		     bytes);
 		if (req->r_result == 0)
 			req->r_result = bytes;
 
 		/* in case this is a write and we need to replay, */
-		req->r_reassert_version = rhead->reassert_version;
+		req->r_reassert_version.epoch = cpu_to_le32(reassert_epoch);
+		req->r_reassert_version.version = cpu_to_le64(reassert_version);
 
 		req->r_got_reply = 1;
 	} else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) {
@@ -1227,7 +1555,11 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	    ((flags & CEPH_OSD_FLAG_WRITE) == 0))
 		__unregister_request(osdc, req);
 
+	already_completed = req->r_completed;
+	req->r_completed = 1;
 	mutex_unlock(&osdc->request_mutex);
+	if (already_completed)
+		goto done;
 
 	if (req->r_callback)
 		req->r_callback(req, msg);
@@ -1242,10 +1574,13 @@ done:
 	ceph_osdc_put_request(req);
 	return;
 
+bad_put:
+	ceph_osdc_put_request(req);
+bad_mutex:
+	mutex_unlock(&osdc->request_mutex);
 bad:
-	pr_err("corrupt osd_op_reply got %d %d expected %d\n",
-	       (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len),
-	       (int)sizeof(*rhead));
+	pr_err("corrupt osd_op_reply got %d %d\n",
+	       (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));
 	ceph_msg_dump(msg);
 }
 
@@ -1462,7 +1797,9 @@ done:
 	if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
 		ceph_monc_request_next_osdmap(&osdc->client->monc);
 
-	send_queued(osdc);
+	mutex_lock(&osdc->request_mutex);
+	__send_queued(osdc);
+	mutex_unlock(&osdc->request_mutex);
 	up_read(&osdc->map_sem);
 	wake_up_all(&osdc->client->auth_wq);
 	return;
@@ -1556,8 +1893,7 @@ static void __remove_event(struct ceph_osd_event *event)
 
 int ceph_osdc_create_event(struct ceph_osd_client *osdc,
 			   void (*event_cb)(u64, u64, u8, void *),
-			   int one_shot, void *data,
-			   struct ceph_osd_event **pevent)
+			   void *data, struct ceph_osd_event **pevent)
 {
 	struct ceph_osd_event *event;
 
@@ -1567,14 +1903,13 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc,
 
 	dout("create_event %p\n", event);
 	event->cb = event_cb;
-	event->one_shot = one_shot;
+	event->one_shot = 0;
 	event->data = data;
 	event->osdc = osdc;
 	INIT_LIST_HEAD(&event->osd_node);
 	RB_CLEAR_NODE(&event->node);
 	kref_init(&event->kref);   /* one ref for us */
 	kref_get(&event->kref);    /* one ref for the caller */
-	init_completion(&event->completion);
 
 	spin_lock(&osdc->event_lock);
 	event->cookie = ++osdc->event_count;
@@ -1610,7 +1945,6 @@ static void do_event_work(struct work_struct *work)
 
 	dout("do_event_work completing %p\n", event);
 	event->cb(ver, notify_id, opcode, event->data);
-	complete(&event->completion);
 	dout("do_event_work completed %p\n", event);
 	ceph_osdc_put_event(event);
 	kfree(event_work);
@@ -1620,7 +1954,8 @@ static void do_event_work(struct work_struct *work)
 /*
  * Process osd watch notifications
  */
-void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
+static void handle_watch_notify(struct ceph_osd_client *osdc,
+				struct ceph_msg *msg)
 {
 	void *p, *end;
 	u8 proto_ver;
@@ -1641,9 +1976,8 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 	spin_lock(&osdc->event_lock);
 	event = __find_event(osdc, cookie);
 	if (event) {
+		BUG_ON(event->one_shot);
 		get_event(event);
-		if (event->one_shot)
-			__remove_event(event);
 	}
 	spin_unlock(&osdc->event_lock);
 	dout("handle_watch_notify cookie %lld ver %lld event %p\n",
@@ -1668,7 +2002,6 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 	return;
 
 done_err:
-	complete(&event->completion);
 	ceph_osdc_put_event(event);
 	return;
 
@@ -1677,20 +2010,103 @@ bad:
 	return;
 }
 
-int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout)
+/*
+ * build new request AND message
+ *
+ */
+void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
+				struct ceph_snap_context *snapc, u64 snap_id,
+				struct timespec *mtime)
 {
-	int err;
+	struct ceph_msg *msg = req->r_request;
+	void *p;
+	size_t msg_size;
+	int flags = req->r_flags;
+	u64 data_len;
+	unsigned int i;
 
-	dout("wait_event %p\n", event);
-	err = wait_for_completion_interruptible_timeout(&event->completion,
-							timeout * HZ);
-	ceph_osdc_put_event(event);
-	if (err > 0)
-		err = 0;
-	dout("wait_event %p returns %d\n", event, err);
-	return err;
+	req->r_snapid = snap_id;
+	req->r_snapc = ceph_get_snap_context(snapc);
+
+	/* encode request */
+	msg->hdr.version = cpu_to_le16(4);
+
+	p = msg->front.iov_base;
+	ceph_encode_32(&p, 1);   /* client_inc  is always 1 */
+	req->r_request_osdmap_epoch = p;
+	p += 4;
+	req->r_request_flags = p;
+	p += 4;
+	if (req->r_flags & CEPH_OSD_FLAG_WRITE)
+		ceph_encode_timespec(p, mtime);
+	p += sizeof(struct ceph_timespec);
+	req->r_request_reassert_version = p;
+	p += sizeof(struct ceph_eversion); /* will get filled in */
+
+	/* oloc */
+	ceph_encode_8(&p, 4);
+	ceph_encode_8(&p, 4);
+	ceph_encode_32(&p, 8 + 4 + 4);
+	req->r_request_pool = p;
+	p += 8;
+	ceph_encode_32(&p, -1);  /* preferred */
+	ceph_encode_32(&p, 0);   /* key len */
+
+	ceph_encode_8(&p, 1);
+	req->r_request_pgid = p;
+	p += 8 + 4;
+	ceph_encode_32(&p, -1);  /* preferred */
+
+	/* oid */
+	ceph_encode_32(&p, req->r_oid_len);
+	memcpy(p, req->r_oid, req->r_oid_len);
+	dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
+	p += req->r_oid_len;
+
+	/* ops--can imply data */
+	ceph_encode_16(&p, (u16)req->r_num_ops);
+	data_len = 0;
+	for (i = 0; i < req->r_num_ops; i++) {
+		data_len += osd_req_encode_op(req, p, i);
+		p += sizeof(struct ceph_osd_op);
+	}
+
+	/* snaps */
+	ceph_encode_64(&p, req->r_snapid);
+	ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0);
+	ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0);
+	if (req->r_snapc) {
+		for (i = 0; i < snapc->num_snaps; i++) {
+			ceph_encode_64(&p, req->r_snapc->snaps[i]);
+		}
+	}
+
+	req->r_request_attempts = p;
+	p += 4;
+
+	/* data */
+	if (flags & CEPH_OSD_FLAG_WRITE) {
+		u16 data_off;
+
+		/*
+		 * The header "data_off" is a hint to the receiver
+		 * allowing it to align received data into its
+		 * buffers such that there's no need to re-copy
+		 * it before writing it to disk (direct I/O).
+		 */
+		data_off = (u16) (off & 0xffff);
+		req->r_request->hdr.data_off = cpu_to_le16(data_off);
+	}
+	req->r_request->hdr.data_len = cpu_to_le32(data_len);
+
+	BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
+	msg_size = p - msg->front.iov_base;
+	msg->front.iov_len = msg_size;
+	msg->hdr.front_len = cpu_to_le32(msg_size);
+
+	dout("build_request msg_size was %d\n", (int)msg_size);
 }
-EXPORT_SYMBOL(ceph_osdc_wait_event);
+EXPORT_SYMBOL(ceph_osdc_build_request);
 
 /*
  * Register request, send initial attempt.
@@ -1701,41 +2117,26 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 {
 	int rc = 0;
 
-	req->r_request->pages = req->r_pages;
-	req->r_request->nr_pages = req->r_num_pages;
-#ifdef CONFIG_BLOCK
-	req->r_request->bio = req->r_bio;
-#endif
-	req->r_request->trail = req->r_trail;
-
-	register_request(osdc, req);
-
 	down_read(&osdc->map_sem);
 	mutex_lock(&osdc->request_mutex);
-	/*
-	 * a racing kick_requests() may have sent the message for us
-	 * while we dropped request_mutex above, so only send now if
-	 * the request still han't been touched yet.
-	 */
-	if (req->r_sent == 0) {
-		rc = __map_request(osdc, req, 0);
-		if (rc < 0) {
-			if (nofail) {
-				dout("osdc_start_request failed map, "
-				     " will retry %lld\n", req->r_tid);
-				rc = 0;
-			}
-			goto out_unlock;
-		}
-		if (req->r_osd == NULL) {
-			dout("send_request %p no up osds in pg\n", req);
-			ceph_monc_request_next_osdmap(&osdc->client->monc);
-		} else {
-			__send_request(osdc, req);
+	__register_request(osdc, req);
+	WARN_ON(req->r_sent);
+	rc = __map_request(osdc, req, 0);
+	if (rc < 0) {
+		if (nofail) {
+			dout("osdc_start_request failed map, "
+				" will retry %lld\n", req->r_tid);
+			rc = 0;
 		}
-		rc = 0;
+		goto out_unlock;
 	}
-
+	if (req->r_osd == NULL) {
+		dout("send_request %p no up osds in pg\n", req);
+		ceph_monc_request_next_osdmap(&osdc->client->monc);
+	} else {
+		__send_queued(osdc);
+	}
+	rc = 0;
 out_unlock:
 	mutex_unlock(&osdc->request_mutex);
 	up_read(&osdc->map_sem);
@@ -1865,7 +2266,6 @@ out_mempool:
 out:
 	return err;
 }
-EXPORT_SYMBOL(ceph_osdc_init);
 
 void ceph_osdc_stop(struct ceph_osd_client *osdc)
 {
@@ -1882,7 +2282,6 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
 	ceph_msgpool_destroy(&osdc->msgpool_op);
 	ceph_msgpool_destroy(&osdc->msgpool_op_reply);
 }
-EXPORT_SYMBOL(ceph_osdc_stop);
 
 /*
  * Read some contiguous pages.  If we cross a stripe boundary, shorten
@@ -1899,18 +2298,22 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
 
 	dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
 	     vino.snap, off, *plen);
-	req = ceph_osdc_new_request(osdc, layout, vino, off, plen,
+	req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
 				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
-				    NULL, 0, truncate_seq, truncate_size, NULL,
-				    false, 1, page_align);
+				    NULL, truncate_seq, truncate_size,
+				    false);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
 	/* it may be a short read due to an object boundary */
-	req->r_pages = pages;
 
-	dout("readpages  final extent is %llu~%llu (%d pages align %d)\n",
-	     off, *plen, req->r_num_pages, page_align);
+	osd_req_op_extent_osd_data_pages(req, 0,
+				pages, *plen, page_align, false, false);
+
+	dout("readpages  final extent is %llu~%llu (%llu bytes align %d)\n",
+	     off, *plen, *plen, page_align);
+
+	ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);
 
 	rc = ceph_osdc_start_request(osdc, req, false);
 	if (!rc)
@@ -1931,30 +2334,29 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 			 u64 off, u64 len,
 			 u32 truncate_seq, u64 truncate_size,
 			 struct timespec *mtime,
-			 struct page **pages, int num_pages,
-			 int flags, int do_sync, bool nofail)
+			 struct page **pages, int num_pages)
 {
 	struct ceph_osd_request *req;
 	int rc = 0;
 	int page_align = off & ~PAGE_MASK;
 
-	BUG_ON(vino.snap != CEPH_NOSNAP);
-	req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
+	BUG_ON(vino.snap != CEPH_NOSNAP);	/* snapshots aren't writeable */
+	req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
 				    CEPH_OSD_OP_WRITE,
-				    flags | CEPH_OSD_FLAG_ONDISK |
-					    CEPH_OSD_FLAG_WRITE,
-				    snapc, do_sync,
-				    truncate_seq, truncate_size, mtime,
-				    nofail, 1, page_align);
+				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+				    snapc, truncate_seq, truncate_size,
+				    true);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
 	/* it may be a short write due to an object boundary */
-	req->r_pages = pages;
-	dout("writepages %llu~%llu (%d pages)\n", off, len,
-	     req->r_num_pages);
+	osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
+				false, false);
+	dout("writepages %llu~%llu (%llu bytes)\n", off, len, len);
 
-	rc = ceph_osdc_start_request(osdc, req, nofail);
+	ceph_osdc_build_request(req, off, snapc, CEPH_NOSNAP, mtime);
+
+	rc = ceph_osdc_start_request(osdc, req, true);
 	if (!rc)
 		rc = ceph_osdc_wait_request(osdc, req);
 
@@ -1966,6 +2368,26 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 }
 EXPORT_SYMBOL(ceph_osdc_writepages);
 
+int ceph_osdc_setup(void)
+{
+	BUG_ON(ceph_osd_request_cache);
+	ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
+					sizeof (struct ceph_osd_request),
+					__alignof__(struct ceph_osd_request),
+					0, NULL);
+
+	return ceph_osd_request_cache ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(ceph_osdc_setup);
+
+void ceph_osdc_cleanup(void)
+{
+	BUG_ON(!ceph_osd_request_cache);
+	kmem_cache_destroy(ceph_osd_request_cache);
+	ceph_osd_request_cache = NULL;
+}
+EXPORT_SYMBOL(ceph_osdc_cleanup);
+
 /*
  * handle incoming message
  */
@@ -2025,13 +2447,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
 		goto out;
 	}
 
-	if (req->r_con_filling_msg) {
+	if (req->r_reply->con)
 		dout("%s revoking msg %p from old con %p\n", __func__,
-		     req->r_reply, req->r_con_filling_msg);
-		ceph_msg_revoke_incoming(req->r_reply);
-		req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
-		req->r_con_filling_msg = NULL;
-	}
+		     req->r_reply, req->r_reply->con);
+	ceph_msg_revoke_incoming(req->r_reply);
 
 	if (front > req->r_reply->front.iov_len) {
 		pr_warning("get_reply front %d > preallocated %d\n",
@@ -2045,26 +2464,29 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
 	m = ceph_msg_get(req->r_reply);
 
 	if (data_len > 0) {
-		int want = calc_pages_for(req->r_page_alignment, data_len);
-
-		if (unlikely(req->r_num_pages < want)) {
-			pr_warning("tid %lld reply has %d bytes %d pages, we"
-				   " had only %d pages ready\n", tid, data_len,
-				   want, req->r_num_pages);
-			*skip = 1;
-			ceph_msg_put(m);
-			m = NULL;
-			goto out;
+		struct ceph_osd_data *osd_data;
+
+		/*
+		 * XXX This is assuming there is only one op containing
+		 * XXX page data.  Probably OK for reads, but this
+		 * XXX ought to be done more generally.
+		 */
+		osd_data = osd_req_op_extent_osd_data(req, 0);
+		if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+			if (osd_data->pages &&
+				unlikely(osd_data->length < data_len)) {
+
+				pr_warning("tid %lld reply has %d bytes "
+					"we had only %llu bytes ready\n",
+					tid, data_len, osd_data->length);
+				*skip = 1;
+				ceph_msg_put(m);
+				m = NULL;
+				goto out;
+			}
 		}
-		m->pages = req->r_pages;
-		m->nr_pages = req->r_num_pages;
-		m->page_alignment = req->r_page_alignment;
-#ifdef CONFIG_BLOCK
-		m->bio = req->r_bio;
-#endif
 	}
 	*skip = 0;
-	req->r_con_filling_msg = con->ops->get(con);
 	dout("get_reply tid %lld %p\n", tid, m);
 
 out:
@@ -2129,13 +2551,17 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
 	struct ceph_auth_handshake *auth = &o->o_auth;
 
 	if (force_new && auth->authorizer) {
-		if (ac->ops && ac->ops->destroy_authorizer)
-			ac->ops->destroy_authorizer(ac, auth->authorizer);
+		ceph_auth_destroy_authorizer(ac, auth->authorizer);
 		auth->authorizer = NULL;
 	}
-	if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
-		int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
-							auth);
+	if (!auth->authorizer) {
+		int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
+						      auth);
+		if (ret)
+			return ERR_PTR(ret);
+	} else {
+		int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
+						     auth);
 		if (ret)
 			return ERR_PTR(ret);
 	}
@@ -2151,11 +2577,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
 	struct ceph_osd_client *osdc = o->o_osdc;
 	struct ceph_auth_client *ac = osdc->client->monc.auth;
 
-	/*
-	 * XXX If ac->ops or ac->ops->verify_authorizer_reply is null,
-	 * XXX which do we do:  succeed or fail?
-	 */
-	return ac->ops->verify_authorizer_reply(ac, o->o_auth.authorizer, len);
+	return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len);
 }
 
 static int invalidate_authorizer(struct ceph_connection *con)
@@ -2164,9 +2586,7 @@ static int invalidate_authorizer(struct ceph_connection *con)
 	struct ceph_osd_client *osdc = o->o_osdc;
 	struct ceph_auth_client *ac = osdc->client->monc.auth;
 
-	if (ac->ops && ac->ops->invalidate_authorizer)
-		ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
-
+	ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
 	return ceph_monc_validate_auth(&osdc->client->monc);
 }
 
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index de73214b5d26..603ddd92db19 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -13,26 +13,18 @@
 
 char *ceph_osdmap_state_str(char *str, int len, int state)
 {
-	int flag = 0;
-
 	if (!len)
-		goto done;
-
-	*str = '\0';
-	if (state) {
-		if (state & CEPH_OSD_EXISTS) {
-			snprintf(str, len, "exists");
-			flag = 1;
-		}
-		if (state & CEPH_OSD_UP) {
-			snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""),
-				 "up");
-			flag = 1;
-		}
-	} else {
+		return str;
+
+	if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP))
+		snprintf(str, len, "exists, up");
+	else if (state & CEPH_OSD_EXISTS)
+		snprintf(str, len, "exists");
+	else if (state & CEPH_OSD_UP)
+		snprintf(str, len, "up");
+	else
 		snprintf(str, len, "doesn't exist");
-	}
-done:
+
 	return str;
 }
 
@@ -53,13 +45,8 @@ static int calc_bits_of(unsigned int t)
  */
 static void calc_pg_masks(struct ceph_pg_pool_info *pi)
 {
-	pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1;
-	pi->pgp_num_mask =
-		(1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1;
-	pi->lpg_num_mask =
-		(1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1;
-	pi->lpgp_num_mask =
-		(1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1;
+	pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
+	pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
 }
 
 /*
@@ -170,6 +157,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
         c->choose_local_tries = 2;
         c->choose_local_fallback_tries = 5;
         c->choose_total_tries = 19;
+	c->chooseleaf_descend_once = 0;
 
 	ceph_decode_need(p, end, 4*sizeof(u32), bad);
 	magic = ceph_decode_32(p);
@@ -336,6 +324,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
         dout("crush decode tunable choose_total_tries = %d",
              c->choose_total_tries);
 
+	ceph_decode_need(p, end, sizeof(u32), done);
+	c->chooseleaf_descend_once = ceph_decode_32(p);
+	dout("crush decode tunable chooseleaf_descend_once = %d",
+	     c->chooseleaf_descend_once);
+
 done:
 	dout("crush_decode success\n");
 	return c;
@@ -354,12 +347,13 @@ bad:
  */
 static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
 {
-	u64 a = *(u64 *)&l;
-	u64 b = *(u64 *)&r;
-
-	if (a < b)
+	if (l.pool < r.pool)
 		return -1;
-	if (a > b)
+	if (l.pool > r.pool)
+		return 1;
+	if (l.seed < r.seed)
+		return -1;
+	if (l.seed > r.seed)
 		return 1;
 	return 0;
 }
@@ -405,8 +399,8 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
 		} else if (c > 0) {
 			n = n->rb_right;
 		} else {
-			dout("__lookup_pg_mapping %llx got %p\n",
-			     *(u64 *)&pgid, pg);
+			dout("__lookup_pg_mapping %lld.%x got %p\n",
+			     pgid.pool, pgid.seed, pg);
 			return pg;
 		}
 	}
@@ -418,12 +412,13 @@ static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
 	struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
 
 	if (pg) {
-		dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg);
+		dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed,
+		     pg);
 		rb_erase(&pg->node, root);
 		kfree(pg);
 		return 0;
 	}
-	dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid);
+	dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed);
 	return -ENOENT;
 }
 
@@ -452,7 +447,7 @@ static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
 	return 0;
 }
 
-static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
+static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
 {
 	struct ceph_pg_pool_info *pi;
 	struct rb_node *n = root->rb_node;
@@ -508,24 +503,57 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
 
 static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
 {
-	unsigned int n, m;
+	u8 ev, cv;
+	unsigned len, num;
+	void *pool_end;
+
+	ceph_decode_need(p, end, 2 + 4, bad);
+	ev = ceph_decode_8(p);  /* encoding version */
+	cv = ceph_decode_8(p); /* compat version */
+	if (ev < 5) {
+		pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
+		return -EINVAL;
+	}
+	if (cv > 7) {
+		pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv);
+		return -EINVAL;
+	}
+	len = ceph_decode_32(p);
+	ceph_decode_need(p, end, len, bad);
+	pool_end = *p + len;
 
-	ceph_decode_copy(p, &pi->v, sizeof(pi->v));
-	calc_pg_masks(pi);
+	pi->type = ceph_decode_8(p);
+	pi->size = ceph_decode_8(p);
+	pi->crush_ruleset = ceph_decode_8(p);
+	pi->object_hash = ceph_decode_8(p);
+
+	pi->pg_num = ceph_decode_32(p);
+	pi->pgp_num = ceph_decode_32(p);
+
+	*p += 4 + 4;  /* skip lpg* */
+	*p += 4;      /* skip last_change */
+	*p += 8 + 4;  /* skip snap_seq, snap_epoch */
 
-	/* num_snaps * snap_info_t */
-	n = le32_to_cpu(pi->v.num_snaps);
-	while (n--) {
-		ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) +
-				 sizeof(struct ceph_timespec), bad);
-		*p += sizeof(u64) +       /* key */
-			1 + sizeof(u64) + /* u8, snapid */
-			sizeof(struct ceph_timespec);
-		m = ceph_decode_32(p);    /* snap name */
-		*p += m;
+	/* skip snaps */
+	num = ceph_decode_32(p);
+	while (num--) {
+		*p += 8;  /* snapid key */
+		*p += 1 + 1; /* versions */
+		len = ceph_decode_32(p);
+		*p += len;
 	}
 
-	*p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
+	/* skip removed snaps */
+	num = ceph_decode_32(p);
+	*p += num * (8 + 8);
+
+	*p += 8;  /* skip auid */
+	pi->flags = ceph_decode_64(p);
+
+	/* ignore the rest */
+
+	*p = pool_end;
+	calc_pg_masks(pi);
 	return 0;
 
 bad:
@@ -535,14 +563,15 @@ bad:
 static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
 {
 	struct ceph_pg_pool_info *pi;
-	u32 num, len, pool;
+	u32 num, len;
+	u64 pool;
 
 	ceph_decode_32_safe(p, end, num, bad);
 	dout(" %d pool names\n", num);
 	while (num--) {
-		ceph_decode_32_safe(p, end, pool, bad);
+		ceph_decode_64_safe(p, end, pool, bad);
 		ceph_decode_32_safe(p, end, len, bad);
-		dout("  pool %d len %d\n", pool, len);
+		dout("  pool %llu len %d\n", pool, len);
 		ceph_decode_need(p, end, len, bad);
 		pi = __lookup_pg_pool(&map->pg_pools, pool);
 		if (pi) {
@@ -633,7 +662,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 	struct ceph_osdmap *map;
 	u16 version;
 	u32 len, max, i;
-	u8 ev;
 	int err = -EINVAL;
 	void *start = *p;
 	struct ceph_pg_pool_info *pi;
@@ -646,9 +674,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 	map->pg_temp = RB_ROOT;
 
 	ceph_decode_16_safe(p, end, version, bad);
-	if (version > CEPH_OSDMAP_VERSION) {
-		pr_warning("got unknown v %d > %d of osdmap\n", version,
-			   CEPH_OSDMAP_VERSION);
+	if (version > 6) {
+		pr_warning("got unknown v %d > 6 of osdmap\n", version);
+		goto bad;
+	}
+	if (version < 6) {
+		pr_warning("got old v %d < 6 of osdmap\n", version);
 		goto bad;
 	}
 
@@ -660,20 +691,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 
 	ceph_decode_32_safe(p, end, max, bad);
 	while (max--) {
-		ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
+		ceph_decode_need(p, end, 8 + 2, bad);
 		err = -ENOMEM;
 		pi = kzalloc(sizeof(*pi), GFP_NOFS);
 		if (!pi)
 			goto bad;
-		pi->id = ceph_decode_32(p);
-		err = -EINVAL;
-		ev = ceph_decode_8(p); /* encoding version */
-		if (ev > CEPH_PG_POOL_VERSION) {
-			pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
-				   ev, CEPH_PG_POOL_VERSION);
-			kfree(pi);
-			goto bad;
-		}
+		pi->id = ceph_decode_64(p);
 		err = __decode_pool(p, end, pi);
 		if (err < 0) {
 			kfree(pi);
@@ -682,12 +705,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 		__insert_pg_pool(&map->pg_pools, pi);
 	}
 
-	if (version >= 5) {
-		err = __decode_pool_names(p, end, map);
-		if (err < 0) {
-			dout("fail to decode pool names");
-			goto bad;
-		}
+	err = __decode_pool_names(p, end, map);
+	if (err < 0) {
+		dout("fail to decode pool names");
+		goto bad;
 	}
 
 	ceph_decode_32_safe(p, end, map->pool_max, bad);
@@ -726,8 +747,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 		struct ceph_pg pgid;
 		struct ceph_pg_mapping *pg;
 
-		ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
-		ceph_decode_copy(p, &pgid, sizeof(pgid));
+		err = ceph_decode_pgid(p, end, &pgid);
+		if (err)
+			goto bad;
+		ceph_decode_need(p, end, sizeof(u32), bad);
 		n = ceph_decode_32(p);
 		err = -EINVAL;
 		if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
@@ -745,7 +768,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 		err = __insert_pg_mapping(pg, &map->pg_temp);
 		if (err)
 			goto bad;
-		dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len);
+		dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed,
+		     len);
 	}
 
 	/* crush */
@@ -784,16 +808,17 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	struct ceph_fsid fsid;
 	u32 epoch = 0;
 	struct ceph_timespec modified;
-	u32 len, pool;
-	__s32 new_pool_max, new_flags, max;
+	s32 len;
+	u64 pool;
+	__s64 new_pool_max;
+	__s32 new_flags, max;
 	void *start = *p;
 	int err = -EINVAL;
 	u16 version;
 
 	ceph_decode_16_safe(p, end, version, bad);
-	if (version > CEPH_OSDMAP_INC_VERSION) {
-		pr_warning("got unknown v %d > %d of inc osdmap\n", version,
-			   CEPH_OSDMAP_INC_VERSION);
+	if (version != 6) {
+		pr_warning("got unknown v %d != 6 of inc osdmap\n", version);
 		goto bad;
 	}
 
@@ -803,7 +828,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	epoch = ceph_decode_32(p);
 	BUG_ON(epoch != map->epoch+1);
 	ceph_decode_copy(p, &modified, sizeof(modified));
-	new_pool_max = ceph_decode_32(p);
+	new_pool_max = ceph_decode_64(p);
 	new_flags = ceph_decode_32(p);
 
 	/* full map? */
@@ -853,18 +878,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	/* new_pool */
 	ceph_decode_32_safe(p, end, len, bad);
 	while (len--) {
-		__u8 ev;
 		struct ceph_pg_pool_info *pi;
 
-		ceph_decode_32_safe(p, end, pool, bad);
-		ceph_decode_need(p, end, 1 + sizeof(pi->v), bad);
-		ev = ceph_decode_8(p);  /* encoding version */
-		if (ev > CEPH_PG_POOL_VERSION) {
-			pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
-				   ev, CEPH_PG_POOL_VERSION);
-			err = -EINVAL;
-			goto bad;
-		}
+		ceph_decode_64_safe(p, end, pool, bad);
 		pi = __lookup_pg_pool(&map->pg_pools, pool);
 		if (!pi) {
 			pi = kzalloc(sizeof(*pi), GFP_NOFS);
@@ -890,7 +906,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	while (len--) {
 		struct ceph_pg_pool_info *pi;
 
-		ceph_decode_32_safe(p, end, pool, bad);
+		ceph_decode_64_safe(p, end, pool, bad);
 		pi = __lookup_pg_pool(&map->pg_pools, pool);
 		if (pi)
 			__remove_pg_pool(&map->pg_pools, pi);
@@ -948,10 +964,12 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 		int j;
 		struct ceph_pg pgid;
 		u32 pglen;
-		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
-		ceph_decode_copy(p, &pgid, sizeof(pgid));
-		pglen = ceph_decode_32(p);
 
+		err = ceph_decode_pgid(p, end, &pgid);
+		if (err)
+			goto bad;
+		ceph_decode_need(p, end, sizeof(u32), bad);
+		pglen = ceph_decode_32(p);
 		if (pglen) {
 			ceph_decode_need(p, end, pglen*sizeof(u32), bad);
 
@@ -975,8 +993,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 				kfree(pg);
 				goto bad;
 			}
-			dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
-			     pglen);
+			dout(" added pg_temp %lld.%x len %d\n", pgid.pool,
+			     pgid.seed, pglen);
 		} else {
 			/* remove */
 			__remove_pg_mapping(&map->pg_temp, pgid);
@@ -1010,7 +1028,7 @@ bad:
  * pass a stride back to the caller.
  */
 int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-				   u64 off, u64 *plen,
+				   u64 off, u64 len,
 				   u64 *ono,
 				   u64 *oxoff, u64 *oxlen)
 {
@@ -1021,7 +1039,7 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 	u32 su_per_object;
 	u64 t, su_offset;
 
-	dout("mapping %llu~%llu  osize %u fl_su %u\n", off, *plen,
+	dout("mapping %llu~%llu  osize %u fl_su %u\n", off, len,
 	     osize, su);
 	if (su == 0 || sc == 0)
 		goto invalid;
@@ -1054,11 +1072,10 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 
 	/*
 	 * Calculate the length of the extent being written to the selected
-	 * object. This is the minimum of the full length requested (plen) or
+	 * object. This is the minimum of the full length requested (len) or
 	 * the remainder of the current stripe being written to.
 	 */
-	*oxlen = min_t(u64, *plen, su - su_offset);
-	*plen = *oxlen;
+	*oxlen = min_t(u64, len, su - su_offset);
 
 	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
 	return 0;
@@ -1076,36 +1093,22 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping);
  * calculate an object layout (i.e. pgid) from an oid,
  * file_layout, and osdmap
  */
-int ceph_calc_object_layout(struct ceph_object_layout *ol,
-			    const char *oid,
-			    struct ceph_file_layout *fl,
-			    struct ceph_osdmap *osdmap)
+int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
+			struct ceph_osdmap *osdmap, uint64_t pool)
 {
-	unsigned int num, num_mask;
-	struct ceph_pg pgid;
-	int poolid = le32_to_cpu(fl->fl_pg_pool);
-	struct ceph_pg_pool_info *pool;
-	unsigned int ps;
+	struct ceph_pg_pool_info *pool_info;
 
 	BUG_ON(!osdmap);
-
-	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
-	if (!pool)
+	pool_info = __lookup_pg_pool(&osdmap->pg_pools, pool);
+	if (!pool_info)
 		return -EIO;
-	ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
-	num = le32_to_cpu(pool->v.pg_num);
-	num_mask = pool->pg_num_mask;
-
-	pgid.ps = cpu_to_le16(ps);
-	pgid.preferred = cpu_to_le16(-1);
-	pgid.pool = fl->fl_pg_pool;
-	dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
+	pg->pool = pool;
+	pg->seed = ceph_str_hash(pool_info->object_hash, oid, strlen(oid));
 
-	ol->ol_pgid = pgid;
-	ol->ol_stripe_unit = fl->fl_object_stripe_unit;
+	dout("%s '%s' pgid %lld.%x\n", __func__, oid, pg->pool, pg->seed);
 	return 0;
 }
-EXPORT_SYMBOL(ceph_calc_object_layout);
+EXPORT_SYMBOL(ceph_calc_ceph_pg);
 
 /*
  * Calculate raw osd vector for the given pgid.  Return pointer to osd
@@ -1117,19 +1120,16 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 	struct ceph_pg_mapping *pg;
 	struct ceph_pg_pool_info *pool;
 	int ruleno;
-	unsigned int poolid, ps, pps, t, r;
+	int r;
+	u32 pps;
 
-	poolid = le32_to_cpu(pgid.pool);
-	ps = le16_to_cpu(pgid.ps);
-
-	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
+	pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
 	if (!pool)
 		return NULL;
 
 	/* pg_temp? */
-	t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
-			    pool->pgp_num_mask);
-	pgid.ps = cpu_to_le16(t);
+	pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
+				    pool->pgp_num_mask);
 	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
 	if (pg) {
 		*num = pg->len;
@@ -1137,26 +1137,39 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 	}
 
 	/* crush */
-	ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
-				 pool->v.type, pool->v.size);
+	ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset,
+				 pool->type, pool->size);
 	if (ruleno < 0) {
-		pr_err("no crush rule pool %d ruleset %d type %d size %d\n",
-		       poolid, pool->v.crush_ruleset, pool->v.type,
-		       pool->v.size);
+		pr_err("no crush rule pool %lld ruleset %d type %d size %d\n",
+		       pgid.pool, pool->crush_ruleset, pool->type,
+		       pool->size);
 		return NULL;
 	}
 
-	pps = ceph_stable_mod(ps,
-			      le32_to_cpu(pool->v.pgp_num),
-			      pool->pgp_num_mask);
-	pps += poolid;
+	if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
+		/* hash pool id and seed sothat pool PGs do not overlap */
+		pps = crush_hash32_2(CRUSH_HASH_RJENKINS1,
+				     ceph_stable_mod(pgid.seed, pool->pgp_num,
+						     pool->pgp_num_mask),
+				     pgid.pool);
+	} else {
+		/*
+		 * legacy ehavior: add ps and pool together.  this is
+		 * not a great approach because the PGs from each pool
+		 * will overlap on top of each other: 0.5 == 1.4 ==
+		 * 2.3 == ...
+		 */
+		pps = ceph_stable_mod(pgid.seed, pool->pgp_num,
+				      pool->pgp_num_mask) +
+			(unsigned)pgid.pool;
+	}
 	r = crush_do_rule(osdmap->crush, ruleno, pps, osds,
-			  min_t(int, pool->v.size, *num),
+			  min_t(int, pool->size, *num),
 			  osdmap->osd_weight);
 	if (r < 0) {
-		pr_err("error %d from crush rule: pool %d ruleset %d type %d"
-		       " size %d\n", r, poolid, pool->v.crush_ruleset,
-		       pool->v.type, pool->v.size);
+		pr_err("error %d from crush rule: pool %lld ruleset %d type %d"
+		       " size %d\n", r, pgid.pool, pool->crush_ruleset,
+		       pool->type, pool->size);
 		return NULL;
 	}
 	*num = r;
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index cd9c21df87d1..815a2249cfa9 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -12,7 +12,7 @@
 /*
  * build a vector of user pages
  */
-struct page **ceph_get_direct_page_vector(const char __user *data,
+struct page **ceph_get_direct_page_vector(const void __user *data,
 					  int num_pages, bool write_page)
 {
 	struct page **pages;
@@ -93,7 +93,7 @@ EXPORT_SYMBOL(ceph_alloc_page_vector);
  * copy user data into a page vector
  */
 int ceph_copy_user_to_page_vector(struct page **pages,
-					 const char __user *data,
+					 const void __user *data,
 					 loff_t off, size_t len)
 {
 	int i = 0;
@@ -118,17 +118,17 @@ int ceph_copy_user_to_page_vector(struct page **pages,
 }
 EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
 
-int ceph_copy_to_page_vector(struct page **pages,
-				    const char *data,
+void ceph_copy_to_page_vector(struct page **pages,
+				    const void *data,
 				    loff_t off, size_t len)
 {
 	int i = 0;
 	size_t po = off & ~PAGE_CACHE_MASK;
 	size_t left = len;
-	size_t l;
 
 	while (left > 0) {
-		l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+		size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+
 		memcpy(page_address(pages[i]) + po, data, l);
 		data += l;
 		left -= l;
@@ -138,21 +138,20 @@ int ceph_copy_to_page_vector(struct page **pages,
 			i++;
 		}
 	}
-	return len;
 }
 EXPORT_SYMBOL(ceph_copy_to_page_vector);
 
-int ceph_copy_from_page_vector(struct page **pages,
-				    char *data,
+void ceph_copy_from_page_vector(struct page **pages,
+				    void *data,
 				    loff_t off, size_t len)
 {
 	int i = 0;
 	size_t po = off & ~PAGE_CACHE_MASK;
 	size_t left = len;
-	size_t l;
 
 	while (left > 0) {
-		l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+		size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+
 		memcpy(data, page_address(pages[i]) + po, l);
 		data += l;
 		left -= l;
@@ -162,7 +161,6 @@ int ceph_copy_from_page_vector(struct page **pages,
 			i++;
 		}
 	}
-	return len;
 }
 EXPORT_SYMBOL(ceph_copy_from_page_vector);
 
@@ -170,7 +168,7 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector);
  * copy user data from a page vector into a user pointer
  */
 int ceph_copy_page_vector_to_user(struct page **pages,
-					 char __user *data,
+					 void __user *data,
 					 loff_t off, size_t len)
 {
 	int i = 0;
diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
new file mode 100644
index 000000000000..154683f5f14c
--- /dev/null
+++ b/net/ceph/snapshot.c
@@ -0,0 +1,78 @@
+/*
+ * snapshot.c    Ceph snapshot context utility routines (part of libceph)
+ *
+ * Copyright (C) 2013 Inktank Storage, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stddef.h>
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/ceph/libceph.h>
+
+/*
+ * Ceph snapshot contexts are reference counted objects, and the
+ * returned structure holds a single reference.  Acquire additional
+ * references with ceph_get_snap_context(), and release them with
+ * ceph_put_snap_context().  When the reference count reaches zero
+ * the entire structure is freed.
+ */
+
+/*
+ * Create a new ceph snapshot context large enough to hold the
+ * indicated number of snapshot ids (which can be 0).  Caller has
+ * to fill in snapc->seq and snapc->snaps[0..snap_count-1].
+ *
+ * Returns a null pointer if an error occurs.
+ */
+struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
+						gfp_t gfp_flags)
+{
+	struct ceph_snap_context *snapc;
+	size_t size;
+
+	size = sizeof (struct ceph_snap_context);
+	size += snap_count * sizeof (snapc->snaps[0]);
+	snapc = kzalloc(size, gfp_flags);
+	if (!snapc)
+		return NULL;
+
+	atomic_set(&snapc->nref, 1);
+	snapc->num_snaps = snap_count;
+
+	return snapc;
+}
+EXPORT_SYMBOL(ceph_create_snap_context);
+
+struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc)
+{
+	if (sc)
+		atomic_inc(&sc->nref);
+	return sc;
+}
+EXPORT_SYMBOL(ceph_get_snap_context);
+
+void ceph_put_snap_context(struct ceph_snap_context *sc)
+{
+	if (!sc)
+		return;
+	if (atomic_dec_and_test(&sc->nref)) {
+		/*printk(" deleting snap_context %p\n", sc);*/
+		kfree(sc);
+	}
+}
+EXPORT_SYMBOL(ceph_put_snap_context);
diff --git a/net/core/Makefile b/net/core/Makefile
index 674641b13aea..b33b996f5dd6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,10 +9,11 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
 obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
-			sock_diag.o
+			sock_diag.o dev_ioctl.o
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
+obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_NET_DMA) += user_dma.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 368f9c3f9dc6..b71423db7785 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -78,9 +78,10 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn
 	return autoremove_wake_function(wait, mode, sync, key);
 }
 /*
- * Wait for a packet..
+ * Wait for the last received packet to be different from skb
  */
-static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+				 const struct sk_buff *skb)
 {
 	int error;
 	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
@@ -92,7 +93,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
 	if (error)
 		goto out_err;
 
-	if (!skb_queue_empty(&sk->sk_receive_queue))
+	if (sk->sk_receive_queue.prev != skb)
 		goto out;
 
 	/* Socket shut down? */
@@ -131,9 +132,9 @@ out_noerr:
  *	__skb_recv_datagram - Receive a datagram skbuff
  *	@sk: socket
  *	@flags: MSG_ flags
+ *	@peeked: returns non-zero if this packet has been seen before
  *	@off: an offset in bytes to peek skb from. Returns an offset
  *	      within an skb where data actually starts
- *	@peeked: returns non-zero if this packet has been seen before
  *	@err: error code returned
  *
  *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -161,7 +162,7 @@ out_noerr:
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 				    int *peeked, int *off, int *err)
 {
-	struct sk_buff *skb;
+	struct sk_buff *skb, *last;
 	long timeo;
 	/*
 	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
@@ -182,13 +183,17 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 		 */
 		unsigned long cpu_flags;
 		struct sk_buff_head *queue = &sk->sk_receive_queue;
+		int _off = *off;
 
+		last = (struct sk_buff *)queue;
 		spin_lock_irqsave(&queue->lock, cpu_flags);
 		skb_queue_walk(queue, skb) {
+			last = skb;
 			*peeked = skb->peeked;
 			if (flags & MSG_PEEK) {
-				if (*off >= skb->len && skb->len) {
-					*off -= skb->len;
+				if (_off >= skb->len && (skb->len || _off ||
+							 skb->peeked)) {
+					_off -= skb->len;
 					continue;
 				}
 				skb->peeked = 1;
@@ -197,6 +202,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 				__skb_unlink(skb, queue);
 
 			spin_unlock_irqrestore(&queue->lock, cpu_flags);
+			*off = _off;
 			return skb;
 		}
 		spin_unlock_irqrestore(&queue->lock, cpu_flags);
@@ -206,7 +212,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 		if (!timeo)
 			goto no_packet;
 
-	} while (!wait_for_packet(sk, err, &timeo));
+	} while (!wait_for_more_packets(sk, err, &timeo, last));
 
 	return NULL;
 
@@ -749,7 +755,9 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
-		mask |= POLLERR;
+		mask |= POLLERR |
+			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/core/dev.c b/net/core/dev.c
index f64e439b4a00..fc1e289397f5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -97,8 +97,6 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
 #include <linux/stat.h>
 #include <net/dst.h>
 #include <net/pkt_sched.h>
@@ -106,12 +104,10 @@
 #include <net/xfrm.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
-#include <linux/kmod.h>
 #include <linux/module.h>
 #include <linux/netpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/delay.h>
-#include <net/wext.h>
 #include <net/iw_handler.h>
 #include <asm/current.h>
 #include <linux/audit.h>
@@ -132,9 +128,7 @@
 #include <linux/pci.h>
 #include <linux/inetdevice.h>
 #include <linux/cpu_rmap.h>
-#include <linux/net_tstamp.h>
 #include <linux/static_key.h>
-#include <net/flow_keys.h>
 
 #include "net-sysfs.h"
 
@@ -144,41 +138,10 @@
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
-/*
- *	The list of packet types we will receive (as opposed to discard)
- *	and the routines to invoke.
- *
- *	Why 16. Because with 16 the only overlap we get on a hash of the
- *	low nibble of the protocol value is RARP/SNAP/X.25.
- *
- *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
- *             sure which should go first, but I bet it won't make much
- *             difference if we are running VLANs.  The good news is that
- *             this protocol won't be in the list unless compiled in, so
- *             the average user (w/out VLANs) will not be adversely affected.
- *             --BLG
- *
- *		0800	IP
- *		8100    802.1Q VLAN
- *		0001	802.3
- *		0002	AX.25
- *		0004	802.2
- *		8035	RARP
- *		0005	SNAP
- *		0805	X.25
- *		0806	ARP
- *		8137	IPX
- *		0009	Localtalk
- *		86DD	IPv6
- */
-
-#define PTYPE_HASH_SIZE	(16)
-#define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
-
 static DEFINE_SPINLOCK(ptype_lock);
 static DEFINE_SPINLOCK(offload_lock);
-static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
-static struct list_head ptype_all __read_mostly;	/* Taps */
+struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+struct list_head ptype_all __read_mostly;	/* Taps */
 static struct list_head offload_base __read_mostly;
 
 /*
@@ -237,7 +200,7 @@ static inline void rps_unlock(struct softnet_data *sd)
 }
 
 /* Device list insertion */
-static int list_netdevice(struct net_device *dev)
+static void list_netdevice(struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
 
@@ -251,8 +214,6 @@ static int list_netdevice(struct net_device *dev)
 	write_unlock_bh(&dev_base_lock);
 
 	dev_base_seq_inc(net);
-
-	return 0;
 }
 
 /* Device list removal
@@ -695,11 +656,10 @@ __setup("netdev=", netdev_boot_setup);
 
 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 {
-	struct hlist_node *p;
 	struct net_device *dev;
 	struct hlist_head *head = dev_name_hash(net, name);
 
-	hlist_for_each_entry(dev, p, head, name_hlist)
+	hlist_for_each_entry(dev, head, name_hlist)
 		if (!strncmp(dev->name, name, IFNAMSIZ))
 			return dev;
 
@@ -721,11 +681,10 @@ EXPORT_SYMBOL(__dev_get_by_name);
 
 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
 {
-	struct hlist_node *p;
 	struct net_device *dev;
 	struct hlist_head *head = dev_name_hash(net, name);
 
-	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
+	hlist_for_each_entry_rcu(dev, head, name_hlist)
 		if (!strncmp(dev->name, name, IFNAMSIZ))
 			return dev;
 
@@ -772,11 +731,10 @@ EXPORT_SYMBOL(dev_get_by_name);
 
 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 {
-	struct hlist_node *p;
 	struct net_device *dev;
 	struct hlist_head *head = dev_index_hash(net, ifindex);
 
-	hlist_for_each_entry(dev, p, head, index_hlist)
+	hlist_for_each_entry(dev, head, index_hlist)
 		if (dev->ifindex == ifindex)
 			return dev;
 
@@ -797,11 +755,10 @@ EXPORT_SYMBOL(__dev_get_by_index);
 
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
 {
-	struct hlist_node *p;
 	struct net_device *dev;
 	struct hlist_head *head = dev_index_hash(net, ifindex);
 
-	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
+	hlist_for_each_entry_rcu(dev, head, index_hlist)
 		if (dev->ifindex == ifindex)
 			return dev;
 
@@ -1227,36 +1184,6 @@ void netdev_notify_peers(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_notify_peers);
 
-/**
- *	dev_load 	- load a network module
- *	@net: the applicable net namespace
- *	@name: name of interface
- *
- *	If a network interface is not present and the process has suitable
- *	privileges this function loads the module. If module loading is not
- *	available in this kernel then it becomes a nop.
- */
-
-void dev_load(struct net *net, const char *name)
-{
-	struct net_device *dev;
-	int no_module;
-
-	rcu_read_lock();
-	dev = dev_get_by_name_rcu(net, name);
-	rcu_read_unlock();
-
-	no_module = !dev;
-	if (no_module && capable(CAP_NET_ADMIN))
-		no_module = request_module("netdev-%s", name);
-	if (no_module && capable(CAP_SYS_MODULE)) {
-		if (!request_module("%s", name))
-			pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated).  Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
-				name);
-	}
-}
-EXPORT_SYMBOL(dev_load);
-
 static int __dev_open(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
@@ -1267,6 +1194,14 @@ static int __dev_open(struct net_device *dev)
 	if (!netif_device_present(dev))
 		return -ENODEV;
 
+	/* Block netpoll from trying to do any rx path servicing.
+	 * If we don't do this there is a chance ndo_poll_controller
+	 * or ndo_poll may be running while we open the device
+	 */
+	ret = netpoll_rx_disable(dev);
+	if (ret)
+		return ret;
+
 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
 	ret = notifier_to_errno(ret);
 	if (ret)
@@ -1280,6 +1215,8 @@ static int __dev_open(struct net_device *dev)
 	if (!ret && ops->ndo_open)
 		ret = ops->ndo_open(dev);
 
+	netpoll_rx_enable(dev);
+
 	if (ret)
 		clear_bit(__LINK_STATE_START, &dev->state);
 	else {
@@ -1371,9 +1308,16 @@ static int __dev_close(struct net_device *dev)
 	int retval;
 	LIST_HEAD(single);
 
+	/* Temporarily disable netpoll until the interface is down */
+	retval = netpoll_rx_disable(dev);
+	if (retval)
+		return retval;
+
 	list_add(&dev->unreg_list, &single);
 	retval = __dev_close_many(&single);
 	list_del(&single);
+
+	netpoll_rx_enable(dev);
 	return retval;
 }
 
@@ -1409,14 +1353,22 @@ static int dev_close_many(struct list_head *head)
  */
 int dev_close(struct net_device *dev)
 {
+	int ret = 0;
 	if (dev->flags & IFF_UP) {
 		LIST_HEAD(single);
 
+		/* Block netpoll rx while the interface is going down */
+		ret = netpoll_rx_disable(dev);
+		if (ret)
+			return ret;
+
 		list_add(&dev->unreg_list, &single);
 		dev_close_many(&single);
 		list_del(&single);
+
+		netpoll_rx_enable(dev);
 	}
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(dev_close);
 
@@ -1591,7 +1543,6 @@ void net_enable_timestamp(void)
 		return;
 	}
 #endif
-	WARN_ON(in_interrupt());
 	static_key_slow_inc(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_enable_timestamp);
@@ -1621,57 +1572,6 @@ static inline void net_timestamp_set(struct sk_buff *skb)
 			__net_timestamp(SKB);		\
 	}						\
 
-static int net_hwtstamp_validate(struct ifreq *ifr)
-{
-	struct hwtstamp_config cfg;
-	enum hwtstamp_tx_types tx_type;
-	enum hwtstamp_rx_filters rx_filter;
-	int tx_type_valid = 0;
-	int rx_filter_valid = 0;
-
-	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
-		return -EFAULT;
-
-	if (cfg.flags) /* reserved for future extensions */
-		return -EINVAL;
-
-	tx_type = cfg.tx_type;
-	rx_filter = cfg.rx_filter;
-
-	switch (tx_type) {
-	case HWTSTAMP_TX_OFF:
-	case HWTSTAMP_TX_ON:
-	case HWTSTAMP_TX_ONESTEP_SYNC:
-		tx_type_valid = 1;
-		break;
-	}
-
-	switch (rx_filter) {
-	case HWTSTAMP_FILTER_NONE:
-	case HWTSTAMP_FILTER_ALL:
-	case HWTSTAMP_FILTER_SOME:
-	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
-	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
-	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-		rx_filter_valid = 1;
-		break;
-	}
-
-	if (!tx_type_valid || !rx_filter_valid)
-		return -ERANGE;
-
-	return 0;
-}
-
 static inline bool is_skb_forwardable(struct net_device *dev,
 				      struct sk_buff *skb)
 {
@@ -1722,7 +1622,6 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 	}
 
 	skb_orphan(skb);
-	nf_reset(skb);
 
 	if (unlikely(!is_skb_forwardable(dev, skb))) {
 		atomic_long_inc(&dev->rx_dropped);
@@ -1738,6 +1637,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 	skb->mark = 0;
 	secpath_reset(skb);
 	nf_reset(skb);
+	nf_reset_trace(skb);
 	return netif_rx(skb);
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -1857,6 +1757,230 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
 	}
 }
 
+#ifdef CONFIG_XPS
+static DEFINE_MUTEX(xps_map_mutex);
+#define xmap_dereference(P)		\
+	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
+
+static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
+					int cpu, u16 index)
+{
+	struct xps_map *map = NULL;
+	int pos;
+
+	if (dev_maps)
+		map = xmap_dereference(dev_maps->cpu_map[cpu]);
+
+	for (pos = 0; map && pos < map->len; pos++) {
+		if (map->queues[pos] == index) {
+			if (map->len > 1) {
+				map->queues[pos] = map->queues[--map->len];
+			} else {
+				RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
+				kfree_rcu(map, rcu);
+				map = NULL;
+			}
+			break;
+		}
+	}
+
+	return map;
+}
+
+static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
+{
+	struct xps_dev_maps *dev_maps;
+	int cpu, i;
+	bool active = false;
+
+	mutex_lock(&xps_map_mutex);
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	if (!dev_maps)
+		goto out_no_maps;
+
+	for_each_possible_cpu(cpu) {
+		for (i = index; i < dev->num_tx_queues; i++) {
+			if (!remove_xps_queue(dev_maps, cpu, i))
+				break;
+		}
+		if (i == dev->num_tx_queues)
+			active = true;
+	}
+
+	if (!active) {
+		RCU_INIT_POINTER(dev->xps_maps, NULL);
+		kfree_rcu(dev_maps, rcu);
+	}
+
+	for (i = index; i < dev->num_tx_queues; i++)
+		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
+					     NUMA_NO_NODE);
+
+out_no_maps:
+	mutex_unlock(&xps_map_mutex);
+}
+
+static struct xps_map *expand_xps_map(struct xps_map *map,
+				      int cpu, u16 index)
+{
+	struct xps_map *new_map;
+	int alloc_len = XPS_MIN_MAP_ALLOC;
+	int i, pos;
+
+	for (pos = 0; map && pos < map->len; pos++) {
+		if (map->queues[pos] != index)
+			continue;
+		return map;
+	}
+
+	/* Need to add queue to this CPU's existing map */
+	if (map) {
+		if (pos < map->alloc_len)
+			return map;
+
+		alloc_len = map->alloc_len * 2;
+	}
+
+	/* Need to allocate new map to store queue on this CPU's map */
+	new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
+			       cpu_to_node(cpu));
+	if (!new_map)
+		return NULL;
+
+	for (i = 0; i < pos; i++)
+		new_map->queues[i] = map->queues[i];
+	new_map->alloc_len = alloc_len;
+	new_map->len = pos;
+
+	return new_map;
+}
+
+int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
+{
+	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+	struct xps_map *map, *new_map;
+	int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
+	int cpu, numa_node_id = -2;
+	bool active = false;
+
+	mutex_lock(&xps_map_mutex);
+
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	/* allocate memory for queue storage */
+	for_each_online_cpu(cpu) {
+		if (!cpumask_test_cpu(cpu, mask))
+			continue;
+
+		if (!new_dev_maps)
+			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
+		if (!new_dev_maps) {
+			mutex_unlock(&xps_map_mutex);
+			return -ENOMEM;
+		}
+
+		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
+				 NULL;
+
+		map = expand_xps_map(map, cpu, index);
+		if (!map)
+			goto error;
+
+		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+	}
+
+	if (!new_dev_maps)
+		goto out_no_new_maps;
+
+	for_each_possible_cpu(cpu) {
+		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
+			/* add queue to CPU maps */
+			int pos = 0;
+
+			map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+			while ((pos < map->len) && (map->queues[pos] != index))
+				pos++;
+
+			if (pos == map->len)
+				map->queues[map->len++] = index;
+#ifdef CONFIG_NUMA
+			if (numa_node_id == -2)
+				numa_node_id = cpu_to_node(cpu);
+			else if (numa_node_id != cpu_to_node(cpu))
+				numa_node_id = -1;
+#endif
+		} else if (dev_maps) {
+			/* fill in the new device map from the old device map */
+			map = xmap_dereference(dev_maps->cpu_map[cpu]);
+			RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+		}
+
+	}
+
+	rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+
+	/* Cleanup old maps */
+	if (dev_maps) {
+		for_each_possible_cpu(cpu) {
+			new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+			map = xmap_dereference(dev_maps->cpu_map[cpu]);
+			if (map && map != new_map)
+				kfree_rcu(map, rcu);
+		}
+
+		kfree_rcu(dev_maps, rcu);
+	}
+
+	dev_maps = new_dev_maps;
+	active = true;
+
+out_no_new_maps:
+	/* update Tx queue numa node */
+	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
+				     (numa_node_id >= 0) ? numa_node_id :
+				     NUMA_NO_NODE);
+
+	if (!dev_maps)
+		goto out_no_maps;
+
+	/* removes queue from unused CPUs */
+	for_each_possible_cpu(cpu) {
+		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
+			continue;
+
+		if (remove_xps_queue(dev_maps, cpu, index))
+			active = true;
+	}
+
+	/* free map if not active */
+	if (!active) {
+		RCU_INIT_POINTER(dev->xps_maps, NULL);
+		kfree_rcu(dev_maps, rcu);
+	}
+
+out_no_maps:
+	mutex_unlock(&xps_map_mutex);
+
+	return 0;
+error:
+	/* remove any maps that we added */
+	for_each_possible_cpu(cpu) {
+		new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
+				 NULL;
+		if (new_map && new_map != map)
+			kfree(new_map);
+	}
+
+	mutex_unlock(&xps_map_mutex);
+
+	kfree(new_dev_maps);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(netif_set_xps_queue);
+
+#endif
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1880,8 +2004,12 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 		if (dev->num_tc)
 			netif_setup_tc(dev, txq);
 
-		if (txq < dev->real_num_tx_queues)
+		if (txq < dev->real_num_tx_queues) {
 			qdisc_reset_all_tx_gt(dev, txq);
+#ifdef CONFIG_XPS
+			netif_reset_xps_queues_gt(dev, txq);
+#endif
+		}
 	}
 
 	dev->real_num_tx_queues = txq;
@@ -2018,6 +2146,9 @@ static void skb_warn_bad_offload(const struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 	const char *driver = "";
 
+	if (!net_ratelimit())
+		return;
+
 	if (dev && dev->dev.parent)
 		driver = dev_driver_string(dev->dev.parent);
 
@@ -2046,6 +2177,15 @@ int skb_checksum_help(struct sk_buff *skb)
 		return -EINVAL;
 	}
 
+	/* Before computing a checksum, we should make sure no frag could
+	 * be modified by an external entity : checksum could be wrong.
+	 */
+	if (skb_has_shared_frag(skb)) {
+		ret = __skb_linearize(skb);
+		if (ret)
+			goto out;
+	}
+
 	offset = skb_checksum_start_offset(skb);
 	BUG_ON(offset >= skb_headlen(skb));
 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
@@ -2068,52 +2208,59 @@ out:
 }
 EXPORT_SYMBOL(skb_checksum_help);
 
-/**
- *	skb_gso_segment - Perform segmentation on skb.
- *	@skb: buffer to segment
- *	@features: features for the output path (see dev->features)
- *
- *	This function segments the given skb and returns a list of segments.
- *
- *	It may return NULL if the skb requires no segmentation.  This is
- *	only possible when GSO is used for verifying header integrity.
- */
-struct sk_buff *skb_gso_segment(struct sk_buff *skb,
-	netdev_features_t features)
+__be16 skb_network_protocol(struct sk_buff *skb)
 {
-	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
-	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
 	int vlan_depth = ETH_HLEN;
-	int err;
 
-	while (type == htons(ETH_P_8021Q)) {
+	/* Tunnel gso handlers can set protocol to ethernet. */
+	if (type == htons(ETH_P_TEB)) {
+		struct ethhdr *eth;
+
+		if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
+			return 0;
+
+		eth = (struct ethhdr *)skb_mac_header(skb);
+		type = eth->h_proto;
+	}
+
+	while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
 		struct vlan_hdr *vh;
 
 		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
-			return ERR_PTR(-EINVAL);
+			return 0;
 
 		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
 		type = vh->h_vlan_encapsulated_proto;
 		vlan_depth += VLAN_HLEN;
 	}
 
-	skb_reset_mac_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
-	__skb_pull(skb, skb->mac_len);
+	return type;
+}
 
-	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
-		skb_warn_bad_offload(skb);
+/**
+ *	skb_mac_gso_segment - mac layer segmentation handler.
+ *	@skb: buffer to segment
+ *	@features: features for the output path (see dev->features)
+ */
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+	struct packet_offload *ptype;
+	__be16 type = skb_network_protocol(skb);
 
-		if (skb_header_cloned(skb) &&
-		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
-			return ERR_PTR(err);
-	}
+	if (unlikely(!type))
+		return ERR_PTR(-EINVAL);
+
+	__skb_pull(skb, skb->mac_len);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &offload_base, list) {
 		if (ptype->type == type && ptype->callbacks.gso_segment) {
 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+				int err;
+
 				err = ptype->callbacks.gso_send_check(skb);
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
@@ -2131,7 +2278,50 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 
 	return segs;
 }
-EXPORT_SYMBOL(skb_gso_segment);
+EXPORT_SYMBOL(skb_mac_gso_segment);
+
+
+/* openvswitch calls this on rx path, so we need a different check.
+ */
+static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
+{
+	if (tx_path)
+		return skb->ip_summed != CHECKSUM_PARTIAL;
+	else
+		return skb->ip_summed == CHECKSUM_NONE;
+}
+
+/**
+ *	__skb_gso_segment - Perform segmentation on skb.
+ *	@skb: buffer to segment
+ *	@features: features for the output path (see dev->features)
+ *	@tx_path: whether it is called in TX path
+ *
+ *	This function segments the given skb and returns a list of segments.
+ *
+ *	It may return NULL if the skb requires no segmentation.  This is
+ *	only possible when GSO is used for verifying header integrity.
+ */
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+				  netdev_features_t features, bool tx_path)
+{
+	if (unlikely(skb_needs_check(skb, tx_path))) {
+		int err;
+
+		skb_warn_bad_offload(skb);
+
+		if (skb_header_cloned(skb) &&
+		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+			return ERR_PTR(err);
+	}
+
+	SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+
+	return skb_mac_gso_segment(skb, features);
+}
+EXPORT_SYMBOL(__skb_gso_segment);
 
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
@@ -2229,24 +2419,12 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
 	return 0;
 }
 
-static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
-{
-	return ((features & NETIF_F_GEN_CSUM) ||
-		((features & NETIF_F_V4_CSUM) &&
-		 protocol == htons(ETH_P_IP)) ||
-		((features & NETIF_F_V6_CSUM) &&
-		 protocol == htons(ETH_P_IPV6)) ||
-		((features & NETIF_F_FCOE_CRC) &&
-		 protocol == htons(ETH_P_FCOE)));
-}
-
 static netdev_features_t harmonize_features(struct sk_buff *skb,
 	__be16 protocol, netdev_features_t features)
 {
 	if (skb->ip_summed != CHECKSUM_NONE &&
 	    !can_checksum_protocol(features, protocol)) {
 		features &= ~NETIF_F_ALL_CSUM;
-		features &= ~NETIF_F_SG;
 	} else if (illegal_highdma(skb->dev, skb)) {
 		features &= ~NETIF_F_SG;
 	}
@@ -2262,20 +2440,22 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
-	if (protocol == htons(ETH_P_8021Q)) {
+	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
 	} else if (!vlan_tx_tag_present(skb)) {
 		return harmonize_features(skb, protocol, features);
 	}
 
-	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
+	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+					       NETIF_F_HW_VLAN_STAG_TX);
 
-	if (protocol != htons(ETH_P_8021Q)) {
+	if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) {
 		return harmonize_features(skb, protocol, features);
 	} else {
 		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
-				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
+				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
+				NETIF_F_HW_VLAN_STAG_TX;
 		return harmonize_features(skb, protocol, features);
 	}
 }
@@ -2287,7 +2467,7 @@ EXPORT_SYMBOL(netif_skb_features);
  *	2. skb is fragmented and the device does not support SG.
  */
 static inline int skb_needs_linearize(struct sk_buff *skb,
-				      int features)
+				      netdev_features_t features)
 {
 	return skb_is_nonlinear(skb) &&
 			((skb_has_frag_list(skb) &&
@@ -2316,8 +2496,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		features = netif_skb_features(skb);
 
 		if (vlan_tx_tag_present(skb) &&
-		    !(features & NETIF_F_HW_VLAN_TX)) {
-			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+		    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
+			skb = __vlan_put_tag(skb, skb->vlan_proto,
+					     vlan_tx_tag_get(skb));
 			if (unlikely(!skb))
 				goto out;
 
@@ -2376,13 +2557,6 @@ gso:
 		skb->next = nskb->next;
 		nskb->next = NULL;
 
-		/*
-		 * If device doesn't need nskb->dst, release it right now while
-		 * its hot in this cpu cache
-		 */
-		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
-			skb_dst_drop(nskb);
-
 		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(nskb, dev);
 
@@ -2402,134 +2576,45 @@ gso:
 	} while (skb->next);
 
 out_kfree_gso_skb:
-	if (likely(skb->next == NULL))
+	if (likely(skb->next == NULL)) {
 		skb->destructor = DEV_GSO_CB(skb)->destructor;
+		consume_skb(skb);
+		return rc;
+	}
 out_kfree_skb:
 	kfree_skb(skb);
 out:
 	return rc;
 }
 
-static u32 hashrnd __read_mostly;
-
-/*
- * Returns a Tx hash based on the given packet descriptor a Tx queues' number
- * to be used as a distribution range.
- */
-u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
-		  unsigned int num_tx_queues)
-{
-	u32 hash;
-	u16 qoffset = 0;
-	u16 qcount = num_tx_queues;
-
-	if (skb_rx_queue_recorded(skb)) {
-		hash = skb_get_rx_queue(skb);
-		while (unlikely(hash >= num_tx_queues))
-			hash -= num_tx_queues;
-		return hash;
-	}
-
-	if (dev->num_tc) {
-		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
-		qoffset = dev->tc_to_txq[tc].offset;
-		qcount = dev->tc_to_txq[tc].count;
-	}
-
-	if (skb->sk && skb->sk->sk_hash)
-		hash = skb->sk->sk_hash;
-	else
-		hash = (__force u16) skb->protocol;
-	hash = jhash_1word(hash, hashrnd);
-
-	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
-}
-EXPORT_SYMBOL(__skb_tx_hash);
-
-static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
-{
-	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
-		net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
-				     dev->name, queue_index,
-				     dev->real_num_tx_queues);
-		return 0;
-	}
-	return queue_index;
-}
-
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+static void qdisc_pkt_len_init(struct sk_buff *skb)
 {
-#ifdef CONFIG_XPS
-	struct xps_dev_maps *dev_maps;
-	struct xps_map *map;
-	int queue_index = -1;
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
 
-	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
-	if (dev_maps) {
-		map = rcu_dereference(
-		    dev_maps->cpu_map[raw_smp_processor_id()]);
-		if (map) {
-			if (map->len == 1)
-				queue_index = map->queues[0];
-			else {
-				u32 hash;
-				if (skb->sk && skb->sk->sk_hash)
-					hash = skb->sk->sk_hash;
-				else
-					hash = (__force u16) skb->protocol ^
-					    skb->rxhash;
-				hash = jhash_1word(hash, hashrnd);
-				queue_index = map->queues[
-				    ((u64)hash * map->len) >> 32];
-			}
-			if (unlikely(queue_index >= dev->real_num_tx_queues))
-				queue_index = -1;
-		}
-	}
-	rcu_read_unlock();
-
-	return queue_index;
-#else
-	return -1;
-#endif
-}
-
-struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-				    struct sk_buff *skb)
-{
-	int queue_index;
-	const struct net_device_ops *ops = dev->netdev_ops;
+	qdisc_skb_cb(skb)->pkt_len = skb->len;
 
-	if (dev->real_num_tx_queues == 1)
-		queue_index = 0;
-	else if (ops->ndo_select_queue) {
-		queue_index = ops->ndo_select_queue(dev, skb);
-		queue_index = dev_cap_txqueue(dev, queue_index);
-	} else {
-		struct sock *sk = skb->sk;
-		queue_index = sk_tx_queue_get(sk);
+	/* To get more precise estimation of bytes sent on wire,
+	 * we add to pkt_len the headers size of all segments
+	 */
+	if (shinfo->gso_size)  {
+		unsigned int hdr_len;
+		u16 gso_segs = shinfo->gso_segs;
 
-		if (queue_index < 0 || skb->ooo_okay ||
-		    queue_index >= dev->real_num_tx_queues) {
-			int old_index = queue_index;
+		/* mac layer + network layer */
+		hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
 
-			queue_index = get_xps_queue(dev, skb);
-			if (queue_index < 0)
-				queue_index = skb_tx_hash(dev, skb);
+		/* + transport layer */
+		if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+			hdr_len += tcp_hdrlen(skb);
+		else
+			hdr_len += sizeof(struct udphdr);
 
-			if (queue_index != old_index && sk) {
-				struct dst_entry *dst =
-				    rcu_dereference_check(sk->sk_dst_cache, 1);
+		if (shinfo->gso_type & SKB_GSO_DODGY)
+			gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
+						shinfo->gso_size);
 
-				if (dst && skb_dst(skb) == dst)
-					sk_tx_queue_set(sk, queue_index);
-			}
-		}
+		qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
 	}
-
-	skb_set_queue_mapping(skb, queue_index);
-	return netdev_get_tx_queue(dev, queue_index);
 }
 
 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
@@ -2540,7 +2625,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	bool contended;
 	int rc;
 
-	qdisc_skb_cb(skb)->pkt_len = skb->len;
+	qdisc_pkt_len_init(skb);
 	qdisc_calculate_pkt_len(skb, q);
 	/*
 	 * Heuristic to force contended enqueues to serialize on a
@@ -2663,6 +2748,8 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 
+	skb_reset_mac_header(skb);
+
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
@@ -2757,41 +2844,6 @@ static inline void ____napi_schedule(struct softnet_data *sd,
 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 }
 
-/*
- * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
- * and src/dst port numbers.  Sets rxhash in skb to non-zero hash value
- * on success, zero indicates no valid hash.  Also, sets l4_rxhash in skb
- * if hash is a canonical 4-tuple hash over transport ports.
- */
-void __skb_get_rxhash(struct sk_buff *skb)
-{
-	struct flow_keys keys;
-	u32 hash;
-
-	if (!skb_flow_dissect(skb, &keys))
-		return;
-
-	if (keys.ports)
-		skb->l4_rxhash = 1;
-
-	/* get a consistent hash (same value on both flow directions) */
-	if (((__force u32)keys.dst < (__force u32)keys.src) ||
-	    (((__force u32)keys.dst == (__force u32)keys.src) &&
-	     ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
-		swap(keys.dst, keys.src);
-		swap(keys.port16[0], keys.port16[1]);
-	}
-
-	hash = jhash_3words((__force u32)keys.dst,
-			    (__force u32)keys.src,
-			    (__force u32)keys.ports, hashrnd);
-	if (!hash)
-		hash = 1;
-
-	skb->rxhash = hash;
-}
-EXPORT_SYMBOL(__skb_get_rxhash);
-
 #ifdef CONFIG_RPS
 
 /* One global table that all flow-based protocols share. */
@@ -3277,6 +3329,7 @@ int netdev_rx_handler_register(struct net_device *dev,
 	if (dev->rx_handler)
 		return -EBUSY;
 
+	/* Note: rx_handler_data must be set before rx_handler */
 	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
 	rcu_assign_pointer(dev->rx_handler, rx_handler);
 
@@ -3288,7 +3341,7 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
  *	netdev_rx_handler_unregister - unregister receive handler
  *	@dev: device to unregister a handler from
  *
- *	Unregister a receive hander from a device.
+ *	Unregister a receive handler from a device.
  *
  *	The caller must hold the rtnl_mutex.
  */
@@ -3297,6 +3350,11 @@ void netdev_rx_handler_unregister(struct net_device *dev)
 
 	ASSERT_RTNL();
 	RCU_INIT_POINTER(dev->rx_handler, NULL);
+	/* a reader seeing a non NULL rx_handler in a rcu_read_lock()
+	 * section has a guarantee to see a non NULL rx_handler_data
+	 * as well.
+	 */
+	synchronize_net();
 	RCU_INIT_POINTER(dev->rx_handler_data, NULL);
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
@@ -3312,13 +3370,14 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
 	case __constant_htons(ETH_P_IP):
 	case __constant_htons(ETH_P_IPV6):
 	case __constant_htons(ETH_P_8021Q):
+	case __constant_htons(ETH_P_8021AD):
 		return true;
 	default:
 		return false;
 	}
 }
 
-static int __netif_receive_skb(struct sk_buff *skb)
+static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 {
 	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
@@ -3327,24 +3386,11 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	bool deliver_exact = false;
 	int ret = NET_RX_DROP;
 	__be16 type;
-	unsigned long pflags = current->flags;
 
 	net_timestamp_check(!netdev_tstamp_prequeue, skb);
 
 	trace_netif_receive_skb(skb);
 
-	/*
-	 * PFMEMALLOC skbs are special, they should
-	 * - be delivered to SOCK_MEMALLOC sockets only
-	 * - stay away from userspace
-	 * - have bounded memory usage
-	 *
-	 * Use PF_MEMALLOC as this saves us from propagating the allocation
-	 * context down to all allocation sites.
-	 */
-	if (sk_memalloc_socks() && skb_pfmemalloc(skb))
-		current->flags |= PF_MEMALLOC;
-
 	/* if we've gotten here through NAPI, check netpoll */
 	if (netpoll_receive_skb(skb))
 		goto out;
@@ -3352,7 +3398,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	orig_dev = skb->dev;
 
 	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
+	if (!skb_transport_header_was_set(skb))
+		skb_reset_transport_header(skb);
 	skb_reset_mac_len(skb);
 
 	pt_prev = NULL;
@@ -3364,7 +3411,8 @@ another_round:
 
 	__this_cpu_inc(softnet_data.processed);
 
-	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
+	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
 		skb = vlan_untag(skb);
 		if (unlikely(!skb))
 			goto unlock;
@@ -3377,7 +3425,7 @@ another_round:
 	}
 #endif
 
-	if (sk_memalloc_socks() && skb_pfmemalloc(skb))
+	if (pfmemalloc)
 		goto skip_taps;
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -3396,8 +3444,7 @@ skip_taps:
 ncls:
 #endif
 
-	if (sk_memalloc_socks() && skb_pfmemalloc(skb)
-				&& !skb_pfmemalloc_protocol(skb))
+	if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
 		goto drop;
 
 	if (vlan_tx_tag_present(skb)) {
@@ -3419,6 +3466,7 @@ ncls:
 		}
 		switch (rx_handler(&skb)) {
 		case RX_HANDLER_CONSUMED:
+			ret = NET_RX_SUCCESS;
 			goto unlock;
 		case RX_HANDLER_ANOTHER:
 			goto another_round;
@@ -3467,7 +3515,31 @@ drop:
 unlock:
 	rcu_read_unlock();
 out:
-	tsk_restore_flags(current, pflags, PF_MEMALLOC);
+	return ret;
+}
+
+static int __netif_receive_skb(struct sk_buff *skb)
+{
+	int ret;
+
+	if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
+		unsigned long pflags = current->flags;
+
+		/*
+		 * PFMEMALLOC skbs are special, they should
+		 * - be delivered to SOCK_MEMALLOC sockets only
+		 * - stay away from userspace
+		 * - have bounded memory usage
+		 *
+		 * Use PF_MEMALLOC as this saves us from propagating the allocation
+		 * context down to all allocation sites.
+		 */
+		current->flags |= PF_MEMALLOC;
+		ret = __netif_receive_skb_core(skb, true);
+		tsk_restore_flags(current, pflags, PF_MEMALLOC);
+	} else
+		ret = __netif_receive_skb_core(skb, false);
+
 	return ret;
 }
 
@@ -3634,7 +3706,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	__be16 type = skb->protocol;
 	struct list_head *head = &offload_base;
 	int same_flow;
-	int mac_len;
 	enum gro_result ret;
 
 	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
@@ -3651,8 +3722,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 			continue;
 
 		skb_set_network_header(skb, skb_gro_offset(skb));
-		mac_len = skb->network_header - skb->mac_header;
-		skb->mac_len = mac_len;
+		skb_reset_mac_len(skb);
 		NAPI_GRO_CB(skb)->same_flow = 0;
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
@@ -4010,6 +4080,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 	napi->gro_list = NULL;
 	napi->skb = NULL;
 	napi->poll = poll;
+	if (weight > NAPI_POLL_WEIGHT)
+		pr_err_once("netif_napi_add() called with weight %d on device %s\n",
+			    weight, dev->name);
 	napi->weight = weight;
 	list_add(&napi->dev_list, &dev->napi_list);
 	napi->dev = dev;
@@ -4056,7 +4129,7 @@ static void net_rx_action(struct softirq_action *h)
 		 * Allow this to run for 2 jiffies since which will allow
 		 * an average latency of 1.5/HZ.
 		 */
-		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
+		if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
 			goto softnet_break;
 
 		local_irq_enable();
@@ -4134,530 +4207,231 @@ softnet_break:
 	goto out;
 }
 
-static gifconf_func_t *gifconf_list[NPROTO];
-
-/**
- *	register_gifconf	-	register a SIOCGIF handler
- *	@family: Address family
- *	@gifconf: Function handler
- *
- *	Register protocol dependent address dumping routines. The handler
- *	that is passed must not be freed or reused until it has been replaced
- *	by another handler.
- */
-int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
-{
-	if (family >= NPROTO)
-		return -EINVAL;
-	gifconf_list[family] = gifconf;
-	return 0;
-}
-EXPORT_SYMBOL(register_gifconf);
-
-
-/*
- *	Map an interface index to its name (SIOCGIFNAME)
- */
-
-/*
- *	We need this ioctl for efficient implementation of the
- *	if_indextoname() function required by the IPv6 API.  Without
- *	it, we would have to search all the interfaces to find a
- *	match.  --pb
- */
-
-static int dev_ifname(struct net *net, struct ifreq __user *arg)
-{
+struct netdev_upper {
 	struct net_device *dev;
-	struct ifreq ifr;
-	unsigned seq;
-
-	/*
-	 *	Fetch the caller's info block.
-	 */
+	bool master;
+	struct list_head list;
+	struct rcu_head rcu;
+	struct list_head search_list;
+};
 
-	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
-		return -EFAULT;
+static void __append_search_uppers(struct list_head *search_list,
+				   struct net_device *dev)
+{
+	struct netdev_upper *upper;
 
-retry:
-	seq = read_seqcount_begin(&devnet_rename_seq);
-	rcu_read_lock();
-	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
-	if (!dev) {
-		rcu_read_unlock();
-		return -ENODEV;
+	list_for_each_entry(upper, &dev->upper_dev_list, list) {
+		/* check if this upper is not already in search list */
+		if (list_empty(&upper->search_list))
+			list_add_tail(&upper->search_list, search_list);
 	}
-
-	strcpy(ifr.ifr_name, dev->name);
-	rcu_read_unlock();
-	if (read_seqcount_retry(&devnet_rename_seq, seq))
-		goto retry;
-
-	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
-		return -EFAULT;
-	return 0;
 }
 
-/*
- *	Perform a SIOCGIFCONF call. This structure will change
- *	size eventually, and there is nothing I can do about it.
- *	Thus we will need a 'compatibility mode'.
- */
-
-static int dev_ifconf(struct net *net, char __user *arg)
+static bool __netdev_search_upper_dev(struct net_device *dev,
+				      struct net_device *upper_dev)
 {
-	struct ifconf ifc;
-	struct net_device *dev;
-	char __user *pos;
-	int len;
-	int total;
-	int i;
+	LIST_HEAD(search_list);
+	struct netdev_upper *upper;
+	struct netdev_upper *tmp;
+	bool ret = false;
 
-	/*
-	 *	Fetch the caller's info block.
-	 */
-
-	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
-		return -EFAULT;
-
-	pos = ifc.ifc_buf;
-	len = ifc.ifc_len;
-
-	/*
-	 *	Loop over the interfaces, and write an info block for each.
-	 */
-
-	total = 0;
-	for_each_netdev(net, dev) {
-		for (i = 0; i < NPROTO; i++) {
-			if (gifconf_list[i]) {
-				int done;
-				if (!pos)
-					done = gifconf_list[i](dev, NULL, 0);
-				else
-					done = gifconf_list[i](dev, pos + total,
-							       len - total);
-				if (done < 0)
-					return -EFAULT;
-				total += done;
-			}
+	__append_search_uppers(&search_list, dev);
+	list_for_each_entry(upper, &search_list, search_list) {
+		if (upper->dev == upper_dev) {
+			ret = true;
+			break;
 		}
+		__append_search_uppers(&search_list, upper->dev);
 	}
-
-	/*
-	 *	All done.  Write the updated control block back to the caller.
-	 */
-	ifc.ifc_len = total;
-
-	/*
-	 * 	Both BSD and Solaris return 0 here, so we do too.
-	 */
-	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
+	list_for_each_entry_safe(upper, tmp, &search_list, search_list)
+		INIT_LIST_HEAD(&upper->search_list);
+	return ret;
 }
 
-#ifdef CONFIG_PROC_FS
-
-#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
-
-#define get_bucket(x) ((x) >> BUCKET_SPACE)
-#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
-#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
-
-static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
+static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
+						struct net_device *upper_dev)
 {
-	struct net *net = seq_file_net(seq);
-	struct net_device *dev;
-	struct hlist_node *p;
-	struct hlist_head *h;
-	unsigned int count = 0, offset = get_offset(*pos);
+	struct netdev_upper *upper;
 
-	h = &net->dev_name_head[get_bucket(*pos)];
-	hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
-		if (++count == offset)
-			return dev;
+	list_for_each_entry(upper, &dev->upper_dev_list, list) {
+		if (upper->dev == upper_dev)
+			return upper;
 	}
-
 	return NULL;
 }
 
-static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
-{
-	struct net_device *dev;
-	unsigned int bucket;
-
-	do {
-		dev = dev_from_same_bucket(seq, pos);
-		if (dev)
-			return dev;
-
-		bucket = get_bucket(*pos) + 1;
-		*pos = set_bucket_offset(bucket, 1);
-	} while (bucket < NETDEV_HASHENTRIES);
-
-	return NULL;
-}
-
-/*
- *	This is invoked by the /proc filesystem handler to display a device
- *	in detail.
+/**
+ * netdev_has_upper_dev - Check if device is linked to an upper device
+ * @dev: device
+ * @upper_dev: upper device to check
+ *
+ * Find out if a device is linked to specified upper device and return true
+ * in case it is. Note that this checks only immediate upper device,
+ * not through a complete stack of devices. The caller must hold the RTNL lock.
  */
-void *dev_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
-{
-	rcu_read_lock();
-	if (!*pos)
-		return SEQ_START_TOKEN;
-
-	if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
-		return NULL;
-
-	return dev_from_bucket(seq, pos);
-}
-
-void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+bool netdev_has_upper_dev(struct net_device *dev,
+			  struct net_device *upper_dev)
 {
-	++*pos;
-	return dev_from_bucket(seq, pos);
-}
-
-void dev_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
-{
-	rcu_read_unlock();
-}
-
-static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
-{
-	struct rtnl_link_stats64 temp;
-	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+	ASSERT_RTNL();
 
-	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
-		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
-		   dev->name, stats->rx_bytes, stats->rx_packets,
-		   stats->rx_errors,
-		   stats->rx_dropped + stats->rx_missed_errors,
-		   stats->rx_fifo_errors,
-		   stats->rx_length_errors + stats->rx_over_errors +
-		    stats->rx_crc_errors + stats->rx_frame_errors,
-		   stats->rx_compressed, stats->multicast,
-		   stats->tx_bytes, stats->tx_packets,
-		   stats->tx_errors, stats->tx_dropped,
-		   stats->tx_fifo_errors, stats->collisions,
-		   stats->tx_carrier_errors +
-		    stats->tx_aborted_errors +
-		    stats->tx_window_errors +
-		    stats->tx_heartbeat_errors,
-		   stats->tx_compressed);
+	return __netdev_find_upper(dev, upper_dev);
 }
+EXPORT_SYMBOL(netdev_has_upper_dev);
 
-/*
- *	Called from the PROCfs module. This now uses the new arbitrary sized
- *	/proc/net interface to create /proc/net/dev
+/**
+ * netdev_has_any_upper_dev - Check if device is linked to some device
+ * @dev: device
+ *
+ * Find out if a device is linked to an upper device and return true in case
+ * it is. The caller must hold the RTNL lock.
  */
-static int dev_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "Inter-|   Receive                            "
-			      "                    |  Transmit\n"
-			      " face |bytes    packets errs drop fifo frame "
-			      "compressed multicast|bytes    packets errs "
-			      "drop fifo colls carrier compressed\n");
-	else
-		dev_seq_printf_stats(seq, v);
-	return 0;
-}
-
-static struct softnet_data *softnet_get_online(loff_t *pos)
-{
-	struct softnet_data *sd = NULL;
-
-	while (*pos < nr_cpu_ids)
-		if (cpu_online(*pos)) {
-			sd = &per_cpu(softnet_data, *pos);
-			break;
-		} else
-			++*pos;
-	return sd;
-}
-
-static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	return softnet_get_online(pos);
-}
-
-static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	return softnet_get_online(pos);
-}
-
-static void softnet_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int softnet_seq_show(struct seq_file *seq, void *v)
+bool netdev_has_any_upper_dev(struct net_device *dev)
 {
-	struct softnet_data *sd = v;
+	ASSERT_RTNL();
 
-	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
-		   sd->processed, sd->dropped, sd->time_squeeze, 0,
-		   0, 0, 0, 0, /* was fastroute */
-		   sd->cpu_collision, sd->received_rps);
-	return 0;
+	return !list_empty(&dev->upper_dev_list);
 }
+EXPORT_SYMBOL(netdev_has_any_upper_dev);
 
-static const struct seq_operations dev_seq_ops = {
-	.start = dev_seq_start,
-	.next  = dev_seq_next,
-	.stop  = dev_seq_stop,
-	.show  = dev_seq_show,
-};
-
-static int dev_seq_open(struct inode *inode, struct file *file)
+/**
+ * netdev_master_upper_dev_get - Get master upper device
+ * @dev: device
+ *
+ * Find a master upper device and return pointer to it or NULL in case
+ * it's not there. The caller must hold the RTNL lock.
+ */
+struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 {
-	return seq_open_net(inode, file, &dev_seq_ops,
-			    sizeof(struct seq_net_private));
-}
+	struct netdev_upper *upper;
 
-static const struct file_operations dev_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = dev_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
+	ASSERT_RTNL();
 
-static const struct seq_operations softnet_seq_ops = {
-	.start = softnet_seq_start,
-	.next  = softnet_seq_next,
-	.stop  = softnet_seq_stop,
-	.show  = softnet_seq_show,
-};
+	if (list_empty(&dev->upper_dev_list))
+		return NULL;
 
-static int softnet_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &softnet_seq_ops);
+	upper = list_first_entry(&dev->upper_dev_list,
+				 struct netdev_upper, list);
+	if (likely(upper->master))
+		return upper->dev;
+	return NULL;
 }
+EXPORT_SYMBOL(netdev_master_upper_dev_get);
 
-static const struct file_operations softnet_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = softnet_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
-static void *ptype_get_idx(loff_t pos)
+/**
+ * netdev_master_upper_dev_get_rcu - Get master upper device
+ * @dev: device
+ *
+ * Find a master upper device and return pointer to it or NULL in case
+ * it's not there. The caller must hold the RCU read lock.
+ */
+struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
 {
-	struct packet_type *pt = NULL;
-	loff_t i = 0;
-	int t;
-
-	list_for_each_entry_rcu(pt, &ptype_all, list) {
-		if (i == pos)
-			return pt;
-		++i;
-	}
+	struct netdev_upper *upper;
 
-	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
-		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
-			if (i == pos)
-				return pt;
-			++i;
-		}
-	}
+	upper = list_first_or_null_rcu(&dev->upper_dev_list,
+				       struct netdev_upper, list);
+	if (upper && likely(upper->master))
+		return upper->dev;
 	return NULL;
 }
+EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
 
-static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
+static int __netdev_upper_dev_link(struct net_device *dev,
+				   struct net_device *upper_dev, bool master)
 {
-	rcu_read_lock();
-	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
-}
+	struct netdev_upper *upper;
 
-static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct packet_type *pt;
-	struct list_head *nxt;
-	int hash;
+	ASSERT_RTNL();
 
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ptype_get_idx(0);
+	if (dev == upper_dev)
+		return -EBUSY;
 
-	pt = v;
-	nxt = pt->list.next;
-	if (pt->type == htons(ETH_P_ALL)) {
-		if (nxt != &ptype_all)
-			goto found;
-		hash = 0;
-		nxt = ptype_base[0].next;
-	} else
-		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
+	/* To prevent loops, check if dev is not upper device to upper_dev. */
+	if (__netdev_search_upper_dev(upper_dev, dev))
+		return -EBUSY;
 
-	while (nxt == &ptype_base[hash]) {
-		if (++hash >= PTYPE_HASH_SIZE)
-			return NULL;
-		nxt = ptype_base[hash].next;
-	}
-found:
-	return list_entry(nxt, struct packet_type, list);
-}
+	if (__netdev_find_upper(dev, upper_dev))
+		return -EEXIST;
 
-static void ptype_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
-{
-	rcu_read_unlock();
-}
+	if (master && netdev_master_upper_dev_get(dev))
+		return -EBUSY;
 
-static int ptype_seq_show(struct seq_file *seq, void *v)
-{
-	struct packet_type *pt = v;
+	upper = kmalloc(sizeof(*upper), GFP_KERNEL);
+	if (!upper)
+		return -ENOMEM;
 
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "Type Device      Function\n");
-	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
-		if (pt->type == htons(ETH_P_ALL))
-			seq_puts(seq, "ALL ");
-		else
-			seq_printf(seq, "%04x", ntohs(pt->type));
+	upper->dev = upper_dev;
+	upper->master = master;
+	INIT_LIST_HEAD(&upper->search_list);
 
-		seq_printf(seq, " %-8s %pF\n",
-			   pt->dev ? pt->dev->name : "", pt->func);
-	}
+	/* Ensure that master upper link is always the first item in list. */
+	if (master)
+		list_add_rcu(&upper->list, &dev->upper_dev_list);
+	else
+		list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
+	dev_hold(upper_dev);
 
 	return 0;
 }
 
-static const struct seq_operations ptype_seq_ops = {
-	.start = ptype_seq_start,
-	.next  = ptype_seq_next,
-	.stop  = ptype_seq_stop,
-	.show  = ptype_seq_show,
-};
-
-static int ptype_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ptype_seq_ops,
-			sizeof(struct seq_net_private));
-}
-
-static const struct file_operations ptype_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = ptype_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-
-static int __net_init dev_proc_net_init(struct net *net)
-{
-	int rc = -ENOMEM;
-
-	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
-		goto out;
-	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
-		goto out_dev;
-	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
-		goto out_softnet;
-
-	if (wext_proc_init(net))
-		goto out_ptype;
-	rc = 0;
-out:
-	return rc;
-out_ptype:
-	proc_net_remove(net, "ptype");
-out_softnet:
-	proc_net_remove(net, "softnet_stat");
-out_dev:
-	proc_net_remove(net, "dev");
-	goto out;
-}
-
-static void __net_exit dev_proc_net_exit(struct net *net)
-{
-	wext_proc_exit(net);
-
-	proc_net_remove(net, "ptype");
-	proc_net_remove(net, "softnet_stat");
-	proc_net_remove(net, "dev");
-}
-
-static struct pernet_operations __net_initdata dev_proc_ops = {
-	.init = dev_proc_net_init,
-	.exit = dev_proc_net_exit,
-};
-
-static int __init dev_proc_init(void)
+/**
+ * netdev_upper_dev_link - Add a link to the upper device
+ * @dev: device
+ * @upper_dev: new upper device
+ *
+ * Adds a link to device which is upper to this one. The caller must hold
+ * the RTNL lock. On a failure a negative errno code is returned.
+ * On success the reference counts are adjusted and the function
+ * returns zero.
+ */
+int netdev_upper_dev_link(struct net_device *dev,
+			  struct net_device *upper_dev)
 {
-	return register_pernet_subsys(&dev_proc_ops);
+	return __netdev_upper_dev_link(dev, upper_dev, false);
 }
-#else
-#define dev_proc_init() 0
-#endif	/* CONFIG_PROC_FS */
-
+EXPORT_SYMBOL(netdev_upper_dev_link);
 
 /**
- *	netdev_set_master	-	set up master pointer
- *	@slave: slave device
- *	@master: new master device
+ * netdev_master_upper_dev_link - Add a master link to the upper device
+ * @dev: device
+ * @upper_dev: new upper device
  *
- *	Changes the master device of the slave. Pass %NULL to break the
- *	bonding. The caller must hold the RTNL semaphore. On a failure
- *	a negative errno code is returned. On success the reference counts
- *	are adjusted and the function returns zero.
+ * Adds a link to device which is upper to this one. In this case, only
+ * one master upper device can be linked, although other non-master devices
+ * might be linked as well. The caller must hold the RTNL lock.
+ * On a failure a negative errno code is returned. On success the reference
+ * counts are adjusted and the function returns zero.
  */
-int netdev_set_master(struct net_device *slave, struct net_device *master)
+int netdev_master_upper_dev_link(struct net_device *dev,
+				 struct net_device *upper_dev)
 {
-	struct net_device *old = slave->master;
-
-	ASSERT_RTNL();
-
-	if (master) {
-		if (old)
-			return -EBUSY;
-		dev_hold(master);
-	}
-
-	slave->master = master;
-
-	if (old)
-		dev_put(old);
-	return 0;
+	return __netdev_upper_dev_link(dev, upper_dev, true);
 }
-EXPORT_SYMBOL(netdev_set_master);
+EXPORT_SYMBOL(netdev_master_upper_dev_link);
 
 /**
- *	netdev_set_bond_master	-	set up bonding master/slave pair
- *	@slave: slave device
- *	@master: new master device
+ * netdev_upper_dev_unlink - Removes a link to upper device
+ * @dev: device
+ * @upper_dev: new upper device
  *
- *	Changes the master device of the slave. Pass %NULL to break the
- *	bonding. The caller must hold the RTNL semaphore. On a failure
- *	a negative errno code is returned. On success %RTM_NEWLINK is sent
- *	to the routing socket and the function returns zero.
+ * Removes a link to device which is upper to this one. The caller must hold
+ * the RTNL lock.
  */
-int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
+void netdev_upper_dev_unlink(struct net_device *dev,
+			     struct net_device *upper_dev)
 {
-	int err;
+	struct netdev_upper *upper;
 
 	ASSERT_RTNL();
 
-	err = netdev_set_master(slave, master);
-	if (err)
-		return err;
-	if (master)
-		slave->flags |= IFF_SLAVE;
-	else
-		slave->flags &= ~IFF_SLAVE;
-
-	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
-	return 0;
+	upper = __netdev_find_upper(dev, upper_dev);
+	if (!upper)
+		return;
+	list_del_rcu(&upper->list);
+	dev_put(upper_dev);
+	kfree_rcu(upper, rcu);
 }
-EXPORT_SYMBOL(netdev_set_bond_master);
+EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
@@ -5020,381 +4794,33 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 	if (!netif_device_present(dev))
 		return -ENODEV;
 	err = ops->ndo_set_mac_address(dev, sa);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	if (err)
+		return err;
+	dev->addr_assign_type = NET_ADDR_SET;
+	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
-	return err;
+	return 0;
 }
 EXPORT_SYMBOL(dev_set_mac_address);
 
-/*
- *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
- */
-static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
-{
-	int err;
-	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
-
-	if (!dev)
-		return -ENODEV;
-
-	switch (cmd) {
-	case SIOCGIFFLAGS:	/* Get interface flags */
-		ifr->ifr_flags = (short) dev_get_flags(dev);
-		return 0;
-
-	case SIOCGIFMETRIC:	/* Get the metric on the interface
-				   (currently unused) */
-		ifr->ifr_metric = 0;
-		return 0;
-
-	case SIOCGIFMTU:	/* Get the MTU of a device */
-		ifr->ifr_mtu = dev->mtu;
-		return 0;
-
-	case SIOCGIFHWADDR:
-		if (!dev->addr_len)
-			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
-		else
-			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
-			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
-		ifr->ifr_hwaddr.sa_family = dev->type;
-		return 0;
-
-	case SIOCGIFSLAVE:
-		err = -EINVAL;
-		break;
-
-	case SIOCGIFMAP:
-		ifr->ifr_map.mem_start = dev->mem_start;
-		ifr->ifr_map.mem_end   = dev->mem_end;
-		ifr->ifr_map.base_addr = dev->base_addr;
-		ifr->ifr_map.irq       = dev->irq;
-		ifr->ifr_map.dma       = dev->dma;
-		ifr->ifr_map.port      = dev->if_port;
-		return 0;
-
-	case SIOCGIFINDEX:
-		ifr->ifr_ifindex = dev->ifindex;
-		return 0;
-
-	case SIOCGIFTXQLEN:
-		ifr->ifr_qlen = dev->tx_queue_len;
-		return 0;
-
-	default:
-		/* dev_ioctl() should ensure this case
-		 * is never reached
-		 */
-		WARN_ON(1);
-		err = -ENOTTY;
-		break;
-
-	}
-	return err;
-}
-
-/*
- *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
- */
-static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
-{
-	int err;
-	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
-	const struct net_device_ops *ops;
-
-	if (!dev)
-		return -ENODEV;
-
-	ops = dev->netdev_ops;
-
-	switch (cmd) {
-	case SIOCSIFFLAGS:	/* Set interface flags */
-		return dev_change_flags(dev, ifr->ifr_flags);
-
-	case SIOCSIFMETRIC:	/* Set the metric on the interface
-				   (currently unused) */
-		return -EOPNOTSUPP;
-
-	case SIOCSIFMTU:	/* Set the MTU of a device */
-		return dev_set_mtu(dev, ifr->ifr_mtu);
-
-	case SIOCSIFHWADDR:
-		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
-
-	case SIOCSIFHWBROADCAST:
-		if (ifr->ifr_hwaddr.sa_family != dev->type)
-			return -EINVAL;
-		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
-		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-		return 0;
-
-	case SIOCSIFMAP:
-		if (ops->ndo_set_config) {
-			if (!netif_device_present(dev))
-				return -ENODEV;
-			return ops->ndo_set_config(dev, &ifr->ifr_map);
-		}
-		return -EOPNOTSUPP;
-
-	case SIOCADDMULTI:
-		if (!ops->ndo_set_rx_mode ||
-		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
-			return -EINVAL;
-		if (!netif_device_present(dev))
-			return -ENODEV;
-		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
-
-	case SIOCDELMULTI:
-		if (!ops->ndo_set_rx_mode ||
-		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
-			return -EINVAL;
-		if (!netif_device_present(dev))
-			return -ENODEV;
-		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
-
-	case SIOCSIFTXQLEN:
-		if (ifr->ifr_qlen < 0)
-			return -EINVAL;
-		dev->tx_queue_len = ifr->ifr_qlen;
-		return 0;
-
-	case SIOCSIFNAME:
-		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
-		return dev_change_name(dev, ifr->ifr_newname);
-
-	case SIOCSHWTSTAMP:
-		err = net_hwtstamp_validate(ifr);
-		if (err)
-			return err;
-		/* fall through */
-
-	/*
-	 *	Unknown or private ioctl
-	 */
-	default:
-		if ((cmd >= SIOCDEVPRIVATE &&
-		    cmd <= SIOCDEVPRIVATE + 15) ||
-		    cmd == SIOCBONDENSLAVE ||
-		    cmd == SIOCBONDRELEASE ||
-		    cmd == SIOCBONDSETHWADDR ||
-		    cmd == SIOCBONDSLAVEINFOQUERY ||
-		    cmd == SIOCBONDINFOQUERY ||
-		    cmd == SIOCBONDCHANGEACTIVE ||
-		    cmd == SIOCGMIIPHY ||
-		    cmd == SIOCGMIIREG ||
-		    cmd == SIOCSMIIREG ||
-		    cmd == SIOCBRADDIF ||
-		    cmd == SIOCBRDELIF ||
-		    cmd == SIOCSHWTSTAMP ||
-		    cmd == SIOCWANDEV) {
-			err = -EOPNOTSUPP;
-			if (ops->ndo_do_ioctl) {
-				if (netif_device_present(dev))
-					err = ops->ndo_do_ioctl(dev, ifr, cmd);
-				else
-					err = -ENODEV;
-			}
-		} else
-			err = -EINVAL;
-
-	}
-	return err;
-}
-
-/*
- *	This function handles all "interface"-type I/O control requests. The actual
- *	'doing' part of this is dev_ifsioc above.
- */
-
 /**
- *	dev_ioctl	-	network device ioctl
- *	@net: the applicable net namespace
- *	@cmd: command to issue
- *	@arg: pointer to a struct ifreq in user space
+ *	dev_change_carrier - Change device carrier
+ *	@dev: device
+ *	@new_carrier: new value
  *
- *	Issue ioctl functions to devices. This is normally called by the
- *	user space syscall interfaces but can sometimes be useful for
- *	other purposes. The return value is the return from the syscall if
- *	positive or a negative errno code on error.
+ *	Change device carrier
  */
-
-int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int dev_change_carrier(struct net_device *dev, bool new_carrier)
 {
-	struct ifreq ifr;
-	int ret;
-	char *colon;
-
-	/* One special case: SIOCGIFCONF takes ifconf argument
-	   and requires shared lock, because it sleeps writing
-	   to user space.
-	 */
-
-	if (cmd == SIOCGIFCONF) {
-		rtnl_lock();
-		ret = dev_ifconf(net, (char __user *) arg);
-		rtnl_unlock();
-		return ret;
-	}
-	if (cmd == SIOCGIFNAME)
-		return dev_ifname(net, (struct ifreq __user *)arg);
-
-	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
-		return -EFAULT;
-
-	ifr.ifr_name[IFNAMSIZ-1] = 0;
-
-	colon = strchr(ifr.ifr_name, ':');
-	if (colon)
-		*colon = 0;
-
-	/*
-	 *	See which interface the caller is talking about.
-	 */
-
-	switch (cmd) {
-	/*
-	 *	These ioctl calls:
-	 *	- can be done by all.
-	 *	- atomic and do not require locking.
-	 *	- return a value
-	 */
-	case SIOCGIFFLAGS:
-	case SIOCGIFMETRIC:
-	case SIOCGIFMTU:
-	case SIOCGIFHWADDR:
-	case SIOCGIFSLAVE:
-	case SIOCGIFMAP:
-	case SIOCGIFINDEX:
-	case SIOCGIFTXQLEN:
-		dev_load(net, ifr.ifr_name);
-		rcu_read_lock();
-		ret = dev_ifsioc_locked(net, &ifr, cmd);
-		rcu_read_unlock();
-		if (!ret) {
-			if (colon)
-				*colon = ':';
-			if (copy_to_user(arg, &ifr,
-					 sizeof(struct ifreq)))
-				ret = -EFAULT;
-		}
-		return ret;
-
-	case SIOCETHTOOL:
-		dev_load(net, ifr.ifr_name);
-		rtnl_lock();
-		ret = dev_ethtool(net, &ifr);
-		rtnl_unlock();
-		if (!ret) {
-			if (colon)
-				*colon = ':';
-			if (copy_to_user(arg, &ifr,
-					 sizeof(struct ifreq)))
-				ret = -EFAULT;
-		}
-		return ret;
-
-	/*
-	 *	These ioctl calls:
-	 *	- require superuser power.
-	 *	- require strict serialization.
-	 *	- return a value
-	 */
-	case SIOCGMIIPHY:
-	case SIOCGMIIREG:
-	case SIOCSIFNAME:
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			return -EPERM;
-		dev_load(net, ifr.ifr_name);
-		rtnl_lock();
-		ret = dev_ifsioc(net, &ifr, cmd);
-		rtnl_unlock();
-		if (!ret) {
-			if (colon)
-				*colon = ':';
-			if (copy_to_user(arg, &ifr,
-					 sizeof(struct ifreq)))
-				ret = -EFAULT;
-		}
-		return ret;
-
-	/*
-	 *	These ioctl calls:
-	 *	- require superuser power.
-	 *	- require strict serialization.
-	 *	- do not return a value
-	 */
-	case SIOCSIFMAP:
-	case SIOCSIFTXQLEN:
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-		/* fall through */
-	/*
-	 *	These ioctl calls:
-	 *	- require local superuser power.
-	 *	- require strict serialization.
-	 *	- do not return a value
-	 */
-	case SIOCSIFFLAGS:
-	case SIOCSIFMETRIC:
-	case SIOCSIFMTU:
-	case SIOCSIFHWADDR:
-	case SIOCSIFSLAVE:
-	case SIOCADDMULTI:
-	case SIOCDELMULTI:
-	case SIOCSIFHWBROADCAST:
-	case SIOCSMIIREG:
-	case SIOCBONDENSLAVE:
-	case SIOCBONDRELEASE:
-	case SIOCBONDSETHWADDR:
-	case SIOCBONDCHANGEACTIVE:
-	case SIOCBRADDIF:
-	case SIOCBRDELIF:
-	case SIOCSHWTSTAMP:
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			return -EPERM;
-		/* fall through */
-	case SIOCBONDSLAVEINFOQUERY:
-	case SIOCBONDINFOQUERY:
-		dev_load(net, ifr.ifr_name);
-		rtnl_lock();
-		ret = dev_ifsioc(net, &ifr, cmd);
-		rtnl_unlock();
-		return ret;
-
-	case SIOCGIFMEM:
-		/* Get the per device memory space. We can add this but
-		 * currently do not support it */
-	case SIOCSIFMEM:
-		/* Set the per device memory buffer space.
-		 * Not applicable in our case */
-	case SIOCSIFLINK:
-		return -ENOTTY;
+	const struct net_device_ops *ops = dev->netdev_ops;
 
-	/*
-	 *	Unknown or private ioctl.
-	 */
-	default:
-		if (cmd == SIOCWANDEV ||
-		    (cmd >= SIOCDEVPRIVATE &&
-		     cmd <= SIOCDEVPRIVATE + 15)) {
-			dev_load(net, ifr.ifr_name);
-			rtnl_lock();
-			ret = dev_ifsioc(net, &ifr, cmd);
-			rtnl_unlock();
-			if (!ret && copy_to_user(arg, &ifr,
-						 sizeof(struct ifreq)))
-				ret = -EFAULT;
-			return ret;
-		}
-		/* Take care of Wireless Extensions */
-		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
-			return wext_handle_ioctl(net, &ifr, cmd, arg);
-		return -ENOTTY;
-	}
+	if (!ops->ndo_change_carrier)
+		return -EOPNOTSUPP;
+	if (!netif_device_present(dev))
+		return -ENODEV;
+	return ops->ndo_change_carrier(dev, new_carrier);
 }
-
+EXPORT_SYMBOL(dev_change_carrier);
 
 /**
  *	dev_new_index	-	allocate an ifindex
@@ -5482,11 +4908,15 @@ static void rollback_registered_many(struct list_head *head)
 		if (dev->netdev_ops->ndo_uninit)
 			dev->netdev_ops->ndo_uninit(dev);
 
-		/* Notifier chain MUST detach us from master device. */
-		WARN_ON(dev->master);
+		/* Notifier chain MUST detach us all upper devices. */
+		WARN_ON(netdev_has_any_upper_dev(dev));
 
 		/* Remove entries from kobject tree */
 		netdev_unregister_kobject(dev);
+#ifdef CONFIG_XPS
+		/* Remove XPS queueing entries */
+		netif_reset_xps_queues_gt(dev, 0);
+#endif
 	}
 
 	synchronize_net();
@@ -5514,20 +4944,25 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
 	}
 
-	/* Fix illegal SG+CSUM combinations. */
-	if ((features & NETIF_F_SG) &&
-	    !(features & NETIF_F_ALL_CSUM)) {
-		netdev_dbg(dev,
-			"Dropping NETIF_F_SG since no checksum feature.\n");
-		features &= ~NETIF_F_SG;
-	}
-
 	/* TSO requires that SG is present as well. */
 	if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
 		netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
 		features &= ~NETIF_F_ALL_TSO;
 	}
 
+	if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
+					!(features & NETIF_F_IP_CSUM)) {
+		netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
+		features &= ~NETIF_F_TSO;
+		features &= ~NETIF_F_TSO_ECN;
+	}
+
+	if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
+					 !(features & NETIF_F_IPV6_CSUM)) {
+		netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
+		features &= ~NETIF_F_TSO6;
+	}
+
 	/* TSO ECN requires that TSO is present as well. */
 	if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
 		features &= ~NETIF_F_TSO_ECN;
@@ -5664,10 +5099,9 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 	BUG_ON(count < 1);
 
 	rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
-	if (!rx) {
-		pr_err("netdev: Unable to allocate %u rx queues\n", count);
+	if (!rx)
 		return -ENOMEM;
-	}
+
 	dev->_rx = rx;
 
 	for (i = 0; i < count; i++)
@@ -5698,10 +5132,9 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 	BUG_ON(count < 1);
 
 	tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
-	if (!tx) {
-		pr_err("netdev: Unable to allocate %u tx queues\n", count);
+	if (!tx)
 		return -ENOMEM;
-	}
+
 	dev->_tx = tx;
 
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
@@ -5760,6 +5193,15 @@ int register_netdevice(struct net_device *dev)
 		}
 	}
 
+	if (((dev->hw_features | dev->features) &
+	     NETIF_F_HW_VLAN_CTAG_FILTER) &&
+	    (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
+	     !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
+		netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
+		ret = -EINVAL;
+		goto err_uninit;
+	}
+
 	ret = -EBUSY;
 	if (!dev->ifindex)
 		dev->ifindex = dev_new_index(net);
@@ -5789,6 +5231,10 @@ int register_netdevice(struct net_device *dev)
 	 */
 	dev->vlan_features |= NETIF_F_HIGHDMA;
 
+	/* Make NETIF_F_SG inheritable to tunnel devices.
+	 */
+	dev->hw_enc_features |= NETIF_F_SG;
+
 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
 	ret = notifier_to_errno(ret);
 	if (ret)
@@ -5815,6 +5261,13 @@ int register_netdevice(struct net_device *dev)
 	list_netdevice(dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
 
+	/* If the device has permanent device address, driver should
+	 * set dev_addr and also addr_assign_type should be set to
+	 * NET_ADDR_PERM (default value).
+	 */
+	if (dev->addr_assign_type == NET_ADDR_PERM)
+		memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
+
 	/* Notify protocols, that a new device appeared. */
 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
 	ret = notifier_to_errno(ret);
@@ -6173,10 +5626,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	alloc_size += NETDEV_ALIGN - 1;
 
 	p = kzalloc(alloc_size, GFP_KERNEL);
-	if (!p) {
-		pr_err("alloc_netdev: Unable to allocate device\n");
+	if (!p)
 		return NULL;
-	}
 
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
@@ -6199,6 +5650,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
+	INIT_LIST_HEAD(&dev->upper_dev_list);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 
@@ -6842,19 +6294,9 @@ static int __init net_dev_init(void)
 
 	hotcpu_notifier(dev_cpu_callback, 0);
 	dst_init();
-	dev_mcast_init();
 	rc = 0;
 out:
 	return rc;
 }
 
 subsys_initcall(net_dev_init);
-
-static int __init initialize_hashrnd(void)
-{
-	get_random_bytes(&hashrnd, sizeof(hashrnd));
-	return 0;
-}
-
-late_initcall_sync(initialize_hashrnd);
-
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index b079c7bbc157..c013f38482a1 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -15,7 +15,6 @@
 #include <linux/rtnetlink.h>
 #include <linux/export.h>
 #include <linux/list.h>
-#include <linux/proc_fs.h>
 
 /*
  * General list handling functions
@@ -23,7 +22,8 @@
 
 static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
 			       const unsigned char *addr, int addr_len,
-			       unsigned char addr_type, bool global)
+			       unsigned char addr_type, bool global,
+			       bool sync)
 {
 	struct netdev_hw_addr *ha;
 	int alloc_size;
@@ -38,7 +38,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
 	ha->type = addr_type;
 	ha->refcount = 1;
 	ha->global_use = global;
-	ha->synced = false;
+	ha->synced = sync;
 	list_add_tail_rcu(&ha->list, &list->list);
 	list->count++;
 
@@ -47,7 +47,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
 
 static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
 			    const unsigned char *addr, int addr_len,
-			    unsigned char addr_type, bool global)
+			    unsigned char addr_type, bool global, bool sync)
 {
 	struct netdev_hw_addr *ha;
 
@@ -64,43 +64,62 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
 				else
 					ha->global_use = true;
 			}
+			if (sync) {
+				if (ha->synced)
+					return 0;
+				else
+					ha->synced = true;
+			}
 			ha->refcount++;
 			return 0;
 		}
 	}
 
-	return __hw_addr_create_ex(list, addr, addr_len, addr_type, global);
+	return __hw_addr_create_ex(list, addr, addr_len, addr_type, global,
+				   sync);
 }
 
 static int __hw_addr_add(struct netdev_hw_addr_list *list,
 			 const unsigned char *addr, int addr_len,
 			 unsigned char addr_type)
 {
-	return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
+	return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false);
+}
+
+static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
+			       struct netdev_hw_addr *ha, bool global,
+			       bool sync)
+{
+	if (global && !ha->global_use)
+		return -ENOENT;
+
+	if (sync && !ha->synced)
+		return -ENOENT;
+
+	if (global)
+		ha->global_use = false;
+
+	if (sync)
+		ha->synced = false;
+
+	if (--ha->refcount)
+		return 0;
+	list_del_rcu(&ha->list);
+	kfree_rcu(ha, rcu_head);
+	list->count--;
+	return 0;
 }
 
 static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
 			    const unsigned char *addr, int addr_len,
-			    unsigned char addr_type, bool global)
+			    unsigned char addr_type, bool global, bool sync)
 {
 	struct netdev_hw_addr *ha;
 
 	list_for_each_entry(ha, &list->list, list) {
 		if (!memcmp(ha->addr, addr, addr_len) &&
-		    (ha->type == addr_type || !addr_type)) {
-			if (global) {
-				if (!ha->global_use)
-					break;
-				else
-					ha->global_use = false;
-			}
-			if (--ha->refcount)
-				return 0;
-			list_del_rcu(&ha->list);
-			kfree_rcu(ha, rcu_head);
-			list->count--;
-			return 0;
-		}
+		    (ha->type == addr_type || !addr_type))
+			return __hw_addr_del_entry(list, ha, global, sync);
 	}
 	return -ENOENT;
 }
@@ -109,7 +128,57 @@ static int __hw_addr_del(struct netdev_hw_addr_list *list,
 			 const unsigned char *addr, int addr_len,
 			 unsigned char addr_type)
 {
-	return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
+	return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false);
+}
+
+static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list,
+			       struct netdev_hw_addr *ha,
+			       int addr_len)
+{
+	int err;
+
+	err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type,
+			       false, true);
+	if (err)
+		return err;
+	ha->sync_cnt++;
+	ha->refcount++;
+
+	return 0;
+}
+
+static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list,
+				 struct netdev_hw_addr_list *from_list,
+				 struct netdev_hw_addr *ha,
+				 int addr_len)
+{
+	int err;
+
+	err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type,
+			       false, true);
+	if (err)
+		return;
+	ha->sync_cnt--;
+	__hw_addr_del_entry(from_list, ha, false, true);
+}
+
+static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
+				   struct netdev_hw_addr_list *from_list,
+				   int addr_len)
+{
+	int err = 0;
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+		if (ha->sync_cnt == ha->refcount) {
+			__hw_addr_unsync_one(to_list, from_list, ha, addr_len);
+		} else {
+			err = __hw_addr_sync_one(to_list, ha, addr_len);
+			if (err)
+				break;
+		}
+	}
+	return err;
 }
 
 int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
@@ -153,6 +222,11 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
 }
 EXPORT_SYMBOL(__hw_addr_del_multiple);
 
+/* This function only works where there is a strict 1-1 relationship
+ * between source and destionation of they synch. If you ever need to
+ * sync addresses to more then 1 destination, you need to use
+ * __hw_addr_sync_multiple().
+ */
 int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
 		   struct netdev_hw_addr_list *from_list,
 		   int addr_len)
@@ -161,17 +235,12 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
 	struct netdev_hw_addr *ha, *tmp;
 
 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-		if (!ha->synced) {
-			err = __hw_addr_add(to_list, ha->addr,
-					    addr_len, ha->type);
+		if (!ha->sync_cnt) {
+			err = __hw_addr_sync_one(to_list, ha, addr_len);
 			if (err)
 				break;
-			ha->synced = true;
-			ha->refcount++;
-		} else if (ha->refcount == 1) {
-			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
-			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
-		}
+		} else if (ha->refcount == 1)
+			__hw_addr_unsync_one(to_list, from_list, ha, addr_len);
 	}
 	return err;
 }
@@ -184,13 +253,8 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
 	struct netdev_hw_addr *ha, *tmp;
 
 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-		if (ha->synced) {
-			__hw_addr_del(to_list, ha->addr,
-				      addr_len, ha->type);
-			ha->synced = false;
-			__hw_addr_del(from_list, ha->addr,
-				      addr_len, ha->type);
-		}
+		if (ha->sync_cnt)
+			__hw_addr_unsync_one(to_list, from_list, ha, addr_len);
 	}
 }
 EXPORT_SYMBOL(__hw_addr_unsync);
@@ -407,7 +471,7 @@ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
 		}
 	}
 	err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len,
-				  NETDEV_HW_ADDR_T_UNICAST, true);
+				  NETDEV_HW_ADDR_T_UNICAST, true, false);
 	if (!err)
 		__dev_set_rx_mode(dev);
 out:
@@ -470,7 +534,8 @@ EXPORT_SYMBOL(dev_uc_del);
  *	locked by netif_addr_lock_bh.
  *
  *	This function is intended to be called from the dev->set_rx_mode
- *	function of layered software devices.
+ *	function of layered software devices.  This function assumes that
+ *	addresses will only ever be synced to the @to devices and no other.
  */
 int dev_uc_sync(struct net_device *to, struct net_device *from)
 {
@@ -489,6 +554,36 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
 EXPORT_SYMBOL(dev_uc_sync);
 
 /**
+ *	dev_uc_sync_multiple - Synchronize device's unicast list to another
+ *	device, but allow for multiple calls to sync to multiple devices.
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have been deleted from the source. The source device
+ *	must be locked by netif_addr_lock_bh.
+ *
+ *	This function is intended to be called from the dev->set_rx_mode
+ *	function of layered software devices.  It allows for a single source
+ *	device to be synced to multiple destination devices.
+ */
+int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	netif_addr_lock_nested(to);
+	err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_sync_multiple);
+
+/**
  *	dev_uc_unsync - Remove synchronized addresses from the destination device
  *	@to: destination device
  *	@from: source device
@@ -560,7 +655,7 @@ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
 		}
 	}
 	err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len,
-				  NETDEV_HW_ADDR_T_MULTICAST, true);
+				  NETDEV_HW_ADDR_T_MULTICAST, true, false);
 	if (!err)
 		__dev_set_rx_mode(dev);
 out:
@@ -576,7 +671,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
 
 	netif_addr_lock_bh(dev);
 	err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
-			       NETDEV_HW_ADDR_T_MULTICAST, global);
+			       NETDEV_HW_ADDR_T_MULTICAST, global, false);
 	if (!err)
 		__dev_set_rx_mode(dev);
 	netif_addr_unlock_bh(dev);
@@ -616,7 +711,7 @@ static int __dev_mc_del(struct net_device *dev, const unsigned char *addr,
 
 	netif_addr_lock_bh(dev);
 	err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
-			       NETDEV_HW_ADDR_T_MULTICAST, global);
+			       NETDEV_HW_ADDR_T_MULTICAST, global, false);
 	if (!err)
 		__dev_set_rx_mode(dev);
 	netif_addr_unlock_bh(dev);
@@ -680,6 +775,36 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
 EXPORT_SYMBOL(dev_mc_sync);
 
 /**
+ *	dev_mc_sync_multiple - Synchronize device's unicast list to another
+ *	device, but allow for multiple calls to sync to multiple devices.
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have no users left. The source device must be
+ *	locked by netif_addr_lock_bh.
+ *
+ *	This function is intended to be called from the ndo_set_rx_mode
+ *	function of layered software devices.  It allows for a single
+ *	source device to be synced to multiple destination devices.
+ */
+int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	netif_addr_lock_nested(to);
+	err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_mc_sync_multiple);
+
+/**
  *	dev_mc_unsync - Remove synchronized addresses from the destination device
  *	@to: destination device
  *	@from: source device
@@ -727,76 +852,3 @@ void dev_mc_init(struct net_device *dev)
 	__hw_addr_init(&dev->mc);
 }
 EXPORT_SYMBOL(dev_mc_init);
-
-#ifdef CONFIG_PROC_FS
-#include <linux/seq_file.h>
-
-static int dev_mc_seq_show(struct seq_file *seq, void *v)
-{
-	struct netdev_hw_addr *ha;
-	struct net_device *dev = v;
-
-	if (v == SEQ_START_TOKEN)
-		return 0;
-
-	netif_addr_lock_bh(dev);
-	netdev_for_each_mc_addr(ha, dev) {
-		int i;
-
-		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
-			   dev->name, ha->refcount, ha->global_use);
-
-		for (i = 0; i < dev->addr_len; i++)
-			seq_printf(seq, "%02x", ha->addr[i]);
-
-		seq_putc(seq, '\n');
-	}
-	netif_addr_unlock_bh(dev);
-	return 0;
-}
-
-static const struct seq_operations dev_mc_seq_ops = {
-	.start = dev_seq_start,
-	.next  = dev_seq_next,
-	.stop  = dev_seq_stop,
-	.show  = dev_mc_seq_show,
-};
-
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_mc_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = dev_mc_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-#endif
-
-static int __net_init dev_mc_net_init(struct net *net)
-{
-	if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
-		return -ENOMEM;
-	return 0;
-}
-
-static void __net_exit dev_mc_net_exit(struct net *net)
-{
-	proc_net_remove(net, "dev_mcast");
-}
-
-static struct pernet_operations __net_initdata dev_mc_net_ops = {
-	.init = dev_mc_net_init,
-	.exit = dev_mc_net_exit,
-};
-
-void __init dev_mcast_init(void)
-{
-	register_pernet_subsys(&dev_mc_net_ops);
-}
-
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
new file mode 100644
index 000000000000..6cc0481faade
--- /dev/null
+++ b/net/core/dev_ioctl.c
@@ -0,0 +1,576 @@
+#include <linux/kmod.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/net_tstamp.h>
+#include <linux/wireless.h>
+#include <net/wext.h>
+
+/*
+ *	Map an interface index to its name (SIOCGIFNAME)
+ */
+
+/*
+ *	We need this ioctl for efficient implementation of the
+ *	if_indextoname() function required by the IPv6 API.  Without
+ *	it, we would have to search all the interfaces to find a
+ *	match.  --pb
+ */
+
+static int dev_ifname(struct net *net, struct ifreq __user *arg)
+{
+	struct net_device *dev;
+	struct ifreq ifr;
+	unsigned seq;
+
+	/*
+	 *	Fetch the caller's info block.
+	 */
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+retry:
+	seq = read_seqcount_begin(&devnet_rename_seq);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
+	if (!dev) {
+		rcu_read_unlock();
+		return -ENODEV;
+	}
+
+	strcpy(ifr.ifr_name, dev->name);
+	rcu_read_unlock();
+	if (read_seqcount_retry(&devnet_rename_seq, seq))
+		goto retry;
+
+	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+		return -EFAULT;
+	return 0;
+}
+
+static gifconf_func_t *gifconf_list[NPROTO];
+
+/**
+ *	register_gifconf	-	register a SIOCGIF handler
+ *	@family: Address family
+ *	@gifconf: Function handler
+ *
+ *	Register protocol dependent address dumping routines. The handler
+ *	that is passed must not be freed or reused until it has been replaced
+ *	by another handler.
+ */
+int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
+{
+	if (family >= NPROTO)
+		return -EINVAL;
+	gifconf_list[family] = gifconf;
+	return 0;
+}
+EXPORT_SYMBOL(register_gifconf);
+
+/*
+ *	Perform a SIOCGIFCONF call. This structure will change
+ *	size eventually, and there is nothing I can do about it.
+ *	Thus we will need a 'compatibility mode'.
+ */
+
+static int dev_ifconf(struct net *net, char __user *arg)
+{
+	struct ifconf ifc;
+	struct net_device *dev;
+	char __user *pos;
+	int len;
+	int total;
+	int i;
+
+	/*
+	 *	Fetch the caller's info block.
+	 */
+
+	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
+		return -EFAULT;
+
+	pos = ifc.ifc_buf;
+	len = ifc.ifc_len;
+
+	/*
+	 *	Loop over the interfaces, and write an info block for each.
+	 */
+
+	total = 0;
+	for_each_netdev(net, dev) {
+		for (i = 0; i < NPROTO; i++) {
+			if (gifconf_list[i]) {
+				int done;
+				if (!pos)
+					done = gifconf_list[i](dev, NULL, 0);
+				else
+					done = gifconf_list[i](dev, pos + total,
+							       len - total);
+				if (done < 0)
+					return -EFAULT;
+				total += done;
+			}
+		}
+	}
+
+	/*
+	 *	All done.  Write the updated control block back to the caller.
+	 */
+	ifc.ifc_len = total;
+
+	/*
+	 * 	Both BSD and Solaris return 0 here, so we do too.
+	 */
+	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
+}
+
+/*
+ *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
+ */
+static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
+{
+	int err;
+	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
+
+	if (!dev)
+		return -ENODEV;
+
+	switch (cmd) {
+	case SIOCGIFFLAGS:	/* Get interface flags */
+		ifr->ifr_flags = (short) dev_get_flags(dev);
+		return 0;
+
+	case SIOCGIFMETRIC:	/* Get the metric on the interface
+				   (currently unused) */
+		ifr->ifr_metric = 0;
+		return 0;
+
+	case SIOCGIFMTU:	/* Get the MTU of a device */
+		ifr->ifr_mtu = dev->mtu;
+		return 0;
+
+	case SIOCGIFHWADDR:
+		if (!dev->addr_len)
+			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
+		else
+			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
+			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+		ifr->ifr_hwaddr.sa_family = dev->type;
+		return 0;
+
+	case SIOCGIFSLAVE:
+		err = -EINVAL;
+		break;
+
+	case SIOCGIFMAP:
+		ifr->ifr_map.mem_start = dev->mem_start;
+		ifr->ifr_map.mem_end   = dev->mem_end;
+		ifr->ifr_map.base_addr = dev->base_addr;
+		ifr->ifr_map.irq       = dev->irq;
+		ifr->ifr_map.dma       = dev->dma;
+		ifr->ifr_map.port      = dev->if_port;
+		return 0;
+
+	case SIOCGIFINDEX:
+		ifr->ifr_ifindex = dev->ifindex;
+		return 0;
+
+	case SIOCGIFTXQLEN:
+		ifr->ifr_qlen = dev->tx_queue_len;
+		return 0;
+
+	default:
+		/* dev_ioctl() should ensure this case
+		 * is never reached
+		 */
+		WARN_ON(1);
+		err = -ENOTTY;
+		break;
+
+	}
+	return err;
+}
+
+static int net_hwtstamp_validate(struct ifreq *ifr)
+{
+	struct hwtstamp_config cfg;
+	enum hwtstamp_tx_types tx_type;
+	enum hwtstamp_rx_filters rx_filter;
+	int tx_type_valid = 0;
+	int rx_filter_valid = 0;
+
+	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	if (cfg.flags) /* reserved for future extensions */
+		return -EINVAL;
+
+	tx_type = cfg.tx_type;
+	rx_filter = cfg.rx_filter;
+
+	switch (tx_type) {
+	case HWTSTAMP_TX_OFF:
+	case HWTSTAMP_TX_ON:
+	case HWTSTAMP_TX_ONESTEP_SYNC:
+		tx_type_valid = 1;
+		break;
+	}
+
+	switch (rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		rx_filter_valid = 1;
+		break;
+	}
+
+	if (!tx_type_valid || !rx_filter_valid)
+		return -ERANGE;
+
+	return 0;
+}
+
+/*
+ *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
+ */
+static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
+{
+	int err;
+	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+	const struct net_device_ops *ops;
+
+	if (!dev)
+		return -ENODEV;
+
+	ops = dev->netdev_ops;
+
+	switch (cmd) {
+	case SIOCSIFFLAGS:	/* Set interface flags */
+		return dev_change_flags(dev, ifr->ifr_flags);
+
+	case SIOCSIFMETRIC:	/* Set the metric on the interface
+				   (currently unused) */
+		return -EOPNOTSUPP;
+
+	case SIOCSIFMTU:	/* Set the MTU of a device */
+		return dev_set_mtu(dev, ifr->ifr_mtu);
+
+	case SIOCSIFHWADDR:
+		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+
+	case SIOCSIFHWBROADCAST:
+		if (ifr->ifr_hwaddr.sa_family != dev->type)
+			return -EINVAL;
+		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
+		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+		return 0;
+
+	case SIOCSIFMAP:
+		if (ops->ndo_set_config) {
+			if (!netif_device_present(dev))
+				return -ENODEV;
+			return ops->ndo_set_config(dev, &ifr->ifr_map);
+		}
+		return -EOPNOTSUPP;
+
+	case SIOCADDMULTI:
+		if (!ops->ndo_set_rx_mode ||
+		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+			return -EINVAL;
+		if (!netif_device_present(dev))
+			return -ENODEV;
+		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
+
+	case SIOCDELMULTI:
+		if (!ops->ndo_set_rx_mode ||
+		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+			return -EINVAL;
+		if (!netif_device_present(dev))
+			return -ENODEV;
+		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
+
+	case SIOCSIFTXQLEN:
+		if (ifr->ifr_qlen < 0)
+			return -EINVAL;
+		dev->tx_queue_len = ifr->ifr_qlen;
+		return 0;
+
+	case SIOCSIFNAME:
+		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
+		return dev_change_name(dev, ifr->ifr_newname);
+
+	case SIOCSHWTSTAMP:
+		err = net_hwtstamp_validate(ifr);
+		if (err)
+			return err;
+		/* fall through */
+
+	/*
+	 *	Unknown or private ioctl
+	 */
+	default:
+		if ((cmd >= SIOCDEVPRIVATE &&
+		    cmd <= SIOCDEVPRIVATE + 15) ||
+		    cmd == SIOCBONDENSLAVE ||
+		    cmd == SIOCBONDRELEASE ||
+		    cmd == SIOCBONDSETHWADDR ||
+		    cmd == SIOCBONDSLAVEINFOQUERY ||
+		    cmd == SIOCBONDINFOQUERY ||
+		    cmd == SIOCBONDCHANGEACTIVE ||
+		    cmd == SIOCGMIIPHY ||
+		    cmd == SIOCGMIIREG ||
+		    cmd == SIOCSMIIREG ||
+		    cmd == SIOCBRADDIF ||
+		    cmd == SIOCBRDELIF ||
+		    cmd == SIOCSHWTSTAMP ||
+		    cmd == SIOCWANDEV) {
+			err = -EOPNOTSUPP;
+			if (ops->ndo_do_ioctl) {
+				if (netif_device_present(dev))
+					err = ops->ndo_do_ioctl(dev, ifr, cmd);
+				else
+					err = -ENODEV;
+			}
+		} else
+			err = -EINVAL;
+
+	}
+	return err;
+}
+
+/**
+ *	dev_load 	- load a network module
+ *	@net: the applicable net namespace
+ *	@name: name of interface
+ *
+ *	If a network interface is not present and the process has suitable
+ *	privileges this function loads the module. If module loading is not
+ *	available in this kernel then it becomes a nop.
+ */
+
+void dev_load(struct net *net, const char *name)
+{
+	struct net_device *dev;
+	int no_module;
+
+	rcu_read_lock();
+	dev = dev_get_by_name_rcu(net, name);
+	rcu_read_unlock();
+
+	no_module = !dev;
+	if (no_module && capable(CAP_NET_ADMIN))
+		no_module = request_module("netdev-%s", name);
+	if (no_module && capable(CAP_SYS_MODULE)) {
+		if (!request_module("%s", name))
+			pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated).  Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
+				name);
+	}
+}
+EXPORT_SYMBOL(dev_load);
+
+/*
+ *	This function handles all "interface"-type I/O control requests. The actual
+ *	'doing' part of this is dev_ifsioc above.
+ */
+
+/**
+ *	dev_ioctl	-	network device ioctl
+ *	@net: the applicable net namespace
+ *	@cmd: command to issue
+ *	@arg: pointer to a struct ifreq in user space
+ *
+ *	Issue ioctl functions to devices. This is normally called by the
+ *	user space syscall interfaces but can sometimes be useful for
+ *	other purposes. The return value is the return from the syscall if
+ *	positive or a negative errno code on error.
+ */
+
+int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+{
+	struct ifreq ifr;
+	int ret;
+	char *colon;
+
+	/* One special case: SIOCGIFCONF takes ifconf argument
+	   and requires shared lock, because it sleeps writing
+	   to user space.
+	 */
+
+	if (cmd == SIOCGIFCONF) {
+		rtnl_lock();
+		ret = dev_ifconf(net, (char __user *) arg);
+		rtnl_unlock();
+		return ret;
+	}
+	if (cmd == SIOCGIFNAME)
+		return dev_ifname(net, (struct ifreq __user *)arg);
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+	ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+	colon = strchr(ifr.ifr_name, ':');
+	if (colon)
+		*colon = 0;
+
+	/*
+	 *	See which interface the caller is talking about.
+	 */
+
+	switch (cmd) {
+	/*
+	 *	These ioctl calls:
+	 *	- can be done by all.
+	 *	- atomic and do not require locking.
+	 *	- return a value
+	 */
+	case SIOCGIFFLAGS:
+	case SIOCGIFMETRIC:
+	case SIOCGIFMTU:
+	case SIOCGIFHWADDR:
+	case SIOCGIFSLAVE:
+	case SIOCGIFMAP:
+	case SIOCGIFINDEX:
+	case SIOCGIFTXQLEN:
+		dev_load(net, ifr.ifr_name);
+		rcu_read_lock();
+		ret = dev_ifsioc_locked(net, &ifr, cmd);
+		rcu_read_unlock();
+		if (!ret) {
+			if (colon)
+				*colon = ':';
+			if (copy_to_user(arg, &ifr,
+					 sizeof(struct ifreq)))
+				ret = -EFAULT;
+		}
+		return ret;
+
+	case SIOCETHTOOL:
+		dev_load(net, ifr.ifr_name);
+		rtnl_lock();
+		ret = dev_ethtool(net, &ifr);
+		rtnl_unlock();
+		if (!ret) {
+			if (colon)
+				*colon = ':';
+			if (copy_to_user(arg, &ifr,
+					 sizeof(struct ifreq)))
+				ret = -EFAULT;
+		}
+		return ret;
+
+	/*
+	 *	These ioctl calls:
+	 *	- require superuser power.
+	 *	- require strict serialization.
+	 *	- return a value
+	 */
+	case SIOCGMIIPHY:
+	case SIOCGMIIREG:
+	case SIOCSIFNAME:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			return -EPERM;
+		dev_load(net, ifr.ifr_name);
+		rtnl_lock();
+		ret = dev_ifsioc(net, &ifr, cmd);
+		rtnl_unlock();
+		if (!ret) {
+			if (colon)
+				*colon = ':';
+			if (copy_to_user(arg, &ifr,
+					 sizeof(struct ifreq)))
+				ret = -EFAULT;
+		}
+		return ret;
+
+	/*
+	 *	These ioctl calls:
+	 *	- require superuser power.
+	 *	- require strict serialization.
+	 *	- do not return a value
+	 */
+	case SIOCSIFMAP:
+	case SIOCSIFTXQLEN:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		/* fall through */
+	/*
+	 *	These ioctl calls:
+	 *	- require local superuser power.
+	 *	- require strict serialization.
+	 *	- do not return a value
+	 */
+	case SIOCSIFFLAGS:
+	case SIOCSIFMETRIC:
+	case SIOCSIFMTU:
+	case SIOCSIFHWADDR:
+	case SIOCSIFSLAVE:
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+	case SIOCSIFHWBROADCAST:
+	case SIOCSMIIREG:
+	case SIOCBONDENSLAVE:
+	case SIOCBONDRELEASE:
+	case SIOCBONDSETHWADDR:
+	case SIOCBONDCHANGEACTIVE:
+	case SIOCBRADDIF:
+	case SIOCBRDELIF:
+	case SIOCSHWTSTAMP:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			return -EPERM;
+		/* fall through */
+	case SIOCBONDSLAVEINFOQUERY:
+	case SIOCBONDINFOQUERY:
+		dev_load(net, ifr.ifr_name);
+		rtnl_lock();
+		ret = dev_ifsioc(net, &ifr, cmd);
+		rtnl_unlock();
+		return ret;
+
+	case SIOCGIFMEM:
+		/* Get the per device memory space. We can add this but
+		 * currently do not support it */
+	case SIOCSIFMEM:
+		/* Set the per device memory buffer space.
+		 * Not applicable in our case */
+	case SIOCSIFLINK:
+		return -ENOTTY;
+
+	/*
+	 *	Unknown or private ioctl.
+	 */
+	default:
+		if (cmd == SIOCWANDEV ||
+		    (cmd >= SIOCDEVPRIVATE &&
+		     cmd <= SIOCDEVPRIVATE + 15)) {
+			dev_load(net, ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ifsioc(net, &ifr, cmd);
+			rtnl_unlock();
+			if (!ret && copy_to_user(arg, &ifr,
+						 sizeof(struct ifreq)))
+				ret = -EFAULT;
+			return ret;
+		}
+		/* Take care of Wireless Extensions */
+		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
+			return wext_handle_ioctl(net, &ifr, cmd, arg);
+		return -ENOTTY;
+	}
+}
diff --git a/net/core/dst.c b/net/core/dst.c
index ee6153e2cf43..df9cc810ec8e 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -179,6 +179,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
 	dst_init_metrics(dst, dst_default_metrics, true);
 	dst->expires = 0UL;
 	dst->path = dst;
+	dst->from = NULL;
 #ifdef CONFIG_XFRM
 	dst->xfrm = NULL;
 #endif
@@ -319,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
 EXPORT_SYMBOL(__dst_destroy_metrics_generic);
 
 /**
- * skb_dst_set_noref - sets skb dst, without a reference
+ * __skb_dst_set_noref - sets skb dst, without a reference
  * @skb: buffer
  * @dst: dst entry
+ * @force: if force is set, use noref version even for DST_NOCACHE entries
  *
  * Sets skb dst, assuming a reference was not taken on dst
  * skb_dst_drop() should not dst_release() this dst
  */
-void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
+void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force)
 {
 	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
 	/* If dst not in cache, we must take a reference, because
 	 * dst_release() will destroy dst as soon as its refcount becomes zero
 	 */
-	if (unlikely(dst->flags & DST_NOCACHE)) {
+	if (unlikely((dst->flags & DST_NOCACHE) && !force)) {
 		dst_hold(dst);
 		skb_dst_set(skb, dst);
 	} else {
 		skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
 	}
 }
-EXPORT_SYMBOL(skb_dst_set_noref);
+EXPORT_SYMBOL(__skb_dst_set_noref);
 
 /* Dirty hack. We did it in 2.2 (in __dst_free),
  * we have _very_ good reasons not to repeat
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a8705432e4b1..22efdaa76ebf 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -60,10 +60,13 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_IPV6_CSUM_BIT] =        "tx-checksum-ipv6",
 	[NETIF_F_HIGHDMA_BIT] =          "highdma",
 	[NETIF_F_FRAGLIST_BIT] =         "tx-scatter-gather-fraglist",
-	[NETIF_F_HW_VLAN_TX_BIT] =       "tx-vlan-hw-insert",
+	[NETIF_F_HW_VLAN_CTAG_TX_BIT] =  "tx-vlan-ctag-hw-insert",
 
-	[NETIF_F_HW_VLAN_RX_BIT] =       "rx-vlan-hw-parse",
-	[NETIF_F_HW_VLAN_FILTER_BIT] =   "rx-vlan-filter",
+	[NETIF_F_HW_VLAN_CTAG_RX_BIT] =  "rx-vlan-ctag-hw-parse",
+	[NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-ctag-filter",
+	[NETIF_F_HW_VLAN_STAG_TX_BIT] =  "tx-vlan-stag-hw-insert",
+	[NETIF_F_HW_VLAN_STAG_RX_BIT] =  "rx-vlan-stag-hw-parse",
+	[NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
 	[NETIF_F_VLAN_CHALLENGED_BIT] =  "vlan-challenged",
 	[NETIF_F_GSO_BIT] =              "tx-generic-segmentation",
 	[NETIF_F_LLTX_BIT] =             "tx-lockless",
@@ -77,6 +80,8 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_TSO_ECN_BIT] =          "tx-tcp-ecn-segmentation",
 	[NETIF_F_TSO6_BIT] =             "tx-tcp6-segmentation",
 	[NETIF_F_FSO_BIT] =              "tx-fcoe-segmentation",
+	[NETIF_F_GSO_GRE_BIT] =		 "tx-gre-segmentation",
+	[NETIF_F_GSO_UDP_TUNNEL_BIT] =	 "tx-udp_tnl-segmentation",
 
 	[NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",
 	[NETIF_F_SCTP_CSUM_BIT] =        "tx-checksum-sctp",
@@ -175,7 +180,7 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
 	if (sset == ETH_SS_FEATURES)
 		return ARRAY_SIZE(netdev_features_strings);
 
-	if (ops && ops->get_sset_count && ops->get_strings)
+	if (ops->get_sset_count && ops->get_strings)
 		return ops->get_sset_count(dev, sset);
 	else
 		return -EOPNOTSUPP;
@@ -265,18 +270,19 @@ static int ethtool_set_one_feature(struct net_device *dev,
 
 #define ETH_ALL_FLAGS    (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \
 			  ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH)
-#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \
-			  NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH)
+#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \
+			  NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \
+			  NETIF_F_RXHASH)
 
 static u32 __ethtool_get_flags(struct net_device *dev)
 {
 	u32 flags = 0;
 
-	if (dev->features & NETIF_F_LRO)	flags |= ETH_FLAG_LRO;
-	if (dev->features & NETIF_F_HW_VLAN_RX)	flags |= ETH_FLAG_RXVLAN;
-	if (dev->features & NETIF_F_HW_VLAN_TX)	flags |= ETH_FLAG_TXVLAN;
-	if (dev->features & NETIF_F_NTUPLE)	flags |= ETH_FLAG_NTUPLE;
-	if (dev->features & NETIF_F_RXHASH)	flags |= ETH_FLAG_RXHASH;
+	if (dev->features & NETIF_F_LRO)	     flags |= ETH_FLAG_LRO;
+	if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN;
+	if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN;
+	if (dev->features & NETIF_F_NTUPLE)	     flags |= ETH_FLAG_NTUPLE;
+	if (dev->features & NETIF_F_RXHASH)	     flags |= ETH_FLAG_RXHASH;
 
 	return flags;
 }
@@ -289,8 +295,8 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
 		return -EINVAL;
 
 	if (data & ETH_FLAG_LRO)	features |= NETIF_F_LRO;
-	if (data & ETH_FLAG_RXVLAN)	features |= NETIF_F_HW_VLAN_RX;
-	if (data & ETH_FLAG_TXVLAN)	features |= NETIF_F_HW_VLAN_TX;
+	if (data & ETH_FLAG_RXVLAN)	features |= NETIF_F_HW_VLAN_CTAG_RX;
+	if (data & ETH_FLAG_TXVLAN)	features |= NETIF_F_HW_VLAN_CTAG_TX;
 	if (data & ETH_FLAG_NTUPLE)	features |= NETIF_F_NTUPLE;
 	if (data & ETH_FLAG_RXHASH)	features |= NETIF_F_RXHASH;
 
@@ -311,7 +317,7 @@ int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	ASSERT_RTNL();
 
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
+	if (!dev->ethtool_ops->get_settings)
 		return -EOPNOTSUPP;
 
 	memset(cmd, 0, sizeof(struct ethtool_cmd));
@@ -355,7 +361,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
 
 	memset(&info, 0, sizeof(info));
 	info.cmd = ETHTOOL_GDRVINFO;
-	if (ops && ops->get_drvinfo) {
+	if (ops->get_drvinfo) {
 		ops->get_drvinfo(dev, &info);
 	} else if (dev->dev.parent && dev->dev.parent->driver) {
 		strlcpy(info.bus_info, dev_name(dev->dev.parent),
@@ -370,7 +376,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
 	 * this method of obtaining string set info is deprecated;
 	 * Use ETHTOOL_GSSET_INFO instead.
 	 */
-	if (ops && ops->get_sset_count) {
+	if (ops->get_sset_count) {
 		int rc;
 
 		rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -383,9 +389,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
 		if (rc >= 0)
 			info.n_priv_flags = rc;
 	}
-	if (ops && ops->get_regs_len)
+	if (ops->get_regs_len)
 		info.regdump_len = ops->get_regs_len(dev);
-	if (ops && ops->get_eeprom_len)
+	if (ops->get_eeprom_len)
 		info.eedump_len = ops->get_eeprom_len(dev);
 
 	if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -590,13 +596,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 	struct ethtool_rxnfc rx_rings;
 	u32 user_size, dev_size, i;
 	u32 *indir;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int ret;
 
-	if (!dev->ethtool_ops->get_rxfh_indir_size ||
-	    !dev->ethtool_ops->set_rxfh_indir ||
-	    !dev->ethtool_ops->get_rxnfc)
+	if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
+	    !ops->get_rxnfc)
 		return -EOPNOTSUPP;
-	dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+
+	dev_size = ops->get_rxfh_indir_size(dev);
 	if (dev_size == 0)
 		return -EOPNOTSUPP;
 
@@ -613,7 +620,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 		return -ENOMEM;
 
 	rx_rings.cmd = ETHTOOL_GRXRINGS;
-	ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL);
+	ret = ops->get_rxnfc(dev, &rx_rings, NULL);
 	if (ret)
 		goto out;
 
@@ -639,7 +646,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 		}
 	}
 
-	ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
+	ret = ops->set_rxfh_indir(dev, indir);
 
 out:
 	kfree(indir);
@@ -1082,9 +1089,10 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_value id;
 	static bool busy;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int rc;
 
-	if (!dev->ethtool_ops->set_phys_id)
+	if (!ops->set_phys_id)
 		return -EOPNOTSUPP;
 
 	if (busy)
@@ -1093,7 +1101,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 	if (copy_from_user(&id, useraddr, sizeof(id)))
 		return -EFAULT;
 
-	rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
+	rc = ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
 	if (rc < 0)
 		return rc;
 
@@ -1118,7 +1126,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 			i = n;
 			do {
 				rtnl_lock();
-				rc = dev->ethtool_ops->set_phys_id(dev,
+				rc = ops->set_phys_id(dev,
 				    (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON);
 				rtnl_unlock();
 				if (rc)
@@ -1133,7 +1141,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 	dev_put(dev);
 	busy = false;
 
-	(void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
+	(void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
 	return rc;
 }
 
@@ -1275,7 +1283,7 @@ static int ethtool_get_dump_flag(struct net_device *dev,
 	struct ethtool_dump dump;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 
-	if (!dev->ethtool_ops->get_dump_flag)
+	if (!ops->get_dump_flag)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&dump, useraddr, sizeof(dump)))
@@ -1299,8 +1307,7 @@ static int ethtool_get_dump_data(struct net_device *dev,
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	void *data = NULL;
 
-	if (!dev->ethtool_ops->get_dump_data ||
-		!dev->ethtool_ops->get_dump_flag)
+	if (!ops->get_dump_data || !ops->get_dump_flag)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&dump, useraddr, sizeof(dump)))
@@ -1346,13 +1353,9 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
 	info.cmd = ETHTOOL_GET_TS_INFO;
 
 	if (phydev && phydev->drv && phydev->drv->ts_info) {
-
 		err = phydev->drv->ts_info(phydev, &info);
-
-	} else if (dev->ethtool_ops && dev->ethtool_ops->get_ts_info) {
-
+	} else if (ops->get_ts_info) {
 		err = ops->get_ts_info(dev, &info);
-
 	} else {
 		info.so_timestamping =
 			SOF_TIMESTAMPING_RX_SOFTWARE |
@@ -1418,7 +1421,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	void __user *useraddr = ifr->ifr_data;
 	u32 ethcmd;
 	int rc;
-	u32 old_features;
+	netdev_features_t old_features;
 
 	if (!dev || !netif_device_present(dev))
 		return -ENODEV;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 58a4ba27dfe3..d5a9f8ead0d8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -266,7 +266,7 @@ errout:
 	return err;
 }
 
-static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -415,7 +415,7 @@ errout:
 	return err;
 }
 
-static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
diff --git a/net/core/filter.c b/net/core/filter.c
index c23543cba132..dad2a178f9f8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -348,6 +348,9 @@ load_b:
 		case BPF_S_ANC_VLAN_TAG_PRESENT:
 			A = !!vlan_tx_tag_present(skb);
 			continue;
+		case BPF_S_ANC_PAY_OFFSET:
+			A = __skb_get_poff(skb);
+			continue;
 		case BPF_S_ANC_NLATTR: {
 			struct nlattr *nla;
 
@@ -532,6 +535,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
 	};
 	int pc;
+	bool anc_found;
 
 	if (flen == 0 || flen > BPF_MAXINSNS)
 		return -EINVAL;
@@ -592,8 +596,10 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 		case BPF_S_LD_W_ABS:
 		case BPF_S_LD_H_ABS:
 		case BPF_S_LD_B_ABS:
+			anc_found = false;
 #define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE:	\
 				code = BPF_S_ANC_##CODE;	\
+				anc_found = true;		\
 				break
 			switch (ftest->k) {
 			ANCILLARY(PROTOCOL);
@@ -609,7 +615,12 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 			ANCILLARY(ALU_XOR_X);
 			ANCILLARY(VLAN_TAG);
 			ANCILLARY(VLAN_TAG_PRESENT);
+			ANCILLARY(PAY_OFFSET);
 			}
+
+			/* ancillary operation unknown or unsupported */
+			if (anc_found == false && ftest->k >= SKF_AD_OFF)
+				return -EINVAL;
 		}
 		ftest->code = code;
 	}
@@ -714,6 +725,9 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
 	int err;
 
+	if (sock_flag(sk, SOCK_FILTER_LOCKED))
+		return -EPERM;
+
 	/* Make sure new filter is there and in the right amounts. */
 	if (fprog->filter == NULL)
 		return -EINVAL;
@@ -750,6 +764,9 @@ int sk_detach_filter(struct sock *sk)
 	int ret = -ENOENT;
 	struct sk_filter *filter;
 
+	if (sock_flag(sk, SOCK_FILTER_LOCKED))
+		return -EPERM;
+
 	filter = rcu_dereference_protected(sk->sk_filter,
 					   sock_owned_by_user(sk));
 	if (filter) {
@@ -801,6 +818,7 @@ static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
 		[BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_VLAN_TAG]	= BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_PAY_OFFSET]	= BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_LD_W_LEN]	= BPF_LD|BPF_W|BPF_LEN,
 		[BPF_S_LD_W_IND]	= BPF_LD|BPF_W|BPF_IND,
 		[BPF_S_LD_H_IND]	= BPF_LD|BPF_H|BPF_IND,
diff --git a/net/core/flow.c b/net/core/flow.c
index b0901ee5a002..7102f166482d 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -132,14 +132,14 @@ static void __flow_cache_shrink(struct flow_cache *fc,
 				int shrink_to)
 {
 	struct flow_cache_entry *fle;
-	struct hlist_node *entry, *tmp;
+	struct hlist_node *tmp;
 	LIST_HEAD(gc_list);
 	int i, deleted = 0;
 
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
 		int saved = 0;
 
-		hlist_for_each_entry_safe(fle, entry, tmp,
+		hlist_for_each_entry_safe(fle, tmp,
 					  &fcp->hash_table[i], u.hlist) {
 			if (saved < shrink_to &&
 			    flow_entry_valid(fle)) {
@@ -211,7 +211,6 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
 	struct flow_cache *fc = &flow_cache_global;
 	struct flow_cache_percpu *fcp;
 	struct flow_cache_entry *fle, *tfle;
-	struct hlist_node *entry;
 	struct flow_cache_object *flo;
 	size_t keysize;
 	unsigned int hash;
@@ -235,7 +234,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
 		flow_new_hash_rnd(fc, fcp);
 
 	hash = flow_hash_code(fc, fcp, key, keysize);
-	hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
+	hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) {
 		if (tfle->net == net &&
 		    tfle->family == family &&
 		    tfle->dir == dir &&
@@ -286,7 +285,7 @@ nocache:
 		else
 			fle->genid--;
 	} else {
-		if (flo && !IS_ERR(flo))
+		if (!IS_ERR_OR_NULL(flo))
 			flo->ops->delete(flo);
 	}
 ret_object:
@@ -301,13 +300,13 @@ static void flow_cache_flush_tasklet(unsigned long data)
 	struct flow_cache *fc = info->cache;
 	struct flow_cache_percpu *fcp;
 	struct flow_cache_entry *fle;
-	struct hlist_node *entry, *tmp;
+	struct hlist_node *tmp;
 	LIST_HEAD(gc_list);
 	int i, deleted = 0;
 
 	fcp = this_cpu_ptr(fc->percpu);
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		hlist_for_each_entry_safe(fle, entry, tmp,
+		hlist_for_each_entry_safe(fle, tmp,
 					  &fcp->hash_table[i], u.hlist) {
 			if (flow_entry_valid(fle))
 				continue;
@@ -324,12 +323,30 @@ static void flow_cache_flush_tasklet(unsigned long data)
 		complete(&info->completion);
 }
 
+/*
+ * Return whether a cpu needs flushing.  Conservatively, we assume
+ * the presence of any entries means the core may require flushing,
+ * since the flow_cache_ops.check() function may assume it's running
+ * on the same core as the per-cpu cache component.
+ */
+static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
+{
+	struct flow_cache_percpu *fcp;
+	int i;
+
+	fcp = per_cpu_ptr(fc->percpu, cpu);
+	for (i = 0; i < flow_cache_hash_size(fc); i++)
+		if (!hlist_empty(&fcp->hash_table[i]))
+			return 0;
+	return 1;
+}
+
 static void flow_cache_flush_per_cpu(void *data)
 {
 	struct flow_flush_info *info = data;
 	struct tasklet_struct *tasklet;
 
-	tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet);
+	tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet;
 	tasklet->data = (unsigned long)info;
 	tasklet_schedule(tasklet);
 }
@@ -338,22 +355,40 @@ void flow_cache_flush(void)
 {
 	struct flow_flush_info info;
 	static DEFINE_MUTEX(flow_flush_sem);
+	cpumask_var_t mask;
+	int i, self;
+
+	/* Track which cpus need flushing to avoid disturbing all cores. */
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return;
+	cpumask_clear(mask);
 
 	/* Don't want cpus going down or up during this. */
 	get_online_cpus();
 	mutex_lock(&flow_flush_sem);
 	info.cache = &flow_cache_global;
-	atomic_set(&info.cpuleft, num_online_cpus());
+	for_each_online_cpu(i)
+		if (!flow_cache_percpu_empty(info.cache, i))
+			cpumask_set_cpu(i, mask);
+	atomic_set(&info.cpuleft, cpumask_weight(mask));
+	if (atomic_read(&info.cpuleft) == 0)
+		goto done;
+
 	init_completion(&info.completion);
 
 	local_bh_disable();
-	smp_call_function(flow_cache_flush_per_cpu, &info, 0);
-	flow_cache_flush_tasklet((unsigned long)&info);
+	self = cpumask_test_and_clear_cpu(smp_processor_id(), mask);
+	on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0);
+	if (self)
+		flow_cache_flush_tasklet((unsigned long)&info);
 	local_bh_enable();
 
 	wait_for_completion(&info.completion);
+
+done:
 	mutex_unlock(&flow_flush_sem);
 	put_online_cpus();
+	free_cpumask_var(mask);
 }
 
 static void flow_cache_flush_task(struct work_struct *work)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 466820b6e344..00ee068efc1c 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -5,6 +5,10 @@
 #include <linux/if_vlan.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <linux/igmp.h>
+#include <linux/icmp.h>
+#include <linux/sctp.h>
+#include <linux/dccp.h>
 #include <linux/if_tunnel.h>
 #include <linux/if_pppox.h>
 #include <linux/ppp_defs.h>
@@ -119,6 +123,17 @@ ipv6:
 				nhoff += 4;
 			if (hdr->flags & GRE_SEQ)
 				nhoff += 4;
+			if (proto == htons(ETH_P_TEB)) {
+				const struct ethhdr *eth;
+				struct ethhdr _eth;
+
+				eth = skb_header_pointer(skb, nhoff,
+							 sizeof(_eth), &_eth);
+				if (!eth)
+					return false;
+				proto = eth->h_proto;
+				nhoff += sizeof(*eth);
+			}
 			goto again;
 		}
 		break;
@@ -140,6 +155,234 @@ ipv6:
 			flow->ports = *ports;
 	}
 
+	flow->thoff = (u16) nhoff;
+
 	return true;
 }
 EXPORT_SYMBOL(skb_flow_dissect);
+
+static u32 hashrnd __read_mostly;
+
+/*
+ * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
+ * and src/dst port numbers.  Sets rxhash in skb to non-zero hash value
+ * on success, zero indicates no valid hash.  Also, sets l4_rxhash in skb
+ * if hash is a canonical 4-tuple hash over transport ports.
+ */
+void __skb_get_rxhash(struct sk_buff *skb)
+{
+	struct flow_keys keys;
+	u32 hash;
+
+	if (!skb_flow_dissect(skb, &keys))
+		return;
+
+	if (keys.ports)
+		skb->l4_rxhash = 1;
+
+	/* get a consistent hash (same value on both flow directions) */
+	if (((__force u32)keys.dst < (__force u32)keys.src) ||
+	    (((__force u32)keys.dst == (__force u32)keys.src) &&
+	     ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
+		swap(keys.dst, keys.src);
+		swap(keys.port16[0], keys.port16[1]);
+	}
+
+	hash = jhash_3words((__force u32)keys.dst,
+			    (__force u32)keys.src,
+			    (__force u32)keys.ports, hashrnd);
+	if (!hash)
+		hash = 1;
+
+	skb->rxhash = hash;
+}
+EXPORT_SYMBOL(__skb_get_rxhash);
+
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+		  unsigned int num_tx_queues)
+{
+	u32 hash;
+	u16 qoffset = 0;
+	u16 qcount = num_tx_queues;
+
+	if (skb_rx_queue_recorded(skb)) {
+		hash = skb_get_rx_queue(skb);
+		while (unlikely(hash >= num_tx_queues))
+			hash -= num_tx_queues;
+		return hash;
+	}
+
+	if (dev->num_tc) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+		qoffset = dev->tc_to_txq[tc].offset;
+		qcount = dev->tc_to_txq[tc].count;
+	}
+
+	if (skb->sk && skb->sk->sk_hash)
+		hash = skb->sk->sk_hash;
+	else
+		hash = (__force u16) skb->protocol;
+	hash = jhash_1word(hash, hashrnd);
+
+	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
+}
+EXPORT_SYMBOL(__skb_tx_hash);
+
+/* __skb_get_poff() returns the offset to the payload as far as it could
+ * be dissected. The main user is currently BPF, so that we can dynamically
+ * truncate packets without needing to push actual payload to the user
+ * space and can analyze headers only, instead.
+ */
+u32 __skb_get_poff(const struct sk_buff *skb)
+{
+	struct flow_keys keys;
+	u32 poff = 0;
+
+	if (!skb_flow_dissect(skb, &keys))
+		return 0;
+
+	poff += keys.thoff;
+	switch (keys.ip_proto) {
+	case IPPROTO_TCP: {
+		const struct tcphdr *tcph;
+		struct tcphdr _tcph;
+
+		tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph);
+		if (!tcph)
+			return poff;
+
+		poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4);
+		break;
+	}
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+		poff += sizeof(struct udphdr);
+		break;
+	/* For the rest, we do not really care about header
+	 * extensions at this point for now.
+	 */
+	case IPPROTO_ICMP:
+		poff += sizeof(struct icmphdr);
+		break;
+	case IPPROTO_ICMPV6:
+		poff += sizeof(struct icmp6hdr);
+		break;
+	case IPPROTO_IGMP:
+		poff += sizeof(struct igmphdr);
+		break;
+	case IPPROTO_DCCP:
+		poff += sizeof(struct dccp_hdr);
+		break;
+	case IPPROTO_SCTP:
+		poff += sizeof(struct sctphdr);
+		break;
+	}
+
+	return poff;
+}
+
+static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
+{
+	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
+		net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
+				     dev->name, queue_index,
+				     dev->real_num_tx_queues);
+		return 0;
+	}
+	return queue_index;
+}
+
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	int queue_index = -1;
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps);
+	if (dev_maps) {
+		map = rcu_dereference(
+		    dev_maps->cpu_map[raw_smp_processor_id()]);
+		if (map) {
+			if (map->len == 1)
+				queue_index = map->queues[0];
+			else {
+				u32 hash;
+				if (skb->sk && skb->sk->sk_hash)
+					hash = skb->sk->sk_hash;
+				else
+					hash = (__force u16) skb->protocol ^
+					    skb->rxhash;
+				hash = jhash_1word(hash, hashrnd);
+				queue_index = map->queues[
+				    ((u64)hash * map->len) >> 32];
+			}
+			if (unlikely(queue_index >= dev->real_num_tx_queues))
+				queue_index = -1;
+		}
+	}
+	rcu_read_unlock();
+
+	return queue_index;
+#else
+	return -1;
+#endif
+}
+
+u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	int queue_index = sk_tx_queue_get(sk);
+
+	if (queue_index < 0 || skb->ooo_okay ||
+	    queue_index >= dev->real_num_tx_queues) {
+		int new_index = get_xps_queue(dev, skb);
+		if (new_index < 0)
+			new_index = skb_tx_hash(dev, skb);
+
+		if (queue_index != new_index && sk) {
+			struct dst_entry *dst =
+				    rcu_dereference_check(sk->sk_dst_cache, 1);
+
+			if (dst && skb_dst(skb) == dst)
+				sk_tx_queue_set(sk, queue_index);
+
+		}
+
+		queue_index = new_index;
+	}
+
+	return queue_index;
+}
+EXPORT_SYMBOL(__netdev_pick_tx);
+
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+				    struct sk_buff *skb)
+{
+	int queue_index = 0;
+
+	if (dev->real_num_tx_queues != 1) {
+		const struct net_device_ops *ops = dev->netdev_ops;
+		if (ops->ndo_select_queue)
+			queue_index = ops->ndo_select_queue(dev, skb);
+		else
+			queue_index = __netdev_pick_tx(dev, skb);
+		queue_index = dev_cap_txqueue(dev, queue_index);
+	}
+
+	skb_set_queue_mapping(skb, queue_index);
+	return netdev_get_tx_queue(dev, queue_index);
+}
+
+static int __init initialize_hashrnd(void)
+{
+	get_random_bytes(&hashrnd, sizeof(hashrnd));
+	return 0;
+}
+
+late_initcall_sync(initialize_hashrnd);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c815f285e5ab..5c56b217b999 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -39,21 +39,13 @@
 #include <linux/string.h>
 #include <linux/log2.h>
 
+#define DEBUG
 #define NEIGH_DEBUG 1
-
-#define NEIGH_PRINTK(x...) printk(x)
-#define NEIGH_NOPRINTK(x...) do { ; } while(0)
-#define NEIGH_PRINTK1 NEIGH_NOPRINTK
-#define NEIGH_PRINTK2 NEIGH_NOPRINTK
-
-#if NEIGH_DEBUG >= 1
-#undef NEIGH_PRINTK1
-#define NEIGH_PRINTK1 NEIGH_PRINTK
-#endif
-#if NEIGH_DEBUG >= 2
-#undef NEIGH_PRINTK2
-#define NEIGH_PRINTK2 NEIGH_PRINTK
-#endif
+#define neigh_dbg(level, fmt, ...)		\
+do {						\
+	if (level <= NEIGH_DEBUG)		\
+		pr_debug(fmt, ##__VA_ARGS__);	\
+} while (0)
 
 #define PNEIGH_HASHMASK		0xF
 
@@ -246,7 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 					n->nud_state = NUD_NOARP;
 				else
 					n->nud_state = NUD_NONE;
-				NEIGH_PRINTK2("neigh %p is stray.\n", n);
+				neigh_dbg(2, "neigh %p is stray\n", n);
 			}
 			write_unlock(&n->lock);
 			neigh_cleanup_and_release(n);
@@ -290,15 +282,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
 			goto out_entries;
 	}
 
-	if (tbl->entry_size)
-		n = kzalloc(tbl->entry_size, GFP_ATOMIC);
-	else {
-		int sz = sizeof(*n) + tbl->key_len;
-
-		sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
-		sz += dev->neigh_priv_len;
-		n = kzalloc(sz, GFP_ATOMIC);
-	}
+	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
 	if (!n)
 		goto out_entries;
 
@@ -550,7 +534,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
 						     lockdep_is_held(&tbl->lock)));
 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
 	write_unlock_bh(&tbl->lock);
-	NEIGH_PRINTK2("neigh %p is created.\n", n);
+	neigh_dbg(2, "neigh %p is created\n", n);
 	rc = n;
 out:
 	return rc;
@@ -733,7 +717,7 @@ void neigh_destroy(struct neighbour *neigh)
 	dev_put(dev);
 	neigh_parms_put(neigh->parms);
 
-	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
+	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
 
 	atomic_dec(&neigh->tbl->entries);
 	kfree_rcu(neigh, rcu);
@@ -747,7 +731,7 @@ EXPORT_SYMBOL(neigh_destroy);
  */
 static void neigh_suspect(struct neighbour *neigh)
 {
-	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+	neigh_dbg(2, "neigh %p is suspected\n", neigh);
 
 	neigh->output = neigh->ops->output;
 }
@@ -759,7 +743,7 @@ static void neigh_suspect(struct neighbour *neigh)
  */
 static void neigh_connect(struct neighbour *neigh)
 {
-	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
+	neigh_dbg(2, "neigh %p is connected\n", neigh);
 
 	neigh->output = neigh->ops->connected_output;
 }
@@ -778,6 +762,9 @@ static void neigh_periodic_work(struct work_struct *work)
 	nht = rcu_dereference_protected(tbl->nht,
 					lockdep_is_held(&tbl->lock));
 
+	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
+		goto out;
+
 	/*
 	 *	periodically recompute ReachableTime from random function
 	 */
@@ -832,6 +819,7 @@ next_elt:
 		nht = rcu_dereference_protected(tbl->nht,
 						lockdep_is_held(&tbl->lock));
 	}
+out:
 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
 	 * base_reachable_time.
@@ -856,7 +844,7 @@ static void neigh_invalidate(struct neighbour *neigh)
 	struct sk_buff *skb;
 
 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
-	NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+	neigh_dbg(2, "neigh %p is failed\n", neigh);
 	neigh->updated = jiffies;
 
 	/* It is very thin place. report_unreachable is very complicated
@@ -908,17 +896,17 @@ static void neigh_timer_handler(unsigned long arg)
 	if (state & NUD_REACHABLE) {
 		if (time_before_eq(now,
 				   neigh->confirmed + neigh->parms->reachable_time)) {
-			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+			neigh_dbg(2, "neigh %p is still alive\n", neigh);
 			next = neigh->confirmed + neigh->parms->reachable_time;
 		} else if (time_before_eq(now,
 					  neigh->used + neigh->parms->delay_probe_time)) {
-			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+			neigh_dbg(2, "neigh %p is delayed\n", neigh);
 			neigh->nud_state = NUD_DELAY;
 			neigh->updated = jiffies;
 			neigh_suspect(neigh);
 			next = now + neigh->parms->delay_probe_time;
 		} else {
-			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+			neigh_dbg(2, "neigh %p is suspected\n", neigh);
 			neigh->nud_state = NUD_STALE;
 			neigh->updated = jiffies;
 			neigh_suspect(neigh);
@@ -927,14 +915,14 @@ static void neigh_timer_handler(unsigned long arg)
 	} else if (state & NUD_DELAY) {
 		if (time_before_eq(now,
 				   neigh->confirmed + neigh->parms->delay_probe_time)) {
-			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
+			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
 			neigh->nud_state = NUD_REACHABLE;
 			neigh->updated = jiffies;
 			neigh_connect(neigh);
 			notify = 1;
 			next = neigh->confirmed + neigh->parms->reachable_time;
 		} else {
-			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+			neigh_dbg(2, "neigh %p is probed\n", neigh);
 			neigh->nud_state = NUD_PROBE;
 			neigh->updated = jiffies;
 			atomic_set(&neigh->probes, 0);
@@ -1001,7 +989,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 			return 1;
 		}
 	} else if (neigh->nud_state & NUD_STALE) {
-		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+		neigh_dbg(2, "neigh %p is delayed\n", neigh);
 		neigh->nud_state = NUD_DELAY;
 		neigh->updated = jiffies;
 		neigh_add_timer(neigh,
@@ -1324,8 +1312,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
 out:
 	return rc;
 discard:
-	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
-		      dst, neigh);
+	neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
 out_kfree_skb:
 	rc = -EINVAL;
 	kfree_skb(skb);
@@ -1502,7 +1489,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
 		}
 	}
 	write_unlock_bh(&tbl->lock);
-	NEIGH_PRINTK1("neigh_parms_release: not found\n");
+	neigh_dbg(1, "%s: not found\n", __func__);
 }
 EXPORT_SYMBOL(neigh_parms_release);
 
@@ -1542,6 +1529,12 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
 	if (!tbl->nht || !tbl->phash_buckets)
 		panic("cannot allocate neighbour cache hashes");
 
+	if (!tbl->entry_size)
+		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
+					tbl->key_len, NEIGH_PRIV_ALIGN);
+	else
+		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
+
 	rwlock_init(&tbl->lock);
 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
@@ -1611,7 +1604,7 @@ int neigh_table_clear(struct neigh_table *tbl)
 }
 EXPORT_SYMBOL(neigh_table_clear);
 
-static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ndmsg *ndm;
@@ -1675,7 +1668,7 @@ out:
 	return err;
 }
 
-static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ndmsg *ndm;
@@ -1953,7 +1946,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
 };
 
-static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct neigh_table *tbl;
@@ -2712,7 +2705,7 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file)
 
 	if (!ret) {
 		struct seq_file *sf = file->private_data;
-		sf->private = PDE(inode)->data;
+		sf->private = PDE_DATA(inode);
 	}
 	return ret;
 };
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
new file mode 100644
index 000000000000..569d355fec3e
--- /dev/null
+++ b/net/core/net-procfs.c
@@ -0,0 +1,411 @@
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/wext.h>
+
+#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
+
+#define get_bucket(x) ((x) >> BUCKET_SPACE)
+#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
+#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
+
+extern struct list_head ptype_all __read_mostly;
+extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+
+static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
+{
+	struct net *net = seq_file_net(seq);
+	struct net_device *dev;
+	struct hlist_head *h;
+	unsigned int count = 0, offset = get_offset(*pos);
+
+	h = &net->dev_name_head[get_bucket(*pos)];
+	hlist_for_each_entry_rcu(dev, h, name_hlist) {
+		if (++count == offset)
+			return dev;
+	}
+
+	return NULL;
+}
+
+static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
+{
+	struct net_device *dev;
+	unsigned int bucket;
+
+	do {
+		dev = dev_from_same_bucket(seq, pos);
+		if (dev)
+			return dev;
+
+		bucket = get_bucket(*pos) + 1;
+		*pos = set_bucket_offset(bucket, 1);
+	} while (bucket < NETDEV_HASHENTRIES);
+
+	return NULL;
+}
+
+/*
+ *	This is invoked by the /proc filesystem handler to display a device
+ *	in detail.
+ */
+static void *dev_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	rcu_read_lock();
+	if (!*pos)
+		return SEQ_START_TOKEN;
+
+	if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
+		return NULL;
+
+	return dev_from_bucket(seq, pos);
+}
+
+static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return dev_from_bucket(seq, pos);
+}
+
+static void dev_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
+{
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+
+	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
+		   dev->name, stats->rx_bytes, stats->rx_packets,
+		   stats->rx_errors,
+		   stats->rx_dropped + stats->rx_missed_errors,
+		   stats->rx_fifo_errors,
+		   stats->rx_length_errors + stats->rx_over_errors +
+		    stats->rx_crc_errors + stats->rx_frame_errors,
+		   stats->rx_compressed, stats->multicast,
+		   stats->tx_bytes, stats->tx_packets,
+		   stats->tx_errors, stats->tx_dropped,
+		   stats->tx_fifo_errors, stats->collisions,
+		   stats->tx_carrier_errors +
+		    stats->tx_aborted_errors +
+		    stats->tx_window_errors +
+		    stats->tx_heartbeat_errors,
+		   stats->tx_compressed);
+}
+
+/*
+ *	Called from the PROCfs module. This now uses the new arbitrary sized
+ *	/proc/net interface to create /proc/net/dev
+ */
+static int dev_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Inter-|   Receive                            "
+			      "                    |  Transmit\n"
+			      " face |bytes    packets errs drop fifo frame "
+			      "compressed multicast|bytes    packets errs "
+			      "drop fifo colls carrier compressed\n");
+	else
+		dev_seq_printf_stats(seq, v);
+	return 0;
+}
+
+static struct softnet_data *softnet_get_online(loff_t *pos)
+{
+	struct softnet_data *sd = NULL;
+
+	while (*pos < nr_cpu_ids)
+		if (cpu_online(*pos)) {
+			sd = &per_cpu(softnet_data, *pos);
+			break;
+		} else
+			++*pos;
+	return sd;
+}
+
+static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return softnet_get_online(pos);
+}
+
+static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return softnet_get_online(pos);
+}
+
+static void softnet_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int softnet_seq_show(struct seq_file *seq, void *v)
+{
+	struct softnet_data *sd = v;
+
+	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+		   sd->processed, sd->dropped, sd->time_squeeze, 0,
+		   0, 0, 0, 0, /* was fastroute */
+		   sd->cpu_collision, sd->received_rps);
+	return 0;
+}
+
+static const struct seq_operations dev_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_seq_show,
+};
+
+static int dev_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &dev_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+static const struct seq_operations softnet_seq_ops = {
+	.start = softnet_seq_start,
+	.next  = softnet_seq_next,
+	.stop  = softnet_seq_stop,
+	.show  = softnet_seq_show,
+};
+
+static int softnet_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &softnet_seq_ops);
+}
+
+static const struct file_operations softnet_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = softnet_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+static void *ptype_get_idx(loff_t pos)
+{
+	struct packet_type *pt = NULL;
+	loff_t i = 0;
+	int t;
+
+	list_for_each_entry_rcu(pt, &ptype_all, list) {
+		if (i == pos)
+			return pt;
+		++i;
+	}
+
+	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
+		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
+			if (i == pos)
+				return pt;
+			++i;
+		}
+	}
+	return NULL;
+}
+
+static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	rcu_read_lock();
+	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct packet_type *pt;
+	struct list_head *nxt;
+	int hash;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ptype_get_idx(0);
+
+	pt = v;
+	nxt = pt->list.next;
+	if (pt->type == htons(ETH_P_ALL)) {
+		if (nxt != &ptype_all)
+			goto found;
+		hash = 0;
+		nxt = ptype_base[0].next;
+	} else
+		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
+
+	while (nxt == &ptype_base[hash]) {
+		if (++hash >= PTYPE_HASH_SIZE)
+			return NULL;
+		nxt = ptype_base[hash].next;
+	}
+found:
+	return list_entry(nxt, struct packet_type, list);
+}
+
+static void ptype_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static int ptype_seq_show(struct seq_file *seq, void *v)
+{
+	struct packet_type *pt = v;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Type Device      Function\n");
+	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
+		if (pt->type == htons(ETH_P_ALL))
+			seq_puts(seq, "ALL ");
+		else
+			seq_printf(seq, "%04x", ntohs(pt->type));
+
+		seq_printf(seq, " %-8s %pf\n",
+			   pt->dev ? pt->dev->name : "", pt->func);
+	}
+
+	return 0;
+}
+
+static const struct seq_operations ptype_seq_ops = {
+	.start = ptype_seq_start,
+	.next  = ptype_seq_next,
+	.stop  = ptype_seq_stop,
+	.show  = ptype_seq_show,
+};
+
+static int ptype_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &ptype_seq_ops,
+			sizeof(struct seq_net_private));
+}
+
+static const struct file_operations ptype_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ptype_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+
+static int __net_init dev_proc_net_init(struct net *net)
+{
+	int rc = -ENOMEM;
+
+	if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
+		goto out;
+	if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
+			 &softnet_seq_fops))
+		goto out_dev;
+	if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
+		goto out_softnet;
+
+	if (wext_proc_init(net))
+		goto out_ptype;
+	rc = 0;
+out:
+	return rc;
+out_ptype:
+	remove_proc_entry("ptype", net->proc_net);
+out_softnet:
+	remove_proc_entry("softnet_stat", net->proc_net);
+out_dev:
+	remove_proc_entry("dev", net->proc_net);
+	goto out;
+}
+
+static void __net_exit dev_proc_net_exit(struct net *net)
+{
+	wext_proc_exit(net);
+
+	remove_proc_entry("ptype", net->proc_net);
+	remove_proc_entry("softnet_stat", net->proc_net);
+	remove_proc_entry("dev", net->proc_net);
+}
+
+static struct pernet_operations __net_initdata dev_proc_ops = {
+	.init = dev_proc_net_init,
+	.exit = dev_proc_net_exit,
+};
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+	struct netdev_hw_addr *ha;
+	struct net_device *dev = v;
+
+	if (v == SEQ_START_TOKEN)
+		return 0;
+
+	netif_addr_lock_bh(dev);
+	netdev_for_each_mc_addr(ha, dev) {
+		int i;
+
+		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+			   dev->name, ha->refcount, ha->global_use);
+
+		for (i = 0; i < dev->addr_len; i++)
+			seq_printf(seq, "%02x", ha->addr[i]);
+
+		seq_putc(seq, '\n');
+	}
+	netif_addr_unlock_bh(dev);
+	return 0;
+}
+
+static const struct seq_operations dev_mc_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &dev_mc_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_mc_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_mc_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+static int __net_init dev_mc_net_init(struct net *net)
+{
+	if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+	remove_proc_entry("dev_mcast", net->proc_net);
+}
+
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+	.init = dev_mc_net_init,
+	.exit = dev_mc_net_exit,
+};
+
+int __init dev_proc_init(void)
+{
+	int ret = register_pernet_subsys(&dev_proc_ops);
+	if (!ret)
+		return register_pernet_subsys(&dev_mc_net_ops);
+	return ret;
+}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 28c5f5aa7ca7..981fed397d1d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -21,6 +21,7 @@
 #include <linux/vmalloc.h>
 #include <linux/export.h>
 #include <linux/jiffies.h>
+#include <linux/pm_runtime.h>
 
 #include "net-sysfs.h"
 
@@ -126,6 +127,19 @@ static ssize_t show_broadcast(struct device *dev,
 	return -EINVAL;
 }
 
+static int change_carrier(struct net_device *net, unsigned long new_carrier)
+{
+	if (!netif_running(net))
+		return -EINVAL;
+	return dev_change_carrier(net, (bool) new_carrier);
+}
+
+static ssize_t store_carrier(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_carrier);
+}
+
 static ssize_t show_carrier(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
@@ -331,7 +345,7 @@ static struct device_attribute net_class_attributes[] = {
 	__ATTR(link_mode, S_IRUGO, show_link_mode, NULL),
 	__ATTR(address, S_IRUGO, show_address, NULL),
 	__ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
-	__ATTR(carrier, S_IRUGO, show_carrier, NULL),
+	__ATTR(carrier, S_IRUGO | S_IWUSR, show_carrier, store_carrier),
 	__ATTR(speed, S_IRUGO, show_speed, NULL),
 	__ATTR(duplex, S_IRUGO, show_duplex, NULL),
 	__ATTR(dormant, S_IRUGO, show_dormant, NULL),
@@ -592,21 +606,11 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 	return sprintf(buf, "%lu\n", val);
 }
 
-static void rps_dev_flow_table_release_work(struct work_struct *work)
-{
-	struct rps_dev_flow_table *table = container_of(work,
-	    struct rps_dev_flow_table, free_work);
-
-	vfree(table);
-}
-
 static void rps_dev_flow_table_release(struct rcu_head *rcu)
 {
 	struct rps_dev_flow_table *table = container_of(rcu,
 	    struct rps_dev_flow_table, rcu);
-
-	INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
-	schedule_work(&table->free_work);
+	vfree(table);
 }
 
 static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
@@ -989,68 +993,14 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
 	return len;
 }
 
-static DEFINE_MUTEX(xps_map_mutex);
-#define xmap_dereference(P)		\
-	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
-
-static void xps_queue_release(struct netdev_queue *queue)
-{
-	struct net_device *dev = queue->dev;
-	struct xps_dev_maps *dev_maps;
-	struct xps_map *map;
-	unsigned long index;
-	int i, pos, nonempty = 0;
-
-	index = get_netdev_queue_index(queue);
-
-	mutex_lock(&xps_map_mutex);
-	dev_maps = xmap_dereference(dev->xps_maps);
-
-	if (dev_maps) {
-		for_each_possible_cpu(i) {
-			map = xmap_dereference(dev_maps->cpu_map[i]);
-			if (!map)
-				continue;
-
-			for (pos = 0; pos < map->len; pos++)
-				if (map->queues[pos] == index)
-					break;
-
-			if (pos < map->len) {
-				if (map->len > 1)
-					map->queues[pos] =
-					    map->queues[--map->len];
-				else {
-					RCU_INIT_POINTER(dev_maps->cpu_map[i],
-					    NULL);
-					kfree_rcu(map, rcu);
-					map = NULL;
-				}
-			}
-			if (map)
-				nonempty = 1;
-		}
-
-		if (!nonempty) {
-			RCU_INIT_POINTER(dev->xps_maps, NULL);
-			kfree_rcu(dev_maps, rcu);
-		}
-	}
-	mutex_unlock(&xps_map_mutex);
-}
-
 static ssize_t store_xps_map(struct netdev_queue *queue,
 		      struct netdev_queue_attribute *attribute,
 		      const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
-	cpumask_var_t mask;
-	int err, i, cpu, pos, map_len, alloc_len, need_set;
 	unsigned long index;
-	struct xps_map *map, *new_map;
-	struct xps_dev_maps *dev_maps, *new_dev_maps;
-	int nonempty = 0;
-	int numa_node_id = -2;
+	cpumask_var_t mask;
+	int err;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -1066,105 +1016,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 		return err;
 	}
 
-	new_dev_maps = kzalloc(max_t(unsigned int,
-	    XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
-	if (!new_dev_maps) {
-		free_cpumask_var(mask);
-		return -ENOMEM;
-	}
-
-	mutex_lock(&xps_map_mutex);
-
-	dev_maps = xmap_dereference(dev->xps_maps);
-
-	for_each_possible_cpu(cpu) {
-		map = dev_maps ?
-			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
-		new_map = map;
-		if (map) {
-			for (pos = 0; pos < map->len; pos++)
-				if (map->queues[pos] == index)
-					break;
-			map_len = map->len;
-			alloc_len = map->alloc_len;
-		} else
-			pos = map_len = alloc_len = 0;
-
-		need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
-#ifdef CONFIG_NUMA
-		if (need_set) {
-			if (numa_node_id == -2)
-				numa_node_id = cpu_to_node(cpu);
-			else if (numa_node_id != cpu_to_node(cpu))
-				numa_node_id = -1;
-		}
-#endif
-		if (need_set && pos >= map_len) {
-			/* Need to add queue to this CPU's map */
-			if (map_len >= alloc_len) {
-				alloc_len = alloc_len ?
-				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
-				new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
-						       GFP_KERNEL,
-						       cpu_to_node(cpu));
-				if (!new_map)
-					goto error;
-				new_map->alloc_len = alloc_len;
-				for (i = 0; i < map_len; i++)
-					new_map->queues[i] = map->queues[i];
-				new_map->len = map_len;
-			}
-			new_map->queues[new_map->len++] = index;
-		} else if (!need_set && pos < map_len) {
-			/* Need to remove queue from this CPU's map */
-			if (map_len > 1)
-				new_map->queues[pos] =
-				    new_map->queues[--new_map->len];
-			else
-				new_map = NULL;
-		}
-		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
-	}
-
-	/* Cleanup old maps */
-	for_each_possible_cpu(cpu) {
-		map = dev_maps ?
-			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
-		if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
-			kfree_rcu(map, rcu);
-		if (new_dev_maps->cpu_map[cpu])
-			nonempty = 1;
-	}
-
-	if (nonempty) {
-		rcu_assign_pointer(dev->xps_maps, new_dev_maps);
-	} else {
-		kfree(new_dev_maps);
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
-	}
-
-	if (dev_maps)
-		kfree_rcu(dev_maps, rcu);
-
-	netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id :
-					    NUMA_NO_NODE);
-
-	mutex_unlock(&xps_map_mutex);
+	err = netif_set_xps_queue(dev, mask, index);
 
 	free_cpumask_var(mask);
-	return len;
 
-error:
-	mutex_unlock(&xps_map_mutex);
-
-	if (new_dev_maps)
-		for_each_possible_cpu(i)
-			kfree(rcu_dereference_protected(
-				new_dev_maps->cpu_map[i],
-				1));
-	kfree(new_dev_maps);
-	free_cpumask_var(mask);
-	return -ENOMEM;
+	return err ? : len;
 }
 
 static struct netdev_queue_attribute xps_cpus_attribute =
@@ -1183,10 +1039,6 @@ static void netdev_queue_release(struct kobject *kobj)
 {
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 
-#ifdef CONFIG_XPS
-	xps_queue_release(queue);
-#endif
-
 	memset(kobj, 0, sizeof(*kobj));
 	dev_put(queue->dev);
 }
@@ -1396,6 +1248,8 @@ void netdev_unregister_kobject(struct net_device * net)
 
 	remove_queue_kobjects(net);
 
+	pm_runtime_set_memalloc_noio(dev, false);
+
 	device_del(dev);
 }
 
@@ -1440,6 +1294,8 @@ int netdev_register_kobject(struct net_device *net)
 		return error;
 	}
 
+	pm_runtime_set_memalloc_noio(dev, true);
+
 	return error;
 }
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 8acce01b6dab..f97652036754 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -10,7 +10,8 @@
 #include <linux/idr.h>
 #include <linux/rculist.h>
 #include <linux/nsproxy.h>
-#include <linux/proc_fs.h>
+#include <linux/fs.h>
+#include <linux/proc_ns.h>
 #include <linux/file.h>
 #include <linux/export.h>
 #include <linux/user_namespace.h>
@@ -336,7 +337,7 @@ EXPORT_SYMBOL_GPL(__put_net);
 
 struct net *get_net_ns_by_fd(int fd)
 {
-	struct proc_inode *ei;
+	struct proc_ns *ei;
 	struct file *file;
 	struct net *net;
 
@@ -344,7 +345,7 @@ struct net *get_net_ns_by_fd(int fd)
 	if (IS_ERR(file))
 		return ERR_CAST(file);
 
-	ei = PROC_I(file->f_dentry->d_inode);
+	ei = get_proc_ns(file_inode(file));
 	if (ei->ns_ops == &netns_operations)
 		net = get_net(ei->ns);
 	else
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 3151acf5ec13..cec074be8c43 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -29,6 +29,9 @@
 #include <linux/if_vlan.h>
 #include <net/tcp.h>
 #include <net/udp.h>
+#include <net/addrconf.h>
+#include <net/ndisc.h>
+#include <net/ip6_checksum.h>
 #include <asm/unaligned.h>
 #include <trace/events/napi.h>
 
@@ -44,6 +47,8 @@ static struct sk_buff_head skb_pool;
 
 static atomic_t trapped;
 
+DEFINE_STATIC_SRCU(netpoll_srcu);
+
 #define USEC_PER_POLL	50
 #define NETPOLL_RX_ENABLED  1
 #define NETPOLL_RX_DROP     2
@@ -55,7 +60,8 @@ static atomic_t trapped;
 	 MAX_UDP_CHUNK)
 
 static void zap_completion_queue(void);
-static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
+static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
+static void netpoll_async_cleanup(struct work_struct *work);
 
 static unsigned int carrier_timeout = 4;
 module_param(carrier_timeout, uint, 0644);
@@ -181,13 +187,13 @@ static void poll_napi(struct net_device *dev)
 	}
 }
 
-static void service_arp_queue(struct netpoll_info *npi)
+static void service_neigh_queue(struct netpoll_info *npi)
 {
 	if (npi) {
 		struct sk_buff *skb;
 
-		while ((skb = skb_dequeue(&npi->arp_tx)))
-			netpoll_arp_reply(skb, npi);
+		while ((skb = skb_dequeue(&npi->neigh_tx)))
+			netpoll_neigh_reply(skb, npi);
 	}
 }
 
@@ -196,35 +202,76 @@ static void netpoll_poll_dev(struct net_device *dev)
 	const struct net_device_ops *ops;
 	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
 
-	if (!dev || !netif_running(dev))
+	/* Don't do any rx activity if the dev_lock mutex is held
+	 * the dev_open/close paths use this to block netpoll activity
+	 * while changing device state
+	 */
+	if (down_trylock(&ni->dev_lock))
 		return;
 
+	if (!netif_running(dev)) {
+		up(&ni->dev_lock);
+		return;
+	}
+
 	ops = dev->netdev_ops;
-	if (!ops->ndo_poll_controller)
+	if (!ops->ndo_poll_controller) {
+		up(&ni->dev_lock);
 		return;
+	}
 
 	/* Process pending work on NIC */
 	ops->ndo_poll_controller(dev);
 
 	poll_napi(dev);
 
+	up(&ni->dev_lock);
+
 	if (dev->flags & IFF_SLAVE) {
 		if (ni) {
-			struct net_device *bond_dev = dev->master;
+			struct net_device *bond_dev;
 			struct sk_buff *skb;
-			struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo);
-			while ((skb = skb_dequeue(&ni->arp_tx))) {
+			struct netpoll_info *bond_ni;
+
+			bond_dev = netdev_master_upper_dev_get_rcu(dev);
+			bond_ni = rcu_dereference_bh(bond_dev->npinfo);
+			while ((skb = skb_dequeue(&ni->neigh_tx))) {
 				skb->dev = bond_dev;
-				skb_queue_tail(&bond_ni->arp_tx, skb);
+				skb_queue_tail(&bond_ni->neigh_tx, skb);
 			}
 		}
 	}
 
-	service_arp_queue(ni);
+	service_neigh_queue(ni);
 
 	zap_completion_queue();
 }
 
+int netpoll_rx_disable(struct net_device *dev)
+{
+	struct netpoll_info *ni;
+	int idx;
+	might_sleep();
+	idx = srcu_read_lock(&netpoll_srcu);
+	ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
+	if (ni)
+		down(&ni->dev_lock);
+	srcu_read_unlock(&netpoll_srcu, idx);
+	return 0;
+}
+EXPORT_SYMBOL(netpoll_rx_disable);
+
+void netpoll_rx_enable(struct net_device *dev)
+{
+	struct netpoll_info *ni;
+	rcu_read_lock();
+	ni = rcu_dereference(dev->npinfo);
+	if (ni)
+		up(&ni->dev_lock);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(netpoll_rx_enable);
+
 static void refill_skbs(void)
 {
 	struct sk_buff *skb;
@@ -336,8 +383,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_xmit_stopped(txq)) {
 					if (vlan_tx_tag_present(skb) &&
-					    !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
-						skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+					    !vlan_hw_offload_capable(netif_skb_features(skb),
+								     skb->vlan_proto)) {
+						skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
 						if (unlikely(!skb))
 							break;
 						skb->vlan_tci = 0;
@@ -381,9 +429,14 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	struct iphdr *iph;
 	struct ethhdr *eth;
 	static atomic_t ip_ident;
+	struct ipv6hdr *ip6h;
 
 	udp_len = len + sizeof(*udph);
-	ip_len = udp_len + sizeof(*iph);
+	if (np->ipv6)
+		ip_len = udp_len + sizeof(*ip6h);
+	else
+		ip_len = udp_len + sizeof(*iph);
+
 	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
 
 	skb = find_skb(np, total_len + np->dev->needed_tailroom,
@@ -400,34 +453,66 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	udph->source = htons(np->local_port);
 	udph->dest = htons(np->remote_port);
 	udph->len = htons(udp_len);
-	udph->check = 0;
-	udph->check = csum_tcpudp_magic(np->local_ip,
-					np->remote_ip,
-					udp_len, IPPROTO_UDP,
-					csum_partial(udph, udp_len, 0));
-	if (udph->check == 0)
-		udph->check = CSUM_MANGLED_0;
-
-	skb_push(skb, sizeof(*iph));
-	skb_reset_network_header(skb);
-	iph = ip_hdr(skb);
-
-	/* iph->version = 4; iph->ihl = 5; */
-	put_unaligned(0x45, (unsigned char *)iph);
-	iph->tos      = 0;
-	put_unaligned(htons(ip_len), &(iph->tot_len));
-	iph->id       = htons(atomic_inc_return(&ip_ident));
-	iph->frag_off = 0;
-	iph->ttl      = 64;
-	iph->protocol = IPPROTO_UDP;
-	iph->check    = 0;
-	put_unaligned(np->local_ip, &(iph->saddr));
-	put_unaligned(np->remote_ip, &(iph->daddr));
-	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
-
-	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
-	skb_reset_mac_header(skb);
-	skb->protocol = eth->h_proto = htons(ETH_P_IP);
+
+	if (np->ipv6) {
+		udph->check = 0;
+		udph->check = csum_ipv6_magic(&np->local_ip.in6,
+					      &np->remote_ip.in6,
+					      udp_len, IPPROTO_UDP,
+					      csum_partial(udph, udp_len, 0));
+		if (udph->check == 0)
+			udph->check = CSUM_MANGLED_0;
+
+		skb_push(skb, sizeof(*ip6h));
+		skb_reset_network_header(skb);
+		ip6h = ipv6_hdr(skb);
+
+		/* ip6h->version = 6; ip6h->priority = 0; */
+		put_unaligned(0x60, (unsigned char *)ip6h);
+		ip6h->flow_lbl[0] = 0;
+		ip6h->flow_lbl[1] = 0;
+		ip6h->flow_lbl[2] = 0;
+
+		ip6h->payload_len = htons(sizeof(struct udphdr) + len);
+		ip6h->nexthdr = IPPROTO_UDP;
+		ip6h->hop_limit = 32;
+		ip6h->saddr = np->local_ip.in6;
+		ip6h->daddr = np->remote_ip.in6;
+
+		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+		skb_reset_mac_header(skb);
+		skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
+	} else {
+		udph->check = 0;
+		udph->check = csum_tcpudp_magic(np->local_ip.ip,
+						np->remote_ip.ip,
+						udp_len, IPPROTO_UDP,
+						csum_partial(udph, udp_len, 0));
+		if (udph->check == 0)
+			udph->check = CSUM_MANGLED_0;
+
+		skb_push(skb, sizeof(*iph));
+		skb_reset_network_header(skb);
+		iph = ip_hdr(skb);
+
+		/* iph->version = 4; iph->ihl = 5; */
+		put_unaligned(0x45, (unsigned char *)iph);
+		iph->tos      = 0;
+		put_unaligned(htons(ip_len), &(iph->tot_len));
+		iph->id       = htons(atomic_inc_return(&ip_ident));
+		iph->frag_off = 0;
+		iph->ttl      = 64;
+		iph->protocol = IPPROTO_UDP;
+		iph->check    = 0;
+		put_unaligned(np->local_ip.ip, &(iph->saddr));
+		put_unaligned(np->remote_ip.ip, &(iph->daddr));
+		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+		skb_reset_mac_header(skb);
+		skb->protocol = eth->h_proto = htons(ETH_P_IP);
+	}
+
 	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
 	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
 
@@ -437,18 +522,16 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 }
 EXPORT_SYMBOL(netpoll_send_udp);
 
-static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
+static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
-	struct arphdr *arp;
-	unsigned char *arp_ptr;
-	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
+	int size, type = ARPOP_REPLY;
 	__be32 sip, tip;
 	unsigned char *sha;
 	struct sk_buff *send_skb;
 	struct netpoll *np, *tmp;
 	unsigned long flags;
 	int hlen, tlen;
-	int hits = 0;
+	int hits = 0, proto;
 
 	if (list_empty(&npinfo->rx_np))
 		return;
@@ -466,94 +549,214 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 	if (!hits)
 		return;
 
-	/* No arp on this interface */
-	if (skb->dev->flags & IFF_NOARP)
-		return;
-
-	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
-		return;
+	proto = ntohs(eth_hdr(skb)->h_proto);
+	if (proto == ETH_P_IP) {
+		struct arphdr *arp;
+		unsigned char *arp_ptr;
+		/* No arp on this interface */
+		if (skb->dev->flags & IFF_NOARP)
+			return;
 
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	arp = arp_hdr(skb);
+		if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
+			return;
 
-	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
-	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
-	    arp->ar_pro != htons(ETH_P_IP) ||
-	    arp->ar_op != htons(ARPOP_REQUEST))
-		return;
+		skb_reset_network_header(skb);
+		skb_reset_transport_header(skb);
+		arp = arp_hdr(skb);
 
-	arp_ptr = (unsigned char *)(arp+1);
-	/* save the location of the src hw addr */
-	sha = arp_ptr;
-	arp_ptr += skb->dev->addr_len;
-	memcpy(&sip, arp_ptr, 4);
-	arp_ptr += 4;
-	/* If we actually cared about dst hw addr,
-	   it would get copied here */
-	arp_ptr += skb->dev->addr_len;
-	memcpy(&tip, arp_ptr, 4);
-
-	/* Should we ignore arp? */
-	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
-		return;
+		if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
+		     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
+		    arp->ar_pro != htons(ETH_P_IP) ||
+		    arp->ar_op != htons(ARPOP_REQUEST))
+			return;
 
-	size = arp_hdr_len(skb->dev);
+		arp_ptr = (unsigned char *)(arp+1);
+		/* save the location of the src hw addr */
+		sha = arp_ptr;
+		arp_ptr += skb->dev->addr_len;
+		memcpy(&sip, arp_ptr, 4);
+		arp_ptr += 4;
+		/* If we actually cared about dst hw addr,
+		   it would get copied here */
+		arp_ptr += skb->dev->addr_len;
+		memcpy(&tip, arp_ptr, 4);
 
-	spin_lock_irqsave(&npinfo->rx_lock, flags);
-	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
-		if (tip != np->local_ip)
-			continue;
+		/* Should we ignore arp? */
+		if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
+			return;
 
-		hlen = LL_RESERVED_SPACE(np->dev);
-		tlen = np->dev->needed_tailroom;
-		send_skb = find_skb(np, size + hlen + tlen, hlen);
-		if (!send_skb)
-			continue;
+		size = arp_hdr_len(skb->dev);
 
-		skb_reset_network_header(send_skb);
-		arp = (struct arphdr *) skb_put(send_skb, size);
-		send_skb->dev = skb->dev;
-		send_skb->protocol = htons(ETH_P_ARP);
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+			if (tip != np->local_ip.ip)
+				continue;
+
+			hlen = LL_RESERVED_SPACE(np->dev);
+			tlen = np->dev->needed_tailroom;
+			send_skb = find_skb(np, size + hlen + tlen, hlen);
+			if (!send_skb)
+				continue;
+
+			skb_reset_network_header(send_skb);
+			arp = (struct arphdr *) skb_put(send_skb, size);
+			send_skb->dev = skb->dev;
+			send_skb->protocol = htons(ETH_P_ARP);
+
+			/* Fill the device header for the ARP frame */
+			if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
+					    sha, np->dev->dev_addr,
+					    send_skb->len) < 0) {
+				kfree_skb(send_skb);
+				continue;
+			}
 
-		/* Fill the device header for the ARP frame */
-		if (dev_hard_header(send_skb, skb->dev, ptype,
-				    sha, np->dev->dev_addr,
-				    send_skb->len) < 0) {
-			kfree_skb(send_skb);
-			continue;
+			/*
+			 * Fill out the arp protocol part.
+			 *
+			 * we only support ethernet device type,
+			 * which (according to RFC 1390) should
+			 * always equal 1 (Ethernet).
+			 */
+
+			arp->ar_hrd = htons(np->dev->type);
+			arp->ar_pro = htons(ETH_P_IP);
+			arp->ar_hln = np->dev->addr_len;
+			arp->ar_pln = 4;
+			arp->ar_op = htons(type);
+
+			arp_ptr = (unsigned char *)(arp + 1);
+			memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
+			arp_ptr += np->dev->addr_len;
+			memcpy(arp_ptr, &tip, 4);
+			arp_ptr += 4;
+			memcpy(arp_ptr, sha, np->dev->addr_len);
+			arp_ptr += np->dev->addr_len;
+			memcpy(arp_ptr, &sip, 4);
+
+			netpoll_send_skb(np, send_skb);
+
+			/* If there are several rx_hooks for the same address,
+			   we're fine by sending a single reply */
+			break;
 		}
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	} else if( proto == ETH_P_IPV6) {
+#if IS_ENABLED(CONFIG_IPV6)
+		struct nd_msg *msg;
+		u8 *lladdr = NULL;
+		struct ipv6hdr *hdr;
+		struct icmp6hdr *icmp6h;
+		const struct in6_addr *saddr;
+		const struct in6_addr *daddr;
+		struct inet6_dev *in6_dev = NULL;
+		struct in6_addr *target;
+
+		in6_dev = in6_dev_get(skb->dev);
+		if (!in6_dev || !in6_dev->cnf.accept_ra)
+			return;
 
-		/*
-		 * Fill out the arp protocol part.
-		 *
-		 * we only support ethernet device type,
-		 * which (according to RFC 1390) should
-		 * always equal 1 (Ethernet).
-		 */
+		if (!pskb_may_pull(skb, skb->len))
+			return;
 
-		arp->ar_hrd = htons(np->dev->type);
-		arp->ar_pro = htons(ETH_P_IP);
-		arp->ar_hln = np->dev->addr_len;
-		arp->ar_pln = 4;
-		arp->ar_op = htons(type);
+		msg = (struct nd_msg *)skb_transport_header(skb);
 
-		arp_ptr = (unsigned char *)(arp + 1);
-		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
-		arp_ptr += np->dev->addr_len;
-		memcpy(arp_ptr, &tip, 4);
-		arp_ptr += 4;
-		memcpy(arp_ptr, sha, np->dev->addr_len);
-		arp_ptr += np->dev->addr_len;
-		memcpy(arp_ptr, &sip, 4);
+		__skb_push(skb, skb->data - skb_transport_header(skb));
 
-		netpoll_send_skb(np, send_skb);
+		if (ipv6_hdr(skb)->hop_limit != 255)
+			return;
+		if (msg->icmph.icmp6_code != 0)
+			return;
+		if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
+			return;
+
+		saddr = &ipv6_hdr(skb)->saddr;
+		daddr = &ipv6_hdr(skb)->daddr;
 
-		/* If there are several rx_hooks for the same address,
-		   we're fine by sending a single reply */
-		break;
+		size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+			if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
+				continue;
+
+			hlen = LL_RESERVED_SPACE(np->dev);
+			tlen = np->dev->needed_tailroom;
+			send_skb = find_skb(np, size + hlen + tlen, hlen);
+			if (!send_skb)
+				continue;
+
+			send_skb->protocol = htons(ETH_P_IPV6);
+			send_skb->dev = skb->dev;
+
+			skb_reset_network_header(send_skb);
+			skb_put(send_skb, sizeof(struct ipv6hdr));
+			hdr = ipv6_hdr(send_skb);
+
+			*(__be32*)hdr = htonl(0x60000000);
+
+			hdr->payload_len = htons(size);
+			hdr->nexthdr = IPPROTO_ICMPV6;
+			hdr->hop_limit = 255;
+			hdr->saddr = *saddr;
+			hdr->daddr = *daddr;
+
+			send_skb->transport_header = send_skb->tail;
+			skb_put(send_skb, size);
+
+			icmp6h = (struct icmp6hdr *)skb_transport_header(skb);
+			icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
+			icmp6h->icmp6_router = 0;
+			icmp6h->icmp6_solicited = 1;
+			target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr));
+			*target = msg->target;
+			icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
+							      IPPROTO_ICMPV6,
+							      csum_partial(icmp6h,
+									   size, 0));
+
+			if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
+					    lladdr, np->dev->dev_addr,
+					    send_skb->len) < 0) {
+				kfree_skb(send_skb);
+				continue;
+			}
+
+			netpoll_send_skb(np, send_skb);
+
+			/* If there are several rx_hooks for the same address,
+			   we're fine by sending a single reply */
+			break;
+		}
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+#endif
 	}
-	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+}
+
+static bool pkt_is_ns(struct sk_buff *skb)
+{
+	struct nd_msg *msg;
+	struct ipv6hdr *hdr;
+
+	if (skb->protocol != htons(ETH_P_ARP))
+		return false;
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
+		return false;
+
+	msg = (struct nd_msg *)skb_transport_header(skb);
+	__skb_push(skb, skb->data - skb_transport_header(skb));
+	hdr = ipv6_hdr(skb);
+
+	if (hdr->nexthdr != IPPROTO_ICMPV6)
+		return false;
+	if (hdr->hop_limit != 255)
+		return false;
+	if (msg->icmph.icmp6_code != 0)
+		return false;
+	if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
+		return false;
+
+	return true;
 }
 
 int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
@@ -571,9 +774,11 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 		goto out;
 
 	/* check if netpoll clients need ARP */
-	if (skb->protocol == htons(ETH_P_ARP) &&
-	    atomic_read(&trapped)) {
-		skb_queue_tail(&npinfo->arp_tx, skb);
+	if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
+		skb_queue_tail(&npinfo->neigh_tx, skb);
+		return 1;
+	} else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
+		skb_queue_tail(&npinfo->neigh_tx, skb);
 		return 1;
 	}
 
@@ -584,60 +789,100 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 	}
 
 	proto = ntohs(eth_hdr(skb)->h_proto);
-	if (proto != ETH_P_IP)
+	if (proto != ETH_P_IP && proto != ETH_P_IPV6)
 		goto out;
 	if (skb->pkt_type == PACKET_OTHERHOST)
 		goto out;
 	if (skb_shared(skb))
 		goto out;
 
-	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-		goto out;
-	iph = (struct iphdr *)skb->data;
-	if (iph->ihl < 5 || iph->version != 4)
-		goto out;
-	if (!pskb_may_pull(skb, iph->ihl*4))
-		goto out;
-	iph = (struct iphdr *)skb->data;
-	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
-		goto out;
-
-	len = ntohs(iph->tot_len);
-	if (skb->len < len || len < iph->ihl*4)
-		goto out;
+	if (proto == ETH_P_IP) {
+		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+			goto out;
+		iph = (struct iphdr *)skb->data;
+		if (iph->ihl < 5 || iph->version != 4)
+			goto out;
+		if (!pskb_may_pull(skb, iph->ihl*4))
+			goto out;
+		iph = (struct iphdr *)skb->data;
+		if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+			goto out;
 
-	/*
-	 * Our transport medium may have padded the buffer out.
-	 * Now We trim to the true length of the frame.
-	 */
-	if (pskb_trim_rcsum(skb, len))
-		goto out;
+		len = ntohs(iph->tot_len);
+		if (skb->len < len || len < iph->ihl*4)
+			goto out;
 
-	iph = (struct iphdr *)skb->data;
-	if (iph->protocol != IPPROTO_UDP)
-		goto out;
+		/*
+		 * Our transport medium may have padded the buffer out.
+		 * Now We trim to the true length of the frame.
+		 */
+		if (pskb_trim_rcsum(skb, len))
+			goto out;
 
-	len -= iph->ihl*4;
-	uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
-	ulen = ntohs(uh->len);
+		iph = (struct iphdr *)skb->data;
+		if (iph->protocol != IPPROTO_UDP)
+			goto out;
 
-	if (ulen != len)
-		goto out;
-	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
-		goto out;
+		len -= iph->ihl*4;
+		uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+		ulen = ntohs(uh->len);
 
-	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
-		if (np->local_ip && np->local_ip != iph->daddr)
-			continue;
-		if (np->remote_ip && np->remote_ip != iph->saddr)
-			continue;
-		if (np->local_port && np->local_port != ntohs(uh->dest))
-			continue;
+		if (ulen != len)
+			goto out;
+		if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
+			goto out;
+		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+			if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
+				continue;
+			if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
+				continue;
+			if (np->local_port && np->local_port != ntohs(uh->dest))
+				continue;
+
+			np->rx_hook(np, ntohs(uh->source),
+				       (char *)(uh+1),
+				       ulen - sizeof(struct udphdr));
+			hits++;
+		}
+	} else {
+#if IS_ENABLED(CONFIG_IPV6)
+		const struct ipv6hdr *ip6h;
 
-		np->rx_hook(np, ntohs(uh->source),
-			       (char *)(uh+1),
-			       ulen - sizeof(struct udphdr));
-		hits++;
+		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+			goto out;
+		ip6h = (struct ipv6hdr *)skb->data;
+		if (ip6h->version != 6)
+			goto out;
+		len = ntohs(ip6h->payload_len);
+		if (!len)
+			goto out;
+		if (len + sizeof(struct ipv6hdr) > skb->len)
+			goto out;
+		if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
+			goto out;
+		ip6h = ipv6_hdr(skb);
+		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
+			goto out;
+		uh = udp_hdr(skb);
+		ulen = ntohs(uh->len);
+		if (ulen != skb->len)
+			goto out;
+		if (udp6_csum_init(skb, uh, IPPROTO_UDP))
+			goto out;
+		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+			if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
+				continue;
+			if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
+				continue;
+			if (np->local_port && np->local_port != ntohs(uh->dest))
+				continue;
+
+			np->rx_hook(np, ntohs(uh->source),
+				       (char *)(uh+1),
+				       ulen - sizeof(struct udphdr));
+			hits++;
+		}
+#endif
 	}
 
 	if (!hits)
@@ -658,17 +903,44 @@ out:
 void netpoll_print_options(struct netpoll *np)
 {
 	np_info(np, "local port %d\n", np->local_port);
-	np_info(np, "local IP %pI4\n", &np->local_ip);
+	if (np->ipv6)
+		np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
+	else
+		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
 	np_info(np, "interface '%s'\n", np->dev_name);
 	np_info(np, "remote port %d\n", np->remote_port);
-	np_info(np, "remote IP %pI4\n", &np->remote_ip);
+	if (np->ipv6)
+		np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
+	else
+		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
 	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
 }
 EXPORT_SYMBOL(netpoll_print_options);
 
+static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
+{
+	const char *end;
+
+	if (!strchr(str, ':') &&
+	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
+		if (!*end)
+			return 0;
+	}
+	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
+#if IS_ENABLED(CONFIG_IPV6)
+		if (!*end)
+			return 1;
+#else
+		return -1;
+#endif
+	}
+	return -1;
+}
+
 int netpoll_parse_options(struct netpoll *np, char *opt)
 {
 	char *cur=opt, *delim;
+	int ipv6;
 
 	if (*cur != '@') {
 		if ((delim = strchr(cur, '@')) == NULL)
@@ -684,7 +956,11 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 		if ((delim = strchr(cur, '/')) == NULL)
 			goto parse_failed;
 		*delim = 0;
-		np->local_ip = in_aton(cur);
+		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
+		if (ipv6 < 0)
+			goto parse_failed;
+		else
+			np->ipv6 = (bool)ipv6;
 		cur = delim;
 	}
 	cur++;
@@ -716,7 +992,13 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	if ((delim = strchr(cur, '/')) == NULL)
 		goto parse_failed;
 	*delim = 0;
-	np->remote_ip = in_aton(cur);
+	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
+	if (ipv6 < 0)
+		goto parse_failed;
+	else if (np->ipv6 != (bool)ipv6)
+		goto parse_failed;
+	else
+		np->ipv6 = (bool)ipv6;
 	cur = delim + 1;
 
 	if (*cur != 0) {
@@ -744,6 +1026,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 
 	np->dev = ndev;
 	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
+	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
 
 	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
 	    !ndev->netdev_ops->ndo_poll_controller) {
@@ -764,7 +1047,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 		INIT_LIST_HEAD(&npinfo->rx_np);
 
 		spin_lock_init(&npinfo->rx_lock);
-		skb_queue_head_init(&npinfo->arp_tx);
+		sema_init(&npinfo->dev_lock, 1);
+		skb_queue_head_init(&npinfo->neigh_tx);
 		skb_queue_head_init(&npinfo->txq);
 		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
 
@@ -777,7 +1061,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 				goto free_npinfo;
 		}
 	} else {
-		npinfo = ndev->npinfo;
+		npinfo = rtnl_dereference(ndev->npinfo);
 		atomic_inc(&npinfo->refcnt);
 	}
 
@@ -808,14 +1092,19 @@ int netpoll_setup(struct netpoll *np)
 	struct in_device *in_dev;
 	int err;
 
-	if (np->dev_name)
-		ndev = dev_get_by_name(&init_net, np->dev_name);
+	rtnl_lock();
+	if (np->dev_name) {
+		struct net *net = current->nsproxy->net_ns;
+		ndev = __dev_get_by_name(net, np->dev_name);
+	}
 	if (!ndev) {
 		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
-		return -ENODEV;
+		err = -ENODEV;
+		goto unlock;
 	}
+	dev_hold(ndev);
 
-	if (ndev->master) {
+	if (netdev_master_upper_dev_get(ndev)) {
 		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
 		err = -EBUSY;
 		goto put;
@@ -826,15 +1115,14 @@ int netpoll_setup(struct netpoll *np)
 
 		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
 
-		rtnl_lock();
 		err = dev_open(ndev);
-		rtnl_unlock();
 
 		if (err) {
 			np_err(np, "failed to open %s\n", ndev->name);
 			goto put;
 		}
 
+		rtnl_unlock();
 		atleast = jiffies + HZ/10;
 		atmost = jiffies + carrier_timeout * HZ;
 		while (!netif_carrier_ok(ndev)) {
@@ -854,39 +1142,70 @@ int netpoll_setup(struct netpoll *np)
 			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
 			msleep(4000);
 		}
+		rtnl_lock();
 	}
 
-	if (!np->local_ip) {
-		rcu_read_lock();
-		in_dev = __in_dev_get_rcu(ndev);
+	if (!np->local_ip.ip) {
+		if (!np->ipv6) {
+			in_dev = __in_dev_get_rtnl(ndev);
+
+			if (!in_dev || !in_dev->ifa_list) {
+				np_err(np, "no IP address for %s, aborting\n",
+				       np->dev_name);
+				err = -EDESTADDRREQ;
+				goto put;
+			}
+
+			np->local_ip.ip = in_dev->ifa_list->ifa_local;
+			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
+		} else {
+#if IS_ENABLED(CONFIG_IPV6)
+			struct inet6_dev *idev;
 
-		if (!in_dev || !in_dev->ifa_list) {
-			rcu_read_unlock();
-			np_err(np, "no IP address for %s, aborting\n",
-			       np->dev_name);
 			err = -EDESTADDRREQ;
+			idev = __in6_dev_get(ndev);
+			if (idev) {
+				struct inet6_ifaddr *ifp;
+
+				read_lock_bh(&idev->lock);
+				list_for_each_entry(ifp, &idev->addr_list, if_list) {
+					if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
+						continue;
+					np->local_ip.in6 = ifp->addr;
+					err = 0;
+					break;
+				}
+				read_unlock_bh(&idev->lock);
+			}
+			if (err) {
+				np_err(np, "no IPv6 address for %s, aborting\n",
+				       np->dev_name);
+				goto put;
+			} else
+				np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
+#else
+			np_err(np, "IPv6 is not supported %s, aborting\n",
+			       np->dev_name);
+			err = -EINVAL;
 			goto put;
+#endif
 		}
-
-		np->local_ip = in_dev->ifa_list->ifa_local;
-		rcu_read_unlock();
-		np_info(np, "local IP %pI4\n", &np->local_ip);
 	}
 
 	/* fill up the skb queue */
 	refill_skbs();
 
-	rtnl_lock();
 	err = __netpoll_setup(np, ndev, GFP_KERNEL);
-	rtnl_unlock();
-
 	if (err)
 		goto put;
 
+	rtnl_unlock();
 	return 0;
 
 put:
 	dev_put(ndev);
+unlock:
+	rtnl_unlock();
 	return err;
 }
 EXPORT_SYMBOL(netpoll_setup);
@@ -903,7 +1222,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
 	struct netpoll_info *npinfo =
 			container_of(rcu_head, struct netpoll_info, rcu);
 
-	skb_queue_purge(&npinfo->arp_tx);
+	skb_queue_purge(&npinfo->neigh_tx);
 	skb_queue_purge(&npinfo->txq);
 
 	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
@@ -921,7 +1240,11 @@ void __netpoll_cleanup(struct netpoll *np)
 	struct netpoll_info *npinfo;
 	unsigned long flags;
 
-	npinfo = np->dev->npinfo;
+	/* rtnl_dereference would be preferable here but
+	 * rcu_cleanup_netpoll path can put us in here safely without
+	 * holding the rtnl, so plain rcu_dereference it is
+	 */
+	npinfo = rtnl_dereference(np->dev->npinfo);
 	if (!npinfo)
 		return;
 
@@ -933,6 +1256,8 @@ void __netpoll_cleanup(struct netpoll *np)
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
 
+	synchronize_srcu(&netpoll_srcu);
+
 	if (atomic_dec_and_test(&npinfo->refcnt)) {
 		const struct net_device_ops *ops;
 
@@ -940,25 +1265,27 @@ void __netpoll_cleanup(struct netpoll *np)
 		if (ops->ndo_netpoll_cleanup)
 			ops->ndo_netpoll_cleanup(np->dev);
 
-		RCU_INIT_POINTER(np->dev->npinfo, NULL);
+		rcu_assign_pointer(np->dev->npinfo, NULL);
 		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
 	}
 }
 EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
+static void netpoll_async_cleanup(struct work_struct *work)
 {
-	struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
+	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
 
+	rtnl_lock();
 	__netpoll_cleanup(np);
+	rtnl_unlock();
 	kfree(np);
 }
 
-void __netpoll_free_rcu(struct netpoll *np)
+void __netpoll_free_async(struct netpoll *np)
 {
-	call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
+	schedule_work(&np->cleanup_work);
 }
-EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
+EXPORT_SYMBOL_GPL(__netpoll_free_async);
 
 void netpoll_cleanup(struct netpoll *np)
 {
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 5e67defe2cb0..0777d0aa18c3 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -69,10 +69,8 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx)
 
 	/* allocate & copy */
 	new = kzalloc(new_sz, GFP_KERNEL);
-	if (!new) {
-		pr_warn("Unable to alloc new priomap!\n");
+	if (!new)
 		return -ENOMEM;
-	}
 
 	if (old)
 		memcpy(new->priomap, old->priomap,
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index e6e1cbe863f5..11f2704c3810 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -164,6 +164,7 @@
 #ifdef CONFIG_XFRM
 #include <net/xfrm.h>
 #endif
+#include <net/netns/generic.h>
 #include <asm/byteorder.h>
 #include <linux/rcupdate.h>
 #include <linux/bitops.h>
@@ -212,7 +213,6 @@
 #define PKTGEN_MAGIC 0xbe9be955
 #define PG_PROC_DIR "pktgen"
 #define PGCTRL	    "pgctrl"
-static struct proc_dir_entry *pg_proc_dir;
 
 #define MAX_CFLOWS  65536
 
@@ -397,7 +397,15 @@ struct pktgen_hdr {
 	__be32 tv_usec;
 };
 
-static bool pktgen_exiting __read_mostly;
+
+static int pg_net_id __read_mostly;
+
+struct pktgen_net {
+	struct net		*net;
+	struct proc_dir_entry	*proc_dir;
+	struct list_head	pktgen_threads;
+	bool			pktgen_exiting;
+};
 
 struct pktgen_thread {
 	spinlock_t if_lock;		/* for list of devices */
@@ -414,6 +422,7 @@ struct pktgen_thread {
 
 	wait_queue_head_t queue;
 	struct completion start_done;
+	struct pktgen_net *net;
 };
 
 #define REMOVE 1
@@ -428,9 +437,9 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
 static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
 					  const char *ifname, bool exact);
 static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
-static void pktgen_run_all_threads(void);
-static void pktgen_reset_all_threads(void);
-static void pktgen_stop_all_threads_ifs(void);
+static void pktgen_run_all_threads(struct pktgen_net *pn);
+static void pktgen_reset_all_threads(struct pktgen_net *pn);
+static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn);
 
 static void pktgen_stop(struct pktgen_thread *t);
 static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
@@ -442,7 +451,6 @@ static int pg_clone_skb_d  __read_mostly;
 static int debug  __read_mostly;
 
 static DEFINE_MUTEX(pktgen_thread_lock);
-static LIST_HEAD(pktgen_threads);
 
 static struct notifier_block pktgen_notifier_block = {
 	.notifier_call = pktgen_device_event,
@@ -464,6 +472,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
 {
 	int err = 0;
 	char data[128];
+	struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id);
 
 	if (!capable(CAP_NET_ADMIN)) {
 		err = -EPERM;
@@ -480,13 +489,13 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
 	data[count - 1] = 0;	/* Make string */
 
 	if (!strcmp(data, "stop"))
-		pktgen_stop_all_threads_ifs();
+		pktgen_stop_all_threads_ifs(pn);
 
 	else if (!strcmp(data, "start"))
-		pktgen_run_all_threads();
+		pktgen_run_all_threads(pn);
 
 	else if (!strcmp(data, "reset"))
-		pktgen_reset_all_threads();
+		pktgen_reset_all_threads(pn);
 
 	else
 		pr_warning("Unknown command: %s\n", data);
@@ -499,7 +508,7 @@ out:
 
 static int pgctrl_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, pgctrl_show, PDE(inode)->data);
+	return single_open(file, pgctrl_show, PDE_DATA(inode));
 }
 
 static const struct file_operations pktgen_fops = {
@@ -1676,7 +1685,7 @@ static ssize_t pktgen_if_write(struct file *file,
 
 static int pktgen_if_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, pktgen_if_show, PDE(inode)->data);
+	return single_open(file, pktgen_if_show, PDE_DATA(inode));
 }
 
 static const struct file_operations pktgen_if_fops = {
@@ -1814,7 +1823,7 @@ out:
 
 static int pktgen_thread_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, pktgen_thread_show, PDE(inode)->data);
+	return single_open(file, pktgen_thread_show, PDE_DATA(inode));
 }
 
 static const struct file_operations pktgen_thread_fops = {
@@ -1827,13 +1836,14 @@ static const struct file_operations pktgen_thread_fops = {
 };
 
 /* Think find or remove for NN */
-static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
+static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn,
+					      const char *ifname, int remove)
 {
 	struct pktgen_thread *t;
 	struct pktgen_dev *pkt_dev = NULL;
 	bool exact = (remove == FIND);
 
-	list_for_each_entry(t, &pktgen_threads, th_list) {
+	list_for_each_entry(t, &pn->pktgen_threads, th_list) {
 		pkt_dev = pktgen_find_dev(t, ifname, exact);
 		if (pkt_dev) {
 			if (remove) {
@@ -1851,7 +1861,7 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
 /*
  * mark a device for removal
  */
-static void pktgen_mark_device(const char *ifname)
+static void pktgen_mark_device(const struct pktgen_net *pn, const char *ifname)
 {
 	struct pktgen_dev *pkt_dev = NULL;
 	const int max_tries = 10, msec_per_try = 125;
@@ -1862,7 +1872,7 @@ static void pktgen_mark_device(const char *ifname)
 
 	while (1) {
 
-		pkt_dev = __pktgen_NN_threads(ifname, REMOVE);
+		pkt_dev = __pktgen_NN_threads(pn, ifname, REMOVE);
 		if (pkt_dev == NULL)
 			break;	/* success */
 
@@ -1883,21 +1893,21 @@ static void pktgen_mark_device(const char *ifname)
 	mutex_unlock(&pktgen_thread_lock);
 }
 
-static void pktgen_change_name(struct net_device *dev)
+static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *dev)
 {
 	struct pktgen_thread *t;
 
-	list_for_each_entry(t, &pktgen_threads, th_list) {
+	list_for_each_entry(t, &pn->pktgen_threads, th_list) {
 		struct pktgen_dev *pkt_dev;
 
 		list_for_each_entry(pkt_dev, &t->if_list, list) {
 			if (pkt_dev->odev != dev)
 				continue;
 
-			remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
+			proc_remove(pkt_dev->entry);
 
 			pkt_dev->entry = proc_create_data(dev->name, 0600,
-							  pg_proc_dir,
+							  pn->proc_dir,
 							  &pktgen_if_fops,
 							  pkt_dev);
 			if (!pkt_dev->entry)
@@ -1912,8 +1922,9 @@ static int pktgen_device_event(struct notifier_block *unused,
 			       unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
+	struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id);
 
-	if (!net_eq(dev_net(dev), &init_net) || pktgen_exiting)
+	if (pn->pktgen_exiting)
 		return NOTIFY_DONE;
 
 	/* It is OK that we do not hold the group lock right now,
@@ -1922,18 +1933,19 @@ static int pktgen_device_event(struct notifier_block *unused,
 
 	switch (event) {
 	case NETDEV_CHANGENAME:
-		pktgen_change_name(dev);
+		pktgen_change_name(pn, dev);
 		break;
 
 	case NETDEV_UNREGISTER:
-		pktgen_mark_device(dev->name);
+		pktgen_mark_device(pn, dev->name);
 		break;
 	}
 
 	return NOTIFY_DONE;
 }
 
-static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
+static struct net_device *pktgen_dev_get_by_name(const struct pktgen_net *pn,
+						 struct pktgen_dev *pkt_dev,
 						 const char *ifname)
 {
 	char b[IFNAMSIZ+5];
@@ -1947,13 +1959,14 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
 	}
 	b[i] = 0;
 
-	return dev_get_by_name(&init_net, b);
+	return dev_get_by_name(pn->net, b);
 }
 
 
 /* Associate pktgen_dev with a device. */
 
-static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
+static int pktgen_setup_dev(const struct pktgen_net *pn,
+			    struct pktgen_dev *pkt_dev, const char *ifname)
 {
 	struct net_device *odev;
 	int err;
@@ -1964,7 +1977,7 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
 		pkt_dev->odev = NULL;
 	}
 
-	odev = pktgen_dev_get_by_name(pkt_dev, ifname);
+	odev = pktgen_dev_get_by_name(pn, pkt_dev, ifname);
 	if (!odev) {
 		pr_err("no such netdevice: \"%s\"\n", ifname);
 		return -ENODEV;
@@ -2185,7 +2198,7 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
 				pkt_dev->curfl = 0; /*reset */
 		}
 	} else {
-		flow = random32() % pkt_dev->cflows;
+		flow = prandom_u32() % pkt_dev->cflows;
 		pkt_dev->curfl = flow;
 
 		if (pkt_dev->flows[flow].count > pkt_dev->lflow) {
@@ -2206,9 +2219,10 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
 static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
 {
 	struct xfrm_state *x = pkt_dev->flows[flow].x;
+	struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id);
 	if (!x) {
 		/*slow path: we dont already have xfrm_state*/
-		x = xfrm_stateonly_find(&init_net, DUMMY_MARK,
+		x = xfrm_stateonly_find(pn->net, DUMMY_MARK,
 					(xfrm_address_t *)&pkt_dev->cur_daddr,
 					(xfrm_address_t *)&pkt_dev->cur_saddr,
 					AF_INET,
@@ -2232,7 +2246,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
 	else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) {
 		__u16 t;
 		if (pkt_dev->flags & F_QUEUE_MAP_RND) {
-			t = random32() %
+			t = prandom_u32() %
 				(pkt_dev->queue_map_max -
 				 pkt_dev->queue_map_min + 1)
 				+ pkt_dev->queue_map_min;
@@ -2264,7 +2278,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 		__u32 tmp;
 
 		if (pkt_dev->flags & F_MACSRC_RND)
-			mc = random32() % pkt_dev->src_mac_count;
+			mc = prandom_u32() % pkt_dev->src_mac_count;
 		else {
 			mc = pkt_dev->cur_src_mac_offset++;
 			if (pkt_dev->cur_src_mac_offset >=
@@ -2290,7 +2304,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 		__u32 tmp;
 
 		if (pkt_dev->flags & F_MACDST_RND)
-			mc = random32() % pkt_dev->dst_mac_count;
+			mc = prandom_u32() % pkt_dev->dst_mac_count;
 
 		else {
 			mc = pkt_dev->cur_dst_mac_offset++;
@@ -2317,21 +2331,21 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 		for (i = 0; i < pkt_dev->nr_labels; i++)
 			if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
 				pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
-					     ((__force __be32)random32() &
+					     ((__force __be32)prandom_u32() &
 						      htonl(0x000fffff));
 	}
 
 	if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) {
-		pkt_dev->vlan_id = random32() & (4096-1);
+		pkt_dev->vlan_id = prandom_u32() & (4096 - 1);
 	}
 
 	if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) {
-		pkt_dev->svlan_id = random32() & (4096 - 1);
+		pkt_dev->svlan_id = prandom_u32() & (4096 - 1);
 	}
 
 	if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
 		if (pkt_dev->flags & F_UDPSRC_RND)
-			pkt_dev->cur_udp_src = random32() %
+			pkt_dev->cur_udp_src = prandom_u32() %
 				(pkt_dev->udp_src_max - pkt_dev->udp_src_min)
 				+ pkt_dev->udp_src_min;
 
@@ -2344,7 +2358,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 
 	if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
 		if (pkt_dev->flags & F_UDPDST_RND) {
-			pkt_dev->cur_udp_dst = random32() %
+			pkt_dev->cur_udp_dst = prandom_u32() %
 				(pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)
 				+ pkt_dev->udp_dst_min;
 		} else {
@@ -2361,7 +2375,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 		if (imn < imx) {
 			__u32 t;
 			if (pkt_dev->flags & F_IPSRC_RND)
-				t = random32() % (imx - imn) + imn;
+				t = prandom_u32() % (imx - imn) + imn;
 			else {
 				t = ntohl(pkt_dev->cur_saddr);
 				t++;
@@ -2382,17 +2396,15 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 				__be32 s;
 				if (pkt_dev->flags & F_IPDST_RND) {
 
-					t = random32() % (imx - imn) + imn;
-					s = htonl(t);
-
-					while (ipv4_is_loopback(s) ||
-					       ipv4_is_multicast(s) ||
-					       ipv4_is_lbcast(s) ||
-					       ipv4_is_zeronet(s) ||
-					       ipv4_is_local_multicast(s)) {
-						t = random32() % (imx - imn) + imn;
+					do {
+						t = prandom_u32() %
+							(imx - imn) + imn;
 						s = htonl(t);
-					}
+					} while (ipv4_is_loopback(s) ||
+						ipv4_is_multicast(s) ||
+						ipv4_is_lbcast(s) ||
+						ipv4_is_zeronet(s) ||
+						ipv4_is_local_multicast(s));
 					pkt_dev->cur_daddr = s;
 				} else {
 					t = ntohl(pkt_dev->cur_daddr);
@@ -2423,7 +2435,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 
 			for (i = 0; i < 4; i++) {
 				pkt_dev->cur_in6_daddr.s6_addr32[i] =
-				    (((__force __be32)random32() |
+				    (((__force __be32)prandom_u32() |
 				      pkt_dev->min_in6_daddr.s6_addr32[i]) &
 				     pkt_dev->max_in6_daddr.s6_addr32[i]);
 			}
@@ -2433,7 +2445,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 	if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
 		__u32 t;
 		if (pkt_dev->flags & F_TXSIZE_RND) {
-			t = random32() %
+			t = prandom_u32() %
 				(pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)
 				+ pkt_dev->min_pkt_size;
 		} else {
@@ -2915,7 +2927,7 @@ static void pktgen_run(struct pktgen_thread *t)
 		t->control &= ~(T_STOP);
 }
 
-static void pktgen_stop_all_threads_ifs(void)
+static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn)
 {
 	struct pktgen_thread *t;
 
@@ -2923,7 +2935,7 @@ static void pktgen_stop_all_threads_ifs(void)
 
 	mutex_lock(&pktgen_thread_lock);
 
-	list_for_each_entry(t, &pktgen_threads, th_list)
+	list_for_each_entry(t, &pn->pktgen_threads, th_list)
 		t->control |= T_STOP;
 
 	mutex_unlock(&pktgen_thread_lock);
@@ -2959,28 +2971,28 @@ signal:
 	return 0;
 }
 
-static int pktgen_wait_all_threads_run(void)
+static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
 {
 	struct pktgen_thread *t;
 	int sig = 1;
 
 	mutex_lock(&pktgen_thread_lock);
 
-	list_for_each_entry(t, &pktgen_threads, th_list) {
+	list_for_each_entry(t, &pn->pktgen_threads, th_list) {
 		sig = pktgen_wait_thread_run(t);
 		if (sig == 0)
 			break;
 	}
 
 	if (sig == 0)
-		list_for_each_entry(t, &pktgen_threads, th_list)
+		list_for_each_entry(t, &pn->pktgen_threads, th_list)
 			t->control |= (T_STOP);
 
 	mutex_unlock(&pktgen_thread_lock);
 	return sig;
 }
 
-static void pktgen_run_all_threads(void)
+static void pktgen_run_all_threads(struct pktgen_net *pn)
 {
 	struct pktgen_thread *t;
 
@@ -2988,7 +3000,7 @@ static void pktgen_run_all_threads(void)
 
 	mutex_lock(&pktgen_thread_lock);
 
-	list_for_each_entry(t, &pktgen_threads, th_list)
+	list_for_each_entry(t, &pn->pktgen_threads, th_list)
 		t->control |= (T_RUN);
 
 	mutex_unlock(&pktgen_thread_lock);
@@ -2996,10 +3008,10 @@ static void pktgen_run_all_threads(void)
 	/* Propagate thread->control  */
 	schedule_timeout_interruptible(msecs_to_jiffies(125));
 
-	pktgen_wait_all_threads_run();
+	pktgen_wait_all_threads_run(pn);
 }
 
-static void pktgen_reset_all_threads(void)
+static void pktgen_reset_all_threads(struct pktgen_net *pn)
 {
 	struct pktgen_thread *t;
 
@@ -3007,7 +3019,7 @@ static void pktgen_reset_all_threads(void)
 
 	mutex_lock(&pktgen_thread_lock);
 
-	list_for_each_entry(t, &pktgen_threads, th_list)
+	list_for_each_entry(t, &pn->pktgen_threads, th_list)
 		t->control |= (T_REMDEVALL);
 
 	mutex_unlock(&pktgen_thread_lock);
@@ -3015,7 +3027,7 @@ static void pktgen_reset_all_threads(void)
 	/* Propagate thread->control  */
 	schedule_timeout_interruptible(msecs_to_jiffies(125));
 
-	pktgen_wait_all_threads_run();
+	pktgen_wait_all_threads_run(pn);
 }
 
 static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
@@ -3157,9 +3169,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
 static void pktgen_rem_thread(struct pktgen_thread *t)
 {
 	/* Remove from the thread list */
-
-	remove_proc_entry(t->tsk->comm, pg_proc_dir);
-
+	remove_proc_entry(t->tsk->comm, t->net->proc_dir);
 }
 
 static void pktgen_resched(struct pktgen_dev *pkt_dev)
@@ -3305,7 +3315,7 @@ static int pktgen_thread_worker(void *arg)
 		pkt_dev = next_to_run(t);
 
 		if (unlikely(!pkt_dev && t->control == 0)) {
-			if (pktgen_exiting)
+			if (t->net->pktgen_exiting)
 				break;
 			wait_event_interruptible_timeout(t->queue,
 							 t->control != 0,
@@ -3427,7 +3437,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 
 	/* We don't allow a device to be on several threads */
 
-	pkt_dev = __pktgen_NN_threads(ifname, FIND);
+	pkt_dev = __pktgen_NN_threads(t->net, ifname, FIND);
 	if (pkt_dev) {
 		pr_err("ERROR: interface already used\n");
 		return -EBUSY;
@@ -3462,13 +3472,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	pkt_dev->svlan_id = 0xffff;
 	pkt_dev->node = -1;
 
-	err = pktgen_setup_dev(pkt_dev, ifname);
+	err = pktgen_setup_dev(t->net, pkt_dev, ifname);
 	if (err)
 		goto out1;
 	if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)
 		pkt_dev->clone_skb = pg_clone_skb_d;
 
-	pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
+	pkt_dev->entry = proc_create_data(ifname, 0600, t->net->proc_dir,
 					  &pktgen_if_fops, pkt_dev);
 	if (!pkt_dev->entry) {
 		pr_err("cannot create %s/%s procfs entry\n",
@@ -3493,7 +3503,7 @@ out1:
 	return err;
 }
 
-static int __init pktgen_create_thread(int cpu)
+static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
 {
 	struct pktgen_thread *t;
 	struct proc_dir_entry *pe;
@@ -3511,7 +3521,7 @@ static int __init pktgen_create_thread(int cpu)
 
 	INIT_LIST_HEAD(&t->if_list);
 
-	list_add_tail(&t->th_list, &pktgen_threads);
+	list_add_tail(&t->th_list, &pn->pktgen_threads);
 	init_completion(&t->start_done);
 
 	p = kthread_create_on_node(pktgen_thread_worker,
@@ -3527,7 +3537,7 @@ static int __init pktgen_create_thread(int cpu)
 	kthread_bind(p, cpu);
 	t->tsk = p;
 
-	pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir,
+	pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir,
 			      &pktgen_thread_fops, t);
 	if (!pe) {
 		pr_err("cannot create %s/%s procfs entry\n",
@@ -3538,6 +3548,7 @@ static int __init pktgen_create_thread(int cpu)
 		return -EINVAL;
 	}
 
+	t->net = pn;
 	wake_up_process(p);
 	wait_for_completion(&t->start_done);
 
@@ -3563,7 +3574,6 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
 static int pktgen_remove_device(struct pktgen_thread *t,
 				struct pktgen_dev *pkt_dev)
 {
-
 	pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
 
 	if (pkt_dev->running) {
@@ -3583,7 +3593,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 	_rem_dev_from_if_list(t, pkt_dev);
 
 	if (pkt_dev->entry)
-		remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
+		proc_remove(pkt_dev->entry);
 
 #ifdef CONFIG_XFRM
 	free_SAs(pkt_dev);
@@ -3595,63 +3605,63 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 	return 0;
 }
 
-static int __init pg_init(void)
+static int __net_init pg_net_init(struct net *net)
 {
-	int cpu;
+	struct pktgen_net *pn = net_generic(net, pg_net_id);
 	struct proc_dir_entry *pe;
-	int ret = 0;
-
-	pr_info("%s", version);
-
-	pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
-	if (!pg_proc_dir)
+	int cpu, ret = 0;
+
+	pn->net = net;
+	INIT_LIST_HEAD(&pn->pktgen_threads);
+	pn->pktgen_exiting = false;
+	pn->proc_dir = proc_mkdir(PG_PROC_DIR, pn->net->proc_net);
+	if (!pn->proc_dir) {
+		pr_warn("cannot create /proc/net/%s\n", PG_PROC_DIR);
 		return -ENODEV;
-
-	pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops);
+	}
+	pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_fops);
 	if (pe == NULL) {
-		pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL);
+		pr_err("cannot create %s procfs entry\n", PGCTRL);
 		ret = -EINVAL;
-		goto remove_dir;
+		goto remove;
 	}
 
-	register_netdevice_notifier(&pktgen_notifier_block);
-
 	for_each_online_cpu(cpu) {
 		int err;
 
-		err = pktgen_create_thread(cpu);
+		err = pktgen_create_thread(cpu, pn);
 		if (err)
-			pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n",
+			pr_warn("Cannot create thread for cpu %d (%d)\n",
 				   cpu, err);
 	}
 
-	if (list_empty(&pktgen_threads)) {
-		pr_err("ERROR: Initialization failed for all threads\n");
+	if (list_empty(&pn->pktgen_threads)) {
+		pr_err("Initialization failed for all threads\n");
 		ret = -ENODEV;
-		goto unregister;
+		goto remove_entry;
 	}
 
 	return 0;
 
- unregister:
-	unregister_netdevice_notifier(&pktgen_notifier_block);
-	remove_proc_entry(PGCTRL, pg_proc_dir);
- remove_dir:
-	proc_net_remove(&init_net, PG_PROC_DIR);
+remove_entry:
+	remove_proc_entry(PGCTRL, pn->proc_dir);
+remove:
+	remove_proc_entry(PG_PROC_DIR, pn->net->proc_net);
 	return ret;
 }
 
-static void __exit pg_cleanup(void)
+static void __net_exit pg_net_exit(struct net *net)
 {
+	struct pktgen_net *pn = net_generic(net, pg_net_id);
 	struct pktgen_thread *t;
 	struct list_head *q, *n;
 	LIST_HEAD(list);
 
 	/* Stop all interfaces & threads */
-	pktgen_exiting = true;
+	pn->pktgen_exiting = true;
 
 	mutex_lock(&pktgen_thread_lock);
-	list_splice_init(&pktgen_threads, &list);
+	list_splice_init(&pn->pktgen_threads, &list);
 	mutex_unlock(&pktgen_thread_lock);
 
 	list_for_each_safe(q, n, &list) {
@@ -3661,12 +3671,36 @@ static void __exit pg_cleanup(void)
 		kfree(t);
 	}
 
-	/* Un-register us from receiving netdevice events */
-	unregister_netdevice_notifier(&pktgen_notifier_block);
+	remove_proc_entry(PGCTRL, pn->proc_dir);
+	remove_proc_entry(PG_PROC_DIR, pn->net->proc_net);
+}
+
+static struct pernet_operations pg_net_ops = {
+	.init = pg_net_init,
+	.exit = pg_net_exit,
+	.id   = &pg_net_id,
+	.size = sizeof(struct pktgen_net),
+};
+
+static int __init pg_init(void)
+{
+	int ret = 0;
 
-	/* Clean up proc file system */
-	remove_proc_entry(PGCTRL, pg_proc_dir);
-	proc_net_remove(&init_net, PG_PROC_DIR);
+	pr_info("%s", version);
+	ret = register_pernet_subsys(&pg_net_ops);
+	if (ret)
+		return ret;
+	ret = register_netdevice_notifier(&pktgen_notifier_block);
+	if (ret)
+		unregister_pernet_subsys(&pg_net_ops);
+
+	return ret;
+}
+
+static void __exit pg_cleanup(void)
+{
+	unregister_netdevice_notifier(&pktgen_notifier_block);
+	unregister_pernet_subsys(&pg_net_ops);
 }
 
 module_init(pg_init);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1868625af25e..a08bd2b7fe3f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -496,8 +496,10 @@ static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
 	}
 	if (ops->fill_info) {
 		data = nla_nest_start(skb, IFLA_INFO_DATA);
-		if (data == NULL)
+		if (data == NULL) {
+			err = -EMSGSIZE;
 			goto err_cancel_link;
+		}
 		err = ops->fill_info(skb, dev);
 		if (err < 0)
 			goto err_cancel_data;
@@ -515,32 +517,6 @@ out:
 	return err;
 }
 
-static const int rtm_min[RTM_NR_FAMILIES] =
-{
-	[RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
-	[RTM_FAM(RTM_NEWADDR)]      = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
-	[RTM_FAM(RTM_NEWROUTE)]     = NLMSG_LENGTH(sizeof(struct rtmsg)),
-	[RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
-	[RTM_FAM(RTM_NEWQDISC)]     = NLMSG_LENGTH(sizeof(struct tcmsg)),
-	[RTM_FAM(RTM_NEWTCLASS)]    = NLMSG_LENGTH(sizeof(struct tcmsg)),
-	[RTM_FAM(RTM_NEWTFILTER)]   = NLMSG_LENGTH(sizeof(struct tcmsg)),
-	[RTM_FAM(RTM_NEWACTION)]    = NLMSG_LENGTH(sizeof(struct tcamsg)),
-	[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
-	[RTM_FAM(RTM_GETANYCAST)]   = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
-};
-
-static const int rta_max[RTM_NR_FAMILIES] =
-{
-	[RTM_FAM(RTM_NEWLINK)]      = IFLA_MAX,
-	[RTM_FAM(RTM_NEWADDR)]      = IFA_MAX,
-	[RTM_FAM(RTM_NEWROUTE)]     = RTA_MAX,
-	[RTM_FAM(RTM_NEWRULE)]      = FRA_MAX,
-	[RTM_FAM(RTM_NEWQDISC)]     = TCA_MAX,
-	[RTM_FAM(RTM_NEWTCLASS)]    = TCA_MAX,
-	[RTM_FAM(RTM_NEWTFILTER)]   = TCA_MAX,
-	[RTM_FAM(RTM_NEWACTION)]    = TCAA_MAX,
-};
-
 int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)
 {
 	struct sock *rtnl = net->rtnl;
@@ -780,6 +756,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(4) /* IFLA_MTU */
 	       + nla_total_size(4) /* IFLA_LINK */
 	       + nla_total_size(4) /* IFLA_MASTER */
+	       + nla_total_size(1) /* IFLA_CARRIER */
 	       + nla_total_size(4) /* IFLA_PROMISCUITY */
 	       + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
 	       + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
@@ -879,6 +856,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	const struct rtnl_link_stats64 *stats;
 	struct nlattr *attr, *af_spec;
 	struct rtnl_af_ops *af_ops;
+	struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
 
 	ASSERT_RTNL();
 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -907,8 +885,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 #endif
 	    (dev->ifindex != dev->iflink &&
 	     nla_put_u32(skb, IFLA_LINK, dev->iflink)) ||
-	    (dev->master &&
-	     nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) ||
+	    (upper_dev &&
+	     nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
+	    nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
 	    (dev->qdisc &&
 	     nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
 	    (dev->ifalias &&
@@ -976,6 +955,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			 * report anything.
 			 */
 			ivi.spoofchk = -1;
+			memset(ivi.mac, 0, sizeof(ivi.mac));
 			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
 				break;
 			vf_mac.vf =
@@ -1057,7 +1037,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	int idx = 0, s_idx;
 	struct net_device *dev;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct nlattr *tb[IFLA_MAX+1];
 	u32 ext_filter_mask = 0;
 
@@ -1067,7 +1046,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	rcu_read_lock();
 	cb->seq = net->dev_base_seq;
 
-	if (nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
+	if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
 			ifla_policy) >= 0) {
 
 		if (tb[IFLA_EXT_MASK])
@@ -1077,7 +1056,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &net->dev_index_head[h];
-		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -1108,6 +1087,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_MTU]		= { .type = NLA_U32 },
 	[IFLA_LINK]		= { .type = NLA_U32 },
 	[IFLA_MASTER]		= { .type = NLA_U32 },
+	[IFLA_CARRIER]		= { .type = NLA_U8 },
 	[IFLA_TXQLEN]		= { .type = NLA_U32 },
 	[IFLA_WEIGHT]		= { .type = NLA_U32 },
 	[IFLA_OPERSTATE]	= { .type = NLA_U8 },
@@ -1270,16 +1250,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
 
 static int do_set_master(struct net_device *dev, int ifindex)
 {
-	struct net_device *master_dev;
+	struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
 	const struct net_device_ops *ops;
 	int err;
 
-	if (dev->master) {
-		if (dev->master->ifindex == ifindex)
+	if (upper_dev) {
+		if (upper_dev->ifindex == ifindex)
 			return 0;
-		ops = dev->master->netdev_ops;
+		ops = upper_dev->netdev_ops;
 		if (ops->ndo_del_slave) {
-			err = ops->ndo_del_slave(dev->master, dev);
+			err = ops->ndo_del_slave(upper_dev, dev);
 			if (err)
 				return err;
 		} else {
@@ -1288,12 +1268,12 @@ static int do_set_master(struct net_device *dev, int ifindex)
 	}
 
 	if (ifindex) {
-		master_dev = __dev_get_by_index(dev_net(dev), ifindex);
-		if (!master_dev)
+		upper_dev = __dev_get_by_index(dev_net(dev), ifindex);
+		if (!upper_dev)
 			return -EINVAL;
-		ops = master_dev->netdev_ops;
+		ops = upper_dev->netdev_ops;
 		if (ops->ndo_add_slave) {
-			err = ops->ndo_add_slave(master_dev, dev);
+			err = ops->ndo_add_slave(upper_dev, dev);
 			if (err)
 				return err;
 		} else {
@@ -1307,7 +1287,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		      struct nlattr **tb, char *ifname, int modified)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-	int send_addr_notify = 0;
 	int err;
 
 	if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
@@ -1360,16 +1339,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		struct sockaddr *sa;
 		int len;
 
-		if (!ops->ndo_set_mac_address) {
-			err = -EOPNOTSUPP;
-			goto errout;
-		}
-
-		if (!netif_device_present(dev)) {
-			err = -ENODEV;
-			goto errout;
-		}
-
 		len = sizeof(sa_family_t) + dev->addr_len;
 		sa = kmalloc(len, GFP_KERNEL);
 		if (!sa) {
@@ -1379,13 +1348,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		sa->sa_family = dev->type;
 		memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
 		       dev->addr_len);
-		err = ops->ndo_set_mac_address(dev, sa);
+		err = dev_set_mac_address(dev, sa);
 		kfree(sa);
 		if (err)
 			goto errout;
-		send_addr_notify = 1;
 		modified = 1;
-		add_device_randomness(dev->dev_addr, dev->addr_len);
 	}
 
 	if (tb[IFLA_MTU]) {
@@ -1422,7 +1389,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 
 	if (tb[IFLA_BROADCAST]) {
 		nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
-		send_addr_notify = 1;
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	}
 
 	if (ifm->ifi_flags || ifm->ifi_change) {
@@ -1438,6 +1405,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		modified = 1;
 	}
 
+	if (tb[IFLA_CARRIER]) {
+		err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER]));
+		if (err)
+			goto errout;
+		modified = 1;
+	}
+
 	if (tb[IFLA_TXQLEN])
 		dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
 
@@ -1536,13 +1510,10 @@ errout:
 		net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n",
 				     dev->name);
 
-	if (send_addr_notify)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-
 	return err;
 }
 
-static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ifinfomsg *ifm;
@@ -1583,7 +1554,7 @@ errout:
 	return err;
 }
 
-static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	const struct rtnl_link_ops *ops;
@@ -1672,9 +1643,11 @@ struct net_device *rtnl_create_link(struct net *net,
 
 	if (tb[IFLA_MTU])
 		dev->mtu = nla_get_u32(tb[IFLA_MTU]);
-	if (tb[IFLA_ADDRESS])
+	if (tb[IFLA_ADDRESS]) {
 		memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
 				nla_len(tb[IFLA_ADDRESS]));
+		dev->addr_assign_type = NET_ADDR_SET;
+	}
 	if (tb[IFLA_BROADCAST])
 		memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]),
 				nla_len(tb[IFLA_BROADCAST]));
@@ -1712,7 +1685,7 @@ static int rtnl_group_changelink(struct net *net, int group,
 	return 0;
 }
 
-static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	const struct rtnl_link_ops *ops;
@@ -1867,7 +1840,7 @@ out:
 	}
 }
 
-static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ifinfomsg *ifm;
@@ -1923,7 +1896,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
 	u32 ext_filter_mask = 0;
 	u16 min_ifinfo_dump_size = 0;
 
-	if (nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
+	if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
 			ifla_policy) >= 0) {
 		if (tb[IFLA_EXT_MASK])
 			ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1958,8 +1931,11 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 		if (rtnl_msg_handlers[idx] == NULL ||
 		    rtnl_msg_handlers[idx][type].dumpit == NULL)
 			continue;
-		if (idx > s_idx)
+		if (idx > s_idx) {
 			memset(&cb->args[0], 0, sizeof(cb->args));
+			cb->prev_seq = 0;
+			cb->seq = 0;
+		}
 		if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
 			break;
 	}
@@ -1992,6 +1968,7 @@ errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
 }
+EXPORT_SYMBOL(rtmsg_ifinfo);
 
 static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
 				   struct net_device *dev,
@@ -2051,19 +2028,47 @@ errout:
 	rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
 }
 
-static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+/**
+ * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry
+ */
+int ndo_dflt_fdb_add(struct ndmsg *ndm,
+		     struct nlattr *tb[],
+		     struct net_device *dev,
+		     const unsigned char *addr,
+		     u16 flags)
+{
+	int err = -EINVAL;
+
+	/* If aging addresses are supported device will need to
+	 * implement its own handler for this.
+	 */
+	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
+		pr_info("%s: FDB only supports static addresses\n", dev->name);
+		return err;
+	}
+
+	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
+		err = dev_uc_add_excl(dev, addr);
+	else if (is_multicast_ether_addr(addr))
+		err = dev_mc_add_excl(dev, addr);
+
+	/* Only return duplicate errors if NLM_F_EXCL is set */
+	if (err == -EEXIST && !(flags & NLM_F_EXCL))
+		err = 0;
+
+	return err;
+}
+EXPORT_SYMBOL(ndo_dflt_fdb_add);
+
+static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
-	struct net_device *master = NULL;
 	struct ndmsg *ndm;
 	struct nlattr *tb[NDA_MAX+1];
 	struct net_device *dev;
 	u8 *addr;
 	int err;
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
 	if (err < 0)
 		return err;
@@ -2086,7 +2091,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	}
 
 	addr = nla_data(tb[NDA_LLADDR]);
-	if (!is_valid_ether_addr(addr)) {
+	if (is_zero_ether_addr(addr)) {
 		pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n");
 		return -EINVAL;
 	}
@@ -2096,10 +2101,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	/* Support fdb on master device the net/bridge default case */
 	if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
 	    (dev->priv_flags & IFF_BRIDGE_PORT)) {
-		master = dev->master;
-		err = master->netdev_ops->ndo_fdb_add(ndm, tb,
-						      dev, addr,
-						      nlh->nlmsg_flags);
+		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+		const struct net_device_ops *ops = br_dev->netdev_ops;
+
+		err = ops->ndo_fdb_add(ndm, tb, dev, addr, nlh->nlmsg_flags);
 		if (err)
 			goto out;
 		else
@@ -2107,10 +2112,13 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	}
 
 	/* Embedded bridge, macvlan, and any other device support */
-	if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) {
-		err = dev->netdev_ops->ndo_fdb_add(ndm, tb,
-						   dev, addr,
-						   nlh->nlmsg_flags);
+	if ((ndm->ndm_flags & NTF_SELF)) {
+		if (dev->netdev_ops->ndo_fdb_add)
+			err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
+							   nlh->nlmsg_flags);
+		else
+			err = ndo_dflt_fdb_add(ndm, tb, dev, addr,
+					       nlh->nlmsg_flags);
 
 		if (!err) {
 			rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH);
@@ -2121,11 +2129,40 @@ out:
 	return err;
 }
 
-static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+/**
+ * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry
+ */
+int ndo_dflt_fdb_del(struct ndmsg *ndm,
+		     struct nlattr *tb[],
+		     struct net_device *dev,
+		     const unsigned char *addr)
+{
+	int err = -EOPNOTSUPP;
+
+	/* If aging addresses are supported device will need to
+	 * implement its own handler for this.
+	 */
+	if (ndm->ndm_state & NUD_PERMANENT) {
+		pr_info("%s: FDB only supports static addresses\n", dev->name);
+		return -EINVAL;
+	}
+
+	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
+		err = dev_uc_del(dev, addr);
+	else if (is_multicast_ether_addr(addr))
+		err = dev_mc_del(dev, addr);
+	else
+		err = -EINVAL;
+
+	return err;
+}
+EXPORT_SYMBOL(ndo_dflt_fdb_del);
+
+static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ndmsg *ndm;
-	struct nlattr *llattr;
+	struct nlattr *tb[NDA_MAX+1];
 	struct net_device *dev;
 	int err = -EINVAL;
 	__u8 *addr;
@@ -2133,8 +2170,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (nlmsg_len(nlh) < sizeof(*ndm))
-		return -EINVAL;
+	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+	if (err < 0)
+		return err;
 
 	ndm = nlmsg_data(nlh);
 	if (ndm->ndm_ifindex == 0) {
@@ -2148,22 +2186,27 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		return -ENODEV;
 	}
 
-	llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR);
-	if (llattr == NULL || nla_len(llattr) != ETH_ALEN) {
-		pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n");
+	if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
+		pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
+		return -EINVAL;
+	}
+
+	addr = nla_data(tb[NDA_LLADDR]);
+	if (is_zero_ether_addr(addr)) {
+		pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n");
 		return -EINVAL;
 	}
 
-	addr = nla_data(llattr);
 	err = -EOPNOTSUPP;
 
 	/* Support fdb on master device the net/bridge default case */
 	if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
 	    (dev->priv_flags & IFF_BRIDGE_PORT)) {
-		struct net_device *master = dev->master;
+		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+		const struct net_device_ops *ops = br_dev->netdev_ops;
 
-		if (master->netdev_ops->ndo_fdb_del)
-			err = master->netdev_ops->ndo_fdb_del(ndm, dev, addr);
+		if (ops->ndo_fdb_del)
+			err = ops->ndo_fdb_del(ndm, tb, dev, addr);
 
 		if (err)
 			goto out;
@@ -2172,8 +2215,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	}
 
 	/* Embedded bridge, macvlan, and any other device support */
-	if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) {
-		err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr);
+	if (ndm->ndm_flags & NTF_SELF) {
+		if (dev->netdev_ops->ndo_fdb_del)
+			err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr);
+		else
+			err = ndo_dflt_fdb_del(ndm, tb, dev, addr);
 
 		if (!err) {
 			rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
@@ -2218,7 +2264,7 @@ skip:
  * @dev: netdevice
  *
  * Default netdevice operation to dump the existing unicast address list.
- * Returns zero on success.
+ * Returns number of addresses from list put in skb.
  */
 int ndo_dflt_fdb_dump(struct sk_buff *skb,
 		      struct netlink_callback *cb,
@@ -2247,15 +2293,19 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	rcu_read_lock();
 	for_each_netdev_rcu(net, dev) {
 		if (dev->priv_flags & IFF_BRIDGE_PORT) {
-			struct net_device *master = dev->master;
-			const struct net_device_ops *ops = master->netdev_ops;
+			struct net_device *br_dev;
+			const struct net_device_ops *ops;
 
+			br_dev = netdev_master_upper_dev_get(dev);
+			ops = br_dev->netdev_ops;
 			if (ops->ndo_fdb_dump)
 				idx = ops->ndo_fdb_dump(skb, cb, dev, idx);
 		}
 
 		if (dev->netdev_ops->ndo_fdb_dump)
 			idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx);
+		else
+			idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);
 	}
 	rcu_read_unlock();
 
@@ -2270,6 +2320,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 	struct ifinfomsg *ifm;
 	struct nlattr *br_afspec;
 	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
+	struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 
 	nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI);
 	if (nlh == NULL)
@@ -2287,8 +2338,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 	if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
 	    nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
 	    nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
-	    (dev->master &&
-	     nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) ||
+	    (br_dev &&
+	     nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) ||
 	    (dev->addr_len &&
 	     nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
 	    (dev->ifindex != dev->iflink &&
@@ -2320,23 +2371,31 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 	int idx = 0;
 	u32 portid = NETLINK_CB(cb->skb).portid;
 	u32 seq = cb->nlh->nlmsg_seq;
+	struct nlattr *extfilt;
+	u32 filter_mask = 0;
+
+	extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg),
+				  IFLA_EXT_MASK);
+	if (extfilt)
+		filter_mask = nla_get_u32(extfilt);
 
 	rcu_read_lock();
 	for_each_netdev_rcu(net, dev) {
 		const struct net_device_ops *ops = dev->netdev_ops;
-		struct net_device *master = dev->master;
+		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 
-		if (master && master->netdev_ops->ndo_bridge_getlink) {
+		if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
 			if (idx >= cb->args[0] &&
-			    master->netdev_ops->ndo_bridge_getlink(
-				    skb, portid, seq, dev) < 0)
+			    br_dev->netdev_ops->ndo_bridge_getlink(
+				    skb, portid, seq, dev, filter_mask) < 0)
 				break;
 			idx++;
 		}
 
 		if (ops->ndo_bridge_getlink) {
 			if (idx >= cb->args[0] &&
-			    ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0)
+			    ops->ndo_bridge_getlink(skb, portid, seq, dev,
+						    filter_mask) < 0)
 				break;
 			idx++;
 		}
@@ -2365,7 +2424,7 @@ static inline size_t bridge_nlmsg_size(void)
 static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
 {
 	struct net *net = dev_net(dev);
-	struct net_device *master = dev->master;
+	struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 	struct sk_buff *skb;
 	int err = -EOPNOTSUPP;
 
@@ -2376,15 +2435,15 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
 	}
 
 	if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
-	    master && master->netdev_ops->ndo_bridge_getlink) {
-		err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
+	    br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
+		err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
 		if (err < 0)
 			goto errout;
 	}
 
 	if ((flags & BRIDGE_FLAGS_SELF) &&
 	    dev->netdev_ops->ndo_bridge_getlink) {
-		err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
+		err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
 		if (err < 0)
 			goto errout;
 	}
@@ -2398,8 +2457,7 @@ errout:
 	return err;
 }
 
-static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
-			       void *arg)
+static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ifinfomsg *ifm;
@@ -2436,13 +2494,14 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 	oflags = flags;
 
 	if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
-		if (!dev->master ||
-		    !dev->master->netdev_ops->ndo_bridge_setlink) {
+		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+
+		if (!br_dev || !br_dev->netdev_ops->ndo_bridge_setlink) {
 			err = -EOPNOTSUPP;
 			goto out;
 		}
 
-		err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh);
+		err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
 		if (err)
 			goto out;
 
@@ -2468,9 +2527,75 @@ out:
 	return err;
 }
 
-/* Protected by RTNL sempahore.  */
-static struct rtattr **rta_buf;
-static int rtattr_max;
+static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ifinfomsg *ifm;
+	struct net_device *dev;
+	struct nlattr *br_spec, *attr = NULL;
+	int rem, err = -EOPNOTSUPP;
+	u16 oflags, flags = 0;
+	bool have_flags = false;
+
+	if (nlmsg_len(nlh) < sizeof(*ifm))
+		return -EINVAL;
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_family != AF_BRIDGE)
+		return -EPFNOSUPPORT;
+
+	dev = __dev_get_by_index(net, ifm->ifi_index);
+	if (!dev) {
+		pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+		return -ENODEV;
+	}
+
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+	if (br_spec) {
+		nla_for_each_nested(attr, br_spec, rem) {
+			if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
+				have_flags = true;
+				flags = nla_get_u16(attr);
+				break;
+			}
+		}
+	}
+
+	oflags = flags;
+
+	if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
+		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+
+		if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+
+		err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+		if (err)
+			goto out;
+
+		flags &= ~BRIDGE_FLAGS_MASTER;
+	}
+
+	if ((flags & BRIDGE_FLAGS_SELF)) {
+		if (!dev->netdev_ops->ndo_bridge_dellink)
+			err = -EOPNOTSUPP;
+		else
+			err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+
+		if (!err)
+			flags &= ~BRIDGE_FLAGS_SELF;
+	}
+
+	if (have_flags)
+		memcpy(nla_data(attr), &flags, sizeof(flags));
+	/* Generate event to notify upper layer of bridge change */
+	if (!err)
+		err = rtnl_bridge_notify(dev, oflags);
+out:
+	return err;
+}
 
 /* Process one rtnetlink message. */
 
@@ -2479,7 +2604,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct net *net = sock_net(skb->sk);
 	rtnl_doit_func doit;
 	int sz_idx, kind;
-	int min_len;
 	int family;
 	int type;
 	int err;
@@ -2491,10 +2615,10 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	type -= RTM_BASE;
 
 	/* All the messages must have at least 1 byte length */
-	if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
+	if (nlmsg_len(nlh) < sizeof(struct rtgenmsg))
 		return 0;
 
-	family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
+	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
 	sz_idx = type>>2;
 	kind = type&3;
 
@@ -2527,32 +2651,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return err;
 	}
 
-	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
-
-	min_len = rtm_min[sz_idx];
-	if (nlh->nlmsg_len < min_len)
-		return -EINVAL;
-
-	if (nlh->nlmsg_len > min_len) {
-		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
-		struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
-
-		while (RTA_OK(attr, attrlen)) {
-			unsigned int flavor = attr->rta_type;
-			if (flavor) {
-				if (flavor > rta_max[sz_idx])
-					return -EINVAL;
-				rta_buf[flavor-1] = attr;
-			}
-			attr = RTA_NEXT(attr, attrlen);
-		}
-	}
-
 	doit = rtnl_get_doit(family, type);
 	if (doit == NULL)
 		return -EOPNOTSUPP;
 
-	return doit(skb, nlh, (void *)&rta_buf[0]);
+	return doit(skb, nlh);
 }
 
 static void rtnetlink_rcv(struct sk_buff *skb)
@@ -2622,16 +2725,6 @@ static struct pernet_operations rtnetlink_net_ops = {
 
 void __init rtnetlink_init(void)
 {
-	int i;
-
-	rtattr_max = 0;
-	for (i = 0; i < ARRAY_SIZE(rta_max); i++)
-		if (rta_max[i] > rtattr_max)
-			rtattr_max = rta_max[i];
-	rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
-	if (!rta_buf)
-		panic("rtnetlink_init: cannot allocate rta_buf\n");
-
 	if (register_pernet_subsys(&rtnetlink_net_ops))
 		panic("rtnetlink_init: cannot initialize rtnetlink\n");
 
@@ -2651,6 +2744,7 @@ void __init rtnetlink_init(void)
 	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
 
 	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
+	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
 	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
 }
 
diff --git a/net/core/scm.c b/net/core/scm.c
index 905dcc6ad1e3..03795d0147f2 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/security.h>
+#include <linux/pid_namespace.h>
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
@@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds)
 	if (!uid_valid(uid) || !gid_valid(gid))
 		return -EINVAL;
 
-	if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) &&
+	if ((creds->pid == task_tgid_vnr(current) ||
+	     ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) &&
 	    ((uid_eq(uid, cred->uid)   || uid_eq(uid, cred->euid) ||
 	      uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
 	    ((gid_eq(gid, cred->gid)   || gid_eq(gid, cred->egid) ||
@@ -185,22 +187,6 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 
 			p->creds.uid = uid;
 			p->creds.gid = gid;
-
-			if (!p->cred ||
-			    !uid_eq(p->cred->euid, uid) ||
-			    !gid_eq(p->cred->egid, gid)) {
-				struct cred *cred;
-				err = -ENOMEM;
-				cred = prepare_creds();
-				if (!cred)
-					goto error;
-
-				cred->uid = cred->euid = uid;
-				cred->gid = cred->egid = gid;
-				if (p->cred)
-					put_cred(p->cred);
-				p->cred = cred;
-			}
 			break;
 		}
 		default:
@@ -304,8 +290,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 		/* Bump the usage count and install the file. */
 		sock = sock_from_file(fp[i], &err);
 		if (sock) {
-			sock_update_netprioidx(sock->sk, current);
-			sock_update_classid(sock->sk, current);
+			sock_update_netprioidx(sock->sk);
+			sock_update_classid(sock->sk);
 		}
 		fd_install(new_fd, get_file(fp[i]));
 	}
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index e61a8bb7fce7..6a2f13cee86a 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -12,12 +12,10 @@
 
 static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
 
-static int __init net_secret_init(void)
+void net_secret_init(void)
 {
 	get_random_bytes(net_secret, sizeof(net_secret));
-	return 0;
 }
-late_initcall(net_secret_init);
 
 #ifdef CONFIG_INET
 static u32 seq_scale(u32 seq)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 32443ebc3e89..af9185d0be6a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -104,47 +104,37 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
 	.get = sock_pipe_buf_get,
 };
 
-/*
- *	Keep out-of-line to prevent kernel bloat.
- *	__builtin_return_address is not used because it is not always
- *	reliable.
- */
-
 /**
- *	skb_over_panic	- 	private function
- *	@skb: buffer
- *	@sz: size
- *	@here: address
- *
- *	Out of line support code for skb_put(). Not user callable.
+ *	skb_panic - private function for out-of-line support
+ *	@skb:	buffer
+ *	@sz:	size
+ *	@addr:	address
+ *	@msg:	skb_over_panic or skb_under_panic
+ *
+ *	Out-of-line support for skb_put() and skb_push().
+ *	Called via the wrapper skb_over_panic() or skb_under_panic().
+ *	Keep out of line to prevent kernel bloat.
+ *	__builtin_return_address is not used because it is not always reliable.
  */
-static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
+		      const char msg[])
 {
 	pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
-		 __func__, here, skb->len, sz, skb->head, skb->data,
+		 msg, addr, skb->len, sz, skb->head, skb->data,
 		 (unsigned long)skb->tail, (unsigned long)skb->end,
 		 skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
 
-/**
- *	skb_under_panic	- 	private function
- *	@skb: buffer
- *	@sz: size
- *	@here: address
- *
- *	Out of line support code for skb_push(). Not user callable.
- */
-
-static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
 {
-	pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
-		 __func__, here, skb->len, sz, skb->head, skb->data,
-		 (unsigned long)skb->tail, (unsigned long)skb->end,
-		 skb->dev ? skb->dev->name : "<NULL>");
-	BUG();
+	skb_panic(skb, sz, addr, __func__);
 }
 
+static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
+{
+	skb_panic(skb, sz, addr, __func__);
+}
 
 /*
  * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
@@ -155,8 +145,9 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
  */
 #define kmalloc_reserve(size, gfp, node, pfmemalloc) \
 	 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
-void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip,
-			 bool *pfmemalloc)
+
+static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
+			       unsigned long ip, bool *pfmemalloc)
 {
 	void *obj;
 	bool ret_pfmemalloc = false;
@@ -188,6 +179,33 @@ out:
  *
  */
 
+struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
+{
+	struct sk_buff *skb;
+
+	/* Get the HEAD */
+	skb = kmem_cache_alloc_node(skbuff_head_cache,
+				    gfp_mask & ~__GFP_DMA, node);
+	if (!skb)
+		goto out;
+
+	/*
+	 * Only clear those fields we need to clear, not those that we will
+	 * actually initialise below. Hence, don't put any more fields after
+	 * the tail pointer in struct sk_buff!
+	 */
+	memset(skb, 0, offsetof(struct sk_buff, tail));
+	skb->data = NULL;
+	skb->truesize = sizeof(struct sk_buff);
+	atomic_set(&skb->users, 1);
+
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	skb->mac_header = ~0U;
+#endif
+out:
+	return skb;
+}
+
 /**
  *	__alloc_skb	-	allocate a network buffer
  *	@size: size to allocate
@@ -259,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb->end = skb->tail + size;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	skb->mac_header = ~0U;
+	skb->transport_header = ~0U;
 #endif
 
 	/* make sure we initialize shinfo sequentially */
@@ -327,6 +346,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
 	skb->end = skb->tail + size;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	skb->mac_header = ~0U;
+	skb->transport_header = ~0U;
 #endif
 
 	/* make sure we initialize shinfo sequentially */
@@ -348,10 +368,6 @@ struct netdev_alloc_cache {
 };
 static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
 
-#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
-#define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
-#define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
-
 static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
 	struct netdev_alloc_cache *nc;
@@ -595,7 +611,8 @@ static void skb_release_head_state(struct sk_buff *skb)
 static void skb_release_all(struct sk_buff *skb)
 {
 	skb_release_head_state(skb);
-	skb_release_data(skb);
+	if (likely(skb->data))
+		skb_release_data(skb);
 }
 
 /**
@@ -684,6 +701,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->mac_header		= old->mac_header;
 	new->inner_transport_header = old->inner_transport_header;
 	new->inner_network_header = old->inner_network_header;
+	new->inner_mac_header = old->inner_mac_header;
 	skb_dst_copy(new, old);
 	new->rxhash		= old->rxhash;
 	new->ooo_okay		= old->ooo_okay;
@@ -717,6 +735,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->tc_verd		= old->tc_verd;
 #endif
 #endif
+	new->vlan_proto		= old->vlan_proto;
 	new->vlan_tci		= old->vlan_tci;
 
 	skb_copy_secmark(new, old);
@@ -878,6 +897,18 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(skb_clone);
 
+static void skb_headers_offset_update(struct sk_buff *skb, int off)
+{
+	/* {transport,network,mac}_header and tail are relative to skb->head */
+	skb->transport_header += off;
+	skb->network_header   += off;
+	if (skb_mac_header_was_set(skb))
+		skb->mac_header += off;
+	skb->inner_transport_header += off;
+	skb->inner_network_header += off;
+	skb->inner_mac_header += off;
+}
+
 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 {
 #ifndef NET_SKBUFF_DATA_USES_OFFSET
@@ -890,13 +921,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	__copy_skb_header(new, old);
 
 #ifndef NET_SKBUFF_DATA_USES_OFFSET
-	/* {transport,network,mac}_header are relative to skb->head */
-	new->transport_header += offset;
-	new->network_header   += offset;
-	if (skb_mac_header_was_set(new))
-		new->mac_header	      += offset;
-	new->inner_transport_header += offset;
-	new->inner_network_header   += offset;
+	skb_headers_offset_update(new, offset);
 #endif
 	skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
 	skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
@@ -1088,14 +1113,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 #else
 	skb->end      = skb->head + size;
 #endif
-	/* {transport,network,mac}_header and tail are relative to skb->head */
 	skb->tail	      += off;
-	skb->transport_header += off;
-	skb->network_header   += off;
-	if (skb_mac_header_was_set(skb))
-		skb->mac_header += off;
-	skb->inner_transport_header += off;
-	skb->inner_network_header += off;
+	skb_headers_offset_update(skb, off);
 	/* Only adjust this if it actually is csum_start rather than csum */
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		skb->csum_start += nhead;
@@ -1191,12 +1210,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 	if (n->ip_summed == CHECKSUM_PARTIAL)
 		n->csum_start += off;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
-	n->transport_header += off;
-	n->network_header   += off;
-	if (skb_mac_header_was_set(skb))
-		n->mac_header += off;
-	n->inner_transport_header += off;
-	n->inner_network_header	   += off;
+	skb_headers_offset_update(n, off);
 #endif
 
 	return n;
@@ -2337,6 +2351,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 {
 	int pos = skb_headlen(skb);
 
+	skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
 	else		/* Second chunk has no header, nothing to copy. */
@@ -2668,48 +2683,37 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 					int len, int odd, struct sk_buff *skb),
 			void *from, int length)
 {
-	int frg_cnt = 0;
-	skb_frag_t *frag = NULL;
-	struct page *page = NULL;
-	int copy, left;
+	int frg_cnt = skb_shinfo(skb)->nr_frags;
+	int copy;
 	int offset = 0;
 	int ret;
+	struct page_frag *pfrag = &current->task_frag;
 
 	do {
 		/* Return error if we don't have space for new frag */
-		frg_cnt = skb_shinfo(skb)->nr_frags;
 		if (frg_cnt >= MAX_SKB_FRAGS)
-			return -EFAULT;
-
-		/* allocate a new page for next frag */
-		page = alloc_pages(sk->sk_allocation, 0);
+			return -EMSGSIZE;
 
-		/* If alloc_page fails just return failure and caller will
-		 * free previous allocated pages by doing kfree_skb()
-		 */
-		if (page == NULL)
+		if (!sk_page_frag_refill(sk, pfrag))
 			return -ENOMEM;
 
-		/* initialize the next frag */
-		skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
-		skb->truesize += PAGE_SIZE;
-		atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
-
-		/* get the new initialized frag */
-		frg_cnt = skb_shinfo(skb)->nr_frags;
-		frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
-
 		/* copy the user data to page */
-		left = PAGE_SIZE - frag->page_offset;
-		copy = (length > left)? left : length;
+		copy = min_t(int, length, pfrag->size - pfrag->offset);
 
-		ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag),
-			    offset, copy, 0, skb);
+		ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
+			      offset, copy, 0, skb);
 		if (ret < 0)
 			return -EFAULT;
 
 		/* copy was successful so update the size parameters */
-		skb_frag_size_add(frag, copy);
+		skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
+				   copy);
+		frg_cnt++;
+		pfrag->offset += copy;
+		get_page(pfrag->page);
+
+		skb->truesize += copy;
+		atomic_add(copy, &sk->sk_wmem_alloc);
 		skb->len += copy;
 		skb->data_len += copy;
 		offset += copy;
@@ -2759,14 +2763,22 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 	unsigned int mss = skb_shinfo(skb)->gso_size;
 	unsigned int doffset = skb->data - skb_mac_header(skb);
 	unsigned int offset = doffset;
+	unsigned int tnl_hlen = skb_tnl_header_len(skb);
 	unsigned int headroom;
 	unsigned int len;
+	__be16 proto;
+	bool csum;
 	int sg = !!(features & NETIF_F_SG);
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	int err = -ENOMEM;
 	int i = 0;
 	int pos;
 
+	proto = skb_network_protocol(skb);
+	if (unlikely(!proto))
+		return ERR_PTR(-EINVAL);
+
+	csum = !!can_checksum_protocol(features, proto);
 	__skb_push(skb, doffset);
 	headroom = skb_headroom(skb);
 	pos = skb_headlen(skb);
@@ -2835,7 +2847,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		skb_set_network_header(nskb, skb->mac_len);
 		nskb->transport_header = (nskb->network_header +
 					  skb_network_header_len(skb));
-		skb_copy_from_linear_data(skb, nskb->data, doffset);
+
+		skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+						 nskb->data - tnl_hlen,
+						 doffset + tnl_hlen);
 
 		if (fskb != skb_shinfo(skb)->frag_list)
 			continue;
@@ -2853,6 +2868,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		skb_copy_from_linear_data_offset(skb, offset,
 						 skb_put(nskb, hsize), hsize);
 
+		skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
+
 		while (pos < offset + len && i < nfrags) {
 			*frag = skb_shinfo(skb)->frags[i];
 			__skb_frag_ref(frag);
@@ -2899,6 +2916,12 @@ skip_fraglist:
 		nskb->data_len = len - hsize;
 		nskb->len += nskb->data_len;
 		nskb->truesize += nskb->data_len;
+
+		if (!csum) {
+			nskb->csum = skb_checksum(nskb, doffset,
+						  nskb->len - doffset, 0);
+			nskb->ip_summed = CHECKSUM_NONE;
+		}
 	} while ((offset += len) < skb->len);
 
 	return segs;
@@ -3304,12 +3327,8 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	if (!sk)
 		return;
 
-	skb = skb_clone(orig_skb, GFP_ATOMIC);
-	if (!skb)
-		return;
-
 	if (hwtstamps) {
-		*skb_hwtstamps(skb) =
+		*skb_hwtstamps(orig_skb) =
 			*hwtstamps;
 	} else {
 		/*
@@ -3317,9 +3336,13 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 		 * so keep the shared tx_flags and only
 		 * store software time stamp
 		 */
-		skb->tstamp = ktime_get_real();
+		orig_skb->tstamp = ktime_get_real();
 	}
 
+	skb = skb_clone(orig_skb, GFP_ATOMIC);
+	if (!skb)
+		return;
+
 	serr = SKB_EXT_ERR(skb);
 	memset(serr, 0, sizeof(*serr));
 	serr->ee.ee_errno = ENOMSG;
@@ -3376,6 +3399,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
 	skb->ip_summed = CHECKSUM_PARTIAL;
 	skb->csum_start = skb_headroom(skb) + start;
 	skb->csum_offset = off;
+	skb_set_transport_header(skb, start);
 	return true;
 }
 EXPORT_SYMBOL_GPL(skb_partial_csum_set);
diff --git a/net/core/sock.c b/net/core/sock.c
index bc131d419683..d4f4cea726e7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -186,8 +186,10 @@ void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
 static struct lock_class_key af_family_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
 
+#if defined(CONFIG_MEMCG_KMEM)
 struct static_key memcg_socket_limit_enabled;
 EXPORT_SYMBOL(memcg_socket_limit_enabled);
+#endif
 
 /*
  * Make lock validator output more readable. (we pre-construct these
@@ -665,6 +667,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 	case SO_REUSEADDR:
 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
 		break;
+	case SO_REUSEPORT:
+		sk->sk_reuseport = valbool;
+		break;
 	case SO_TYPE:
 	case SO_PROTOCOL:
 	case SO_DOMAIN:
@@ -861,6 +866,13 @@ set_rcvbuf:
 		ret = sk_detach_filter(sk);
 		break;
 
+	case SO_LOCK_FILTER:
+		if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
+			ret = -EPERM;
+		else
+			sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
+		break;
+
 	case SO_PASSSEC:
 		if (valbool)
 			set_bit(SOCK_PASSSEC, &sock->flags);
@@ -895,6 +907,10 @@ set_rcvbuf:
 		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
 		break;
 
+	case SO_SELECT_ERR_QUEUE:
+		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -965,6 +981,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_reuse;
 		break;
 
+	case SO_REUSEPORT:
+		v.val = sk->sk_reuseport;
+		break;
+
 	case SO_KEEPALIVE:
 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
 		break;
@@ -1140,6 +1160,14 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 
 		goto lenout;
 
+	case SO_LOCK_FILTER:
+		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
+		break;
+
+	case SO_SELECT_ERR_QUEUE:
+		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1278,13 +1306,12 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
 	module_put(owner);
 }
 
-#ifdef CONFIG_CGROUPS
 #if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
-void sock_update_classid(struct sock *sk, struct task_struct *task)
+void sock_update_classid(struct sock *sk)
 {
 	u32 classid;
 
-	classid = task_cls_classid(task);
+	classid = task_cls_classid(current);
 	if (classid != sk->sk_classid)
 		sk->sk_classid = classid;
 }
@@ -1292,16 +1319,15 @@ EXPORT_SYMBOL(sock_update_classid);
 #endif
 
 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
-void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
+void sock_update_netprioidx(struct sock *sk)
 {
 	if (in_interrupt())
 		return;
 
-	sk->sk_cgrp_prioidx = task_netprioidx(task);
+	sk->sk_cgrp_prioidx = task_netprioidx(current);
 }
 EXPORT_SYMBOL_GPL(sock_update_netprioidx);
 #endif
-#endif
 
 /**
  *	sk_alloc - All socket objects are allocated here
@@ -1327,8 +1353,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		sock_net_set(sk, get_net(net));
 		atomic_set(&sk->sk_wmem_alloc, 1);
 
-		sock_update_classid(sk, current);
-		sock_update_netprioidx(sk, current);
+		sock_update_classid(sk);
+		sock_update_netprioidx(sk);
 	}
 
 	return sk;
@@ -2212,7 +2238,7 @@ EXPORT_SYMBOL(sk_reset_timer);
 
 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
 {
-	if (timer_pending(timer) && del_timer(timer))
+	if (del_timer(timer))
 		__sock_put(sk);
 }
 EXPORT_SYMBOL(sk_stop_timer);
@@ -2818,7 +2844,7 @@ static const struct file_operations proto_seq_fops = {
 
 static __net_init int proto_init_net(struct net *net)
 {
-	if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops))
+	if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
 		return -ENOMEM;
 
 	return 0;
@@ -2826,7 +2852,7 @@ static __net_init int proto_init_net(struct net *net)
 
 static __net_exit void proto_exit_net(struct net *net)
 {
-	proc_net_remove(net, "protocols");
+	remove_proc_entry("protocols", net->proc_net);
 }
 
 
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 602cd637182e..d5bef0b0f639 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -49,6 +49,39 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
 }
 EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
 
+int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
+			     struct sk_buff *skb, int attrtype)
+{
+	struct nlattr *attr;
+	struct sk_filter *filter;
+	unsigned int len;
+	int err = 0;
+
+	if (!ns_capable(user_ns, CAP_NET_ADMIN)) {
+		nla_reserve(skb, attrtype, 0);
+		return 0;
+	}
+
+	rcu_read_lock();
+
+	filter = rcu_dereference(sk->sk_filter);
+	len = filter ? filter->len * sizeof(struct sock_filter) : 0;
+
+	attr = nla_reserve(skb, attrtype, len);
+	if (attr == NULL) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	if (filter)
+		memcpy(nla_data(attr), filter->insns, len);
+
+out:
+	rcu_read_unlock();
+	return err;
+}
+EXPORT_SYMBOL(sock_diag_put_filterinfo);
+
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
 {
 	mutex_lock(&sock_diag_table_mutex);
@@ -97,21 +130,6 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld)
 }
 EXPORT_SYMBOL_GPL(sock_diag_unregister);
 
-static const inline struct sock_diag_handler *sock_diag_lock_handler(int family)
-{
-	if (sock_diag_handlers[family] == NULL)
-		request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-				NETLINK_SOCK_DIAG, family);
-
-	mutex_lock(&sock_diag_table_mutex);
-	return sock_diag_handlers[family];
-}
-
-static inline void sock_diag_unlock_handler(const struct sock_diag_handler *h)
-{
-	mutex_unlock(&sock_diag_table_mutex);
-}
-
 static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	int err;
@@ -121,12 +139,20 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (nlmsg_len(nlh) < sizeof(*req))
 		return -EINVAL;
 
-	hndl = sock_diag_lock_handler(req->sdiag_family);
+	if (req->sdiag_family >= AF_MAX)
+		return -EINVAL;
+
+	if (sock_diag_handlers[req->sdiag_family] == NULL)
+		request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+				NETLINK_SOCK_DIAG, req->sdiag_family);
+
+	mutex_lock(&sock_diag_table_mutex);
+	hndl = sock_diag_handlers[req->sdiag_family];
 	if (hndl == NULL)
 		err = -ENOENT;
 	else
 		err = hndl->dump(skb, nlh);
-	sock_diag_unlock_handler(hndl);
+	mutex_unlock(&sock_diag_table_mutex);
 
 	return err;
 }
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d1b08045a9df..cfdb46ab3a7f 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -20,6 +20,8 @@
 #include <net/sock.h>
 #include <net/net_ratelimit.h>
 
+static int one = 1;
+
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -92,28 +94,32 @@ static struct ctl_table net_core_table[] = {
 		.data		= &sysctl_wmem_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "rmem_max",
 		.data		= &sysctl_rmem_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "wmem_default",
 		.data		= &sysctl_wmem_default,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "rmem_default",
 		.data		= &sysctl_rmem_default,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "dev_weight",
diff --git a/net/core/utils.c b/net/core/utils.c
index e3487e461939..3c7f5b51b979 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
+#include <linux/ctype.h>
 #include <linux/inet.h>
 #include <linux/mm.h>
 #include <linux/net.h>
@@ -348,9 +349,7 @@ int mac_pton(const char *s, u8 *mac)
 
 	/* Don't dirty result unless string is valid MAC. */
 	for (i = 0; i < ETH_ALEN; i++) {
-		if (!strchr("0123456789abcdefABCDEF", s[i * 3]))
-			return 0;
-		if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1]))
+		if (!isxdigit(s[i * 3]) || !isxdigit(s[i * 3 + 1]))
 			return 0;
 		if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
 			return 0;
diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c
index 1d9eb7c60a68..4f72fc40bf02 100644
--- a/net/dcb/dcbevent.c
+++ b/net/dcb/dcbevent.c
@@ -20,6 +20,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/notifier.h>
 #include <linux/export.h>
+#include <net/dcbevent.h>
 
 static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain);
 
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 1b588e23cf80..40d5829ed36a 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -284,6 +284,7 @@ static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlmsghdr *nlh,
 	if (!netdev->dcbnl_ops->getpermhwaddr)
 		return -EOPNOTSUPP;
 
+	memset(perm_addr, 0, sizeof(perm_addr));
 	netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr);
 
 	return nla_put(skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr), perm_addr);
@@ -1042,6 +1043,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 
 	if (ops->ieee_getets) {
 		struct ieee_ets ets;
+		memset(&ets, 0, sizeof(ets));
 		err = ops->ieee_getets(netdev, &ets);
 		if (!err &&
 		    nla_put(skb, DCB_ATTR_IEEE_ETS, sizeof(ets), &ets))
@@ -1050,6 +1052,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 
 	if (ops->ieee_getmaxrate) {
 		struct ieee_maxrate maxrate;
+		memset(&maxrate, 0, sizeof(maxrate));
 		err = ops->ieee_getmaxrate(netdev, &maxrate);
 		if (!err) {
 			err = nla_put(skb, DCB_ATTR_IEEE_MAXRATE,
@@ -1061,6 +1064,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 
 	if (ops->ieee_getpfc) {
 		struct ieee_pfc pfc;
+		memset(&pfc, 0, sizeof(pfc));
 		err = ops->ieee_getpfc(netdev, &pfc);
 		if (!err &&
 		    nla_put(skb, DCB_ATTR_IEEE_PFC, sizeof(pfc), &pfc))
@@ -1094,6 +1098,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 	/* get peer info if available */
 	if (ops->ieee_peer_getets) {
 		struct ieee_ets ets;
+		memset(&ets, 0, sizeof(ets));
 		err = ops->ieee_peer_getets(netdev, &ets);
 		if (!err &&
 		    nla_put(skb, DCB_ATTR_IEEE_PEER_ETS, sizeof(ets), &ets))
@@ -1102,6 +1107,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 
 	if (ops->ieee_peer_getpfc) {
 		struct ieee_pfc pfc;
+		memset(&pfc, 0, sizeof(pfc));
 		err = ops->ieee_peer_getpfc(netdev, &pfc);
 		if (!err &&
 		    nla_put(skb, DCB_ATTR_IEEE_PEER_PFC, sizeof(pfc), &pfc))
@@ -1280,6 +1286,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev)
 	/* peer info if available */
 	if (ops->cee_peer_getpg) {
 		struct cee_pg pg;
+		memset(&pg, 0, sizeof(pg));
 		err = ops->cee_peer_getpg(netdev, &pg);
 		if (!err &&
 		    nla_put(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg))
@@ -1288,6 +1295,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev)
 
 	if (ops->cee_peer_getpfc) {
 		struct cee_pfc pfc;
+		memset(&pfc, 0, sizeof(pfc));
 		err = ops->cee_peer_getpfc(netdev, &pfc);
 		if (!err &&
 		    nla_put(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc))
@@ -1650,7 +1658,7 @@ static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = {
 	[DCB_CMD_CEE_GET]	= { RTM_GETDCB, dcbnl_cee_get },
 };
 
-static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct net_device *netdev;
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index b75968a04017..8c0ef71bed2f 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -1,6 +1,6 @@
 menuconfig IP_DCCP
-	tristate "The DCCP Protocol (EXPERIMENTAL)"
-	depends on INET && EXPERIMENTAL
+	tristate "The DCCP Protocol"
+	depends on INET
 	---help---
 	  Datagram Congestion Control Protocol (RFC 4340)
 
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 0581143cb800..8ba3fc9d6d16 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -1,5 +1,4 @@
-menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+menu "DCCP CCIDs Configuration"
 
 config IP_DCCP_CCID2_DEBUG
 	bool "CCID-2 debugging messages"
@@ -12,7 +11,7 @@ config IP_DCCP_CCID2_DEBUG
 	  If in doubt, say N.
 
 config IP_DCCP_CCID3
-	bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)"
+	bool "CCID-3 (TCP-Friendly)"
 	def_bool y if (IP_DCCP = y || IP_DCCP = m)
 	---help---
 	  CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4f9f5eb478f1..ebc54fef85a5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -500,8 +500,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 	return &rt->dst;
 }
 
-static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
-				 struct request_values *rv_unused)
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
 {
 	int err = -1;
 	struct sk_buff *skb;
@@ -658,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	dreq->dreq_gss     = dreq->dreq_iss;
 	dreq->dreq_service = service;
 
-	if (dccp_v4_send_response(sk, req, NULL))
+	if (dccp_v4_send_response(sk, req))
 		goto drop_and_free;
 
 	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6e05981f271e..9c61f9c02fdb 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -213,8 +213,7 @@ out:
 }
 
 
-static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
-				 struct request_values *rv_unused)
+static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 {
 	struct inet6_request_sock *ireq6 = inet6_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -428,7 +427,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	dreq->dreq_gss     = dreq->dreq_iss;
 	dreq->dreq_service = service;
 
-	if (dccp_v6_send_response(sk, req, NULL))
+	if (dccp_v6_send_response(sk, req))
 		goto drop_and_free;
 
 	inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 0a8d6ebd9b45..4c6bdf97a657 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -171,7 +171,7 @@ static __init int dccpprobe_init(void)
 	spin_lock_init(&dccpw.lock);
 	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL))
 		return ret;
-	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
+	if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops))
 		goto err0;
 
 	ret = setup_jprobe();
@@ -181,7 +181,7 @@ static __init int dccpprobe_init(void)
 	pr_info("DCCP watch registered (port=%d)\n", port);
 	return 0;
 err1:
-	proc_net_remove(&init_net, procname);
+	remove_proc_entry(procname, init_net.proc_net);
 err0:
 	kfifo_free(&dccpw.fifo);
 	return ret;
@@ -191,7 +191,7 @@ module_init(dccpprobe_init);
 static __exit void dccpprobe_exit(void)
 {
 	kfifo_free(&dccpw.fifo);
-	proc_net_remove(&init_net, procname);
+	remove_proc_entry(procname, init_net.proc_net);
 	unregister_jprobe(&dccp_send_probe);
 
 }
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index 7914fd619c5c..f3393e154f0f 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig
@@ -25,8 +25,8 @@ config DECNET
 	  The module is called decnet.
 
 config DECNET_ROUTER
-	bool "DECnet: router support (EXPERIMENTAL)"
-	depends on DECNET && EXPERIMENTAL
+	bool "DECnet: router support"
+	depends on DECNET
 	select FIB_RULES
 	---help---
 	  Add support for turning your DECnet Endnode into a level 1 or 2
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 307c322d53bb..c21f200eed93 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -175,12 +175,11 @@ static struct hlist_head *dn_find_list(struct sock *sk)
 static int check_port(__le16 port)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 	if (port == 0)
 		return -1;
 
-	sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) {
+	sk_for_each(sk, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) {
 		struct dn_scp *scp = DN_SK(sk);
 		if (scp->addrloc == port)
 			return -1;
@@ -374,11 +373,10 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn,
 struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr)
 {
 	struct hlist_head *list = listen_hash(addr);
-	struct hlist_node *node;
 	struct sock *sk;
 
 	read_lock(&dn_hash_lock);
-	sk_for_each(sk, node, list) {
+	sk_for_each(sk, list) {
 		struct dn_scp *scp = DN_SK(sk);
 		if (sk->sk_state != TCP_LISTEN)
 			continue;
@@ -414,11 +412,10 @@ struct sock *dn_find_by_skb(struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 	struct sock *sk;
-	struct hlist_node *node;
 	struct dn_scp *scp;
 
 	read_lock(&dn_hash_lock);
-	sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) {
+	sk_for_each(sk, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) {
 		scp = DN_SK(sk);
 		if (cb->src != dn_saddr2dn(&scp->peer))
 			continue;
@@ -909,6 +906,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen,
 	struct dn_scp *scp = DN_SK(sk);
 	int err = -EISCONN;
 	struct flowidn fld;
+	struct dst_entry *dst;
 
 	if (sock->state == SS_CONNECTED)
 		goto out;
@@ -955,10 +953,11 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen,
 	fld.flowidn_proto = DNPROTO_NSP;
 	if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0)
 		goto out;
-	sk->sk_route_caps = sk->sk_dst_cache->dev->features;
+	dst = __sk_dst_get(sk);
+	sk->sk_route_caps = dst->dev->features;
 	sock->state = SS_CONNECTING;
 	scp->state = DN_CI;
-	scp->segsize_loc = dst_metric_advmss(sk->sk_dst_cache);
+	scp->segsize_loc = dst_metric_advmss(dst);
 
 	dn_nsp_send_conninit(sk, NSP_CI);
 	err = -EINPROGRESS;
@@ -2382,7 +2381,7 @@ static int __init decnet_init(void)
 	dev_add_pack(&dn_dix_packet_type);
 	register_netdevice_notifier(&dn_dev_notifier);
 
-	proc_net_fops_create(&init_net, "decnet", S_IRUGO, &dn_socket_seq_fops);
+	proc_create("decnet", S_IRUGO, init_net.proc_net, &dn_socket_seq_fops);
 	dn_register_sysctl();
 out:
 	return rc;
@@ -2411,7 +2410,7 @@ static void __exit decnet_exit(void)
 	dn_neigh_cleanup();
 	dn_fib_cleanup();
 
-	proc_net_remove(&init_net, "decnet");
+	remove_proc_entry("decnet", init_net.proc_net);
 
 	proto_unregister(&dn_proto);
 
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index e47ba9fc4a0e..7d9197063ebb 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -563,7 +563,7 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
 				    .len = IFNAMSIZ - 1 },
 };
 
-static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tb[IFA_MAX+1];
@@ -607,7 +607,7 @@ errout:
 	return err;
 }
 
-static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tb[IFA_MAX+1];
@@ -1412,7 +1412,7 @@ void __init dn_dev_init(void)
 	rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, NULL);
 	rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, NULL);
 
-	proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops);
+	proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
 
 #ifdef CONFIG_SYSCTL
 	{
@@ -1433,7 +1433,7 @@ void __exit dn_dev_cleanup(void)
 	}
 #endif /* CONFIG_SYSCTL */
 
-	proc_net_remove(&init_net, "decnet_dev");
+	remove_proc_entry("decnet_dev", init_net.proc_net);
 
 	dn_dev_devices_off();
 }
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index e36614eccc04..57dc159245ec 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -145,22 +145,10 @@ static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi
 	return NULL;
 }
 
-__le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type)
+static int dn_fib_count_nhs(const struct nlattr *attr)
 {
-	while(RTA_OK(attr,attrlen)) {
-		if (attr->rta_type == type)
-			return *(__le16*)RTA_DATA(attr);
-		attr = RTA_NEXT(attr, attrlen);
-	}
-
-	return 0;
-}
-
-static int dn_fib_count_nhs(struct rtattr *rta)
-{
-	int nhs = 0;
-	struct rtnexthop *nhp = RTA_DATA(rta);
-	int nhlen = RTA_PAYLOAD(rta);
+	struct rtnexthop *nhp = nla_data(attr);
+	int nhs = 0, nhlen = nla_len(attr);
 
 	while(nhlen >= (int)sizeof(struct rtnexthop)) {
 		if ((nhlen -= nhp->rtnh_len) < 0)
@@ -172,10 +160,11 @@ static int dn_fib_count_nhs(struct rtattr *rta)
 	return nhs;
 }
 
-static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
+static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
+			  const struct rtmsg *r)
 {
-	struct rtnexthop *nhp = RTA_DATA(rta);
-	int nhlen = RTA_PAYLOAD(rta);
+	struct rtnexthop *nhp = nla_data(attr);
+	int nhlen = nla_len(attr);
 
 	change_nexthops(fi) {
 		int attrlen = nhlen - sizeof(struct rtnexthop);
@@ -187,7 +176,10 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, cons
 		nh->nh_weight = nhp->rtnh_hops + 1;
 
 		if (attrlen) {
-			nh->nh_gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+			struct nlattr *gw_attr;
+
+			gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
+			nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
 		}
 		nhp = RTNH_NEXT(nhp);
 	} endfor_nexthops(fi);
@@ -268,7 +260,8 @@ out:
 }
 
 
-struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta *rta, const struct nlmsghdr *nlh, int *errp)
+struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[],
+				       const struct nlmsghdr *nlh, int *errp)
 {
 	int err;
 	struct dn_fib_info *fi = NULL;
@@ -281,11 +274,9 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
 	if (dn_fib_props[r->rtm_type].scope > r->rtm_scope)
 		goto err_inval;
 
-	if (rta->rta_mp) {
-		nhs = dn_fib_count_nhs(rta->rta_mp);
-		if (nhs == 0)
-			goto err_inval;
-	}
+	if (attrs[RTA_MULTIPATH] &&
+	    (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0)
+		goto err_inval;
 
 	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL);
 	err = -ENOBUFS;
@@ -295,53 +286,65 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
 	fi->fib_protocol = r->rtm_protocol;
 	fi->fib_nhs = nhs;
 	fi->fib_flags = r->rtm_flags;
-	if (rta->rta_priority)
-		fi->fib_priority = *rta->rta_priority;
-	if (rta->rta_mx) {
-		int attrlen = RTA_PAYLOAD(rta->rta_mx);
-		struct rtattr *attr = RTA_DATA(rta->rta_mx);
 
-		while(RTA_OK(attr, attrlen)) {
-			unsigned int flavour = attr->rta_type;
+	if (attrs[RTA_PRIORITY])
+		fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]);
+
+	if (attrs[RTA_METRICS]) {
+		struct nlattr *attr;
+		int rem;
 
-			if (flavour) {
-				if (flavour > RTAX_MAX)
+		nla_for_each_nested(attr, attrs[RTA_METRICS], rem) {
+			int type = nla_type(attr);
+
+			if (type) {
+				if (type > RTAX_MAX || nla_len(attr) < 4)
 					goto err_inval;
-				fi->fib_metrics[flavour-1] = *(unsigned int *)RTA_DATA(attr);
+
+				fi->fib_metrics[type-1] = nla_get_u32(attr);
 			}
-			attr = RTA_NEXT(attr, attrlen);
 		}
 	}
-	if (rta->rta_prefsrc)
-		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 2);
 
-	if (rta->rta_mp) {
-		if ((err = dn_fib_get_nhs(fi, rta->rta_mp, r)) != 0)
+	if (attrs[RTA_PREFSRC])
+		fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]);
+
+	if (attrs[RTA_MULTIPATH]) {
+		if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0)
 			goto failure;
-		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
+
+		if (attrs[RTA_OIF] &&
+		    fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF]))
 			goto err_inval;
-		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 2))
+
+		if (attrs[RTA_GATEWAY] &&
+		    fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY]))
 			goto err_inval;
 	} else {
 		struct dn_fib_nh *nh = fi->fib_nh;
-		if (rta->rta_oif)
-			nh->nh_oif = *rta->rta_oif;
-		if (rta->rta_gw)
-			memcpy(&nh->nh_gw, rta->rta_gw, 2);
+
+		if (attrs[RTA_OIF])
+			nh->nh_oif = nla_get_u32(attrs[RTA_OIF]);
+
+		if (attrs[RTA_GATEWAY])
+			nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
+
 		nh->nh_flags = r->rtm_flags;
 		nh->nh_weight = 1;
 	}
 
 	if (r->rtm_type == RTN_NAT) {
-		if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
+		if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF])
 			goto err_inval;
-		memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 2);
+
+		fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
 		goto link_it;
 	}
 
 	if (dn_fib_props[r->rtm_type].error) {
-		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
+		if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH])
 			goto err_inval;
+
 		goto link_it;
 	}
 
@@ -367,8 +370,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
 	}
 
 	if (fi->fib_prefsrc) {
-		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
-		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 2))
+		if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] ||
+		    fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST]))
 			if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
 				goto err_inval;
 	}
@@ -486,39 +489,21 @@ void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res)
 	spin_unlock_bh(&dn_fib_multipath_lock);
 }
 
-
-static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
-{
-	int i;
-
-	for(i = 1; i <= RTA_MAX; i++) {
-		struct rtattr *attr = rta[i-1];
-		if (attr) {
-			if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2)
-				return -EINVAL;
-			if (i != RTA_MULTIPATH && i != RTA_METRICS &&
-			    i != RTA_TABLE)
-				rta[i-1] = (struct rtattr *)RTA_DATA(attr);
-		}
-	}
-
-	return 0;
-}
-
-static inline u32 rtm_get_table(struct rtattr **rta, u8 table)
+static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table)
 {
-	if (rta[RTA_TABLE - 1])
-		table = nla_get_u32((struct nlattr *) rta[RTA_TABLE - 1]);
+	if (attrs[RTA_TABLE])
+		table = nla_get_u32(attrs[RTA_TABLE]);
 
 	return table;
 }
 
-static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct dn_fib_table *tb;
-	struct rtattr **rta = arg;
-	struct rtmsg *r = NLMSG_DATA(nlh);
+	struct rtmsg *r = nlmsg_data(nlh);
+	struct nlattr *attrs[RTA_MAX+1];
+	int err;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -526,22 +511,24 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
 	if (!net_eq(net, &init_net))
 		return -EINVAL;
 
-	if (dn_fib_check_attr(r, rta))
-		return -EINVAL;
+	err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy);
+	if (err < 0)
+		return err;
 
-	tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0);
-	if (tb)
-		return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
+	tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0);
+	if (!tb)
+		return -ESRCH;
 
-	return -ESRCH;
+	return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb));
 }
 
-static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct dn_fib_table *tb;
-	struct rtattr **rta = arg;
-	struct rtmsg *r = NLMSG_DATA(nlh);
+	struct rtmsg *r = nlmsg_data(nlh);
+	struct nlattr *attrs[RTA_MAX+1];
+	int err;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -549,14 +536,15 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
 	if (!net_eq(net, &init_net))
 		return -EINVAL;
 
-	if (dn_fib_check_attr(r, rta))
-		return -EINVAL;
+	err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy);
+	if (err < 0)
+		return err;
 
-	tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1);
-	if (tb)
-		return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
+	tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1);
+	if (!tb)
+		return -ENOBUFS;
 
-	return -ENOBUFS;
+	return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb));
 }
 
 static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
@@ -566,10 +554,31 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad
 		struct nlmsghdr nlh;
 		struct rtmsg rtm;
 	} req;
-	struct dn_kern_rta rta;
+	struct {
+		struct nlattr hdr;
+		__le16 dst;
+	} dst_attr = {
+		.dst = dst,
+	};
+	struct {
+		struct nlattr hdr;
+		__le16 prefsrc;
+	} prefsrc_attr = {
+		.prefsrc = ifa->ifa_local,
+	};
+	struct {
+		struct nlattr hdr;
+		u32 oif;
+	} oif_attr = {
+		.oif = ifa->ifa_dev->dev->ifindex,
+	};
+	struct nlattr *attrs[RTA_MAX+1] = {
+		[RTA_DST] = (struct nlattr *) &dst_attr,
+		[RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr,
+		[RTA_OIF] = (struct nlattr *) &oif_attr,
+	};
 
 	memset(&req.rtm, 0, sizeof(req.rtm));
-	memset(&rta, 0, sizeof(rta));
 
 	if (type == RTN_UNICAST)
 		tb = dn_fib_get_table(RT_MIN_TABLE, 1);
@@ -591,14 +600,10 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad
 	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
 	req.rtm.rtm_type = type;
 
-	rta.rta_dst = &dst;
-	rta.rta_prefsrc = &ifa->ifa_local;
-	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
-
 	if (cmd == RTM_NEWROUTE)
-		tb->insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+		tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL);
 	else
-		tb->delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+		tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL);
 }
 
 static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa)
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 3aede1b459fd..f8637f93d318 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -95,7 +95,7 @@ static u32 dn_neigh_hash(const void *pkey,
 
 struct neigh_table dn_neigh_table = {
 	.family =			PF_DECnet,
-	.entry_size =			sizeof(struct dn_neigh),
+	.entry_size =			NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)),
 	.key_len =			sizeof(__le16),
 	.hash =				dn_neigh_hash,
 	.constructor =			dn_neigh_construct,
@@ -590,11 +590,12 @@ static const struct file_operations dn_neigh_seq_fops = {
 void __init dn_neigh_init(void)
 {
 	neigh_table_init(&dn_neigh_table);
-	proc_net_fops_create(&init_net, "decnet_neigh", S_IRUGO, &dn_neigh_seq_fops);
+	proc_create("decnet_neigh", S_IRUGO, init_net.proc_net,
+		    &dn_neigh_seq_fops);
 }
 
 void __exit dn_neigh_cleanup(void)
 {
-	proc_net_remove(&init_net, "decnet_neigh");
+	remove_proc_entry("decnet_neigh", init_net.proc_net);
 	neigh_table_clear(&dn_neigh_table);
 }
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 8a96047c7c94..1aaa51ebbda6 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -598,7 +598,7 @@ void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg,
 	if (reason == 0)
 		reason = le16_to_cpu(scp->discdata_out.opt_status);
 
-	dn_nsp_do_disc(sk, msgflg, reason, gfp, sk->sk_dst_cache, ddl,
+	dn_nsp_do_disc(sk, msgflg, reason, gfp, __sk_dst_get(sk), ddl,
 		scp->discdata_out.opt_data, scp->addrrem, scp->addrloc);
 }
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index b57419cc41a4..fe32388ea24f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1282,7 +1282,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int
 	return err;
 }
 
-int dn_route_output_sock(struct dst_entry **pprt, struct flowidn *fl, struct sock *sk, int flags)
+int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, struct sock *sk, int flags)
 {
 	int err;
 
@@ -1613,23 +1613,41 @@ errout:
 	return -EMSGSIZE;
 }
 
+const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = {
+	[RTA_DST]		= { .type = NLA_U16 },
+	[RTA_SRC]		= { .type = NLA_U16 },
+	[RTA_IIF]		= { .type = NLA_U32 },
+	[RTA_OIF]		= { .type = NLA_U32 },
+	[RTA_GATEWAY]		= { .type = NLA_U16 },
+	[RTA_PRIORITY]		= { .type = NLA_U32 },
+	[RTA_PREFSRC]		= { .type = NLA_U16 },
+	[RTA_METRICS]		= { .type = NLA_NESTED },
+	[RTA_MULTIPATH]		= { .type = NLA_NESTED },
+	[RTA_TABLE]		= { .type = NLA_U32 },
+	[RTA_MARK]		= { .type = NLA_U32 },
+};
+
 /*
  * This is called by both endnodes and routers now.
  */
-static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
+static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(in_skb->sk);
-	struct rtattr **rta = arg;
 	struct rtmsg *rtm = nlmsg_data(nlh);
 	struct dn_route *rt = NULL;
 	struct dn_skb_cb *cb;
 	int err;
 	struct sk_buff *skb;
 	struct flowidn fld;
+	struct nlattr *tb[RTA_MAX+1];
 
 	if (!net_eq(net, &init_net))
 		return -EINVAL;
 
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy);
+	if (err < 0)
+		return err;
+
 	memset(&fld, 0, sizeof(fld));
 	fld.flowidn_proto = DNPROTO_NSP;
 
@@ -1639,12 +1657,14 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 	skb_reset_mac_header(skb);
 	cb = DN_SKB_CB(skb);
 
-	if (rta[RTA_SRC-1])
-		memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2);
-	if (rta[RTA_DST-1])
-		memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2);
-	if (rta[RTA_IIF-1])
-		memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
+	if (tb[RTA_SRC])
+		fld.saddr = nla_get_le16(tb[RTA_SRC]);
+
+	if (tb[RTA_DST])
+		fld.daddr = nla_get_le16(tb[RTA_DST]);
+
+	if (tb[RTA_IIF])
+		fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]);
 
 	if (fld.flowidn_iif) {
 		struct net_device *dev;
@@ -1669,10 +1689,9 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 		if (!err && -rt->dst.error)
 			err = rt->dst.error;
 	} else {
-		int oif = 0;
-		if (rta[RTA_OIF - 1])
-			memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
-		fld.flowidn_oif = oif;
+		if (tb[RTA_OIF])
+			fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]);
+
 		err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0);
 	}
 
@@ -1901,7 +1920,8 @@ void __init dn_route_init(void)
 
 	dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
 
-	proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops);
+	proc_create("decnet_cache", S_IRUGO, init_net.proc_net,
+		    &dn_rt_cache_seq_fops);
 
 #ifdef CONFIG_DECNET_ROUTER
 	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
@@ -1917,7 +1937,7 @@ void __exit dn_route_cleanup(void)
 	del_timer(&dn_route_timer);
 	dn_run_flush(0);
 
-	proc_net_remove(&init_net, "decnet_cache");
+	remove_proc_entry("decnet_cache", init_net.proc_net);
 	dst_entries_destroy(&dn_dst_ops);
 }
 
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index f968c1b58f47..86e3807052e9 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -19,7 +19,6 @@
 #include <linux/sockios.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
-#include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/proc_fs.h>
 #include <linux/netdevice.h>
@@ -224,26 +223,27 @@ static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
 }
 
 
-static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern_rta *rta, struct dn_fib_info *fi)
+static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi)
 {
 	struct rtnexthop *nhp;
 	int nhlen;
 
-	if (rta->rta_priority && *rta->rta_priority != fi->fib_priority)
+	if (attrs[RTA_PRIORITY] &&
+	    nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority)
 		return 1;
 
-	if (rta->rta_oif || rta->rta_gw) {
-		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
-		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 2) == 0))
+	if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) {
+		if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) &&
+		    (!attrs[RTA_GATEWAY]  || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw))
 			return 0;
 		return 1;
 	}
 
-	if (rta->rta_mp == NULL)
+	if (!attrs[RTA_MULTIPATH])
 		return 0;
 
-	nhp = RTA_DATA(rta->rta_mp);
-	nhlen = RTA_PAYLOAD(rta->rta_mp);
+	nhp = nla_data(attrs[RTA_MULTIPATH]);
+	nhlen = nla_len(attrs[RTA_MULTIPATH]);
 
 	for_nexthops(fi) {
 		int attrlen = nhlen - sizeof(struct rtnexthop);
@@ -254,7 +254,10 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
 		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
 			return 1;
 		if (attrlen) {
-			gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+			struct nlattr *gw_attr;
+
+			gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
+			gw = gw_attr ? nla_get_le16(gw_attr) : 0;
 
 			if (gw && gw != nh->nh_gw)
 				return 1;
@@ -483,13 +486,12 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
 	struct dn_fib_table *tb;
-	struct hlist_node *node;
 	int dumped = 0;
 
 	if (!net_eq(net, &init_net))
 		return 0;
 
-	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
+	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
 		((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED)
 			return dn_cache_dump(skb, cb);
 
@@ -498,7 +500,7 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) {
 		e = 0;
-		hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) {
+		hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) {
 			if (e < s_e)
 				goto next;
 			if (dumped)
@@ -518,7 +520,8 @@ out:
 	return skb->len;
 }
 
-static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
+			       struct nlmsghdr *n, struct netlink_skb_parms *req)
 {
 	struct dn_hash *table = (struct dn_hash *)tb->data;
 	struct dn_fib_node *new_f, *f, **fp, **del_fp;
@@ -537,15 +540,14 @@ static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct
 		return -ENOBUFS;
 
 	dz_key_0(key);
-	if (rta->rta_dst) {
-		__le16 dst;
-		memcpy(&dst, rta->rta_dst, 2);
+	if (attrs[RTA_DST]) {
+		__le16 dst = nla_get_le16(attrs[RTA_DST]);
 		if (dst & ~DZ_MASK(dz))
 			return -EINVAL;
 		key = dz_key(dst, dz);
 	}
 
-	if ((fi = dn_fib_create_info(r, rta, n, &err)) == NULL)
+	if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL)
 		return err;
 
 	if (dz->dz_nent > (dz->dz_divisor << 2) &&
@@ -655,7 +657,8 @@ out:
 }
 
 
-static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
+			       struct nlmsghdr *n, struct netlink_skb_parms *req)
 {
 	struct dn_hash *table = (struct dn_hash*)tb->data;
 	struct dn_fib_node **fp, **del_fp, *f;
@@ -672,9 +675,8 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct
 		return -ESRCH;
 
 	dz_key_0(key);
-	if (rta->rta_dst) {
-		__le16 dst;
-		memcpy(&dst, rta->rta_dst, 2);
+	if (attrs[RTA_DST]) {
+		__le16 dst = nla_get_le16(attrs[RTA_DST]);
 		if (dst & ~DZ_MASK(dz))
 			return -EINVAL;
 		key = dz_key(dst, dz);
@@ -704,7 +706,7 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct
 				(r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
 				(!r->rtm_protocol ||
 					fi->fib_protocol == r->rtm_protocol) &&
-				dn_fib_nh_match(r, n, rta, fi) == 0)
+				dn_fib_nh_match(r, n, attrs, fi) == 0)
 			del_fp = fp;
 	}
 
@@ -828,7 +830,6 @@ out:
 struct dn_fib_table *dn_fib_get_table(u32 n, int create)
 {
 	struct dn_fib_table *t;
-	struct hlist_node *node;
 	unsigned int h;
 
 	if (n < RT_TABLE_MIN)
@@ -839,7 +840,7 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create)
 
 	h = n & (DN_FIB_TABLE_HASHSZ - 1);
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) {
+	hlist_for_each_entry_rcu(t, &dn_fib_table_hash[h], hlist) {
 		if (t->n == n) {
 			rcu_read_unlock();
 			return t;
@@ -885,11 +886,10 @@ void dn_fib_flush(void)
 {
 	int flushed = 0;
 	struct dn_fib_table *tb;
-	struct hlist_node *node;
 	unsigned int h;
 
 	for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
-		hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist)
+		hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist)
 			flushed += tb->flush(tb);
 	}
 
@@ -908,12 +908,12 @@ void __init dn_fib_table_init(void)
 void __exit dn_fib_table_cleanup(void)
 {
 	struct dn_fib_table *t;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	unsigned int h;
 
 	write_lock(&dn_fib_tables_lock);
 	for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
-		hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h],
+		hlist_for_each_entry_safe(t, next, &dn_fib_table_hash[h],
 					  hlist) {
 			hlist_del(&t->hlist);
 			kfree(t);
diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig
index 2f81de5e752f..8d7c109d5109 100644
--- a/net/decnet/netfilter/Kconfig
+++ b/net/decnet/netfilter/Kconfig
@@ -3,7 +3,7 @@
 #
 
 menu "DECnet: Netfilter Configuration"
-	depends on DECNET && NETFILTER && EXPERIMENTAL
+	depends on DECNET && NETFILTER
 	depends on NETFILTER_ADVANCED
 
 config DECNET_NF_GRABULATOR
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index dfe42012a044..2a7efe388344 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -19,7 +19,7 @@
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
 #include <linux/spinlock.h>
-#include <linux/netlink.h>
+#include <net/netlink.h>
 #include <linux/netfilter_decnet.h>
 
 #include <net/sock.h>
@@ -39,21 +39,21 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
 	unsigned char *ptr;
 	struct nf_dn_rtmsg *rtm;
 
-	size = NLMSG_SPACE(rt_skb->len);
-	size += NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg));
-	skb = alloc_skb(size, GFP_ATOMIC);
+	size = NLMSG_ALIGN(rt_skb->len) +
+	       NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg));
+	skb = nlmsg_new(size, GFP_ATOMIC);
 	if (!skb) {
 		*errp = -ENOMEM;
 		return NULL;
 	}
 	old_tail = skb->tail;
-	nlh = nlmsg_put(skb, 0, 0, 0, size - sizeof(*nlh), 0);
+	nlh = nlmsg_put(skb, 0, 0, 0, size, 0);
 	if (!nlh) {
 		kfree_skb(skb);
 		*errp = -ENOMEM;
 		return NULL;
 	}
-	rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh);
+	rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh);
 	rtm->nfdn_ifindex = rt_skb->dev->ifindex;
 	ptr = NFDN_RTMSG(rtm);
 	skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 45295ca09571..0eb5d5e76dfb 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1,6 +1,7 @@
 /*
  * net/dsa/dsa.c - Hardware switch handling
  * Copyright (c) 2008-2009 Marvell Semiconductor
+ * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -14,6 +15,9 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <net/dsa.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_platform.h>
 #include "dsa_priv.h"
 
 char dsa_driver_version[] = "0.1";
@@ -80,6 +84,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 	int ret;
 	char *name;
 	int i;
+	bool valid_name_found = false;
 
 	/*
 	 * Probe for switch model.
@@ -131,8 +136,13 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 		} else {
 			ds->phys_port_mask |= 1 << i;
 		}
+		valid_name_found = true;
 	}
 
+	if (!valid_name_found && i == DSA_MAX_PORTS) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	/*
 	 * If the CPU connects to this switch, set the switch tree
@@ -281,34 +291,239 @@ static struct net_device *dev_to_net_device(struct device *dev)
 	return NULL;
 }
 
+#ifdef CONFIG_OF
+static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
+					struct dsa_chip_data *cd,
+					int chip_index,
+					struct device_node *link)
+{
+	int ret;
+	const __be32 *reg;
+	int link_port_addr;
+	int link_sw_addr;
+	struct device_node *parent_sw;
+	int len;
+
+	parent_sw = of_get_parent(link);
+	if (!parent_sw)
+		return -EINVAL;
+
+	reg = of_get_property(parent_sw, "reg", &len);
+	if (!reg || (len != sizeof(*reg) * 2))
+		return -EINVAL;
+
+	link_sw_addr = be32_to_cpup(reg + 1);
+
+	if (link_sw_addr >= pd->nr_chips)
+		return -EINVAL;
+
+	/* First time routing table allocation */
+	if (!cd->rtable) {
+		cd->rtable = kmalloc(pd->nr_chips * sizeof(s8), GFP_KERNEL);
+		if (!cd->rtable)
+			return -ENOMEM;
+
+		/* default to no valid uplink/downlink */
+		memset(cd->rtable, -1, pd->nr_chips * sizeof(s8));
+	}
+
+	reg = of_get_property(link, "reg", NULL);
+	if (!reg) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	link_port_addr = be32_to_cpup(reg);
+
+	cd->rtable[link_sw_addr] = link_port_addr;
+
+	return 0;
+out:
+	kfree(cd->rtable);
+	return ret;
+}
+
+static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
+{
+	int i;
+	int port_index;
+
+	for (i = 0; i < pd->nr_chips; i++) {
+		port_index = 0;
+		while (port_index < DSA_MAX_PORTS) {
+			if (pd->chip[i].port_names[port_index])
+				kfree(pd->chip[i].port_names[port_index]);
+			port_index++;
+		}
+		kfree(pd->chip[i].rtable);
+	}
+	kfree(pd->chip);
+}
+
+static int dsa_of_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct device_node *child, *mdio, *ethernet, *port, *link;
+	struct mii_bus *mdio_bus;
+	struct platform_device *ethernet_dev;
+	struct dsa_platform_data *pd;
+	struct dsa_chip_data *cd;
+	const char *port_name;
+	int chip_index, port_index;
+	const unsigned int *sw_addr, *port_reg;
+	int ret;
+
+	mdio = of_parse_phandle(np, "dsa,mii-bus", 0);
+	if (!mdio)
+		return -EINVAL;
+
+	mdio_bus = of_mdio_find_bus(mdio);
+	if (!mdio_bus)
+		return -EINVAL;
+
+	ethernet = of_parse_phandle(np, "dsa,ethernet", 0);
+	if (!ethernet)
+		return -EINVAL;
+
+	ethernet_dev = of_find_device_by_node(ethernet);
+	if (!ethernet_dev)
+		return -ENODEV;
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return -ENOMEM;
+
+	pdev->dev.platform_data = pd;
+	pd->netdev = &ethernet_dev->dev;
+	pd->nr_chips = of_get_child_count(np);
+	if (pd->nr_chips > DSA_MAX_SWITCHES)
+		pd->nr_chips = DSA_MAX_SWITCHES;
+
+	pd->chip = kzalloc(pd->nr_chips * sizeof(struct dsa_chip_data),
+			GFP_KERNEL);
+	if (!pd->chip) {
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
+	chip_index = 0;
+	for_each_available_child_of_node(np, child) {
+		cd = &pd->chip[chip_index];
+
+		cd->mii_bus = &mdio_bus->dev;
+
+		sw_addr = of_get_property(child, "reg", NULL);
+		if (!sw_addr)
+			continue;
+
+		cd->sw_addr = be32_to_cpup(sw_addr);
+		if (cd->sw_addr > PHY_MAX_ADDR)
+			continue;
+
+		for_each_available_child_of_node(child, port) {
+			port_reg = of_get_property(port, "reg", NULL);
+			if (!port_reg)
+				continue;
+
+			port_index = be32_to_cpup(port_reg);
+
+			port_name = of_get_property(port, "label", NULL);
+			if (!port_name)
+				continue;
+
+			cd->port_names[port_index] = kstrdup(port_name,
+					GFP_KERNEL);
+			if (!cd->port_names[port_index]) {
+				ret = -ENOMEM;
+				goto out_free_chip;
+			}
+
+			link = of_parse_phandle(port, "link", 0);
+
+			if (!strcmp(port_name, "dsa") && link &&
+					pd->nr_chips > 1) {
+				ret = dsa_of_setup_routing_table(pd, cd,
+						chip_index, link);
+				if (ret)
+					goto out_free_chip;
+			}
+
+			if (port_index == DSA_MAX_PORTS)
+				break;
+		}
+	}
+
+	return 0;
+
+out_free_chip:
+	dsa_of_free_platform_data(pd);
+out_free:
+	kfree(pd);
+	pdev->dev.platform_data = NULL;
+	return ret;
+}
+
+static void dsa_of_remove(struct platform_device *pdev)
+{
+	struct dsa_platform_data *pd = pdev->dev.platform_data;
+
+	if (!pdev->dev.of_node)
+		return;
+
+	dsa_of_free_platform_data(pd);
+	kfree(pd);
+}
+#else
+static inline int dsa_of_probe(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static inline void dsa_of_remove(struct platform_device *pdev)
+{
+}
+#endif
+
 static int dsa_probe(struct platform_device *pdev)
 {
 	static int dsa_version_printed;
 	struct dsa_platform_data *pd = pdev->dev.platform_data;
 	struct net_device *dev;
 	struct dsa_switch_tree *dst;
-	int i;
+	int i, ret;
 
 	if (!dsa_version_printed++)
 		printk(KERN_NOTICE "Distributed Switch Architecture "
 			"driver version %s\n", dsa_driver_version);
 
+	if (pdev->dev.of_node) {
+		ret = dsa_of_probe(pdev);
+		if (ret)
+			return ret;
+
+		pd = pdev->dev.platform_data;
+	}
+
 	if (pd == NULL || pd->netdev == NULL)
 		return -EINVAL;
 
 	dev = dev_to_net_device(pd->netdev);
-	if (dev == NULL)
-		return -EINVAL;
+	if (dev == NULL) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	if (dev->dsa_ptr != NULL) {
 		dev_put(dev);
-		return -EEXIST;
+		ret = -EEXIST;
+		goto out;
 	}
 
 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 	if (dst == NULL) {
 		dev_put(dev);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out;
 	}
 
 	platform_set_drvdata(pdev, dst);
@@ -360,6 +575,11 @@ static int dsa_probe(struct platform_device *pdev)
 	}
 
 	return 0;
+
+out:
+	dsa_of_remove(pdev);
+
+	return ret;
 }
 
 static int dsa_remove(struct platform_device *pdev)
@@ -379,6 +599,8 @@ static int dsa_remove(struct platform_device *pdev)
 			dsa_switch_destroy(ds);
 	}
 
+	dsa_of_remove(pdev);
+
 	return 0;
 }
 
@@ -386,6 +608,12 @@ static void dsa_shutdown(struct platform_device *pdev)
 {
 }
 
+static const struct of_device_id dsa_of_match_table[] = {
+	{ .compatible = "marvell,dsa", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dsa_of_match_table);
+
 static struct platform_driver dsa_driver = {
 	.probe		= dsa_probe,
 	.remove		= dsa_remove,
@@ -393,6 +621,7 @@ static struct platform_driver dsa_driver = {
 	.driver = {
 		.name	= "dsa",
 		.owner	= THIS_MODULE,
+		.of_match_table = dsa_of_match_table,
 	},
 };
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index e32083d5d8f8..6ebd8fbd9285 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -41,8 +41,8 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
 	ds->slave_mii_bus->name = "dsa slave smi";
 	ds->slave_mii_bus->read = dsa_slave_phy_read;
 	ds->slave_mii_bus->write = dsa_slave_phy_write;
-	snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x",
-			ds->master_mii_bus->id, ds->pd->sw_addr);
+	snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x",
+			ds->index, ds->pd->sw_addr);
 	ds->slave_mii_bus->parent = &ds->master_mii_bus->dev;
 }
 
@@ -203,10 +203,10 @@ dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 static void dsa_slave_get_drvinfo(struct net_device *dev,
 				  struct ethtool_drvinfo *drvinfo)
 {
-	strncpy(drvinfo->driver, "dsa", 32);
-	strncpy(drvinfo->version, dsa_driver_version, 32);
-	strncpy(drvinfo->fw_version, "N/A", 32);
-	strncpy(drvinfo->bus_info, "platform", 32);
+	strlcpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, dsa_driver_version, sizeof(drvinfo->version));
+	strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
+	strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
 }
 
 static int dsa_slave_nway_reset(struct net_device *dev)
@@ -391,7 +391,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 
 	if (p->phy != NULL) {
 		phy_attach(slave_dev, dev_name(&p->phy->dev),
-			   0, PHY_INTERFACE_MODE_GMII);
+			   PHY_INTERFACE_MODE_GMII);
 
 		p->phy->autoneg = AUTONEG_ENABLE;
 		p->phy->speed = 0;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 4efad533e5f6..5359560926bc 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -195,7 +195,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	if (netdev_uses_trailer_tags(dev))
 		return htons(ETH_P_TRAILER);
 
-	if (ntohs(eth->h_proto) >= 1536)
+	if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
 		return eth->h_proto;
 
 	/*
@@ -272,6 +272,36 @@ void eth_header_cache_update(struct hh_cache *hh,
 EXPORT_SYMBOL(eth_header_cache_update);
 
 /**
+ * eth_prepare_mac_addr_change - prepare for mac change
+ * @dev: network device
+ * @p: socket address
+ */
+int eth_prepare_mac_addr_change(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
+		return -EBUSY;
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+	return 0;
+}
+EXPORT_SYMBOL(eth_prepare_mac_addr_change);
+
+/**
+ * eth_commit_mac_addr_change - commit mac change
+ * @dev: network device
+ * @p: socket address
+ */
+void eth_commit_mac_addr_change(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+}
+EXPORT_SYMBOL(eth_commit_mac_addr_change);
+
+/**
  * eth_mac_addr - set new Ethernet hardware address
  * @dev: network device
  * @p: socket address
@@ -283,15 +313,12 @@ EXPORT_SYMBOL(eth_header_cache_update);
  */
 int eth_mac_addr(struct net_device *dev, void *p)
 {
-	struct sockaddr *addr = p;
+	int ret;
 
-	if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
-		return -EBUSY;
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
-	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
-	/* if device marked as NET_ADDR_RANDOM, reset it */
-	dev->addr_assign_type &= ~NET_ADDR_RANDOM;
+	ret = eth_prepare_mac_addr_change(dev, p);
+	if (ret < 0)
+		return ret;
+	eth_commit_mac_addr_change(dev, p);
 	return 0;
 }
 EXPORT_SYMBOL(eth_mac_addr);
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index f651da60f161..55e1fd5b3e56 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -104,6 +104,7 @@ static const u8 lowpan_llprefix[] = {0xfe, 0x80};
 struct lowpan_dev_info {
 	struct net_device	*real_dev; /* real WPAN device ptr */
 	struct mutex		dev_list_mtx; /* mutex for list ops */
+	unsigned short		fragment_tag;
 };
 
 struct lowpan_dev_record {
@@ -120,7 +121,6 @@ struct lowpan_fragment {
 	struct list_head	list;		/* fragments list */
 };
 
-static unsigned short fragment_tag;
 static LIST_HEAD(lowpan_fragments);
 static DEFINE_SPINLOCK(flist_lock);
 
@@ -284,6 +284,9 @@ lowpan_compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb)
 	/* checksum is always inline */
 	memcpy(*hc06_ptr, &uh->check, 2);
 	*hc06_ptr += 2;
+
+	/* skip the UDP header */
+	skb_pull(skb, sizeof(struct udphdr));
 }
 
 static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val)
@@ -309,9 +312,8 @@ static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val)
 }
 
 static int
-lowpan_uncompress_udp_header(struct sk_buff *skb)
+lowpan_uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh)
 {
-	struct udphdr *uh = udp_hdr(skb);
 	u8 tmp;
 
 	if (!uh)
@@ -358,6 +360,14 @@ lowpan_uncompress_udp_header(struct sk_buff *skb)
 		/* copy checksum */
 		memcpy(&uh->check, &skb->data[0], 2);
 		skb_pull(skb, 2);
+
+		/*
+		 * UDP lenght needs to be infered from the lower layers
+		 * here, we obtain the hint from the remaining size of the
+		 * frame
+		 */
+		uh->len = htons(skb->len + sizeof(struct udphdr));
+		pr_debug("uncompressed UDP length: src = %d", uh->len);
 	} else {
 		pr_debug("ERROR: unsupported NH format\n");
 		goto err;
@@ -377,17 +387,14 @@ static int lowpan_header_create(struct sk_buff *skb,
 	struct ipv6hdr *hdr;
 	const u8 *saddr = _saddr;
 	const u8 *daddr = _daddr;
-	u8 *head;
+	u8 head[100];
 	struct ieee802154_addr sa, da;
 
+	/* TODO:
+	 * if this package isn't ipv6 one, where should it be routed?
+	 */
 	if (type != ETH_P_IPV6)
 		return 0;
-		/* TODO:
-		 * if this package isn't ipv6 one, where should it be routed?
-		 */
-	head = kzalloc(100, GFP_KERNEL);
-	if (head == NULL)
-		return -ENOMEM;
 
 	hdr = ipv6_hdr(skb);
 	hc06_ptr = head + 2;
@@ -561,8 +568,6 @@ static int lowpan_header_create(struct sk_buff *skb,
 	skb_pull(skb, sizeof(struct ipv6hdr));
 	memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head);
 
-	kfree(head);
-
 	lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data,
 				skb->len);
 
@@ -577,27 +582,63 @@ static int lowpan_header_create(struct sk_buff *skb,
 	 * this isn't implemented in mainline yet, so currently we assign 0xff
 	 */
 	{
+		mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
+		mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
+
 		/* prepare wpan address data */
 		sa.addr_type = IEEE802154_ADDR_LONG;
-		sa.pan_id = 0xff;
+		sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
 
-		da.addr_type = IEEE802154_ADDR_LONG;
-		da.pan_id = 0xff;
-
-		memcpy(&(da.hwaddr), daddr, 8);
 		memcpy(&(sa.hwaddr), saddr, 8);
+		/* intra-PAN communications */
+		da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
 
-		mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
+		/*
+		 * if the destination address is the broadcast address, use the
+		 * corresponding short address
+		 */
+		if (lowpan_is_addr_broadcast(daddr)) {
+			da.addr_type = IEEE802154_ADDR_SHORT;
+			da.short_addr = IEEE802154_ADDR_BROADCAST;
+		} else {
+			da.addr_type = IEEE802154_ADDR_LONG;
+			memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN);
+
+			/* request acknowledgment */
+			mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
+		}
 
 		return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
 				type, (void *)&da, (void *)&sa, skb->len);
 	}
 }
 
+static int lowpan_give_skb_to_devices(struct sk_buff *skb)
+{
+	struct lowpan_dev_record *entry;
+	struct sk_buff *skb_cp;
+	int stat = NET_RX_SUCCESS;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(entry, &lowpan_devices, list)
+		if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) {
+			skb_cp = skb_copy(skb, GFP_ATOMIC);
+			if (!skb_cp) {
+				stat = -ENOMEM;
+				break;
+			}
+
+			skb_cp->dev = entry->ldev;
+			stat = netif_rx(skb_cp);
+		}
+	rcu_read_unlock();
+
+	return stat;
+}
+
 static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr)
 {
 	struct sk_buff *new;
-	struct lowpan_dev_record *entry;
 	int stat = NET_RX_SUCCESS;
 
 	new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb),
@@ -614,19 +655,7 @@ static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr)
 	new->protocol = htons(ETH_P_IPV6);
 	new->pkt_type = PACKET_HOST;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(entry, &lowpan_devices, list)
-		if (lowpan_dev_info(entry->ldev)->real_dev == new->dev) {
-			skb = skb_copy(new, GFP_ATOMIC);
-			if (!skb) {
-				stat = -ENOMEM;
-				break;
-			}
-
-			skb->dev = entry->ldev;
-			stat = netif_rx(skb);
-		}
-	rcu_read_unlock();
+	stat = lowpan_give_skb_to_devices(new);
 
 	kfree_skb(new);
 
@@ -645,7 +674,7 @@ static void lowpan_fragment_timer_expired(unsigned long entry_addr)
 }
 
 static struct lowpan_fragment *
-lowpan_alloc_new_frame(struct sk_buff *skb, u8 len, u16 tag)
+lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag)
 {
 	struct lowpan_fragment *frame;
 
@@ -715,7 +744,7 @@ lowpan_process_data(struct sk_buff *skb)
 	{
 		struct lowpan_fragment *frame;
 		/* slen stores the rightmost 8 bits of the 11 bits length */
-		u8 slen, offset;
+		u8 slen, offset = 0;
 		u16 len, tag;
 		bool found = false;
 
@@ -726,6 +755,18 @@ lowpan_process_data(struct sk_buff *skb)
 		/* adds the 3 MSB to the 8 LSB to retrieve the 11 bits length */
 		len = ((iphc0 & 7) << 8) | slen;
 
+		if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) {
+			pr_debug("%s received a FRAG1 packet (tag: %d, "
+				 "size of the entire IP packet: %d)",
+				 __func__, tag, len);
+		} else { /* FRAGN */
+			if (lowpan_fetch_skb_u8(skb, &offset))
+				goto unlock_and_drop;
+			pr_debug("%s received a FRAGN packet (tag: %d, "
+				 "size of the entire IP packet: %d, "
+				 "offset: %d)", __func__, tag, len, offset * 8);
+		}
+
 		/*
 		 * check if frame assembling with the same tag is
 		 * already in progress
@@ -740,17 +781,13 @@ lowpan_process_data(struct sk_buff *skb)
 
 		/* alloc new frame structure */
 		if (!found) {
+			pr_debug("%s first fragment received for tag %d, "
+				 "begin packet reassembly", __func__, tag);
 			frame = lowpan_alloc_new_frame(skb, len, tag);
 			if (!frame)
 				goto unlock_and_drop;
 		}
 
-		if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1)
-			goto unlock_and_drop;
-
-		if (lowpan_fetch_skb_u8(skb, &offset)) /* fetch offset */
-			goto unlock_and_drop;
-
 		/* if payload fits buffer, copy it */
 		if (likely((offset * 8 + skb->len) <= frame->length))
 			skb_copy_to_linear_data_offset(frame->skb, offset * 8,
@@ -768,6 +805,9 @@ lowpan_process_data(struct sk_buff *skb)
 			list_del(&frame->list);
 			spin_unlock_bh(&flist_lock);
 
+			pr_debug("%s successfully reassembled fragment "
+				 "(tag %d)", __func__, tag);
+
 			dev_kfree_skb(skb);
 			skb = frame->skb;
 			kfree(frame);
@@ -913,10 +953,35 @@ lowpan_process_data(struct sk_buff *skb)
 	}
 
 	/* UDP data uncompression */
-	if (iphc0 & LOWPAN_IPHC_NH_C)
-		if (lowpan_uncompress_udp_header(skb))
+	if (iphc0 & LOWPAN_IPHC_NH_C) {
+		struct udphdr uh;
+		struct sk_buff *new;
+		if (lowpan_uncompress_udp_header(skb, &uh))
 			goto drop;
 
+		/*
+		 * replace the compressed UDP head by the uncompressed UDP
+		 * header
+		 */
+		new = skb_copy_expand(skb, sizeof(struct udphdr),
+				      skb_tailroom(skb), GFP_ATOMIC);
+		kfree_skb(skb);
+
+		if (!new)
+			return -ENOMEM;
+
+		skb = new;
+
+		skb_push(skb, sizeof(struct udphdr));
+		skb_reset_transport_header(skb);
+		skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr));
+
+		lowpan_raw_dump_table(__func__, "raw UDP header dump",
+				      (u8 *)&uh, sizeof(uh));
+
+		hdr.nexthdr = UIP_PROTO_UDP;
+	}
+
 	/* Not fragmented package */
 	hdr.payload_len = htons(skb->len);
 
@@ -964,13 +1029,13 @@ static int lowpan_get_mac_header_length(struct sk_buff *skb)
 
 static int
 lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
-			int mlen, int plen, int offset)
+			int mlen, int plen, int offset, int type)
 {
 	struct sk_buff *frag;
 	int hlen, ret;
 
-	/* if payload length is zero, therefore it's a first fragment */
-	hlen = (plen == 0 ? LOWPAN_FRAG1_HEAD_SIZE :  LOWPAN_FRAGN_HEAD_SIZE);
+	hlen = (type == LOWPAN_DISPATCH_FRAG1) ?
+			LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE;
 
 	lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen);
 
@@ -998,14 +1063,14 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
 }
 
 static int
-lowpan_skb_fragmentation(struct sk_buff *skb)
+lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev)
 {
 	int  err, header_length, payload_length, tag, offset = 0;
 	u8 head[5];
 
 	header_length = lowpan_get_mac_header_length(skb);
 	payload_length = skb->len - header_length;
-	tag = fragment_tag++;
+	tag = lowpan_dev_info(dev)->fragment_tag++;
 
 	/* first fragment header */
 	head[0] = LOWPAN_DISPATCH_FRAG1 | ((payload_length >> 8) & 0x7);
@@ -1013,7 +1078,16 @@ lowpan_skb_fragmentation(struct sk_buff *skb)
 	head[2] = tag >> 8;
 	head[3] = tag & 0xff;
 
-	err = lowpan_fragment_xmit(skb, head, header_length, 0, 0);
+	err = lowpan_fragment_xmit(skb, head, header_length, LOWPAN_FRAG_SIZE,
+				   0, LOWPAN_DISPATCH_FRAG1);
+
+	if (err) {
+		pr_debug("%s unable to send FRAG1 packet (tag: %d)",
+			 __func__, tag);
+		goto exit;
+	}
+
+	offset = LOWPAN_FRAG_SIZE;
 
 	/* next fragment header */
 	head[0] &= ~LOWPAN_DISPATCH_FRAG1;
@@ -1028,10 +1102,17 @@ lowpan_skb_fragmentation(struct sk_buff *skb)
 			len = payload_length - offset;
 
 		err = lowpan_fragment_xmit(skb, head, header_length,
-							len, offset);
+					   len, offset, LOWPAN_DISPATCH_FRAGN);
+		if (err) {
+			pr_debug("%s unable to send a subsequent FRAGN packet "
+				 "(tag: %d, offset: %d", __func__, tag, offset);
+			goto exit;
+		}
+
 		offset += len;
 	}
 
+exit:
 	return err;
 }
 
@@ -1054,14 +1135,14 @@ static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	pr_debug("frame is too big, fragmentation is needed\n");
-	err = lowpan_skb_fragmentation(skb);
+	err = lowpan_skb_fragmentation(skb, dev);
 error:
 	dev_kfree_skb(skb);
 out:
-	if (err < 0)
+	if (err)
 		pr_debug("ERROR: xmit failed\n");
 
-	return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK);
+	return (err < 0) ? NET_XMIT_DROP : err;
 }
 
 static struct wpan_phy *lowpan_get_phy(const struct net_device *dev)
@@ -1082,6 +1163,12 @@ static u16 lowpan_get_short_addr(const struct net_device *dev)
 	return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
 }
 
+static u8 lowpan_get_dsn(const struct net_device *dev)
+{
+	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+	return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev);
+}
+
 static struct header_ops lowpan_header_ops = {
 	.create	= lowpan_header_create,
 };
@@ -1095,6 +1182,7 @@ static struct ieee802154_mlme_ops lowpan_mlme = {
 	.get_pan_id = lowpan_get_pan_id,
 	.get_phy = lowpan_get_phy,
 	.get_short_addr = lowpan_get_short_addr,
+	.get_dsn = lowpan_get_dsn,
 };
 
 static void lowpan_setup(struct net_device *dev)
@@ -1137,19 +1225,42 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
 		goto drop;
 
 	/* check that it's our buffer */
-	switch (skb->data[0] & 0xe0) {
-	case LOWPAN_DISPATCH_IPHC:	/* ipv6 datagram */
-	case LOWPAN_DISPATCH_FRAG1:	/* first fragment header */
-	case LOWPAN_DISPATCH_FRAGN:	/* next fragments headers */
-		local_skb = skb_clone(skb, GFP_ATOMIC);
+	if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
+		/* Copy the packet so that the IPv6 header is
+		 * properly aligned.
+		 */
+		local_skb = skb_copy_expand(skb, NET_SKB_PAD - 1,
+					    skb_tailroom(skb), GFP_ATOMIC);
 		if (!local_skb)
 			goto drop;
-		lowpan_process_data(local_skb);
 
+		local_skb->protocol = htons(ETH_P_IPV6);
+		local_skb->pkt_type = PACKET_HOST;
+
+		/* Pull off the 1-byte of 6lowpan header. */
+		skb_pull(local_skb, 1);
+		skb_reset_network_header(local_skb);
+		skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
+
+		lowpan_give_skb_to_devices(local_skb);
+
+		kfree_skb(local_skb);
 		kfree_skb(skb);
-		break;
-	default:
-		break;
+	} else {
+		switch (skb->data[0] & 0xe0) {
+		case LOWPAN_DISPATCH_IPHC:	/* ipv6 datagram */
+		case LOWPAN_DISPATCH_FRAG1:	/* first fragment header */
+		case LOWPAN_DISPATCH_FRAGN:	/* next fragments headers */
+			local_skb = skb_clone(skb, GFP_ATOMIC);
+			if (!local_skb)
+				goto drop;
+			lowpan_process_data(local_skb);
+
+			kfree_skb(skb);
+			break;
+		default:
+			break;
+		}
 	}
 
 	return NET_RX_SUCCESS;
@@ -1175,6 +1286,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
 		return -ENODEV;
 
 	lowpan_dev_info(dev)->real_dev = real_dev;
+	lowpan_dev_info(dev)->fragment_tag = 0;
 	mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
 
 	entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL);
@@ -1234,7 +1346,7 @@ static inline int __init lowpan_netlink_init(void)
 	return rtnl_link_register(&lowpan_link_ops);
 }
 
-static inline void __init lowpan_netlink_fini(void)
+static inline void lowpan_netlink_fini(void)
 {
 	rtnl_link_unregister(&lowpan_link_ops);
 }
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
index 8c2251fb0a3f..4b8f917658b5 100644
--- a/net/ieee802154/6lowpan.h
+++ b/net/ieee802154/6lowpan.h
@@ -84,7 +84,7 @@
 	(memcmp(addr1, addr2, length >> 3) == 0)
 
 /* local link, i.e. FE80::/10 */
-#define is_addr_link_local(a) (((a)->s6_addr16[0]) == 0x80FE)
+#define is_addr_link_local(a) (((a)->s6_addr16[0]) == htons(0xFE80))
 
 /*
  * check whether we can compress the IID to 16 bits,
@@ -92,9 +92,10 @@
  */
 #define lowpan_is_iid_16_bit_compressable(a)	\
 	((((a)->s6_addr16[4]) == 0) &&		\
-	 (((a)->s6_addr16[5]) == 0) &&		\
-	 (((a)->s6_addr16[6]) == 0) &&		\
-	 ((((a)->s6_addr[14]) & 0x80) == 0))
+	 (((a)->s6_addr[10]) == 0) &&		\
+	 (((a)->s6_addr[11]) == 0xff) &&	\
+	 (((a)->s6_addr[12]) == 0xfe) &&	\
+	 (((a)->s6_addr[13]) == 0))
 
 /* multicast address */
 #define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF)
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 7dee65052925..b2e06df0076c 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -1,6 +1,5 @@
 config IEEE802154
-	tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support"
 	---help---
 	  IEEE Std 802.15.4 defines a low data rate, low power and low
 	  complexity short range wireless personal area networks. It was
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 16705611589a..581a59504bd5 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -291,6 +291,9 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
 	size_t copied = 0;
 	int err = -EOPNOTSUPP;
 	struct sk_buff *skb;
+	struct sockaddr_ieee802154 *saddr;
+
+	saddr = (struct sockaddr_ieee802154 *)msg->msg_name;
 
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	if (!skb)
@@ -309,6 +312,13 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
 
 	sock_recv_ts_and_drops(msg, sk, skb);
 
+	if (saddr) {
+		saddr->family = AF_IEEE802154;
+		saddr->addr = mac_cb(skb)->sa;
+	}
+	if (addr_len)
+		*addr_len = sizeof(*saddr);
+
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
 done:
@@ -350,7 +360,6 @@ static inline int ieee802154_match_sock(u8 *hw_addr, u16 pan_id,
 int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
 {
 	struct sock *sk, *prev = NULL;
-	struct hlist_node *node;
 	int ret = NET_RX_SUCCESS;
 	u16 pan_id, short_addr;
 
@@ -361,7 +370,7 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
 	short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev);
 
 	read_lock(&dgram_lock);
-	sk_for_each(sk, node, &dgram_head) {
+	sk_for_each(sk, &dgram_head) {
 		if (ieee802154_match_sock(dev->dev_addr, pan_id, short_addr,
 					dgram_sk(sk))) {
 			if (prev) {
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 97351e1d07a4..7e49bbcc6967 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -64,8 +64,8 @@ struct sk_buff *ieee802154_nl_create(int flags, u8 req)
 
 int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group)
 {
-	/* XXX: nlh is right at the start of msg */
-	void *hdr = genlmsg_data(NLMSG_DATA(msg->data));
+	struct nlmsghdr *nlh = nlmsg_hdr(msg);
+	void *hdr = genlmsg_data(nlmsg_data(nlh));
 
 	if (genlmsg_end(msg, hdr) < 0)
 		goto out;
@@ -97,8 +97,8 @@ struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info,
 
 int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)
 {
-	/* XXX: nlh is right at the start of msg */
-	void *hdr = genlmsg_data(NLMSG_DATA(msg->data));
+	struct nlmsghdr *nlh = nlmsg_hdr(msg);
+	void *hdr = genlmsg_data(nlmsg_data(nlh));
 
 	if (genlmsg_end(msg, hdr) < 0)
 		goto out;
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 96bb08abece2..b0bdd8c51e9c 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -315,7 +315,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,
 	struct net_device *dev;
 	struct ieee802154_addr addr;
 	u8 page;
-	int ret = -EINVAL;
+	int ret = -EOPNOTSUPP;
 
 	if (!info->attrs[IEEE802154_ATTR_CHANNEL] ||
 	    !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||
@@ -327,6 +327,8 @@ static int ieee802154_associate_req(struct sk_buff *skb,
 	dev = ieee802154_nl_get_dev(info);
 	if (!dev)
 		return -ENODEV;
+	if (!ieee802154_mlme_ops(dev)->assoc_req)
+		goto out;
 
 	if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) {
 		addr.addr_type = IEEE802154_ADDR_LONG;
@@ -350,6 +352,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,
 			page,
 			nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY]));
 
+out:
 	dev_put(dev);
 	return ret;
 }
@@ -359,7 +362,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
 {
 	struct net_device *dev;
 	struct ieee802154_addr addr;
-	int ret = -EINVAL;
+	int ret = -EOPNOTSUPP;
 
 	if (!info->attrs[IEEE802154_ATTR_STATUS] ||
 	    !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] ||
@@ -369,6 +372,8 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
 	dev = ieee802154_nl_get_dev(info);
 	if (!dev)
 		return -ENODEV;
+	if (!ieee802154_mlme_ops(dev)->assoc_resp)
+		goto out;
 
 	addr.addr_type = IEEE802154_ADDR_LONG;
 	nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR],
@@ -380,6 +385,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
 		nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),
 		nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS]));
 
+out:
 	dev_put(dev);
 	return ret;
 }
@@ -389,7 +395,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
 {
 	struct net_device *dev;
 	struct ieee802154_addr addr;
-	int ret = -EINVAL;
+	int ret = -EOPNOTSUPP;
 
 	if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] &&
 		!info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) ||
@@ -399,6 +405,8 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
 	dev = ieee802154_nl_get_dev(info);
 	if (!dev)
 		return -ENODEV;
+	if (!ieee802154_mlme_ops(dev)->disassoc_req)
+		goto out;
 
 	if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) {
 		addr.addr_type = IEEE802154_ADDR_LONG;
@@ -415,6 +423,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
 	ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr,
 			nla_get_u8(info->attrs[IEEE802154_ATTR_REASON]));
 
+out:
 	dev_put(dev);
 	return ret;
 }
@@ -432,7 +441,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
 	u8 channel, bcn_ord, sf_ord;
 	u8 page;
 	int pan_coord, blx, coord_realign;
-	int ret;
+	int ret = -EOPNOTSUPP;
 
 	if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||
 	    !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] ||
@@ -448,6 +457,8 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
 	dev = ieee802154_nl_get_dev(info);
 	if (!dev)
 		return -ENODEV;
+	if (!ieee802154_mlme_ops(dev)->start_req)
+		goto out;
 
 	addr.addr_type = IEEE802154_ADDR_SHORT;
 	addr.short_addr = nla_get_u16(
@@ -476,6 +487,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
 	ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page,
 		bcn_ord, sf_ord, pan_coord, blx, coord_realign);
 
+out:
 	dev_put(dev);
 	return ret;
 }
@@ -483,7 +495,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
 static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net_device *dev;
-	int ret;
+	int ret = -EOPNOTSUPP;
 	u8 type;
 	u32 channels;
 	u8 duration;
@@ -497,6 +509,8 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
 	dev = ieee802154_nl_get_dev(info);
 	if (!dev)
 		return -ENODEV;
+	if (!ieee802154_mlme_ops(dev)->scan_req)
+		goto out;
 
 	type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]);
 	channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]);
@@ -511,6 +525,7 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
 	ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page,
 			duration);
 
+out:
 	dev_put(dev);
 	return ret;
 }
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 50e823927d49..41f538b8e59c 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -221,10 +221,9 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
 void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 	read_lock(&raw_lock);
-	sk_for_each(sk, node, &raw_head) {
+	sk_for_each(sk, &raw_head) {
 		bh_lock_sock(sk);
 		if (!sk->sk_bound_dev_if ||
 		    sk->sk_bound_dev_if == dev->ifindex) {
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 1627ef2e8522..13571eae6bae 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -91,7 +91,7 @@ static struct class wpan_phy_class = {
 static DEFINE_MUTEX(wpan_phy_mutex);
 static int wpan_phy_idx;
 
-static int wpan_phy_match(struct device *dev, void *data)
+static int wpan_phy_match(struct device *dev, const void *data)
 {
 	return !strcmp(dev_name(dev), (const char *)data);
 }
@@ -103,8 +103,7 @@ struct wpan_phy *wpan_phy_find(const char *str)
 	if (WARN_ON(!str))
 		return NULL;
 
-	dev = class_find_device(&wpan_phy_class, NULL,
-			(void *)str, wpan_phy_match);
+	dev = class_find_device(&wpan_phy_class, NULL, str, wpan_phy_match);
 	if (!dev)
 		return NULL;
 
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 5a19aeb86094..8603ca827104 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -166,6 +166,7 @@ config IP_PNP_RARP
 config NET_IPIP
 	tristate "IP: tunneling"
 	select INET_TUNNEL
+	select NET_IP_TUNNEL
 	---help---
 	  Tunneling means encapsulating data of one protocol type within
 	  another protocol and sending it over a channel that understands the
@@ -186,9 +187,14 @@ config NET_IPGRE_DEMUX
 	 This is helper module to demultiplex GRE packets on GRE version field criteria.
 	 Required by ip_gre and pptp modules.
 
+config NET_IP_TUNNEL
+	tristate
+	default n
+
 config NET_IPGRE
 	tristate "IP: GRE tunnels over IP"
 	depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX
+	select NET_IP_TUNNEL
 	help
 	  Tunneling means encapsulating data of one protocol type within
 	  another protocol and sending it over a channel that understands the
@@ -313,6 +319,7 @@ config SYN_COOKIES
 config NET_IPVTI
 	tristate "Virtual (secure) IP: tunneling"
 	select INET_TUNNEL
+	select NET_IP_TUNNEL
 	depends on INET_XFRM_MODE_TUNNEL
 	---help---
 	  Tunneling means encapsulating data of one protocol type within
@@ -488,7 +495,6 @@ config TCP_CONG_HTCP
 
 config TCP_CONG_HSTCP
 	tristate "High Speed TCP"
-	depends on EXPERIMENTAL
 	default n
 	---help---
 	Sally Floyd's High Speed TCP (RFC 3649) congestion control.
@@ -499,7 +505,6 @@ config TCP_CONG_HSTCP
 
 config TCP_CONG_HYBLA
 	tristate "TCP-Hybla congestion control algorithm"
-	depends on EXPERIMENTAL
 	default n
 	---help---
 	TCP-Hybla is a sender-side only change that eliminates penalization of
@@ -509,7 +514,6 @@ config TCP_CONG_HYBLA
 
 config TCP_CONG_VEGAS
 	tristate "TCP Vegas"
-	depends on EXPERIMENTAL
 	default n
 	---help---
 	TCP Vegas is a sender-side only change to TCP that anticipates
@@ -520,7 +524,6 @@ config TCP_CONG_VEGAS
 
 config TCP_CONG_SCALABLE
 	tristate "Scalable TCP"
-	depends on EXPERIMENTAL
 	default n
 	---help---
 	Scalable TCP is a sender-side only change to TCP which uses a
@@ -530,7 +533,6 @@ config TCP_CONG_SCALABLE
 
 config TCP_CONG_LP
 	tristate "TCP Low Priority"
-	depends on EXPERIMENTAL
 	default n
 	---help---
 	TCP Low Priority (TCP-LP), a distributed algorithm whose goal is
@@ -540,7 +542,6 @@ config TCP_CONG_LP
 
 config TCP_CONG_VENO
 	tristate "TCP Veno"
-	depends on EXPERIMENTAL
 	default n
 	---help---
 	TCP Veno is a sender-side only enhancement of TCP to obtain better
@@ -552,7 +553,6 @@ config TCP_CONG_VENO
 
 config TCP_CONG_YEAH
 	tristate "YeAH TCP"
-	depends on EXPERIMENTAL
 	select TCP_CONG_VEGAS
 	default n
 	---help---
@@ -567,7 +567,6 @@ config TCP_CONG_YEAH
 
 config TCP_CONG_ILLINOIS
 	tristate "TCP Illinois"
-	depends on EXPERIMENTAL
 	default n
 	---help---
 	TCP-Illinois is a sender-side modification of TCP Reno for
@@ -631,8 +630,7 @@ config DEFAULT_TCP_CONG
 	default "cubic"
 
 config TCP_MD5SIG
-	bool "TCP: MD5 Signature Option support (RFC2385) (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	bool "TCP: MD5 Signature Option support (RFC2385)"
 	select CRYPTO
 	select CRYPTO_MD5
 	---help---
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 15ca63ec604e..089cb9f36387 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -13,6 +13,7 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     fib_frontend.o fib_semantics.o fib_trie.o \
 	     inet_fragment.o ping.o
 
+obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
 obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 24b384b7903e..d01be2a3ae53 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -111,10 +111,10 @@
 #include <net/sock.h>
 #include <net/raw.h>
 #include <net/icmp.h>
-#include <net/ipip.h>
 #include <net/inet_common.h>
 #include <net/xfrm.h>
 #include <net/net_namespace.h>
+#include <net/secure_seq.h>
 #ifdef CONFIG_IP_MROUTE
 #include <linux/mroute.h>
 #endif
@@ -248,8 +248,12 @@ EXPORT_SYMBOL(inet_listen);
 u32 inet_ehash_secret __read_mostly;
 EXPORT_SYMBOL(inet_ehash_secret);
 
+u32 ipv6_hash_secret __read_mostly;
+EXPORT_SYMBOL(ipv6_hash_secret);
+
 /*
- * inet_ehash_secret must be set exactly once
+ * inet_ehash_secret must be set exactly once, and to a non nul value
+ * ipv6_hash_secret must be set exactly once.
  */
 void build_ehash_secret(void)
 {
@@ -259,24 +263,12 @@ void build_ehash_secret(void)
 		get_random_bytes(&rnd, sizeof(rnd));
 	} while (rnd == 0);
 
-	cmpxchg(&inet_ehash_secret, 0, rnd);
-}
-EXPORT_SYMBOL(build_ehash_secret);
-
-static inline int inet_netns_ok(struct net *net, __u8 protocol)
-{
-	const struct net_protocol *ipprot;
-
-	if (net_eq(net, &init_net))
-		return 1;
-
-	ipprot = rcu_dereference(inet_protos[protocol]);
-	if (ipprot == NULL) {
-		/* raw IP is OK */
-		return 1;
+	if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {
+		get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
+		net_secret_init();
 	}
-	return ipprot->netns_ok;
 }
+EXPORT_SYMBOL(build_ehash_secret);
 
 /*
  *	Create an inet socket.
@@ -350,10 +342,6 @@ lookup_protocol:
 	    !ns_capable(net->user_ns, CAP_NET_RAW))
 		goto out_rcu_unlock;
 
-	err = -EAFNOSUPPORT;
-	if (!inet_netns_ok(net, protocol))
-		goto out_rcu_unlock;
-
 	sock->ops = answer->ops;
 	answer_prot = answer->prot;
 	answer_no_check = answer->no_check;
@@ -1297,15 +1285,16 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	int ihl;
 	int id;
 	unsigned int offset = 0;
-
-	if (!(features & NETIF_F_V4_CSUM))
-		features &= ~NETIF_F_SG;
+	bool tunnel;
 
 	if (unlikely(skb_shinfo(skb)->gso_type &
 		     ~(SKB_GSO_TCPV4 |
 		       SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_GRE |
+		       SKB_GSO_TCPV6 |
+		       SKB_GSO_UDP_TUNNEL |
 		       0)))
 		goto out;
 
@@ -1320,6 +1309,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	if (unlikely(!pskb_may_pull(skb, ihl)))
 		goto out;
 
+	tunnel = !!skb->encapsulation;
+
 	__skb_pull(skb, ihl);
 	skb_reset_transport_header(skb);
 	iph = ip_hdr(skb);
@@ -1333,20 +1324,21 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		segs = ops->callbacks.gso_segment(skb, features);
 	rcu_read_unlock();
 
-	if (!segs || IS_ERR(segs))
+	if (IS_ERR_OR_NULL(segs))
 		goto out;
 
 	skb = segs;
 	do {
 		iph = ip_hdr(skb);
-		if (proto == IPPROTO_UDP) {
+		if (!tunnel && proto == IPPROTO_UDP) {
 			iph->id = htons(id);
 			iph->frag_off = htons(offset >> 3);
 			if (skb->next != NULL)
 				iph->frag_off |= htons(IP_MF);
 			offset += (skb->len - skb->mac_len - iph->ihl * 4);
-		} else
+		} else  {
 			iph->id = htons(id++);
+		}
 		iph->tot_len = htons(skb->len - skb->mac_len);
 		iph->check = 0;
 		iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
@@ -1590,7 +1582,7 @@ static const struct net_offload udp_offload = {
 
 static const struct net_protocol icmp_protocol = {
 	.handler =	icmp_rcv,
-	.err_handler =	ping_err,
+	.err_handler =	icmp_err,
 	.no_policy =	1,
 	.netns_ok =	1,
 };
@@ -1705,12 +1697,11 @@ static struct packet_type ip_packet_type __read_mostly = {
 
 static int __init inet_init(void)
 {
-	struct sk_buff *dummy_skb;
 	struct inet_protosw *q;
 	struct list_head *r;
 	int rc = -EINVAL;
 
-	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
+	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
 	if (!sysctl_local_reserved_ports)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a69b4e4a02b5..2e7f1948216f 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -321,8 +321,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* We are going to _remove_ AH header to keep sockets happy,
 	 * so... Later this can change. */
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
 	skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index ded146b217f1..247ec1951c35 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -654,11 +654,19 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 	arp_ptr += dev->addr_len;
 	memcpy(arp_ptr, &src_ip, 4);
 	arp_ptr += 4;
-	if (target_hw != NULL)
-		memcpy(arp_ptr, target_hw, dev->addr_len);
-	else
-		memset(arp_ptr, 0, dev->addr_len);
-	arp_ptr += dev->addr_len;
+
+	switch (dev->type) {
+#if IS_ENABLED(CONFIG_FIREWIRE_NET)
+	case ARPHRD_IEEE1394:
+		break;
+#endif
+	default:
+		if (target_hw != NULL)
+			memcpy(arp_ptr, target_hw, dev->addr_len);
+		else
+			memset(arp_ptr, 0, dev->addr_len);
+		arp_ptr += dev->addr_len;
+	}
 	memcpy(arp_ptr, &dest_ip, 4);
 
 	return skb;
@@ -781,7 +789,14 @@ static int arp_process(struct sk_buff *skb)
 	arp_ptr += dev->addr_len;
 	memcpy(&sip, arp_ptr, 4);
 	arp_ptr += 4;
-	arp_ptr += dev->addr_len;
+	switch (dev_type) {
+#if IS_ENABLED(CONFIG_FIREWIRE_NET)
+	case ARPHRD_IEEE1394:
+		break;
+#endif
+	default:
+		arp_ptr += dev->addr_len;
+	}
 	memcpy(&tip, arp_ptr, 4);
 /*
  *	Check for bad requests for 127.x.x.x and requests for multicast
@@ -1405,14 +1420,14 @@ static const struct file_operations arp_seq_fops = {
 
 static int __net_init arp_net_init(struct net *net)
 {
-	if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops))
+	if (!proc_create("arp", S_IRUGO, net->proc_net, &arp_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 static void __net_exit arp_net_exit(struct net *net)
 {
-	proc_net_remove(net, "arp");
+	remove_proc_entry("arp", net->proc_net);
 }
 
 static struct pernet_operations arp_net_ops = {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a8e4f2665d5e..dfc39d4d48b7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -63,6 +63,7 @@
 #include <net/ip_fib.h>
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
+#include <net/addrconf.h>
 
 #include "fib_lookup.h"
 
@@ -93,6 +94,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
 };
 
 #define IN4_ADDR_HSIZE_SHIFT	8
@@ -137,10 +139,9 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 	u32 hash = inet_addr_hash(net, addr);
 	struct net_device *result = NULL;
 	struct in_ifaddr *ifa;
-	struct hlist_node *node;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
+	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
 		if (ifa->ifa_local == addr) {
 			struct net_device *dev = ifa->ifa_dev->dev;
 
@@ -417,6 +418,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 }
 
+static void check_lifetime(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
+
 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 			     u32 portid)
 {
@@ -462,6 +467,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 
 	inet_hash_insert(dev_net(in_dev->dev), ifa);
 
+	cancel_delayed_work(&check_lifetime_work);
+	schedule_delayed_work(&check_lifetime_work, 0);
+
 	/* Send message first, then call notifier.
 	   Notifier will trigger FIB update, so that
 	   listeners of netlink will know about new ifaddr */
@@ -528,7 +536,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 	return NULL;
 }
 
-static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tb[IFA_MAX+1];
@@ -573,7 +581,131 @@ errout:
 	return err;
 }
 
-static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
+#define INFINITY_LIFE_TIME	0xFFFFFFFF
+
+static void check_lifetime(struct work_struct *work)
+{
+	unsigned long now, next, next_sec, next_sched;
+	struct in_ifaddr *ifa;
+	struct hlist_node *n;
+	int i;
+
+	now = jiffies;
+	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
+
+	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
+		bool change_needed = false;
+
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
+			unsigned long age;
+
+			if (ifa->ifa_flags & IFA_F_PERMANENT)
+				continue;
+
+			/* We try to batch several events at once. */
+			age = (now - ifa->ifa_tstamp +
+			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+
+			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
+			    age >= ifa->ifa_valid_lft) {
+				change_needed = true;
+			} else if (ifa->ifa_preferred_lft ==
+				   INFINITY_LIFE_TIME) {
+				continue;
+			} else if (age >= ifa->ifa_preferred_lft) {
+				if (time_before(ifa->ifa_tstamp +
+						ifa->ifa_valid_lft * HZ, next))
+					next = ifa->ifa_tstamp +
+					       ifa->ifa_valid_lft * HZ;
+
+				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
+					change_needed = true;
+			} else if (time_before(ifa->ifa_tstamp +
+					       ifa->ifa_preferred_lft * HZ,
+					       next)) {
+				next = ifa->ifa_tstamp +
+				       ifa->ifa_preferred_lft * HZ;
+			}
+		}
+		rcu_read_unlock();
+		if (!change_needed)
+			continue;
+		rtnl_lock();
+		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
+			unsigned long age;
+
+			if (ifa->ifa_flags & IFA_F_PERMANENT)
+				continue;
+
+			/* We try to batch several events at once. */
+			age = (now - ifa->ifa_tstamp +
+			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+
+			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
+			    age >= ifa->ifa_valid_lft) {
+				struct in_ifaddr **ifap;
+
+				for (ifap = &ifa->ifa_dev->ifa_list;
+				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
+					if (*ifap == ifa) {
+						inet_del_ifa(ifa->ifa_dev,
+							     ifap, 1);
+						break;
+					}
+				}
+			} else if (ifa->ifa_preferred_lft !=
+				   INFINITY_LIFE_TIME &&
+				   age >= ifa->ifa_preferred_lft &&
+				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
+				ifa->ifa_flags |= IFA_F_DEPRECATED;
+				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
+			}
+		}
+		rtnl_unlock();
+	}
+
+	next_sec = round_jiffies_up(next);
+	next_sched = next;
+
+	/* If rounded timeout is accurate enough, accept it. */
+	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
+		next_sched = next_sec;
+
+	now = jiffies;
+	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
+	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
+		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
+
+	schedule_delayed_work(&check_lifetime_work, next_sched - now);
+}
+
+static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
+			     __u32 prefered_lft)
+{
+	unsigned long timeout;
+
+	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
+
+	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	if (addrconf_finite_timeout(timeout))
+		ifa->ifa_valid_lft = timeout;
+	else
+		ifa->ifa_flags |= IFA_F_PERMANENT;
+
+	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		if (timeout == 0)
+			ifa->ifa_flags |= IFA_F_DEPRECATED;
+		ifa->ifa_preferred_lft = timeout;
+	}
+	ifa->ifa_tstamp = jiffies;
+	if (!ifa->ifa_cstamp)
+		ifa->ifa_cstamp = ifa->ifa_tstamp;
+}
+
+static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
+				       __u32 *pvalid_lft, __u32 *pprefered_lft)
 {
 	struct nlattr *tb[IFA_MAX+1];
 	struct in_ifaddr *ifa;
@@ -633,24 +765,77 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 	else
 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 
+	if (tb[IFA_CACHEINFO]) {
+		struct ifa_cacheinfo *ci;
+
+		ci = nla_data(tb[IFA_CACHEINFO]);
+		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
+			err = -EINVAL;
+			goto errout;
+		}
+		*pvalid_lft = ci->ifa_valid;
+		*pprefered_lft = ci->ifa_prefered;
+	}
+
 	return ifa;
 
 errout:
 	return ERR_PTR(err);
 }
 
-static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = ifa->ifa_dev;
+	struct in_ifaddr *ifa1, **ifap;
+
+	if (!ifa->ifa_local)
+		return NULL;
+
+	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
+	     ifap = &ifa1->ifa_next) {
+		if (ifa1->ifa_mask == ifa->ifa_mask &&
+		    inet_ifa_match(ifa1->ifa_address, ifa) &&
+		    ifa1->ifa_local == ifa->ifa_local)
+			return ifa1;
+	}
+	return NULL;
+}
+
+static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct in_ifaddr *ifa;
+	struct in_ifaddr *ifa_existing;
+	__u32 valid_lft = INFINITY_LIFE_TIME;
+	__u32 prefered_lft = INFINITY_LIFE_TIME;
 
 	ASSERT_RTNL();
 
-	ifa = rtm_to_ifaddr(net, nlh);
+	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
 	if (IS_ERR(ifa))
 		return PTR_ERR(ifa);
 
-	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+	ifa_existing = find_matching_ifa(ifa);
+	if (!ifa_existing) {
+		/* It would be best to check for !NLM_F_CREATE here but
+		 * userspace alreay relies on not having to provide this.
+		 */
+		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
+		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+	} else {
+		inet_free_ifa(ifa);
+
+		if (nlh->nlmsg_flags & NLM_F_EXCL ||
+		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
+			return -EEXIST;
+		ifa = ifa_existing;
+		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
+		cancel_delayed_work(&check_lifetime_work);
+		schedule_delayed_work(&check_lifetime_work, 0);
+		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
+		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
+	}
+	return 0;
 }
 
 /*
@@ -852,6 +1037,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 			ifa->ifa_prefixlen = 32;
 			ifa->ifa_mask = inet_make_mask(32);
 		}
+		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
 		ret = inet_set_ifa(dev, ifa);
 		break;
 
@@ -1190,6 +1376,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 				ifa->ifa_dev = in_dev;
 				ifa->ifa_scope = RT_SCOPE_HOST;
 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
+						 INFINITY_LIFE_TIME);
 				inet_insert_ifa(ifa);
 			}
 		}
@@ -1246,11 +1434,30 @@ static size_t inet_nlmsg_size(void)
 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
 }
 
+static inline u32 cstamp_delta(unsigned long cstamp)
+{
+	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
+}
+
+static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
+			 unsigned long tstamp, u32 preferred, u32 valid)
+{
+	struct ifa_cacheinfo ci;
+
+	ci.cstamp = cstamp_delta(cstamp);
+	ci.tstamp = cstamp_delta(tstamp);
+	ci.ifa_prefered = preferred;
+	ci.ifa_valid = valid;
+
+	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+}
+
 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 			    u32 portid, u32 seq, int event, unsigned int flags)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
+	u32 preferred, valid;
 
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
 	if (nlh == NULL)
@@ -1259,10 +1466,31 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 	ifm = nlmsg_data(nlh);
 	ifm->ifa_family = AF_INET;
 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
-	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
+	ifm->ifa_flags = ifa->ifa_flags;
 	ifm->ifa_scope = ifa->ifa_scope;
 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
 
+	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
+		preferred = ifa->ifa_preferred_lft;
+		valid = ifa->ifa_valid_lft;
+		if (preferred != INFINITY_LIFE_TIME) {
+			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
+
+			if (preferred > tval)
+				preferred -= tval;
+			else
+				preferred = 0;
+			if (valid != INFINITY_LIFE_TIME) {
+				if (valid > tval)
+					valid -= tval;
+				else
+					valid = 0;
+			}
+		}
+	} else {
+		preferred = INFINITY_LIFE_TIME;
+		valid = INFINITY_LIFE_TIME;
+	}
 	if ((ifa->ifa_address &&
 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
 	    (ifa->ifa_local &&
@@ -1270,7 +1498,9 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 	    (ifa->ifa_broadcast &&
 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
 	    (ifa->ifa_label[0] &&
-	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
+	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
+	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
+			  preferred, valid))
 		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
@@ -1290,7 +1520,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 	struct in_device *in_dev;
 	struct in_ifaddr *ifa;
 	struct hlist_head *head;
-	struct hlist_node *node;
 
 	s_h = cb->args[0];
 	s_idx = idx = cb->args[1];
@@ -1300,7 +1529,9 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 		idx = 0;
 		head = &net->dev_index_head[h];
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
+			  net->dev_base_seq;
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			if (h > s_h || idx > s_idx)
@@ -1320,6 +1551,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 					rcu_read_unlock();
 					goto done;
 				}
+				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 			}
 cont:
 			idx++;
@@ -1531,8 +1763,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
 };
 
 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
-				    struct nlmsghdr *nlh,
-				    void *arg)
+				    struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(in_skb->sk);
 	struct nlattr *tb[NETCONFA_MAX+1];
@@ -1592,6 +1823,77 @@ errout:
 	return err;
 }
 
+static int inet_netconf_dump_devconf(struct sk_buff *skb,
+				     struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	int h, s_h;
+	int idx, s_idx;
+	struct net_device *dev;
+	struct in_device *in_dev;
+	struct hlist_head *head;
+
+	s_h = cb->args[0];
+	s_idx = idx = cb->args[1];
+
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		rcu_read_lock();
+		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
+			  net->dev_base_seq;
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
+			if (idx < s_idx)
+				goto cont;
+			in_dev = __in_dev_get_rcu(dev);
+			if (!in_dev)
+				goto cont;
+
+			if (inet_netconf_fill_devconf(skb, dev->ifindex,
+						      &in_dev->cnf,
+						      NETLINK_CB(cb->skb).portid,
+						      cb->nlh->nlmsg_seq,
+						      RTM_NEWNETCONF,
+						      NLM_F_MULTI,
+						      -1) <= 0) {
+				rcu_read_unlock();
+				goto done;
+			}
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+			idx++;
+		}
+		rcu_read_unlock();
+	}
+	if (h == NETDEV_HASHENTRIES) {
+		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+					      net->ipv4.devconf_all,
+					      NETLINK_CB(cb->skb).portid,
+					      cb->nlh->nlmsg_seq,
+					      RTM_NEWNETCONF, NLM_F_MULTI,
+					      -1) <= 0)
+			goto done;
+		else
+			h++;
+	}
+	if (h == NETDEV_HASHENTRIES + 1) {
+		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+					      net->ipv4.devconf_dflt,
+					      NETLINK_CB(cb->skb).portid,
+					      cb->nlh->nlmsg_seq,
+					      RTM_NEWNETCONF, NLM_F_MULTI,
+					      -1) <= 0)
+			goto done;
+		else
+			h++;
+	}
+done:
+	cb->args[0] = h;
+	cb->args[1] = idx;
+
+	return skb->len;
+}
+
 #ifdef CONFIG_SYSCTL
 
 static void devinet_copy_dflt_conf(struct net *net, int i)
@@ -1988,12 +2290,14 @@ void __init devinet_init(void)
 	register_gifconf(PF_INET, inet_gifconf);
 	register_netdevice_notifier(&ip_netdev_notifier);
 
+	schedule_delayed_work(&check_lifetime_work, 0);
+
 	rtnl_af_register(&inet_af_ops);
 
 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
-		      NULL, NULL);
+		      inet_netconf_dump_devconf, NULL);
 }
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 3b4f0cd2e63e..4cfe34d4cc96 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -139,8 +139,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* skb is pure payload to encrypt */
 
-	err = -ENOMEM;
-
 	esp = x->data;
 	aead = esp->aead;
 	alen = crypto_aead_authsize(aead);
@@ -176,8 +174,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	}
 
 	tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
-	if (!tmp)
+	if (!tmp) {
+		err = -ENOMEM;
 		goto error;
+	}
 
 	seqhi = esp_tmp_seqhi(tmp);
 	iv = esp_tmp_iv(aead, tmp, seqhilen);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 5cd75e2dab2c..c7629a209f9d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -112,7 +112,6 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
 struct fib_table *fib_get_table(struct net *net, u32 id)
 {
 	struct fib_table *tb;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	unsigned int h;
 
@@ -122,7 +121,7 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 
 	rcu_read_lock();
 	head = &net->ipv4.fib_table_hash[h];
-	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
+	hlist_for_each_entry_rcu(tb, head, tb_hlist) {
 		if (tb->tb_id == id) {
 			rcu_read_unlock();
 			return tb;
@@ -137,13 +136,12 @@ static void fib_flush(struct net *net)
 {
 	int flushed = 0;
 	struct fib_table *tb;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	unsigned int h;
 
 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
 		head = &net->ipv4.fib_table_hash[h];
-		hlist_for_each_entry(tb, node, head, tb_hlist)
+		hlist_for_each_entry(tb, head, tb_hlist)
 			flushed += fib_table_flush(tb);
 	}
 
@@ -606,7 +604,7 @@ errout:
 	return err;
 }
 
-static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct fib_config cfg;
@@ -628,7 +626,7 @@ errout:
 	return err;
 }
 
-static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct fib_config cfg;
@@ -656,7 +654,6 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
 	struct fib_table *tb;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	int dumped = 0;
 
@@ -670,7 +667,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
 		e = 0;
 		head = &net->ipv4.fib_table_hash[h];
-		hlist_for_each_entry(tb, node, head, tb_hlist) {
+		hlist_for_each_entry(tb, head, tb_hlist) {
 			if (e < s_e)
 				goto next;
 			if (dumped)
@@ -960,8 +957,8 @@ static void nl_fib_input(struct sk_buff *skb)
 
 	net = sock_net(skb->sk);
 	nlh = nlmsg_hdr(skb);
-	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
-	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
+	if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
+	    nlmsg_len(nlh) < sizeof(*frn))
 		return;
 
 	skb = skb_clone(skb, GFP_KERNEL);
@@ -969,12 +966,12 @@ static void nl_fib_input(struct sk_buff *skb)
 		return;
 	nlh = nlmsg_hdr(skb);
 
-	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
+	frn = (struct fib_result_nl *) nlmsg_data(nlh);
 	tb = fib_get_table(net, frn->tb_id_in);
 
 	nl_fib_lookup(frn, tb);
 
-	portid = NETLINK_CB(skb).portid;      /* pid of sending process */
+	portid = NETLINK_CB(skb).portid;      /* netlink portid */
 	NETLINK_CB(skb).portid = 0;        /* from kernel */
 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
 	netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT);
@@ -1117,11 +1114,11 @@ static void ip_fib_net_exit(struct net *net)
 	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
 		struct fib_table *tb;
 		struct hlist_head *head;
-		struct hlist_node *node, *tmp;
+		struct hlist_node *tmp;
 
 		head = &net->ipv4.fib_table_hash[i];
-		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
-			hlist_del(node);
+		hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
+			hlist_del(&tb->tb_hlist);
 			fib_table_flush(tb);
 			fib_free_table(tb);
 		}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 4797a800faf8..8f6cb7a87cd6 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -298,14 +298,13 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
 static struct fib_info *fib_find_info(const struct fib_info *nfi)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct fib_info *fi;
 	unsigned int hash;
 
 	hash = fib_info_hashfn(nfi);
 	head = &fib_info_hash[hash];
 
-	hlist_for_each_entry(fi, node, head, fib_hash) {
+	hlist_for_each_entry(fi, head, fib_hash) {
 		if (!net_eq(fi->fib_net, nfi->fib_net))
 			continue;
 		if (fi->fib_nhs != nfi->fib_nhs)
@@ -331,7 +330,6 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
 int ip_fib_check_default(__be32 gw, struct net_device *dev)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct fib_nh *nh;
 	unsigned int hash;
 
@@ -339,7 +337,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
 
 	hash = fib_devindex_hashfn(dev->ifindex);
 	head = &fib_info_devhash[hash];
-	hlist_for_each_entry(nh, node, head, nh_hash) {
+	hlist_for_each_entry(nh, head, nh_hash) {
 		if (nh->nh_dev == dev &&
 		    nh->nh_gw == gw &&
 		    !(nh->nh_flags & RTNH_F_DEAD)) {
@@ -721,10 +719,10 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
 
 	for (i = 0; i < old_size; i++) {
 		struct hlist_head *head = &fib_info_hash[i];
-		struct hlist_node *node, *n;
+		struct hlist_node *n;
 		struct fib_info *fi;
 
-		hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
+		hlist_for_each_entry_safe(fi, n, head, fib_hash) {
 			struct hlist_head *dest;
 			unsigned int new_hash;
 
@@ -739,10 +737,10 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
 
 	for (i = 0; i < old_size; i++) {
 		struct hlist_head *lhead = &fib_info_laddrhash[i];
-		struct hlist_node *node, *n;
+		struct hlist_node *n;
 		struct fib_info *fi;
 
-		hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
+		hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
 			struct hlist_head *ldest;
 			unsigned int new_hash;
 
@@ -1096,13 +1094,12 @@ int fib_sync_down_addr(struct net *net, __be32 local)
 	int ret = 0;
 	unsigned int hash = fib_laddr_hashfn(local);
 	struct hlist_head *head = &fib_info_laddrhash[hash];
-	struct hlist_node *node;
 	struct fib_info *fi;
 
 	if (fib_info_laddrhash == NULL || local == 0)
 		return 0;
 
-	hlist_for_each_entry(fi, node, head, fib_lhash) {
+	hlist_for_each_entry(fi, head, fib_lhash) {
 		if (!net_eq(fi->fib_net, net))
 			continue;
 		if (fi->fib_prefsrc == local) {
@@ -1120,13 +1117,12 @@ int fib_sync_down_dev(struct net_device *dev, int force)
 	struct fib_info *prev_fi = NULL;
 	unsigned int hash = fib_devindex_hashfn(dev->ifindex);
 	struct hlist_head *head = &fib_info_devhash[hash];
-	struct hlist_node *node;
 	struct fib_nh *nh;
 
 	if (force)
 		scope = -1;
 
-	hlist_for_each_entry(nh, node, head, nh_hash) {
+	hlist_for_each_entry(nh, head, nh_hash) {
 		struct fib_info *fi = nh->nh_parent;
 		int dead;
 
@@ -1232,7 +1228,6 @@ int fib_sync_up(struct net_device *dev)
 	struct fib_info *prev_fi;
 	unsigned int hash;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct fib_nh *nh;
 	int ret;
 
@@ -1244,7 +1239,7 @@ int fib_sync_up(struct net_device *dev)
 	head = &fib_info_devhash[hash];
 	ret = 0;
 
-	hlist_for_each_entry(nh, node, head, nh_hash) {
+	hlist_for_each_entry(nh, head, nh_hash) {
 		struct fib_info *fi = nh->nh_parent;
 		int alive;
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 31d771ca9a70..49616fed9340 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -125,7 +125,6 @@ struct tnode {
 	unsigned int empty_children;	/* KEYLENGTH bits needed */
 	union {
 		struct rcu_head rcu;
-		struct work_struct work;
 		struct tnode *tnode_free;
 	};
 	struct rt_trie_node __rcu *child[0];
@@ -383,12 +382,6 @@ static struct tnode *tnode_alloc(size_t size)
 		return vzalloc(size);
 }
 
-static void __tnode_vfree(struct work_struct *arg)
-{
-	struct tnode *tn = container_of(arg, struct tnode, work);
-	vfree(tn);
-}
-
 static void __tnode_free_rcu(struct rcu_head *head)
 {
 	struct tnode *tn = container_of(head, struct tnode, rcu);
@@ -397,10 +390,8 @@ static void __tnode_free_rcu(struct rcu_head *head)
 
 	if (size <= PAGE_SIZE)
 		kfree(tn);
-	else {
-		INIT_WORK(&tn->work, __tnode_vfree);
-		schedule_work(&tn->work);
-	}
+	else
+		vfree(tn);
 }
 
 static inline void tnode_free(struct tnode *tn)
@@ -920,10 +911,9 @@ nomem:
 static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
 {
 	struct hlist_head *head = &l->list;
-	struct hlist_node *node;
 	struct leaf_info *li;
 
-	hlist_for_each_entry_rcu(li, node, head, hlist)
+	hlist_for_each_entry_rcu(li, head, hlist)
 		if (li->plen == plen)
 			return li;
 
@@ -943,12 +933,11 @@ static inline struct list_head *get_fa_head(struct leaf *l, int plen)
 static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
 {
 	struct leaf_info *li = NULL, *last = NULL;
-	struct hlist_node *node;
 
 	if (hlist_empty(head)) {
 		hlist_add_head_rcu(&new->hlist, head);
 	} else {
-		hlist_for_each_entry(li, node, head, hlist) {
+		hlist_for_each_entry(li, head, hlist) {
 			if (new->plen > li->plen)
 				break;
 
@@ -1354,9 +1343,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
 {
 	struct leaf_info *li;
 	struct hlist_head *hhead = &l->list;
-	struct hlist_node *node;
 
-	hlist_for_each_entry_rcu(li, node, hhead, hlist) {
+	hlist_for_each_entry_rcu(li, hhead, hlist) {
 		struct fib_alias *fa;
 
 		if (l->key != (key & li->mask_plen))
@@ -1740,10 +1728,10 @@ static int trie_flush_leaf(struct leaf *l)
 {
 	int found = 0;
 	struct hlist_head *lih = &l->list;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	struct leaf_info *li = NULL;
 
-	hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
+	hlist_for_each_entry_safe(li, tmp, lih, hlist) {
 		found += trie_flush_list(&li->falh);
 
 		if (list_empty(&li->falh)) {
@@ -1895,14 +1883,13 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
 			struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct leaf_info *li;
-	struct hlist_node *node;
 	int i, s_i;
 
 	s_i = cb->args[4];
 	i = 0;
 
 	/* rcu_read_lock is hold by caller */
-	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
+	hlist_for_each_entry_rcu(li, &l->list, hlist) {
 		if (i < s_i) {
 			i++;
 			continue;
@@ -2092,14 +2079,13 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 		if (IS_LEAF(n)) {
 			struct leaf *l = (struct leaf *)n;
 			struct leaf_info *li;
-			struct hlist_node *tmp;
 
 			s->leaves++;
 			s->totdepth += iter.depth;
 			if (iter.depth > s->maxdepth)
 				s->maxdepth = iter.depth;
 
-			hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
+			hlist_for_each_entry_rcu(li, &l->list, hlist)
 				++s->prefixes;
 		} else {
 			const struct tnode *tn = (const struct tnode *) n;
@@ -2200,10 +2186,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 
 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
-		struct hlist_node *node;
 		struct fib_table *tb;
 
-		hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
+		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
 			struct trie *t = (struct trie *) tb->tb_data;
 			struct trie_stat stat;
 
@@ -2245,10 +2230,9 @@ static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
 
 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
-		struct hlist_node *node;
 		struct fib_table *tb;
 
-		hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
+		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
 			struct rt_trie_node *n;
 
 			for (n = fib_trie_get_first(iter,
@@ -2298,7 +2282,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	/* new hash chain */
 	while (++h < FIB_TABLE_HASHSZ) {
 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
-		hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
+		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
 			n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
 			if (n)
 				goto found;
@@ -2381,13 +2365,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 	} else {
 		struct leaf *l = (struct leaf *) n;
 		struct leaf_info *li;
-		struct hlist_node *node;
 		__be32 val = htonl(l->key);
 
 		seq_indent(seq, iter->depth);
 		seq_printf(seq, "  |-- %pI4\n", &val);
 
-		hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
+		hlist_for_each_entry_rcu(li, &l->list, hlist) {
 			struct fib_alias *fa;
 
 			list_for_each_entry_rcu(fa, &li->falh, fa_list) {
@@ -2532,7 +2515,6 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 {
 	struct leaf *l = v;
 	struct leaf_info *li;
-	struct hlist_node *node;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
@@ -2541,7 +2523,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		return 0;
 	}
 
-	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
+	hlist_for_each_entry_rcu(li, &l->list, hlist) {
 		struct fib_alias *fa;
 		__be32 mask, prefix;
 
@@ -2607,31 +2589,31 @@ static const struct file_operations fib_route_fops = {
 
 int __net_init fib_proc_init(struct net *net)
 {
-	if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
+	if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops))
 		goto out1;
 
-	if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
-				  &fib_triestat_fops))
+	if (!proc_create("fib_triestat", S_IRUGO, net->proc_net,
+			 &fib_triestat_fops))
 		goto out2;
 
-	if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
+	if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
 		goto out3;
 
 	return 0;
 
 out3:
-	proc_net_remove(net, "fib_triestat");
+	remove_proc_entry("fib_triestat", net->proc_net);
 out2:
-	proc_net_remove(net, "fib_trie");
+	remove_proc_entry("fib_trie", net->proc_net);
 out1:
 	return -ENOMEM;
 }
 
 void __net_exit fib_proc_exit(struct net *net)
 {
-	proc_net_remove(net, "fib_trie");
-	proc_net_remove(net, "fib_triestat");
-	proc_net_remove(net, "route");
+	remove_proc_entry("fib_trie", net->proc_net);
+	remove_proc_entry("fib_triestat", net->proc_net);
+	remove_proc_entry("route", net->proc_net);
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 42a491055c76..b2e805af9b87 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -19,6 +19,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/netdevice.h>
+#include <linux/if_tunnel.h>
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 #include <net/gre.h>
@@ -112,12 +113,113 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	rcu_read_unlock();
 }
 
+static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
+				       netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	netdev_features_t enc_features;
+	int ghl = GRE_HEADER_SECTION;
+	struct gre_base_hdr *greh;
+	int mac_len = skb->mac_len;
+	__be16 protocol = skb->protocol;
+	int tnl_hlen;
+	bool csum;
+
+	if (unlikely(skb_shinfo(skb)->gso_type &
+				~(SKB_GSO_TCPV4 |
+				  SKB_GSO_TCPV6 |
+				  SKB_GSO_UDP |
+				  SKB_GSO_DODGY |
+				  SKB_GSO_TCP_ECN |
+				  SKB_GSO_GRE)))
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
+		goto out;
+
+	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+
+	if (greh->flags & GRE_KEY)
+		ghl += GRE_HEADER_SECTION;
+	if (greh->flags & GRE_SEQ)
+		ghl += GRE_HEADER_SECTION;
+	if (greh->flags & GRE_CSUM) {
+		ghl += GRE_HEADER_SECTION;
+		csum = true;
+	} else
+		csum = false;
+
+	/* setup inner skb. */
+	skb->protocol = greh->protocol;
+	skb->encapsulation = 0;
+
+	if (unlikely(!pskb_may_pull(skb, ghl)))
+		goto out;
+	__skb_pull(skb, ghl);
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, skb_inner_network_offset(skb));
+	skb->mac_len = skb_inner_network_offset(skb);
+
+	/* segment inner packet. */
+	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+	segs = skb_mac_gso_segment(skb, enc_features);
+	if (!segs || IS_ERR(segs))
+		goto out;
+
+	skb = segs;
+	tnl_hlen = skb_tnl_header_len(skb);
+	do {
+		__skb_push(skb, ghl);
+		if (csum) {
+			__be32 *pcsum;
+
+			if (skb_has_shared_frag(skb)) {
+				int err;
+
+				err = __skb_linearize(skb);
+				if (err) {
+					kfree_skb(segs);
+					segs = ERR_PTR(err);
+					goto out;
+				}
+			}
+
+			greh = (struct gre_base_hdr *)(skb->data);
+			pcsum = (__be32 *)(greh + 1);
+			*pcsum = 0;
+			*(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
+		}
+		__skb_push(skb, tnl_hlen - ghl);
+
+		skb_reset_mac_header(skb);
+		skb_set_network_header(skb, mac_len);
+		skb->mac_len = mac_len;
+		skb->protocol = protocol;
+	} while ((skb = skb->next));
+out:
+	return segs;
+}
+
+static int gre_gso_send_check(struct sk_buff *skb)
+{
+	if (!skb->encapsulation)
+		return -EINVAL;
+	return 0;
+}
+
 static const struct net_protocol net_gre_protocol = {
 	.handler     = gre_rcv,
 	.err_handler = gre_err,
 	.netns_ok    = 1,
 };
 
+static const struct net_offload gre_offload = {
+	.callbacks = {
+		.gso_send_check =	gre_gso_send_check,
+		.gso_segment    =	gre_gso_segment,
+	},
+};
+
 static int __init gre_init(void)
 {
 	pr_info("GRE over IPv4 demultiplexor driver\n");
@@ -127,11 +229,18 @@ static int __init gre_init(void)
 		return -EAGAIN;
 	}
 
+	if (inet_add_offload(&gre_offload, IPPROTO_GRE)) {
+		pr_err("can't add protocol offload\n");
+		inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+		return -EAGAIN;
+	}
+
 	return 0;
 }
 
 static void __exit gre_exit(void)
 {
+	inet_del_offload(&gre_offload, IPPROTO_GRE);
 	inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
 }
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 17ff9fd7cdda..76e10b47e053 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -881,7 +881,7 @@ int icmp_rcv(struct sk_buff *skb)
 	case CHECKSUM_NONE:
 		skb->csum = 0;
 		if (__skb_checksum_complete(skb))
-			goto error;
+			goto csum_error;
 	}
 
 	if (!pskb_pull(skb, sizeof(*icmph)))
@@ -929,11 +929,36 @@ int icmp_rcv(struct sk_buff *skb)
 drop:
 	kfree_skb(skb);
 	return 0;
+csum_error:
+	ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS);
 error:
 	ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 	goto drop;
 }
 
+void icmp_err(struct sk_buff *skb, u32 info)
+{
+	struct iphdr *iph = (struct iphdr *)skb->data;
+	struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
+	int type = icmp_hdr(skb)->type;
+	int code = icmp_hdr(skb)->code;
+	struct net *net = dev_net(skb->dev);
+
+	/*
+	 * Use ping_err to handle all icmp errors except those
+	 * triggered by ICMP_ECHOREPLY which sent from kernel.
+	 */
+	if (icmph->type != ICMP_ECHOREPLY) {
+		ping_err(skb, info);
+		return;
+	}
+
+	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
+		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ICMP, 0);
+	else if (type == ICMP_REDIRECT)
+		ipv4_redirect(skb, net, 0, 0, IPPROTO_ICMP, 0);
+}
+
 /*
  *	This table is the definition of how we handle ICMP.
  */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 736ab70fd179..d8c232794bcb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2646,24 +2646,25 @@ static int __net_init igmp_net_init(struct net *net)
 {
 	struct proc_dir_entry *pde;
 
-	pde = proc_net_fops_create(net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
+	pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops);
 	if (!pde)
 		goto out_igmp;
-	pde = proc_net_fops_create(net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+	pde = proc_create("mcfilter", S_IRUGO, net->proc_net,
+			  &igmp_mcf_seq_fops);
 	if (!pde)
 		goto out_mcfilter;
 	return 0;
 
 out_mcfilter:
-	proc_net_remove(net, "igmp");
+	remove_proc_entry("igmp", net->proc_net);
 out_igmp:
 	return -ENOMEM;
 }
 
 static void __net_exit igmp_net_exit(struct net *net)
 {
-	proc_net_remove(net, "mcfilter");
-	proc_net_remove(net, "igmp");
+	remove_proc_entry("mcfilter", net->proc_net);
+	remove_proc_entry("igmp", net->proc_net);
 }
 
 static struct pernet_operations igmp_net_ops = {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d0670f00d524..6acb541c9091 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -57,8 +57,9 @@ int inet_csk_bind_conflict(const struct sock *sk,
 			   const struct inet_bind_bucket *tb, bool relax)
 {
 	struct sock *sk2;
-	struct hlist_node *node;
 	int reuse = sk->sk_reuse;
+	int reuseport = sk->sk_reuseport;
+	kuid_t uid = sock_i_uid((struct sock *)sk);
 
 	/*
 	 * Unlike other sk lookup places we do not check
@@ -67,14 +68,17 @@ int inet_csk_bind_conflict(const struct sock *sk,
 	 * one this bucket belongs to.
 	 */
 
-	sk_for_each_bound(sk2, node, &tb->owners) {
+	sk_for_each_bound(sk2, &tb->owners) {
 		if (sk != sk2 &&
 		    !inet_v6_ipv6only(sk2) &&
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
-			if (!reuse || !sk2->sk_reuse ||
-			    sk2->sk_state == TCP_LISTEN) {
+			if ((!reuse || !sk2->sk_reuse ||
+			    sk2->sk_state == TCP_LISTEN) &&
+			    (!reuseport || !sk2->sk_reuseport ||
+			    (sk2->sk_state != TCP_TIME_WAIT &&
+			     !uid_eq(uid, sock_i_uid(sk2))))) {
 				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
 				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
 				    sk2_rcv_saddr == sk_rcv_saddr(sk))
@@ -90,7 +94,7 @@ int inet_csk_bind_conflict(const struct sock *sk,
 			}
 		}
 	}
-	return node != NULL;
+	return sk2 != NULL;
 }
 EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
 
@@ -101,11 +105,11 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
 	struct inet_bind_hashbucket *head;
-	struct hlist_node *node;
 	struct inet_bind_bucket *tb;
 	int ret, attempts = 5;
 	struct net *net = sock_net(sk);
 	int smallest_size = -1, smallest_rover;
+	kuid_t uid = sock_i_uid(sk);
 
 	local_bh_disable();
 	if (!snum) {
@@ -123,11 +127,14 @@ again:
 			head = &hashinfo->bhash[inet_bhashfn(net, rover,
 					hashinfo->bhash_size)];
 			spin_lock(&head->lock);
-			inet_bind_bucket_for_each(tb, node, &head->chain)
+			inet_bind_bucket_for_each(tb, &head->chain)
 				if (net_eq(ib_net(tb), net) && tb->port == rover) {
-					if (tb->fastreuse > 0 &&
-					    sk->sk_reuse &&
-					    sk->sk_state != TCP_LISTEN &&
+					if (((tb->fastreuse > 0 &&
+					      sk->sk_reuse &&
+					      sk->sk_state != TCP_LISTEN) ||
+					     (tb->fastreuseport > 0 &&
+					      sk->sk_reuseport &&
+					      uid_eq(tb->fastuid, uid))) &&
 					    (tb->num_owners < smallest_size || smallest_size == -1)) {
 						smallest_size = tb->num_owners;
 						smallest_rover = rover;
@@ -174,7 +181,7 @@ have_snum:
 		head = &hashinfo->bhash[inet_bhashfn(net, snum,
 				hashinfo->bhash_size)];
 		spin_lock(&head->lock);
-		inet_bind_bucket_for_each(tb, node, &head->chain)
+		inet_bind_bucket_for_each(tb, &head->chain)
 			if (net_eq(ib_net(tb), net) && tb->port == snum)
 				goto tb_found;
 	}
@@ -185,14 +192,18 @@ tb_found:
 		if (sk->sk_reuse == SK_FORCE_REUSE)
 			goto success;
 
-		if (tb->fastreuse > 0 &&
-		    sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
+		if (((tb->fastreuse > 0 &&
+		      sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
+		     (tb->fastreuseport > 0 &&
+		      sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
 		    smallest_size == -1) {
 			goto success;
 		} else {
 			ret = 1;
 			if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
-				if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
+				if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
+				     (tb->fastreuseport > 0 &&
+				      sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
 				    smallest_size != -1 && --attempts >= 0) {
 					spin_unlock(&head->lock);
 					goto again;
@@ -212,9 +223,19 @@ tb_not_found:
 			tb->fastreuse = 1;
 		else
 			tb->fastreuse = 0;
-	} else if (tb->fastreuse &&
-		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
-		tb->fastreuse = 0;
+		if (sk->sk_reuseport) {
+			tb->fastreuseport = 1;
+			tb->fastuid = uid;
+		} else
+			tb->fastreuseport = 0;
+	} else {
+		if (tb->fastreuse &&
+		    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
+			tb->fastreuse = 0;
+		if (tb->fastreuseport &&
+		    (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)))
+			tb->fastreuseport = 0;
+	}
 success:
 	if (!inet_csk(sk)->icsk_bind_hash)
 		inet_bind_hash(sk, tb, snum);
@@ -538,7 +559,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
 
 int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
 {
-	int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL);
+	int err = req->rsk_ops->rtx_syn_ack(parent, req);
 
 	if (!err)
 		req->num_retrans++;
@@ -714,6 +735,7 @@ EXPORT_SYMBOL(inet_csk_destroy_sock);
  * tcp/dccp_create_openreq_child().
  */
 void inet_csk_prepare_forced_close(struct sock *sk)
+	__releases(&sk->sk_lock.slock)
 {
 	/* sk_clone_lock locked the socket and set refcnt to 2 */
 	bh_unlock_sock(sk);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7afa2c3c788f..5f648751fce2 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -158,7 +158,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 
 #define EXPIRES_IN_MS(tmo)  DIV_ROUND_UP((tmo - jiffies) * 1000, HZ)
 
-	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 		r->idiag_timer = 1;
 		r->idiag_retrans = icsk->icsk_retransmits;
 		r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
@@ -322,7 +324,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
 	}
 
 	err = sk_diag_fill(sk, rep, req,
-			   sk_user_ns(NETLINK_CB(in_skb).ssk),
+			   sk_user_ns(NETLINK_CB(in_skb).sk),
 			   NETLINK_CB(in_skb).portid,
 			   nlh->nlmsg_seq, 0, nlh);
 	if (err < 0) {
@@ -628,7 +630,7 @@ static int inet_csk_diag_dump(struct sock *sk,
 		return 0;
 
 	return inet_csk_diag_fill(sk, skb, r,
-				  sk_user_ns(NETLINK_CB(cb->skb).ssk),
+				  sk_user_ns(NETLINK_CB(cb->skb).sk),
 				  NETLINK_CB(cb->skb).portid,
 				  cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
 }
@@ -803,7 +805,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 			}
 
 			err = inet_diag_fill_req(skb, sk, req,
-					       sk_user_ns(NETLINK_CB(cb->skb).ssk),
+					       sk_user_ns(NETLINK_CB(cb->skb).sk),
 					       NETLINK_CB(cb->skb).portid,
 					       cb->nlh->nlmsg_seq, cb->nlh);
 			if (err < 0) {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 4750d2b74d79..7e06641e36ae 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -21,7 +21,30 @@
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
 
+#include <net/sock.h>
 #include <net/inet_frag.h>
+#include <net/inet_ecn.h>
+
+/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
+ * Value : 0xff if frame should be dropped.
+ *         0 or INET_ECN_CE value, to be ORed in to final iph->tos field
+ */
+const u8 ip_frag_ecn_table[16] = {
+	/* at least one fragment had CE, and others ECT_0 or ECT_1 */
+	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0]			= INET_ECN_CE,
+	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1]			= INET_ECN_CE,
+	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1]	= INET_ECN_CE,
+
+	/* invalid combinations : drop frame */
+	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
+	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
+	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
+	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
+	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
+	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
+	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
+};
+EXPORT_SYMBOL(ip_frag_ecn_table);
 
 static void inet_frag_secret_rebuild(unsigned long dummy)
 {
@@ -29,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
 	unsigned long now = jiffies;
 	int i;
 
+	/* Per bucket lock NOT needed here, due to write lock protection */
 	write_lock(&f->lock);
+
 	get_random_bytes(&f->rnd, sizeof(u32));
 	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+		struct inet_frag_bucket *hb;
 		struct inet_frag_queue *q;
-		struct hlist_node *p, *n;
+		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) {
+		hb = &f->hash[i];
+		hlist_for_each_entry_safe(q, n, &hb->chain, list) {
 			unsigned int hval = f->hashfn(q);
 
 			if (hval != i) {
+				struct inet_frag_bucket *hb_dest;
+
 				hlist_del(&q->list);
 
 				/* Relink to new hash chain. */
-				hlist_add_head(&q->list, &f->hash[hval]);
+				hb_dest = &f->hash[hval];
+				hlist_add_head(&q->list, &hb_dest->chain);
 			}
 		}
 	}
@@ -55,9 +85,12 @@ void inet_frags_init(struct inet_frags *f)
 {
 	int i;
 
-	for (i = 0; i < INETFRAGS_HASHSZ; i++)
-		INIT_HLIST_HEAD(&f->hash[i]);
+	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+		struct inet_frag_bucket *hb = &f->hash[i];
 
+		spin_lock_init(&hb->chain_lock);
+		INIT_HLIST_HEAD(&hb->chain);
+	}
 	rwlock_init(&f->lock);
 
 	f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
@@ -73,8 +106,9 @@ EXPORT_SYMBOL(inet_frags_init);
 void inet_frags_init_net(struct netns_frags *nf)
 {
 	nf->nqueues = 0;
-	atomic_set(&nf->mem, 0);
+	init_frag_mem_limit(nf);
 	INIT_LIST_HEAD(&nf->lru_list);
+	spin_lock_init(&nf->lru_lock);
 }
 EXPORT_SYMBOL(inet_frags_init_net);
 
@@ -91,16 +125,26 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
 	local_bh_disable();
 	inet_frag_evictor(nf, f, true);
 	local_bh_enable();
+
+	percpu_counter_destroy(&nf->mem);
 }
 EXPORT_SYMBOL(inet_frags_exit_net);
 
 static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
 {
-	write_lock(&f->lock);
+	struct inet_frag_bucket *hb;
+	unsigned int hash;
+
+	read_lock(&f->lock);
+	hash = f->hashfn(fq);
+	hb = &f->hash[hash];
+
+	spin_lock(&hb->chain_lock);
 	hlist_del(&fq->list);
-	list_del(&fq->lru_list);
-	fq->net->nqueues--;
-	write_unlock(&f->lock);
+	spin_unlock(&hb->chain_lock);
+
+	read_unlock(&f->lock);
+	inet_frag_lru_del(fq);
 }
 
 void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
@@ -117,12 +161,8 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
 EXPORT_SYMBOL(inet_frag_kill);
 
 static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
-		struct sk_buff *skb, int *work)
+		struct sk_buff *skb)
 {
-	if (work)
-		*work -= skb->truesize;
-
-	atomic_sub(skb->truesize, &nf->mem);
 	if (f->skb_free)
 		f->skb_free(skb);
 	kfree_skb(skb);
@@ -133,6 +173,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
 {
 	struct sk_buff *fp;
 	struct netns_frags *nf;
+	unsigned int sum, sum_truesize = 0;
 
 	WARN_ON(!(q->last_in & INET_FRAG_COMPLETE));
 	WARN_ON(del_timer(&q->timer) != 0);
@@ -143,13 +184,14 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
 	while (fp) {
 		struct sk_buff *xp = fp->next;
 
-		frag_kfree_skb(nf, f, fp, work);
+		sum_truesize += fp->truesize;
+		frag_kfree_skb(nf, f, fp);
 		fp = xp;
 	}
-
+	sum = sum_truesize + f->qsize;
 	if (work)
-		*work -= f->qsize;
-	atomic_sub(f->qsize, &nf->mem);
+		*work -= sum;
+	sub_frag_mem_limit(q, sum);
 
 	if (f->destructor)
 		f->destructor(q);
@@ -164,22 +206,26 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
 	int work, evicted = 0;
 
 	if (!force) {
-		if (atomic_read(&nf->mem) <= nf->high_thresh)
+		if (frag_mem_limit(nf) <= nf->high_thresh)
 			return 0;
 	}
 
-	work = atomic_read(&nf->mem) - nf->low_thresh;
+	work = frag_mem_limit(nf) - nf->low_thresh;
 	while (work > 0) {
-		read_lock(&f->lock);
+		spin_lock(&nf->lru_lock);
+
 		if (list_empty(&nf->lru_list)) {
-			read_unlock(&f->lock);
+			spin_unlock(&nf->lru_lock);
 			break;
 		}
 
 		q = list_first_entry(&nf->lru_list,
 				struct inet_frag_queue, lru_list);
 		atomic_inc(&q->refcnt);
-		read_unlock(&f->lock);
+		/* Remove q from list to avoid several CPUs grabbing it */
+		list_del_init(&q->lru_list);
+
+		spin_unlock(&nf->lru_lock);
 
 		spin_lock(&q->lock);
 		if (!(q->last_in & INET_FRAG_COMPLETE))
@@ -199,28 +245,32 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 		struct inet_frag_queue *qp_in, struct inet_frags *f,
 		void *arg)
 {
+	struct inet_frag_bucket *hb;
 	struct inet_frag_queue *qp;
 #ifdef CONFIG_SMP
-	struct hlist_node *n;
 #endif
 	unsigned int hash;
 
-	write_lock(&f->lock);
+	read_lock(&f->lock); /* Protects against hash rebuild */
 	/*
 	 * While we stayed w/o the lock other CPU could update
 	 * the rnd seed, so we need to re-calculate the hash
 	 * chain. Fortunatelly the qp_in can be used to get one.
 	 */
 	hash = f->hashfn(qp_in);
+	hb = &f->hash[hash];
+	spin_lock(&hb->chain_lock);
+
 #ifdef CONFIG_SMP
 	/* With SMP race we have to recheck hash table, because
 	 * such entry could be created on other cpu, while we
-	 * promoted read lock to write lock.
+	 * released the hash bucket lock.
 	 */
-	hlist_for_each_entry(qp, n, &f->hash[hash], list) {
+	hlist_for_each_entry(qp, &hb->chain, list) {
 		if (qp->net == nf && f->match(qp, arg)) {
 			atomic_inc(&qp->refcnt);
-			write_unlock(&f->lock);
+			spin_unlock(&hb->chain_lock);
+			read_unlock(&f->lock);
 			qp_in->last_in |= INET_FRAG_COMPLETE;
 			inet_frag_put(qp_in, f);
 			return qp;
@@ -232,10 +282,10 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 		atomic_inc(&qp->refcnt);
 
 	atomic_inc(&qp->refcnt);
-	hlist_add_head(&qp->list, &f->hash[hash]);
-	list_add_tail(&qp->lru_list, &nf->lru_list);
-	nf->nqueues++;
-	write_unlock(&f->lock);
+	hlist_add_head(&qp->list, &hb->chain);
+	spin_unlock(&hb->chain_lock);
+	read_unlock(&f->lock);
+	inet_frag_lru_add(nf, qp);
 	return qp;
 }
 
@@ -250,10 +300,12 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 
 	q->net = nf;
 	f->constructor(q, arg);
-	atomic_add(f->qsize, &nf->mem);
+	add_frag_mem_limit(q, f->qsize);
+
 	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
 	spin_lock_init(&q->lock);
 	atomic_set(&q->refcnt, 1);
+	INIT_LIST_HEAD(&q->lru_list);
 
 	return q;
 }
@@ -274,18 +326,40 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 		struct inet_frags *f, void *key, unsigned int hash)
 	__releases(&f->lock)
 {
+	struct inet_frag_bucket *hb;
 	struct inet_frag_queue *q;
-	struct hlist_node *n;
+	int depth = 0;
 
-	hlist_for_each_entry(q, n, &f->hash[hash], list) {
+	hb = &f->hash[hash];
+
+	spin_lock(&hb->chain_lock);
+	hlist_for_each_entry(q, &hb->chain, list) {
 		if (q->net == nf && f->match(q, key)) {
 			atomic_inc(&q->refcnt);
+			spin_unlock(&hb->chain_lock);
 			read_unlock(&f->lock);
 			return q;
 		}
+		depth++;
 	}
+	spin_unlock(&hb->chain_lock);
 	read_unlock(&f->lock);
 
-	return inet_frag_create(nf, f, key);
+	if (depth <= INETFRAGS_MAXDEPTH)
+		return inet_frag_create(nf, f, key);
+	else
+		return ERR_PTR(-ENOBUFS);
 }
 EXPORT_SYMBOL(inet_frag_find);
+
+void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+				   const char *prefix)
+{
+	static const char msg[] = "inet_frag_find: Fragment hash bucket"
+		" list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
+		". Dropping fragment.\n";
+
+	if (PTR_ERR(q) == -ENOBUFS)
+		LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg);
+}
+EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fa3ae8148710..6af375afeeef 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -39,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
 		write_pnet(&tb->ib_net, hold_net(net));
 		tb->port      = snum;
 		tb->fastreuse = 0;
+		tb->fastreuseport = 0;
 		tb->num_owners = 0;
 		INIT_HLIST_HEAD(&tb->owners);
 		hlist_add_head(&tb->node, &head->chain);
@@ -119,13 +120,12 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)
 		 * that the listener socket's icsk_bind_hash is the same
 		 * as that of the child socket. We have to look up or
 		 * create a new bind bucket for the child here. */
-		struct hlist_node *node;
-		inet_bind_bucket_for_each(tb, node, &head->chain) {
+		inet_bind_bucket_for_each(tb, &head->chain) {
 			if (net_eq(ib_net(tb), sock_net(sk)) &&
 			    tb->port == port)
 				break;
 		}
-		if (!node) {
+		if (!tb) {
 			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
 						     sock_net(sk), head, port);
 			if (!tb) {
@@ -151,16 +151,16 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
 			!ipv6_only_sock(sk)) {
 		__be32 rcv_saddr = inet->inet_rcv_saddr;
-		score = sk->sk_family == PF_INET ? 1 : 0;
+		score = sk->sk_family == PF_INET ? 2 : 1;
 		if (rcv_saddr) {
 			if (rcv_saddr != daddr)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (sk->sk_bound_dev_if) {
 			if (sk->sk_bound_dev_if != dif)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 	}
 	return score;
@@ -176,6 +176,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
 
 struct sock *__inet_lookup_listener(struct net *net,
 				    struct inet_hashinfo *hashinfo,
+				    const __be32 saddr, __be16 sport,
 				    const __be32 daddr, const unsigned short hnum,
 				    const int dif)
 {
@@ -183,17 +184,29 @@ struct sock *__inet_lookup_listener(struct net *net,
 	struct hlist_nulls_node *node;
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
-	int score, hiscore;
+	int score, hiscore, matches = 0, reuseport = 0;
+	u32 phash = 0;
 
 	rcu_read_lock();
 begin:
 	result = NULL;
-	hiscore = -1;
+	hiscore = 0;
 	sk_nulls_for_each_rcu(sk, node, &ilb->head) {
 		score = compute_score(sk, net, hnum, daddr, dif);
 		if (score > hiscore) {
 			result = sk;
 			hiscore = score;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				phash = inet_ehashfn(net, daddr, hnum,
+						     saddr, sport);
+				matches = 1;
+			}
+		} else if (score == hiscore && reuseport) {
+			matches++;
+			if (((u64)phash * matches) >> 32 == 0)
+				result = sk;
+			phash = next_pseudo_random32(phash);
 		}
 	}
 	/*
@@ -479,7 +492,6 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		int i, remaining, low, high, port;
 		static u32 hint;
 		u32 offset = hint + port_offset;
-		struct hlist_node *node;
 		struct inet_timewait_sock *tw = NULL;
 
 		inet_get_local_port_range(&low, &high);
@@ -498,10 +510,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 			 * because the established check is already
 			 * unique enough.
 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
+			inet_bind_bucket_for_each(tb, &head->chain) {
 				if (net_eq(ib_net(tb), net) &&
 				    tb->port == port) {
-					if (tb->fastreuse >= 0)
+					if (tb->fastreuse >= 0 ||
+					    tb->fastreuseport >= 0)
 						goto next_port;
 					WARN_ON(hlist_empty(&tb->owners));
 					if (!check_established(death_row, sk,
@@ -518,6 +531,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 				break;
 			}
 			tb->fastreuse = -1;
+			tb->fastreuseport = -1;
 			goto ok;
 
 		next_port:
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index cc280a3f4f96..1975f52933c5 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/if_vlan.h>
 #include <linux/inet_lro.h>
+#include <net/checksum.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
@@ -114,11 +115,9 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
 		*(p+2) = lro_desc->tcp_rcv_tsecr;
 	}
 
+	csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
 	iph->tot_len = htons(lro_desc->ip_tot_len);
 
-	iph->check = 0;
-	iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
-
 	tcph->check = 0;
 	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
 	lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 2784db3155fb..1f27c9f4afd0 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -216,7 +216,6 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
 				    const int slot)
 {
 	struct inet_timewait_sock *tw;
-	struct hlist_node *node;
 	unsigned int killed;
 	int ret;
 
@@ -229,7 +228,7 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
 	killed = 0;
 	ret = 0;
 rescan:
-	inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
+	inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) {
 		__inet_twsk_del_dead_node(tw);
 		spin_unlock(&twdr->death_lock);
 		__inet_twsk_kill(tw, twdr->hashinfo);
@@ -438,10 +437,10 @@ void inet_twdr_twcal_tick(unsigned long data)
 
 	for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
 		if (time_before_eq(j, now)) {
-			struct hlist_node *node, *safe;
+			struct hlist_node *safe;
 			struct inet_timewait_sock *tw;
 
-			inet_twsk_for_each_inmate_safe(tw, node, safe,
+			inet_twsk_for_each_inmate_safe(tw, safe,
 						       &twdr->twcal_row[slot]) {
 				__inet_twsk_del_dead_node(tw);
 				__inet_twsk_kill(tw, twdr->hashinfo);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index eb9d63a570cd..b66910aaef4d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -79,40 +79,11 @@ struct ipq {
 	struct inet_peer *peer;
 };
 
-/* RFC 3168 support :
- * We want to check ECN values of all fragments, do detect invalid combinations.
- * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
- */
-#define	IPFRAG_ECN_NOT_ECT	0x01 /* one frag had ECN_NOT_ECT */
-#define	IPFRAG_ECN_ECT_1	0x02 /* one frag had ECN_ECT_1 */
-#define	IPFRAG_ECN_ECT_0	0x04 /* one frag had ECN_ECT_0 */
-#define	IPFRAG_ECN_CE		0x08 /* one frag had ECN_CE */
-
 static inline u8 ip4_frag_ecn(u8 tos)
 {
 	return 1 << (tos & INET_ECN_MASK);
 }
 
-/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
- * Value : 0xff if frame should be dropped.
- *         0 or INET_ECN_CE value, to be ORed in to final iph->tos field
- */
-static const u8 ip4_frag_ecn_table[16] = {
-	/* at least one fragment had CE, and others ECT_0 or ECT_1 */
-	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0]			= INET_ECN_CE,
-	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1]			= INET_ECN_CE,
-	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1]	= INET_ECN_CE,
-
-	/* invalid combinations : drop frame */
-	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
-	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
-	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
-	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
-	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
-	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
-	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
-};
-
 static struct inet_frags ip4_frags;
 
 int ip_frag_nqueues(struct net *net)
@@ -122,7 +93,7 @@ int ip_frag_nqueues(struct net *net)
 
 int ip_frag_mem(struct net *net)
 {
-	return atomic_read(&net->ipv4.frags.mem);
+	return sum_frag_mem_limit(&net->ipv4.frags);
 }
 
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
@@ -161,13 +132,6 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
 		qp->user == arg->user;
 }
 
-/* Memory Tracking Functions. */
-static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
-{
-	atomic_sub(skb->truesize, &nf->mem);
-	kfree_skb(skb);
-}
-
 static void ip4_frag_init(struct inet_frag_queue *q, void *a)
 {
 	struct ipq *qp = container_of(q, struct ipq, q);
@@ -255,8 +219,7 @@ static void ip_expire(unsigned long arg)
 		if (!head->dev)
 			goto out_rcu_unlock;
 
-		/* skb dst is stale, drop it, and perform route lookup again */
-		skb_dst_drop(head);
+		/* skb has no dst, perform route lookup again */
 		iph = ip_hdr(head);
 		err = ip_route_input_noref(head, iph->daddr, iph->saddr,
 					   iph->tos, head->dev);
@@ -299,14 +262,11 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
 	hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
 
 	q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
-	if (q == NULL)
-		goto out_nomem;
-
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
+		return NULL;
+	}
 	return container_of(q, struct ipq, q);
-
-out_nomem:
-	LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n"));
-	return NULL;
 }
 
 /* Is the fragment too far ahead to be part of ipq? */
@@ -340,6 +300,7 @@ static inline int ip_frag_too_far(struct ipq *qp)
 static int ip_frag_reinit(struct ipq *qp)
 {
 	struct sk_buff *fp;
+	unsigned int sum_truesize = 0;
 
 	if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
 		atomic_inc(&qp->q.refcnt);
@@ -349,9 +310,12 @@ static int ip_frag_reinit(struct ipq *qp)
 	fp = qp->q.fragments;
 	do {
 		struct sk_buff *xp = fp->next;
-		frag_kfree_skb(qp->q.net, fp);
+
+		sum_truesize += fp->truesize;
+		kfree_skb(fp);
 		fp = xp;
 	} while (fp);
+	sub_frag_mem_limit(&qp->q, sum_truesize);
 
 	qp->q.last_in = 0;
 	qp->q.len = 0;
@@ -496,7 +460,8 @@ found:
 				qp->q.fragments = next;
 
 			qp->q.meat -= free_it->len;
-			frag_kfree_skb(qp->q.net, free_it);
+			sub_frag_mem_limit(&qp->q, free_it->truesize);
+			kfree_skb(free_it);
 		}
 	}
 
@@ -519,7 +484,7 @@ found:
 	qp->q.stamp = skb->tstamp;
 	qp->q.meat += skb->len;
 	qp->ecn |= ecn;
-	atomic_add(skb->truesize, &qp->q.net->mem);
+	add_frag_mem_limit(&qp->q, skb->truesize);
 	if (offset == 0)
 		qp->q.last_in |= INET_FRAG_FIRST_IN;
 
@@ -528,12 +493,17 @@ found:
 		qp->q.max_size = skb->len + ihl;
 
 	if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
-	    qp->q.meat == qp->q.len)
-		return ip_frag_reasm(qp, prev, dev);
+	    qp->q.meat == qp->q.len) {
+		unsigned long orefdst = skb->_skb_refdst;
+
+		skb->_skb_refdst = 0UL;
+		err = ip_frag_reasm(qp, prev, dev);
+		skb->_skb_refdst = orefdst;
+		return err;
+	}
 
-	write_lock(&ip4_frags.lock);
-	list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
-	write_unlock(&ip4_frags.lock);
+	skb_dst_drop(skb);
+	inet_frag_lru_move(&qp->q);
 	return -EINPROGRESS;
 
 err:
@@ -558,7 +528,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 
 	ipq_kill(qp);
 
-	ecn = ip4_frag_ecn_table[qp->ecn];
+	ecn = ip_frag_ecn_table[qp->ecn];
 	if (unlikely(ecn == 0xff)) {
 		err = -EINVAL;
 		goto out_fail;
@@ -594,7 +564,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(head, GFP_ATOMIC))
 		goto out_nomem;
 
 	/* If the first fragment is fragmented itself, we split
@@ -617,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		head->len -= clone->len;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		atomic_add(clone->truesize, &qp->q.net->mem);
+		add_frag_mem_limit(&qp->q, clone->truesize);
 	}
 
 	skb_push(head, head->data - skb_network_header(head));
@@ -645,7 +615,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		}
 		fp = next;
 	}
-	atomic_sub(sum_truesize, &qp->q.net->mem);
+	sub_frag_mem_limit(&qp->q, sum_truesize);
 
 	head->next = NULL;
 	head->dev = dev;
@@ -851,14 +821,22 @@ static inline void ip4_frags_ctl_register(void)
 
 static int __net_init ipv4_frags_init_net(struct net *net)
 {
-	/*
-	 * Fragment cache limits. We will commit 256K at one time. Should we
-	 * cross that limit we will prune down to 192K. This should cope with
-	 * even the most extreme cases without allowing an attacker to
-	 * measurably harm machine performance.
+	/* Fragment cache limits.
+	 *
+	 * The fragment memory accounting code, (tries to) account for
+	 * the real memory usage, by measuring both the size of frag
+	 * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue))
+	 * and the SKB's truesize.
+	 *
+	 * A 64K fragment consumes 129736 bytes (44*2944)+200
+	 * (1500 truesize == 2944, sizeof(struct ipq) == 200)
+	 *
+	 * We will commit 4MB at one time. Should we cross that limit
+	 * we will prune down to 3MB, making room for approx 8 big 64K
+	 * fragments 8x128k.
 	 */
-	net->ipv4.frags.high_thresh = 256 * 1024;
-	net->ipv4.frags.low_thresh = 192 * 1024;
+	net->ipv4.frags.high_thresh = 4 * 1024 * 1024;
+	net->ipv4.frags.low_thresh  = 3 * 1024 * 1024;
 	/*
 	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
 	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index e81b1caf2ea2..c625e4dad4b0 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -37,7 +37,7 @@
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/protocol.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
 #include <net/arp.h>
 #include <net/checksum.h>
 #include <net/dsfield.h>
@@ -108,15 +108,6 @@
    fatal route to network, even if it were you who configured
    fatal static route: you are innocent. :-)
 
-
-
-   3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
-   practically identical code. It would be good to glue them
-   together, but it is not very evident, how to make them modular.
-   sit is integral part of IPv6, ipip and gre are naturally modular.
-   We could extract common parts (hash table, ioctl etc)
-   to a separate module (ip_tunnel.c).
-
    Alexey Kuznetsov.
  */
 
@@ -126,400 +117,137 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 static int ipgre_tunnel_init(struct net_device *dev);
-static void ipgre_tunnel_setup(struct net_device *dev);
-static int ipgre_tunnel_bind_dev(struct net_device *dev);
-
-/* Fallback tunnel: no source, no destination, no key, no options */
-
-#define HASH_SIZE  16
 
 static int ipgre_net_id __read_mostly;
-struct ipgre_net {
-	struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
-
-	struct net_device *fb_tunnel_dev;
-};
-
-/* Tunnel hash table */
-
-/*
-   4 hash tables:
-
-   3: (remote,local)
-   2: (remote,*)
-   1: (*,local)
-   0: (*,*)
+static int gre_tap_net_id __read_mostly;
 
-   We require exact key match i.e. if a key is present in packet
-   it will match only tunnel with the same key; if it is not present,
-   it will match only keyless tunnel.
-
-   All keysless packets, if not matched configured keyless tunnels
-   will match fallback tunnel.
- */
+static __sum16 check_checksum(struct sk_buff *skb)
+{
+	__sum16 csum = 0;
 
-#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		csum = csum_fold(skb->csum);
 
-#define tunnels_r_l	tunnels[3]
-#define tunnels_r	tunnels[2]
-#define tunnels_l	tunnels[1]
-#define tunnels_wc	tunnels[0]
+		if (!csum)
+			break;
+		/* Fall through. */
 
-static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
-						   struct rtnl_link_stats64 *tot)
-{
-	int i;
-
-	for_each_possible_cpu(i) {
-		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
-		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
-		unsigned int start;
-
-		do {
-			start = u64_stats_fetch_begin_bh(&tstats->syncp);
-			rx_packets = tstats->rx_packets;
-			tx_packets = tstats->tx_packets;
-			rx_bytes = tstats->rx_bytes;
-			tx_bytes = tstats->tx_bytes;
-		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
-		tot->rx_packets += rx_packets;
-		tot->tx_packets += tx_packets;
-		tot->rx_bytes   += rx_bytes;
-		tot->tx_bytes   += tx_bytes;
+	case CHECKSUM_NONE:
+		skb->csum = 0;
+		csum = __skb_checksum_complete(skb);
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		break;
 	}
 
-	tot->multicast = dev->stats.multicast;
-	tot->rx_crc_errors = dev->stats.rx_crc_errors;
-	tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
-	tot->rx_length_errors = dev->stats.rx_length_errors;
-	tot->rx_frame_errors = dev->stats.rx_frame_errors;
-	tot->rx_errors = dev->stats.rx_errors;
-
-	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
-	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
-	tot->tx_dropped = dev->stats.tx_dropped;
-	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
-	tot->tx_errors = dev->stats.tx_errors;
-
-	return tot;
+	return csum;
 }
 
-/* Does key in tunnel parameters match packet */
-static bool ipgre_key_match(const struct ip_tunnel_parm *p,
-			    __be16 flags, __be32 key)
+static int ip_gre_calc_hlen(__be16 o_flags)
 {
-	if (p->i_flags & GRE_KEY) {
-		if (flags & GRE_KEY)
-			return key == p->i_key;
-		else
-			return false;	/* key expected, none present */
-	} else
-		return !(flags & GRE_KEY);
-}
+	int addend = 4;
 
-/* Given src, dst and key, find appropriate for input tunnel. */
+	if (o_flags&TUNNEL_CSUM)
+		addend += 4;
+	if (o_flags&TUNNEL_KEY)
+		addend += 4;
+	if (o_flags&TUNNEL_SEQ)
+		addend += 4;
+	return addend;
+}
 
-static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
-					     __be32 remote, __be32 local,
-					     __be16 flags, __be32 key,
-					     __be16 gre_proto)
+static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+			    bool *csum_err, int *hdr_len)
 {
-	struct net *net = dev_net(dev);
-	int link = dev->ifindex;
-	unsigned int h0 = HASH(remote);
-	unsigned int h1 = HASH(key);
-	struct ip_tunnel *t, *cand = NULL;
-	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
-	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
-		       ARPHRD_ETHER : ARPHRD_IPGRE;
-	int score, cand_score = 4;
-
-	for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
-		if (local != t->parms.iph.saddr ||
-		    remote != t->parms.iph.daddr ||
-		    !(t->dev->flags & IFF_UP))
-			continue;
-
-		if (!ipgre_key_match(&t->parms, flags, key))
-			continue;
-
-		if (t->dev->type != ARPHRD_IPGRE &&
-		    t->dev->type != dev_type)
-			continue;
-
-		score = 0;
-		if (t->parms.link != link)
-			score |= 1;
-		if (t->dev->type != dev_type)
-			score |= 2;
-		if (score == 0)
-			return t;
-
-		if (score < cand_score) {
-			cand = t;
-			cand_score = score;
-		}
-	}
+	unsigned int ip_hlen = ip_hdrlen(skb);
+	const struct gre_base_hdr *greh;
+	__be32 *options;
 
-	for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
-		if (remote != t->parms.iph.daddr ||
-		    !(t->dev->flags & IFF_UP))
-			continue;
-
-		if (!ipgre_key_match(&t->parms, flags, key))
-			continue;
-
-		if (t->dev->type != ARPHRD_IPGRE &&
-		    t->dev->type != dev_type)
-			continue;
-
-		score = 0;
-		if (t->parms.link != link)
-			score |= 1;
-		if (t->dev->type != dev_type)
-			score |= 2;
-		if (score == 0)
-			return t;
-
-		if (score < cand_score) {
-			cand = t;
-			cand_score = score;
-		}
-	}
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+		return -EINVAL;
 
-	for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
-		if ((local != t->parms.iph.saddr &&
-		     (local != t->parms.iph.daddr ||
-		      !ipv4_is_multicast(local))) ||
-		    !(t->dev->flags & IFF_UP))
-			continue;
-
-		if (!ipgre_key_match(&t->parms, flags, key))
-			continue;
-
-		if (t->dev->type != ARPHRD_IPGRE &&
-		    t->dev->type != dev_type)
-			continue;
-
-		score = 0;
-		if (t->parms.link != link)
-			score |= 1;
-		if (t->dev->type != dev_type)
-			score |= 2;
-		if (score == 0)
-			return t;
-
-		if (score < cand_score) {
-			cand = t;
-			cand_score = score;
-		}
-	}
+	greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
+	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+		return -EINVAL;
 
-	for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
-		if (t->parms.i_key != key ||
-		    !(t->dev->flags & IFF_UP))
-			continue;
-
-		if (t->dev->type != ARPHRD_IPGRE &&
-		    t->dev->type != dev_type)
-			continue;
-
-		score = 0;
-		if (t->parms.link != link)
-			score |= 1;
-		if (t->dev->type != dev_type)
-			score |= 2;
-		if (score == 0)
-			return t;
-
-		if (score < cand_score) {
-			cand = t;
-			cand_score = score;
-		}
-	}
+	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+	*hdr_len = ip_gre_calc_hlen(tpi->flags);
 
-	if (cand != NULL)
-		return cand;
+	if (!pskb_may_pull(skb, *hdr_len))
+		return -EINVAL;
 
-	dev = ign->fb_tunnel_dev;
-	if (dev->flags & IFF_UP)
-		return netdev_priv(dev);
+	greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
 
-	return NULL;
-}
+	tpi->proto = greh->protocol;
 
-static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
-		struct ip_tunnel_parm *parms)
-{
-	__be32 remote = parms->iph.daddr;
-	__be32 local = parms->iph.saddr;
-	__be32 key = parms->i_key;
-	unsigned int h = HASH(key);
-	int prio = 0;
-
-	if (local)
-		prio |= 1;
-	if (remote && !ipv4_is_multicast(remote)) {
-		prio |= 2;
-		h ^= HASH(remote);
+	options = (__be32 *)(greh + 1);
+	if (greh->flags & GRE_CSUM) {
+		if (check_checksum(skb)) {
+			*csum_err = true;
+			return -EINVAL;
+		}
+		options++;
 	}
 
-	return &ign->tunnels[prio][h];
-}
-
-static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
-		struct ip_tunnel *t)
-{
-	return __ipgre_bucket(ign, &t->parms);
-}
-
-static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
-{
-	struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
+	if (greh->flags & GRE_KEY) {
+		tpi->key = *options;
+		options++;
+	} else
+		tpi->key = 0;
 
-	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
-	rcu_assign_pointer(*tp, t);
-}
+	if (unlikely(greh->flags & GRE_SEQ)) {
+		tpi->seq = *options;
+		options++;
+	} else
+		tpi->seq = 0;
 
-static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
-{
-	struct ip_tunnel __rcu **tp;
-	struct ip_tunnel *iter;
-
-	for (tp = ipgre_bucket(ign, t);
-	     (iter = rtnl_dereference(*tp)) != NULL;
-	     tp = &iter->next) {
-		if (t == iter) {
-			rcu_assign_pointer(*tp, t->next);
-			break;
+	/* WCCP version 1 and 2 protocol decoding.
+	 * - Change protocol to IP
+	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+	 */
+	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+		tpi->proto = htons(ETH_P_IP);
+		if ((*(u8 *)options & 0xF0) != 0x40) {
+			*hdr_len += 4;
+			if (!pskb_may_pull(skb, *hdr_len))
+				return -EINVAL;
 		}
 	}
-}
-
-static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
-					   struct ip_tunnel_parm *parms,
-					   int type)
-{
-	__be32 remote = parms->iph.daddr;
-	__be32 local = parms->iph.saddr;
-	__be32 key = parms->i_key;
-	int link = parms->link;
-	struct ip_tunnel *t;
-	struct ip_tunnel __rcu **tp;
-	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
-
-	for (tp = __ipgre_bucket(ign, parms);
-	     (t = rtnl_dereference(*tp)) != NULL;
-	     tp = &t->next)
-		if (local == t->parms.iph.saddr &&
-		    remote == t->parms.iph.daddr &&
-		    key == t->parms.i_key &&
-		    link == t->parms.link &&
-		    type == t->dev->type)
-			break;
-
-	return t;
-}
-
-static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
-		struct ip_tunnel_parm *parms, int create)
-{
-	struct ip_tunnel *t, *nt;
-	struct net_device *dev;
-	char name[IFNAMSIZ];
-	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
-
-	t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
-	if (t || !create)
-		return t;
-
-	if (parms->name[0])
-		strlcpy(name, parms->name, IFNAMSIZ);
-	else
-		strcpy(name, "gre%d");
-
-	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
-	if (!dev)
-		return NULL;
 
-	dev_net_set(dev, net);
-
-	nt = netdev_priv(dev);
-	nt->parms = *parms;
-	dev->rtnl_link_ops = &ipgre_link_ops;
-
-	dev->mtu = ipgre_tunnel_bind_dev(dev);
-
-	if (register_netdevice(dev) < 0)
-		goto failed_free;
-
-	/* Can use a lockless transmit, unless we generate output sequences */
-	if (!(nt->parms.o_flags & GRE_SEQ))
-		dev->features |= NETIF_F_LLTX;
-
-	dev_hold(dev);
-	ipgre_tunnel_link(ign, nt);
-	return nt;
-
-failed_free:
-	free_netdev(dev);
-	return NULL;
-}
-
-static void ipgre_tunnel_uninit(struct net_device *dev)
-{
-	struct net *net = dev_net(dev);
-	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
-
-	ipgre_tunnel_unlink(ign, netdev_priv(dev));
-	dev_put(dev);
+	return 0;
 }
 
-
 static void ipgre_err(struct sk_buff *skb, u32 info)
 {
 
-/* All the routers (except for Linux) return only
-   8 bytes of packet payload. It means, that precise relaying of
-   ICMP in the real Internet is absolutely infeasible.
+	/* All the routers (except for Linux) return only
+	   8 bytes of packet payload. It means, that precise relaying of
+	   ICMP in the real Internet is absolutely infeasible.
 
-   Moreover, Cisco "wise men" put GRE key to the third word
-   in GRE header. It makes impossible maintaining even soft state for keyed
-   GRE tunnels with enabled checksum. Tell them "thank you".
-
-   Well, I wonder, rfc1812 was written by Cisco employee,
-   what the hell these idiots break standards established
-   by themselves???
- */
+	   Moreover, Cisco "wise men" put GRE key to the third word
+	   in GRE header. It makes impossible maintaining even soft
+	   state for keyed GRE tunnels with enabled checksum. Tell
+	   them "thank you".
 
+	   Well, I wonder, rfc1812 was written by Cisco employee,
+	   what the hell these idiots break standards established
+	   by themselves???
+	   */
+	struct net *net = dev_net(skb->dev);
+	struct ip_tunnel_net *itn;
 	const struct iphdr *iph = (const struct iphdr *)skb->data;
-	__be16	     *p = (__be16 *)(skb->data+(iph->ihl<<2));
-	int grehlen = (iph->ihl<<2) + 4;
 	const int type = icmp_hdr(skb)->type;
 	const int code = icmp_hdr(skb)->code;
 	struct ip_tunnel *t;
-	__be16 flags;
-	__be32 key = 0;
+	struct tnl_ptk_info tpi;
+	int hdr_len;
+	bool csum_err = false;
 
-	flags = p[0];
-	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
-		if (flags&(GRE_VERSION|GRE_ROUTING))
+	if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
+		if (!csum_err)          /* ignore csum errors. */
 			return;
-		if (flags&GRE_KEY) {
-			grehlen += 4;
-			if (flags&GRE_CSUM)
-				grehlen += 4;
-		}
 	}
 
-	/* If only 8 bytes returned, keyed message will be dropped here */
-	if (skb_headlen(skb) < grehlen)
-		return;
-
-	if (flags & GRE_KEY)
-		key = *(((__be32 *)p) + (grehlen / 4) - 1);
-
 	switch (type) {
 	default:
 	case ICMP_PARAMETERPROB:
@@ -548,8 +276,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 		break;
 	}
 
-	t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
-				flags, key, p[1]);
+	if (tpi.proto == htons(ETH_P_TEB))
+		itn = net_generic(net, gre_tap_net_id);
+	else
+		itn = net_generic(net, ipgre_net_id);
+
+	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
+			     iph->daddr, iph->saddr, tpi.key);
 
 	if (t == NULL)
 		return;
@@ -578,612 +311,214 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 	t->err_time = jiffies;
 }
 
-static inline u8
-ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
-{
-	u8 inner = 0;
-	if (skb->protocol == htons(ETH_P_IP))
-		inner = old_iph->tos;
-	else if (skb->protocol == htons(ETH_P_IPV6))
-		inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
-	return INET_ECN_encapsulate(tos, inner);
-}
-
 static int ipgre_rcv(struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb->dev);
+	struct ip_tunnel_net *itn;
 	const struct iphdr *iph;
-	u8     *h;
-	__be16    flags;
-	__sum16   csum = 0;
-	__be32 key = 0;
-	u32    seqno = 0;
 	struct ip_tunnel *tunnel;
-	int    offset = 4;
-	__be16 gre_proto;
-	int    err;
+	struct tnl_ptk_info tpi;
+	int hdr_len;
+	bool csum_err = false;
 
-	if (!pskb_may_pull(skb, 16))
+	if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
 		goto drop;
 
-	iph = ip_hdr(skb);
-	h = skb->data;
-	flags = *(__be16 *)h;
-
-	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
-		/* - Version must be 0.
-		   - We do not support routing headers.
-		 */
-		if (flags&(GRE_VERSION|GRE_ROUTING))
-			goto drop;
-
-		if (flags&GRE_CSUM) {
-			switch (skb->ip_summed) {
-			case CHECKSUM_COMPLETE:
-				csum = csum_fold(skb->csum);
-				if (!csum)
-					break;
-				/* fall through */
-			case CHECKSUM_NONE:
-				skb->csum = 0;
-				csum = __skb_checksum_complete(skb);
-				skb->ip_summed = CHECKSUM_COMPLETE;
-			}
-			offset += 4;
-		}
-		if (flags&GRE_KEY) {
-			key = *(__be32 *)(h + offset);
-			offset += 4;
-		}
-		if (flags&GRE_SEQ) {
-			seqno = ntohl(*(__be32 *)(h + offset));
-			offset += 4;
-		}
-	}
+	if (tpi.proto == htons(ETH_P_TEB))
+		itn = net_generic(net, gre_tap_net_id);
+	else
+		itn = net_generic(net, ipgre_net_id);
 
-	gre_proto = *(__be16 *)(h + 2);
+	iph = ip_hdr(skb);
+	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
+				  iph->saddr, iph->daddr, tpi.key);
 
-	tunnel = ipgre_tunnel_lookup(skb->dev,
-				     iph->saddr, iph->daddr, flags, key,
-				     gre_proto);
 	if (tunnel) {
-		struct pcpu_tstats *tstats;
-
-		secpath_reset(skb);
-
-		skb->protocol = gre_proto;
-		/* WCCP version 1 and 2 protocol decoding.
-		 * - Change protocol to IP
-		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
-		 */
-		if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
-			skb->protocol = htons(ETH_P_IP);
-			if ((*(h + offset) & 0xF0) != 0x40)
-				offset += 4;
-		}
-
-		skb->mac_header = skb->network_header;
-		__pskb_pull(skb, offset);
-		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
-		skb->pkt_type = PACKET_HOST;
-#ifdef CONFIG_NET_IPGRE_BROADCAST
-		if (ipv4_is_multicast(iph->daddr)) {
-			/* Looped back packet, drop it! */
-			if (rt_is_output_route(skb_rtable(skb)))
-				goto drop;
-			tunnel->dev->stats.multicast++;
-			skb->pkt_type = PACKET_BROADCAST;
-		}
-#endif
-
-		if (((flags&GRE_CSUM) && csum) ||
-		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
-			tunnel->dev->stats.rx_crc_errors++;
-			tunnel->dev->stats.rx_errors++;
-			goto drop;
-		}
-		if (tunnel->parms.i_flags&GRE_SEQ) {
-			if (!(flags&GRE_SEQ) ||
-			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
-				tunnel->dev->stats.rx_fifo_errors++;
-				tunnel->dev->stats.rx_errors++;
-				goto drop;
-			}
-			tunnel->i_seqno = seqno + 1;
-		}
-
-		/* Warning: All skb pointers will be invalidated! */
-		if (tunnel->dev->type == ARPHRD_ETHER) {
-			if (!pskb_may_pull(skb, ETH_HLEN)) {
-				tunnel->dev->stats.rx_length_errors++;
-				tunnel->dev->stats.rx_errors++;
-				goto drop;
-			}
-
-			iph = ip_hdr(skb);
-			skb->protocol = eth_type_trans(skb, tunnel->dev);
-			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
-		}
-
-		__skb_tunnel_rx(skb, tunnel->dev);
-
-		skb_reset_network_header(skb);
-		err = IP_ECN_decapsulate(iph, skb);
-		if (unlikely(err)) {
-			if (log_ecn_error)
-				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
-						     &iph->saddr, iph->tos);
-			if (err > 1) {
-				++tunnel->dev->stats.rx_frame_errors;
-				++tunnel->dev->stats.rx_errors;
-				goto drop;
-			}
-		}
-
-		tstats = this_cpu_ptr(tunnel->dev->tstats);
-		u64_stats_update_begin(&tstats->syncp);
-		tstats->rx_packets++;
-		tstats->rx_bytes += skb->len;
-		u64_stats_update_end(&tstats->syncp);
-
-		gro_cells_receive(&tunnel->gro_cells, skb);
+		ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
 		return 0;
 	}
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
-
 drop:
 	kfree_skb(skb);
 	return 0;
 }
 
-static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb)
 {
-	struct ip_tunnel *tunnel = netdev_priv(dev);
-	const struct iphdr  *old_iph = ip_hdr(skb);
-	const struct iphdr  *tiph;
-	struct flowi4 fl4;
-	u8     tos;
-	__be16 df;
-	struct rtable *rt;     			/* Route to the other host */
-	struct net_device *tdev;		/* Device to other host */
-	struct iphdr  *iph;			/* Our new IP header */
-	unsigned int max_headroom;		/* The extra header space needed */
-	int    gre_hlen;
-	__be32 dst;
-	int    mtu;
-	u8     ttl;
-
-	if (skb->ip_summed == CHECKSUM_PARTIAL &&
-	    skb_checksum_help(skb))
-		goto tx_error;
-
-	if (dev->type == ARPHRD_ETHER)
-		IPCB(skb)->flags = 0;
-
-	if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
-		gre_hlen = 0;
-		if (skb->protocol == htons(ETH_P_IP))
-			tiph = (const struct iphdr *)skb->data;
-		else
-			tiph = &tunnel->parms.iph;
-	} else {
-		gre_hlen = tunnel->hlen;
-		tiph = &tunnel->parms.iph;
-	}
-
-	if ((dst = tiph->daddr) == 0) {
-		/* NBMA tunnel */
-
-		if (skb_dst(skb) == NULL) {
-			dev->stats.tx_fifo_errors++;
-			goto tx_error;
-		}
-
-		if (skb->protocol == htons(ETH_P_IP)) {
-			rt = skb_rtable(skb);
-			dst = rt_nexthop(rt, old_iph->daddr);
-		}
-#if IS_ENABLED(CONFIG_IPV6)
-		else if (skb->protocol == htons(ETH_P_IPV6)) {
-			const struct in6_addr *addr6;
-			struct neighbour *neigh;
-			bool do_tx_error_icmp;
-			int addr_type;
-
-			neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
-			if (neigh == NULL)
-				goto tx_error;
-
-			addr6 = (const struct in6_addr *)&neigh->primary_key;
-			addr_type = ipv6_addr_type(addr6);
-
-			if (addr_type == IPV6_ADDR_ANY) {
-				addr6 = &ipv6_hdr(skb)->daddr;
-				addr_type = ipv6_addr_type(addr6);
-			}
-
-			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
-				do_tx_error_icmp = true;
-			else {
-				do_tx_error_icmp = false;
-				dst = addr6->s6_addr32[3];
-			}
-			neigh_release(neigh);
-			if (do_tx_error_icmp)
-				goto tx_error_icmp;
-		}
-#endif
-		else
-			goto tx_error;
-	}
+	int err;
 
-	ttl = tiph->ttl;
-	tos = tiph->tos;
-	if (tos == 1) {
-		tos = 0;
-		if (skb->protocol == htons(ETH_P_IP))
-			tos = old_iph->tos;
-		else if (skb->protocol == htons(ETH_P_IPV6))
-			tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
-	}
+	if (skb_is_gso(skb)) {
+		err = skb_unclone(skb, GFP_ATOMIC);
+		if (unlikely(err))
+			goto error;
+		skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
+		return skb;
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL &&
+		   tunnel->parms.o_flags&TUNNEL_CSUM) {
+		err = skb_checksum_help(skb);
+		if (unlikely(err))
+			goto error;
+	} else if (skb->ip_summed != CHECKSUM_PARTIAL)
+		skb->ip_summed = CHECKSUM_NONE;
+
+	return skb;
+
+error:
+	kfree_skb(skb);
+	return ERR_PTR(err);
+}
 
-	rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
-				 tunnel->parms.o_key, RT_TOS(tos),
-				 tunnel->parms.link);
-	if (IS_ERR(rt)) {
-		dev->stats.tx_carrier_errors++;
-		goto tx_error;
-	}
-	tdev = rt->dst.dev;
+static struct sk_buff *gre_build_header(struct sk_buff *skb,
+					const struct tnl_ptk_info *tpi,
+					int hdr_len)
+{
+	struct gre_base_hdr *greh;
 
-	if (tdev == dev) {
-		ip_rt_put(rt);
-		dev->stats.collisions++;
-		goto tx_error;
-	}
+	skb_push(skb, hdr_len);
 
-	df = tiph->frag_off;
-	if (df)
-		mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
-	else
-		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+	greh = (struct gre_base_hdr *)skb->data;
+	greh->flags = tnl_flags_to_gre_flags(tpi->flags);
+	greh->protocol = tpi->proto;
 
-	if (skb_dst(skb))
-		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+	if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
+		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
 
-	if (skb->protocol == htons(ETH_P_IP)) {
-		df |= (old_iph->frag_off&htons(IP_DF));
-
-		if ((old_iph->frag_off&htons(IP_DF)) &&
-		    mtu < ntohs(old_iph->tot_len)) {
-			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
-			ip_rt_put(rt);
-			goto tx_error;
+		if (tpi->flags&TUNNEL_SEQ) {
+			*ptr = tpi->seq;
+			ptr--;
 		}
-	}
-#if IS_ENABLED(CONFIG_IPV6)
-	else if (skb->protocol == htons(ETH_P_IPV6)) {
-		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
-
-		if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
-			if ((tunnel->parms.iph.daddr &&
-			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
-			    rt6->rt6i_dst.plen == 128) {
-				rt6->rt6i_flags |= RTF_MODIFIED;
-				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
-			}
+		if (tpi->flags&TUNNEL_KEY) {
+			*ptr = tpi->key;
+			ptr--;
 		}
-
-		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-			ip_rt_put(rt);
-			goto tx_error;
+		if (tpi->flags&TUNNEL_CSUM &&
+		    !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
+			*(__sum16 *)ptr = 0;
+			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
+								 skb->len, 0));
 		}
 	}
-#endif
 
-	if (tunnel->err_count > 0) {
-		if (time_before(jiffies,
-				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
-			tunnel->err_count--;
+	return skb;
+}
 
-			dst_link_failure(skb);
-		} else
-			tunnel->err_count = 0;
-	}
+static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
+		       const struct iphdr *tnl_params,
+		       __be16 proto)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct tnl_ptk_info tpi;
 
-	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
-
-	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
-	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
-		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
-		if (max_headroom > dev->needed_headroom)
-			dev->needed_headroom = max_headroom;
-		if (!new_skb) {
-			ip_rt_put(rt);
-			dev->stats.tx_dropped++;
-			dev_kfree_skb(skb);
-			return NETDEV_TX_OK;
-		}
-		if (skb->sk)
-			skb_set_owner_w(new_skb, skb->sk);
-		dev_kfree_skb(skb);
-		skb = new_skb;
-		old_iph = ip_hdr(skb);
-		/* Warning : tiph value might point to freed memory */
+	if (likely(!skb->encapsulation)) {
+		skb_reset_inner_headers(skb);
+		skb->encapsulation = 1;
 	}
 
-	skb_push(skb, gre_hlen);
-	skb_reset_network_header(skb);
-	skb_set_transport_header(skb, sizeof(*iph));
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
-			      IPSKB_REROUTED);
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
-
-	/*
-	 *	Push down and install the IPIP header.
-	 */
+	tpi.flags = tunnel->parms.o_flags;
+	tpi.proto = proto;
+	tpi.key = tunnel->parms.o_key;
+	if (tunnel->parms.o_flags & TUNNEL_SEQ)
+		tunnel->o_seqno++;
+	tpi.seq = htonl(tunnel->o_seqno);
 
-	iph 			=	ip_hdr(skb);
-	iph->version		=	4;
-	iph->ihl		=	sizeof(struct iphdr) >> 2;
-	iph->frag_off		=	df;
-	iph->protocol		=	IPPROTO_GRE;
-	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
-	iph->daddr		=	fl4.daddr;
-	iph->saddr		=	fl4.saddr;
-	iph->ttl		=	ttl;
-
-	if (ttl == 0) {
-		if (skb->protocol == htons(ETH_P_IP))
-			iph->ttl = old_iph->ttl;
-#if IS_ENABLED(CONFIG_IPV6)
-		else if (skb->protocol == htons(ETH_P_IPV6))
-			iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
-#endif
-		else
-			iph->ttl = ip4_dst_hoplimit(&rt->dst);
+	/* Push GRE header. */
+	skb = gre_build_header(skb, &tpi, tunnel->hlen);
+	if (unlikely(!skb)) {
+		dev->stats.tx_dropped++;
+		return;
 	}
 
-	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
-	((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
-				   htons(ETH_P_TEB) : skb->protocol;
+	ip_tunnel_xmit(skb, dev, tnl_params);
+}
 
-	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
-		__be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
+static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
+			      struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	const struct iphdr *tnl_params;
 
-		if (tunnel->parms.o_flags&GRE_SEQ) {
-			++tunnel->o_seqno;
-			*ptr = htonl(tunnel->o_seqno);
-			ptr--;
-		}
-		if (tunnel->parms.o_flags&GRE_KEY) {
-			*ptr = tunnel->parms.o_key;
-			ptr--;
-		}
-		if (tunnel->parms.o_flags&GRE_CSUM) {
-			int offset = skb_transport_offset(skb);
+	skb = handle_offloads(tunnel, skb);
+	if (IS_ERR(skb))
+		goto out;
 
-			*ptr = 0;
-			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
-								 skb->len - offset,
-								 0));
-		}
+	if (dev->header_ops) {
+		/* Need space for new headers */
+		if (skb_cow_head(skb, dev->needed_headroom -
+				      (tunnel->hlen + sizeof(struct iphdr))))
+			goto free_skb;
+
+		tnl_params = (const struct iphdr *)skb->data;
+
+		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
+		 * to gre header.
+		 */
+		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+	} else {
+		if (skb_cow_head(skb, dev->needed_headroom))
+			goto free_skb;
+
+		tnl_params = &tunnel->parms.iph;
 	}
 
-	iptunnel_xmit(skb, dev);
+	__gre_xmit(skb, dev, tnl_params, skb->protocol);
+
 	return NETDEV_TX_OK;
 
-#if IS_ENABLED(CONFIG_IPV6)
-tx_error_icmp:
-	dst_link_failure(skb);
-#endif
-tx_error:
-	dev->stats.tx_errors++;
+free_skb:
 	dev_kfree_skb(skb);
+out:
+	dev->stats.tx_dropped++;
 	return NETDEV_TX_OK;
 }
 
-static int ipgre_tunnel_bind_dev(struct net_device *dev)
+static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
+				struct net_device *dev)
 {
-	struct net_device *tdev = NULL;
-	struct ip_tunnel *tunnel;
-	const struct iphdr *iph;
-	int hlen = LL_MAX_HEADER;
-	int mtu = ETH_DATA_LEN;
-	int addend = sizeof(struct iphdr) + 4;
-
-	tunnel = netdev_priv(dev);
-	iph = &tunnel->parms.iph;
-
-	/* Guess output device to choose reasonable mtu and needed_headroom */
-
-	if (iph->daddr) {
-		struct flowi4 fl4;
-		struct rtable *rt;
-
-		rt = ip_route_output_gre(dev_net(dev), &fl4,
-					 iph->daddr, iph->saddr,
-					 tunnel->parms.o_key,
-					 RT_TOS(iph->tos),
-					 tunnel->parms.link);
-		if (!IS_ERR(rt)) {
-			tdev = rt->dst.dev;
-			ip_rt_put(rt);
-		}
-
-		if (dev->type != ARPHRD_ETHER)
-			dev->flags |= IFF_POINTOPOINT;
-	}
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 
-	if (!tdev && tunnel->parms.link)
-		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
+	skb = handle_offloads(tunnel, skb);
+	if (IS_ERR(skb))
+		goto out;
 
-	if (tdev) {
-		hlen = tdev->hard_header_len + tdev->needed_headroom;
-		mtu = tdev->mtu;
-	}
-	dev->iflink = tunnel->parms.link;
-
-	/* Precalculate GRE options length */
-	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
-		if (tunnel->parms.o_flags&GRE_CSUM)
-			addend += 4;
-		if (tunnel->parms.o_flags&GRE_KEY)
-			addend += 4;
-		if (tunnel->parms.o_flags&GRE_SEQ)
-			addend += 4;
-	}
-	dev->needed_headroom = addend + hlen;
-	mtu -= dev->hard_header_len + addend;
+	if (skb_cow_head(skb, dev->needed_headroom))
+		goto free_skb;
 
-	if (mtu < 68)
-		mtu = 68;
+	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
 
-	tunnel->hlen = addend;
+	return NETDEV_TX_OK;
 
-	return mtu;
+free_skb:
+	dev_kfree_skb(skb);
+out:
+	dev->stats.tx_dropped++;
+	return NETDEV_TX_OK;
 }
 
-static int
-ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
+static int ipgre_tunnel_ioctl(struct net_device *dev,
+			      struct ifreq *ifr, int cmd)
 {
 	int err = 0;
 	struct ip_tunnel_parm p;
-	struct ip_tunnel *t;
-	struct net *net = dev_net(dev);
-	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
-
-	switch (cmd) {
-	case SIOCGETTUNNEL:
-		t = NULL;
-		if (dev == ign->fb_tunnel_dev) {
-			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
-				err = -EFAULT;
-				break;
-			}
-			t = ipgre_tunnel_locate(net, &p, 0);
-		}
-		if (t == NULL)
-			t = netdev_priv(dev);
-		memcpy(&p, &t->parms, sizeof(p));
-		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
-			err = -EFAULT;
-		break;
-
-	case SIOCADDTUNNEL:
-	case SIOCCHGTUNNEL:
-		err = -EPERM;
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			goto done;
-
-		err = -EFAULT;
-		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-			goto done;
-
-		err = -EINVAL;
-		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
-		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
-		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
-			goto done;
-		if (p.iph.ttl)
-			p.iph.frag_off |= htons(IP_DF);
-
-		if (!(p.i_flags&GRE_KEY))
-			p.i_key = 0;
-		if (!(p.o_flags&GRE_KEY))
-			p.o_key = 0;
-
-		t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
-
-		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
-			if (t != NULL) {
-				if (t->dev != dev) {
-					err = -EEXIST;
-					break;
-				}
-			} else {
-				unsigned int nflags = 0;
-
-				t = netdev_priv(dev);
-
-				if (ipv4_is_multicast(p.iph.daddr))
-					nflags = IFF_BROADCAST;
-				else if (p.iph.daddr)
-					nflags = IFF_POINTOPOINT;
-
-				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
-					err = -EINVAL;
-					break;
-				}
-				ipgre_tunnel_unlink(ign, t);
-				synchronize_net();
-				t->parms.iph.saddr = p.iph.saddr;
-				t->parms.iph.daddr = p.iph.daddr;
-				t->parms.i_key = p.i_key;
-				t->parms.o_key = p.o_key;
-				memcpy(dev->dev_addr, &p.iph.saddr, 4);
-				memcpy(dev->broadcast, &p.iph.daddr, 4);
-				ipgre_tunnel_link(ign, t);
-				netdev_state_change(dev);
-			}
-		}
-
-		if (t) {
-			err = 0;
-			if (cmd == SIOCCHGTUNNEL) {
-				t->parms.iph.ttl = p.iph.ttl;
-				t->parms.iph.tos = p.iph.tos;
-				t->parms.iph.frag_off = p.iph.frag_off;
-				if (t->parms.link != p.link) {
-					t->parms.link = p.link;
-					dev->mtu = ipgre_tunnel_bind_dev(dev);
-					netdev_state_change(dev);
-				}
-			}
-			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
-				err = -EFAULT;
-		} else
-			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
-		break;
 
-	case SIOCDELTUNNEL:
-		err = -EPERM;
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			goto done;
-
-		if (dev == ign->fb_tunnel_dev) {
-			err = -EFAULT;
-			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-				goto done;
-			err = -ENOENT;
-			if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
-				goto done;
-			err = -EPERM;
-			if (t == netdev_priv(ign->fb_tunnel_dev))
-				goto done;
-			dev = t->dev;
-		}
-		unregister_netdevice(dev);
-		err = 0;
-		break;
-
-	default:
-		err = -EINVAL;
+	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+		return -EFAULT;
+	if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
+	    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
+	    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) {
+		return -EINVAL;
 	}
+	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
+	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
 
-done:
-	return err;
-}
+	err = ip_tunnel_ioctl(dev, &p, cmd);
+	if (err)
+		return err;
 
-static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
-{
-	struct ip_tunnel *tunnel = netdev_priv(dev);
-	if (new_mtu < 68 ||
-	    new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
-		return -EINVAL;
-	dev->mtu = new_mtu;
+	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
+	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
+
+	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+		return -EFAULT;
 	return 0;
 }
 
@@ -1213,25 +548,23 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
    ...
    ftp fec0:6666:6666::193.233.7.65
    ...
-
  */
-
 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 			unsigned short type,
 			const void *daddr, const void *saddr, unsigned int len)
 {
 	struct ip_tunnel *t = netdev_priv(dev);
-	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
-	__be16 *p = (__be16 *)(iph+1);
+	struct iphdr *iph;
+	struct gre_base_hdr *greh;
 
-	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
-	p[0]		= t->parms.o_flags;
-	p[1]		= htons(type);
+	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
+	greh = (struct gre_base_hdr *)(iph+1);
+	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
+	greh->protocol = htons(type);
 
-	/*
-	 *	Set the source hardware address.
-	 */
+	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
 
+	/* Set the source hardware address. */
 	if (saddr)
 		memcpy(&iph->saddr, saddr, 4);
 	if (daddr)
@@ -1239,7 +572,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 	if (iph->daddr)
 		return t->hlen;
 
-	return -t->hlen;
+	return -(t->hlen + sizeof(*iph));
 }
 
 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
@@ -1293,31 +626,21 @@ static int ipgre_close(struct net_device *dev)
 	}
 	return 0;
 }
-
 #endif
 
 static const struct net_device_ops ipgre_netdev_ops = {
 	.ndo_init		= ipgre_tunnel_init,
-	.ndo_uninit		= ipgre_tunnel_uninit,
+	.ndo_uninit		= ip_tunnel_uninit,
 #ifdef CONFIG_NET_IPGRE_BROADCAST
 	.ndo_open		= ipgre_open,
 	.ndo_stop		= ipgre_close,
 #endif
-	.ndo_start_xmit		= ipgre_tunnel_xmit,
+	.ndo_start_xmit		= ipgre_xmit,
 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
-	.ndo_change_mtu		= ipgre_tunnel_change_mtu,
-	.ndo_get_stats64	= ipgre_get_stats64,
+	.ndo_change_mtu		= ip_tunnel_change_mtu,
+	.ndo_get_stats64	= ip_tunnel_get_stats64,
 };
 
-static void ipgre_dev_free(struct net_device *dev)
-{
-	struct ip_tunnel *tunnel = netdev_priv(dev);
-
-	gro_cells_destroy(&tunnel->gro_cells);
-	free_percpu(dev->tstats);
-	free_netdev(dev);
-}
-
 #define GRE_FEATURES (NETIF_F_SG |		\
 		      NETIF_F_FRAGLIST |	\
 		      NETIF_F_HIGHDMA |		\
@@ -1326,35 +649,48 @@ static void ipgre_dev_free(struct net_device *dev)
 static void ipgre_tunnel_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &ipgre_netdev_ops;
-	dev->destructor 	= ipgre_dev_free;
+	ip_tunnel_setup(dev, ipgre_net_id);
+}
 
-	dev->type		= ARPHRD_IPGRE;
-	dev->needed_headroom 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
+static void __gre_tunnel_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel;
+
+	tunnel = netdev_priv(dev);
+	tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
+	tunnel->parms.iph.protocol = IPPROTO_GRE;
+
+	dev->needed_headroom	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
-	dev->flags		= IFF_NOARP;
-	dev->iflink		= 0;
-	dev->addr_len		= 4;
-	dev->features		|= NETIF_F_NETNS_LOCAL;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
 
-	dev->features		|= GRE_FEATURES;
+	dev->features		|= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
 	dev->hw_features	|= GRE_FEATURES;
+
+	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
+		/* TCP offload with GRE SEQ is not supported. */
+		dev->features    |= NETIF_F_GSO_SOFTWARE;
+		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+		/* Can use a lockless transmit, unless we generate
+		 * output sequences
+		 */
+		dev->features |= NETIF_F_LLTX;
+	}
 }
 
 static int ipgre_tunnel_init(struct net_device *dev)
 {
-	struct ip_tunnel *tunnel;
-	struct iphdr *iph;
-	int err;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct iphdr *iph = &tunnel->parms.iph;
 
-	tunnel = netdev_priv(dev);
-	iph = &tunnel->parms.iph;
+	__gre_tunnel_init(dev);
 
-	tunnel->dev = dev;
-	strcpy(tunnel->parms.name, dev->name);
+	memcpy(dev->dev_addr, &iph->saddr, 4);
+	memcpy(dev->broadcast, &iph->daddr, 4);
 
-	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
-	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+	dev->type		= ARPHRD_IPGRE;
+	dev->flags		= IFF_NOARP;
+	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	dev->addr_len		= 4;
 
 	if (iph->daddr) {
 #ifdef CONFIG_NET_IPGRE_BROADCAST
@@ -1368,106 +704,30 @@ static int ipgre_tunnel_init(struct net_device *dev)
 	} else
 		dev->header_ops = &ipgre_header_ops;
 
-	dev->tstats = alloc_percpu(struct pcpu_tstats);
-	if (!dev->tstats)
-		return -ENOMEM;
-
-	err = gro_cells_init(&tunnel->gro_cells, dev);
-	if (err) {
-		free_percpu(dev->tstats);
-		return err;
-	}
-
-	return 0;
+	return ip_tunnel_init(dev);
 }
 
-static void ipgre_fb_tunnel_init(struct net_device *dev)
-{
-	struct ip_tunnel *tunnel = netdev_priv(dev);
-	struct iphdr *iph = &tunnel->parms.iph;
-
-	tunnel->dev = dev;
-	strcpy(tunnel->parms.name, dev->name);
-
-	iph->version		= 4;
-	iph->protocol		= IPPROTO_GRE;
-	iph->ihl		= 5;
-	tunnel->hlen		= sizeof(struct iphdr) + 4;
-
-	dev_hold(dev);
-}
-
-
 static const struct gre_protocol ipgre_protocol = {
 	.handler     = ipgre_rcv,
 	.err_handler = ipgre_err,
 };
 
-static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
-{
-	int prio;
-
-	for (prio = 0; prio < 4; prio++) {
-		int h;
-		for (h = 0; h < HASH_SIZE; h++) {
-			struct ip_tunnel *t;
-
-			t = rtnl_dereference(ign->tunnels[prio][h]);
-
-			while (t != NULL) {
-				unregister_netdevice_queue(t->dev, head);
-				t = rtnl_dereference(t->next);
-			}
-		}
-	}
-}
-
 static int __net_init ipgre_init_net(struct net *net)
 {
-	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
-	int err;
-
-	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
-					   ipgre_tunnel_setup);
-	if (!ign->fb_tunnel_dev) {
-		err = -ENOMEM;
-		goto err_alloc_dev;
-	}
-	dev_net_set(ign->fb_tunnel_dev, net);
-
-	ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
-	ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
-
-	if ((err = register_netdev(ign->fb_tunnel_dev)))
-		goto err_reg_dev;
-
-	rcu_assign_pointer(ign->tunnels_wc[0],
-			   netdev_priv(ign->fb_tunnel_dev));
-	return 0;
-
-err_reg_dev:
-	ipgre_dev_free(ign->fb_tunnel_dev);
-err_alloc_dev:
-	return err;
+	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
 }
 
 static void __net_exit ipgre_exit_net(struct net *net)
 {
-	struct ipgre_net *ign;
-	LIST_HEAD(list);
-
-	ign = net_generic(net, ipgre_net_id);
-	rtnl_lock();
-	ipgre_destroy_tunnels(ign, &list);
-	unregister_netdevice_many(&list);
-	rtnl_unlock();
+	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
+	ip_tunnel_delete_net(itn);
 }
 
 static struct pernet_operations ipgre_net_ops = {
 	.init = ipgre_init_net,
 	.exit = ipgre_exit_net,
 	.id   = &ipgre_net_id,
-	.size = sizeof(struct ipgre_net),
+	.size = sizeof(struct ip_tunnel_net),
 };
 
 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1512,8 +772,8 @@ out:
 	return ipgre_tunnel_validate(tb, data);
 }
 
-static void ipgre_netlink_parms(struct nlattr *data[],
-				struct ip_tunnel_parm *parms)
+static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
+			       struct ip_tunnel_parm *parms)
 {
 	memset(parms, 0, sizeof(*parms));
 
@@ -1526,10 +786,10 @@ static void ipgre_netlink_parms(struct nlattr *data[],
 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
 
 	if (data[IFLA_GRE_IFLAGS])
-		parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
+		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
 
 	if (data[IFLA_GRE_OFLAGS])
-		parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
+		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
 
 	if (data[IFLA_GRE_IKEY])
 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1553,145 +813,46 @@ static void ipgre_netlink_parms(struct nlattr *data[],
 		parms->iph.frag_off = htons(IP_DF);
 }
 
-static int ipgre_tap_init(struct net_device *dev)
+static int gre_tap_init(struct net_device *dev)
 {
-	struct ip_tunnel *tunnel;
-
-	tunnel = netdev_priv(dev);
-
-	tunnel->dev = dev;
-	strcpy(tunnel->parms.name, dev->name);
+	__gre_tunnel_init(dev);
 
-	ipgre_tunnel_bind_dev(dev);
-
-	dev->tstats = alloc_percpu(struct pcpu_tstats);
-	if (!dev->tstats)
-		return -ENOMEM;
-
-	return 0;
+	return ip_tunnel_init(dev);
 }
 
-static const struct net_device_ops ipgre_tap_netdev_ops = {
-	.ndo_init		= ipgre_tap_init,
-	.ndo_uninit		= ipgre_tunnel_uninit,
-	.ndo_start_xmit		= ipgre_tunnel_xmit,
+static const struct net_device_ops gre_tap_netdev_ops = {
+	.ndo_init		= gre_tap_init,
+	.ndo_uninit		= ip_tunnel_uninit,
+	.ndo_start_xmit		= gre_tap_xmit,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= ipgre_tunnel_change_mtu,
-	.ndo_get_stats64	= ipgre_get_stats64,
+	.ndo_change_mtu		= ip_tunnel_change_mtu,
+	.ndo_get_stats64	= ip_tunnel_get_stats64,
 };
 
 static void ipgre_tap_setup(struct net_device *dev)
 {
-
 	ether_setup(dev);
-
-	dev->netdev_ops		= &ipgre_tap_netdev_ops;
-	dev->destructor 	= ipgre_dev_free;
-
-	dev->iflink		= 0;
-	dev->features		|= NETIF_F_NETNS_LOCAL;
+	dev->netdev_ops		= &gre_tap_netdev_ops;
+	ip_tunnel_setup(dev, gre_tap_net_id);
 }
 
-static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
-			 struct nlattr *data[])
+static int ipgre_newlink(struct net *src_net, struct net_device *dev,
+			 struct nlattr *tb[], struct nlattr *data[])
 {
-	struct ip_tunnel *nt;
-	struct net *net = dev_net(dev);
-	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
-	int mtu;
-	int err;
-
-	nt = netdev_priv(dev);
-	ipgre_netlink_parms(data, &nt->parms);
-
-	if (ipgre_tunnel_find(net, &nt->parms, dev->type))
-		return -EEXIST;
-
-	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
-		eth_hw_addr_random(dev);
-
-	mtu = ipgre_tunnel_bind_dev(dev);
-	if (!tb[IFLA_MTU])
-		dev->mtu = mtu;
-
-	/* Can use a lockless transmit, unless we generate output sequences */
-	if (!(nt->parms.o_flags & GRE_SEQ))
-		dev->features |= NETIF_F_LLTX;
-
-	err = register_netdevice(dev);
-	if (err)
-		goto out;
-
-	dev_hold(dev);
-	ipgre_tunnel_link(ign, nt);
+	struct ip_tunnel_parm p;
 
-out:
-	return err;
+	ipgre_netlink_parms(data, tb, &p);
+	return ip_tunnel_newlink(dev, tb, &p);
 }
 
 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
 			    struct nlattr *data[])
 {
-	struct ip_tunnel *t, *nt;
-	struct net *net = dev_net(dev);
-	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 	struct ip_tunnel_parm p;
-	int mtu;
-
-	if (dev == ign->fb_tunnel_dev)
-		return -EINVAL;
-
-	nt = netdev_priv(dev);
-	ipgre_netlink_parms(data, &p);
-
-	t = ipgre_tunnel_locate(net, &p, 0);
-
-	if (t) {
-		if (t->dev != dev)
-			return -EEXIST;
-	} else {
-		t = nt;
-
-		if (dev->type != ARPHRD_ETHER) {
-			unsigned int nflags = 0;
-
-			if (ipv4_is_multicast(p.iph.daddr))
-				nflags = IFF_BROADCAST;
-			else if (p.iph.daddr)
-				nflags = IFF_POINTOPOINT;
-
-			if ((dev->flags ^ nflags) &
-			    (IFF_POINTOPOINT | IFF_BROADCAST))
-				return -EINVAL;
-		}
 
-		ipgre_tunnel_unlink(ign, t);
-		t->parms.iph.saddr = p.iph.saddr;
-		t->parms.iph.daddr = p.iph.daddr;
-		t->parms.i_key = p.i_key;
-		if (dev->type != ARPHRD_ETHER) {
-			memcpy(dev->dev_addr, &p.iph.saddr, 4);
-			memcpy(dev->broadcast, &p.iph.daddr, 4);
-		}
-		ipgre_tunnel_link(ign, t);
-		netdev_state_change(dev);
-	}
-
-	t->parms.o_key = p.o_key;
-	t->parms.iph.ttl = p.iph.ttl;
-	t->parms.iph.tos = p.iph.tos;
-	t->parms.iph.frag_off = p.iph.frag_off;
-
-	if (t->parms.link != p.link) {
-		t->parms.link = p.link;
-		mtu = ipgre_tunnel_bind_dev(dev);
-		if (!tb[IFLA_MTU])
-			dev->mtu = mtu;
-		netdev_state_change(dev);
-	}
-
-	return 0;
+	ipgre_netlink_parms(data, tb, &p);
+	return ip_tunnel_changelink(dev, tb, &p);
 }
 
 static size_t ipgre_get_size(const struct net_device *dev)
@@ -1726,8 +887,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	struct ip_tunnel_parm *p = &t->parms;
 
 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
-	    nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
-	    nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
+	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
+	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
 	    nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
@@ -1765,6 +926,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
 	.validate	= ipgre_tunnel_validate,
 	.newlink	= ipgre_newlink,
 	.changelink	= ipgre_changelink,
+	.dellink	= ip_tunnel_dellink,
 	.get_size	= ipgre_get_size,
 	.fill_info	= ipgre_fill_info,
 };
@@ -1778,13 +940,28 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
 	.validate	= ipgre_tap_validate,
 	.newlink	= ipgre_newlink,
 	.changelink	= ipgre_changelink,
+	.dellink	= ip_tunnel_dellink,
 	.get_size	= ipgre_get_size,
 	.fill_info	= ipgre_fill_info,
 };
 
-/*
- *	And now the modules code and kernel interface.
- */
+static int __net_init ipgre_tap_init_net(struct net *net)
+{
+	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
+}
+
+static void __net_exit ipgre_tap_exit_net(struct net *net)
+{
+	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
+	ip_tunnel_delete_net(itn);
+}
+
+static struct pernet_operations ipgre_tap_net_ops = {
+	.init = ipgre_tap_init_net,
+	.exit = ipgre_tap_exit_net,
+	.id   = &gre_tap_net_id,
+	.size = sizeof(struct ip_tunnel_net),
+};
 
 static int __init ipgre_init(void)
 {
@@ -1796,6 +973,10 @@ static int __init ipgre_init(void)
 	if (err < 0)
 		return err;
 
+	err = register_pernet_device(&ipgre_tap_net_ops);
+	if (err < 0)
+		goto pnet_tap_faied;
+
 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
 	if (err < 0) {
 		pr_info("%s: can't add protocol\n", __func__);
@@ -1810,16 +991,17 @@ static int __init ipgre_init(void)
 	if (err < 0)
 		goto tap_ops_failed;
 
-out:
-	return err;
+	return 0;
 
 tap_ops_failed:
 	rtnl_link_unregister(&ipgre_link_ops);
 rtnl_link_failed:
 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
 add_proto_failed:
+	unregister_pernet_device(&ipgre_tap_net_ops);
+pnet_tap_faied:
 	unregister_pernet_device(&ipgre_net_ops);
-	goto out;
+	return err;
 }
 
 static void __exit ipgre_fini(void)
@@ -1828,6 +1010,7 @@ static void __exit ipgre_fini(void)
 	rtnl_link_unregister(&ipgre_link_ops);
 	if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
 		pr_info("%s: can't remove protocol\n", __func__);
+	unregister_pernet_device(&ipgre_tap_net_ops);
 	unregister_pernet_device(&ipgre_net_ops);
 }
 
@@ -1837,3 +1020,4 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS_RTNL_LINK("gre");
 MODULE_ALIAS_RTNL_LINK("gretap");
 MODULE_ALIAS_NETDEV("gre0");
+MODULE_ALIAS_NETDEV("gretap0");
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index f1395a6fb35f..3da817b89e9b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -208,13 +208,6 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
 		if (ipprot != NULL) {
 			int ret;
 
-			if (!net_eq(net, &init_net) && !ipprot->netns_ok) {
-				net_info_ratelimited("%s: proto %d isn't netns-ready\n",
-						     __func__, protocol);
-				kfree_skb(skb);
-				goto out;
-			}
-
 			if (!ipprot->no_policy) {
 				if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 					kfree_skb(skb);
@@ -235,9 +228,11 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
 					icmp_send(skb, ICMP_DEST_UNREACH,
 						  ICMP_PROT_UNREACH, 0);
 				}
-			} else
+				kfree_skb(skb);
+			} else {
 				IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
-			kfree_skb(skb);
+				consume_skb(skb);
+			}
 		}
 	}
  out:
@@ -424,7 +419,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	iph = ip_hdr(skb);
 
 	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
-		goto inhdr_error;
+		goto csum_error;
 
 	len = ntohs(iph->tot_len);
 	if (skb->len < len) {
@@ -451,6 +446,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL,
 		       ip_rcv_finish);
 
+csum_error:
+	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_CSUMERRORS);
 inhdr_error:
 	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
 drop:
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f6289bf6f332..ec7264514a82 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -370,7 +370,6 @@ int ip_options_compile(struct net *net,
 				}
 				switch (optptr[3]&0xF) {
 				      case IPOPT_TS_TSONLY:
-					opt->ts = optptr - iph;
 					if (skb)
 						timeptr = &optptr[optptr[2]-1];
 					opt->ts_needtime = 1;
@@ -381,7 +380,6 @@ int ip_options_compile(struct net *net,
 						pp_ptr = optptr + 2;
 						goto error;
 					}
-					opt->ts = optptr - iph;
 					if (rt)  {
 						spec_dst_fill(&spec_dst, skb);
 						memcpy(&optptr[optptr[2]-1], &spec_dst, 4);
@@ -396,7 +394,6 @@ int ip_options_compile(struct net *net,
 						pp_ptr = optptr + 2;
 						goto error;
 					}
-					opt->ts = optptr - iph;
 					{
 						__be32 addr;
 						memcpy(&addr, &optptr[optptr[2]-1], 4);
@@ -423,18 +420,18 @@ int ip_options_compile(struct net *net,
 					put_unaligned_be32(midtime, timeptr);
 					opt->is_changed = 1;
 				}
-			} else {
+			} else if ((optptr[3]&0xF) != IPOPT_TS_PRESPEC) {
 				unsigned int overflow = optptr[3]>>4;
 				if (overflow == 15) {
 					pp_ptr = optptr + 3;
 					goto error;
 				}
-				opt->ts = optptr - iph;
 				if (skb) {
 					optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4);
 					opt->is_changed = 1;
 				}
 			}
+			opt->ts = optptr - iph;
 			break;
 		      case IPOPT_RA:
 			if (optlen < 4) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3e98ed2bff55..147abf5275aa 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -430,8 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->tc_index = from->tc_index;
 #endif
 	nf_copy(to, from);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
-    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 	to->nf_trace = from->nf_trace;
 #endif
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
@@ -598,6 +597,7 @@ slow_path:
 	/* for offloaded checksums cleanup checksum before fragmentation */
 	if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb))
 		goto fail;
+	iph = ip_hdr(skb);
 
 	left = skb->len - hlen;		/* Space per frame */
 	ptr = hlen;		/* Where to start from */
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
new file mode 100644
index 000000000000..e4147ec1665a
--- /dev/null
+++ b/net/ipv4/ip_tunnel.c
@@ -0,0 +1,1035 @@
+/*
+ * Copyright (c) 2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/rculist.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/ip_tunnels.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/rtnetlink.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#endif
+
+static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
+				   __be32 key, __be32 remote)
+{
+	return hash_32((__force u32)key ^ (__force u32)remote,
+			 IP_TNL_HASH_BITS);
+}
+
+/* Often modified stats are per cpu, other are shared (netdev->stats) */
+struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
+						struct rtnl_link_stats64 *tot)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_bh(&tstats->syncp);
+			rx_packets = tstats->rx_packets;
+			tx_packets = tstats->tx_packets;
+			rx_bytes = tstats->rx_bytes;
+			tx_bytes = tstats->tx_bytes;
+		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+
+		tot->rx_packets += rx_packets;
+		tot->tx_packets += tx_packets;
+		tot->rx_bytes   += rx_bytes;
+		tot->tx_bytes   += tx_bytes;
+	}
+
+	tot->multicast = dev->stats.multicast;
+
+	tot->rx_crc_errors = dev->stats.rx_crc_errors;
+	tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
+	tot->rx_length_errors = dev->stats.rx_length_errors;
+	tot->rx_frame_errors = dev->stats.rx_frame_errors;
+	tot->rx_errors = dev->stats.rx_errors;
+
+	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+	tot->tx_dropped = dev->stats.tx_dropped;
+	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+	tot->tx_errors = dev->stats.tx_errors;
+
+	tot->collisions  = dev->stats.collisions;
+
+	return tot;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
+
+static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
+				__be16 flags, __be32 key)
+{
+	if (p->i_flags & TUNNEL_KEY) {
+		if (flags & TUNNEL_KEY)
+			return key == p->i_key;
+		else
+			/* key expected, none present */
+			return false;
+	} else
+		return !(flags & TUNNEL_KEY);
+}
+
+/* Fallback tunnel: no source, no destination, no key, no options
+
+   Tunnel hash table:
+   We require exact key match i.e. if a key is present in packet
+   it will match only tunnel with the same key; if it is not present,
+   it will match only keyless tunnel.
+
+   All keysless packets, if not matched configured keyless tunnels
+   will match fallback tunnel.
+   Given src, dst and key, find appropriate for input tunnel.
+*/
+struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
+				   int link, __be16 flags,
+				   __be32 remote, __be32 local,
+				   __be32 key)
+{
+	unsigned int hash;
+	struct ip_tunnel *t, *cand = NULL;
+	struct hlist_head *head;
+
+	hash = ip_tunnel_hash(itn, key, remote);
+	head = &itn->tunnels[hash];
+
+	hlist_for_each_entry_rcu(t, head, hash_node) {
+		if (local != t->parms.iph.saddr ||
+		    remote != t->parms.iph.daddr ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (!ip_tunnel_key_match(&t->parms, flags, key))
+			continue;
+
+		if (t->parms.link == link)
+			return t;
+		else
+			cand = t;
+	}
+
+	hlist_for_each_entry_rcu(t, head, hash_node) {
+		if (remote != t->parms.iph.daddr ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (!ip_tunnel_key_match(&t->parms, flags, key))
+			continue;
+
+		if (t->parms.link == link)
+			return t;
+		else if (!cand)
+			cand = t;
+	}
+
+	hash = ip_tunnel_hash(itn, key, 0);
+	head = &itn->tunnels[hash];
+
+	hlist_for_each_entry_rcu(t, head, hash_node) {
+		if ((local != t->parms.iph.saddr &&
+		     (local != t->parms.iph.daddr ||
+		      !ipv4_is_multicast(local))) ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (!ip_tunnel_key_match(&t->parms, flags, key))
+			continue;
+
+		if (t->parms.link == link)
+			return t;
+		else if (!cand)
+			cand = t;
+	}
+
+	if (flags & TUNNEL_NO_KEY)
+		goto skip_key_lookup;
+
+	hlist_for_each_entry_rcu(t, head, hash_node) {
+		if (t->parms.i_key != key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->parms.link == link)
+			return t;
+		else if (!cand)
+			cand = t;
+	}
+
+skip_key_lookup:
+	if (cand)
+		return cand;
+
+	if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
+		return netdev_priv(itn->fb_tunnel_dev);
+
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
+
+static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
+				    struct ip_tunnel_parm *parms)
+{
+	unsigned int h;
+	__be32 remote;
+
+	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
+		remote = parms->iph.daddr;
+	else
+		remote = 0;
+
+	h = ip_tunnel_hash(itn, parms->i_key, remote);
+	return &itn->tunnels[h];
+}
+
+static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
+{
+	struct hlist_head *head = ip_bucket(itn, &t->parms);
+
+	hlist_add_head_rcu(&t->hash_node, head);
+}
+
+static void ip_tunnel_del(struct ip_tunnel *t)
+{
+	hlist_del_init_rcu(&t->hash_node);
+}
+
+static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
+					struct ip_tunnel_parm *parms,
+					int type)
+{
+	__be32 remote = parms->iph.daddr;
+	__be32 local = parms->iph.saddr;
+	__be32 key = parms->i_key;
+	int link = parms->link;
+	struct ip_tunnel *t = NULL;
+	struct hlist_head *head = ip_bucket(itn, parms);
+
+	hlist_for_each_entry_rcu(t, head, hash_node) {
+		if (local == t->parms.iph.saddr &&
+		    remote == t->parms.iph.daddr &&
+		    key == t->parms.i_key &&
+		    link == t->parms.link &&
+		    type == t->dev->type)
+			break;
+	}
+	return t;
+}
+
+static struct net_device *__ip_tunnel_create(struct net *net,
+					     const struct rtnl_link_ops *ops,
+					     struct ip_tunnel_parm *parms)
+{
+	int err;
+	struct ip_tunnel *tunnel;
+	struct net_device *dev;
+	char name[IFNAMSIZ];
+
+	if (parms->name[0])
+		strlcpy(name, parms->name, IFNAMSIZ);
+	else {
+		if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
+			err = -E2BIG;
+			goto failed;
+		}
+		strlcpy(name, ops->kind, IFNAMSIZ);
+		strncat(name, "%d", 2);
+	}
+
+	ASSERT_RTNL();
+	dev = alloc_netdev(ops->priv_size, name, ops->setup);
+	if (!dev) {
+		err = -ENOMEM;
+		goto failed;
+	}
+	dev_net_set(dev, net);
+
+	dev->rtnl_link_ops = ops;
+
+	tunnel = netdev_priv(dev);
+	tunnel->parms = *parms;
+
+	err = register_netdevice(dev);
+	if (err)
+		goto failed_free;
+
+	return dev;
+
+failed_free:
+	free_netdev(dev);
+failed:
+	return ERR_PTR(err);
+}
+
+static inline struct rtable *ip_route_output_tunnel(struct net *net,
+						    struct flowi4 *fl4,
+						    int proto,
+						    __be32 daddr, __be32 saddr,
+						    __be32 key, __u8 tos, int oif)
+{
+	memset(fl4, 0, sizeof(*fl4));
+	fl4->flowi4_oif = oif;
+	fl4->daddr = daddr;
+	fl4->saddr = saddr;
+	fl4->flowi4_tos = tos;
+	fl4->flowi4_proto = proto;
+	fl4->fl4_gre_key = key;
+	return ip_route_output_key(net, fl4);
+}
+
+static int ip_tunnel_bind_dev(struct net_device *dev)
+{
+	struct net_device *tdev = NULL;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	const struct iphdr *iph;
+	int hlen = LL_MAX_HEADER;
+	int mtu = ETH_DATA_LEN;
+	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+	iph = &tunnel->parms.iph;
+
+	/* Guess output device to choose reasonable mtu and needed_headroom */
+	if (iph->daddr) {
+		struct flowi4 fl4;
+		struct rtable *rt;
+
+		rt = ip_route_output_tunnel(dev_net(dev), &fl4,
+					    tunnel->parms.iph.protocol,
+					    iph->daddr, iph->saddr,
+					    tunnel->parms.o_key,
+					    RT_TOS(iph->tos),
+					    tunnel->parms.link);
+		if (!IS_ERR(rt)) {
+			tdev = rt->dst.dev;
+			ip_rt_put(rt);
+		}
+		if (dev->type != ARPHRD_ETHER)
+			dev->flags |= IFF_POINTOPOINT;
+	}
+
+	if (!tdev && tunnel->parms.link)
+		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
+
+	if (tdev) {
+		hlen = tdev->hard_header_len + tdev->needed_headroom;
+		mtu = tdev->mtu;
+	}
+	dev->iflink = tunnel->parms.link;
+
+	dev->needed_headroom = t_hlen + hlen;
+	mtu -= (dev->hard_header_len + t_hlen);
+
+	if (mtu < 68)
+		mtu = 68;
+
+	return mtu;
+}
+
+static struct ip_tunnel *ip_tunnel_create(struct net *net,
+					  struct ip_tunnel_net *itn,
+					  struct ip_tunnel_parm *parms)
+{
+	struct ip_tunnel *nt, *fbt;
+	struct net_device *dev;
+
+	BUG_ON(!itn->fb_tunnel_dev);
+	fbt = netdev_priv(itn->fb_tunnel_dev);
+	dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
+	if (IS_ERR(dev))
+		return NULL;
+
+	dev->mtu = ip_tunnel_bind_dev(dev);
+
+	nt = netdev_priv(dev);
+	ip_tunnel_add(itn, nt);
+	return nt;
+}
+
+int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
+		  const struct tnl_ptk_info *tpi, bool log_ecn_error)
+{
+	struct pcpu_tstats *tstats;
+	const struct iphdr *iph = ip_hdr(skb);
+	int err;
+
+	secpath_reset(skb);
+
+	skb->protocol = tpi->proto;
+
+	skb->mac_header = skb->network_header;
+	__pskb_pull(skb, tunnel->hlen);
+	skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+	if (ipv4_is_multicast(iph->daddr)) {
+		/* Looped back packet, drop it! */
+		if (rt_is_output_route(skb_rtable(skb)))
+			goto drop;
+		tunnel->dev->stats.multicast++;
+		skb->pkt_type = PACKET_BROADCAST;
+	}
+#endif
+
+	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
+	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
+		tunnel->dev->stats.rx_crc_errors++;
+		tunnel->dev->stats.rx_errors++;
+		goto drop;
+	}
+
+	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
+		if (!(tpi->flags&TUNNEL_SEQ) ||
+		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
+			tunnel->dev->stats.rx_fifo_errors++;
+			tunnel->dev->stats.rx_errors++;
+			goto drop;
+		}
+		tunnel->i_seqno = ntohl(tpi->seq) + 1;
+	}
+
+	/* Warning: All skb pointers will be invalidated! */
+	if (tunnel->dev->type == ARPHRD_ETHER) {
+		if (!pskb_may_pull(skb, ETH_HLEN)) {
+			tunnel->dev->stats.rx_length_errors++;
+			tunnel->dev->stats.rx_errors++;
+			goto drop;
+		}
+
+		iph = ip_hdr(skb);
+		skb->protocol = eth_type_trans(skb, tunnel->dev);
+		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+	}
+
+	skb->pkt_type = PACKET_HOST;
+	__skb_tunnel_rx(skb, tunnel->dev);
+
+	skb_reset_network_header(skb);
+	err = IP_ECN_decapsulate(iph, skb);
+	if (unlikely(err)) {
+		if (log_ecn_error)
+			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+					&iph->saddr, iph->tos);
+		if (err > 1) {
+			++tunnel->dev->stats.rx_frame_errors;
+			++tunnel->dev->stats.rx_errors;
+			goto drop;
+		}
+	}
+
+	tstats = this_cpu_ptr(tunnel->dev->tstats);
+	u64_stats_update_begin(&tstats->syncp);
+	tstats->rx_packets++;
+	tstats->rx_bytes += skb->len;
+	u64_stats_update_end(&tstats->syncp);
+
+	gro_cells_receive(&tunnel->gro_cells, skb);
+	return 0;
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
+
+void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+		    const struct iphdr *tnl_params)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	const struct iphdr *inner_iph;
+	struct iphdr *iph;
+	struct flowi4 fl4;
+	u8     tos, ttl;
+	__be16 df;
+	struct rtable *rt;		/* Route to the other host */
+	struct net_device *tdev;	/* Device to other host */
+	unsigned int max_headroom;	/* The extra header space needed */
+	__be32 dst;
+	int mtu;
+
+	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+
+	dst = tnl_params->daddr;
+	if (dst == 0) {
+		/* NBMA tunnel */
+
+		if (skb_dst(skb) == NULL) {
+			dev->stats.tx_fifo_errors++;
+			goto tx_error;
+		}
+
+		if (skb->protocol == htons(ETH_P_IP)) {
+			rt = skb_rtable(skb);
+			dst = rt_nexthop(rt, inner_iph->daddr);
+		}
+#if IS_ENABLED(CONFIG_IPV6)
+		else if (skb->protocol == htons(ETH_P_IPV6)) {
+			const struct in6_addr *addr6;
+			struct neighbour *neigh;
+			bool do_tx_error_icmp;
+			int addr_type;
+
+			neigh = dst_neigh_lookup(skb_dst(skb),
+						 &ipv6_hdr(skb)->daddr);
+			if (neigh == NULL)
+				goto tx_error;
+
+			addr6 = (const struct in6_addr *)&neigh->primary_key;
+			addr_type = ipv6_addr_type(addr6);
+
+			if (addr_type == IPV6_ADDR_ANY) {
+				addr6 = &ipv6_hdr(skb)->daddr;
+				addr_type = ipv6_addr_type(addr6);
+			}
+
+			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
+				do_tx_error_icmp = true;
+			else {
+				do_tx_error_icmp = false;
+				dst = addr6->s6_addr32[3];
+			}
+			neigh_release(neigh);
+			if (do_tx_error_icmp)
+				goto tx_error_icmp;
+		}
+#endif
+		else
+			goto tx_error;
+	}
+
+	tos = tnl_params->tos;
+	if (tos & 0x1) {
+		tos &= ~0x1;
+		if (skb->protocol == htons(ETH_P_IP))
+			tos = inner_iph->tos;
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
+	}
+
+	rt = ip_route_output_tunnel(dev_net(dev), &fl4,
+				    tunnel->parms.iph.protocol,
+				    dst, tnl_params->saddr,
+				    tunnel->parms.o_key,
+				    RT_TOS(tos),
+				    tunnel->parms.link);
+	if (IS_ERR(rt)) {
+		dev->stats.tx_carrier_errors++;
+		goto tx_error;
+	}
+	tdev = rt->dst.dev;
+
+	if (tdev == dev) {
+		ip_rt_put(rt);
+		dev->stats.collisions++;
+		goto tx_error;
+	}
+
+	df = tnl_params->frag_off;
+
+	if (df)
+		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
+					- sizeof(struct iphdr);
+	else
+		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+
+	if (skb_dst(skb))
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		df |= (inner_iph->frag_off&htons(IP_DF));
+
+		if (!skb_is_gso(skb) &&
+		    (inner_iph->frag_off&htons(IP_DF)) &&
+		     mtu < ntohs(inner_iph->tot_len)) {
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+			ip_rt_put(rt);
+			goto tx_error;
+		}
+	}
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (skb->protocol == htons(ETH_P_IPV6)) {
+		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+
+		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
+		    mtu >= IPV6_MIN_MTU) {
+			if ((tunnel->parms.iph.daddr &&
+			    !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
+			    rt6->rt6i_dst.plen == 128) {
+				rt6->rt6i_flags |= RTF_MODIFIED;
+				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
+			}
+		}
+
+		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
+		    mtu < skb->len) {
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			ip_rt_put(rt);
+			goto tx_error;
+		}
+	}
+#endif
+
+	if (tunnel->err_count > 0) {
+		if (time_before(jiffies,
+				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
+			tunnel->err_count--;
+
+			dst_link_failure(skb);
+		} else
+			tunnel->err_count = 0;
+	}
+
+	ttl = tnl_params->ttl;
+	if (ttl == 0) {
+		if (skb->protocol == htons(ETH_P_IP))
+			ttl = inner_iph->ttl;
+#if IS_ENABLED(CONFIG_IPV6)
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
+#endif
+		else
+			ttl = ip4_dst_hoplimit(&rt->dst);
+	}
+
+	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
+					       + rt->dst.header_len;
+	if (max_headroom > dev->needed_headroom) {
+		dev->needed_headroom = max_headroom;
+		if (skb_cow_head(skb, dev->needed_headroom)) {
+			dev->stats.tx_dropped++;
+			dev_kfree_skb(skb);
+			return;
+		}
+	}
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->dst);
+	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+
+	/* Push down and install the IP header. */
+	skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
+
+	iph = ip_hdr(skb);
+	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+
+	iph->version	=	4;
+	iph->ihl	=	sizeof(struct iphdr) >> 2;
+	iph->frag_off	=	df;
+	iph->protocol	=	tnl_params->protocol;
+	iph->tos	=	ip_tunnel_ecn_encap(tos, inner_iph, skb);
+	iph->daddr	=	fl4.daddr;
+	iph->saddr	=	fl4.saddr;
+	iph->ttl	=	ttl;
+	tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
+
+	iptunnel_xmit(skb, dev);
+	return;
+
+#if IS_ENABLED(CONFIG_IPV6)
+tx_error_icmp:
+	dst_link_failure(skb);
+#endif
+tx_error:
+	dev->stats.tx_errors++;
+	dev_kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
+
+static void ip_tunnel_update(struct ip_tunnel_net *itn,
+			     struct ip_tunnel *t,
+			     struct net_device *dev,
+			     struct ip_tunnel_parm *p,
+			     bool set_mtu)
+{
+	ip_tunnel_del(t);
+	t->parms.iph.saddr = p->iph.saddr;
+	t->parms.iph.daddr = p->iph.daddr;
+	t->parms.i_key = p->i_key;
+	t->parms.o_key = p->o_key;
+	if (dev->type != ARPHRD_ETHER) {
+		memcpy(dev->dev_addr, &p->iph.saddr, 4);
+		memcpy(dev->broadcast, &p->iph.daddr, 4);
+	}
+	ip_tunnel_add(itn, t);
+
+	t->parms.iph.ttl = p->iph.ttl;
+	t->parms.iph.tos = p->iph.tos;
+	t->parms.iph.frag_off = p->iph.frag_off;
+
+	if (t->parms.link != p->link) {
+		int mtu;
+
+		t->parms.link = p->link;
+		mtu = ip_tunnel_bind_dev(dev);
+		if (set_mtu)
+			dev->mtu = mtu;
+	}
+	netdev_state_change(dev);
+}
+
+int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+{
+	int err = 0;
+	struct ip_tunnel *t;
+	struct net *net = dev_net(dev);
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
+
+	BUG_ON(!itn->fb_tunnel_dev);
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		t = NULL;
+		if (dev == itn->fb_tunnel_dev)
+			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+		if (t == NULL)
+			t = netdev_priv(dev);
+		memcpy(p, &t->parms, sizeof(*p));
+		break;
+
+	case SIOCADDTUNNEL:
+	case SIOCCHGTUNNEL:
+		err = -EPERM;
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			goto done;
+		if (p->iph.ttl)
+			p->iph.frag_off |= htons(IP_DF);
+		if (!(p->i_flags&TUNNEL_KEY))
+			p->i_key = 0;
+		if (!(p->o_flags&TUNNEL_KEY))
+			p->o_key = 0;
+
+		t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+
+		if (!t && (cmd == SIOCADDTUNNEL))
+			t = ip_tunnel_create(net, itn, p);
+
+		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+			if (t != NULL) {
+				if (t->dev != dev) {
+					err = -EEXIST;
+					break;
+				}
+			} else {
+				unsigned int nflags = 0;
+
+				if (ipv4_is_multicast(p->iph.daddr))
+					nflags = IFF_BROADCAST;
+				else if (p->iph.daddr)
+					nflags = IFF_POINTOPOINT;
+
+				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
+					err = -EINVAL;
+					break;
+				}
+
+				t = netdev_priv(dev);
+			}
+		}
+
+		if (t) {
+			err = 0;
+			ip_tunnel_update(itn, t, dev, p, true);
+		} else
+			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+		break;
+
+	case SIOCDELTUNNEL:
+		err = -EPERM;
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			goto done;
+
+		if (dev == itn->fb_tunnel_dev) {
+			err = -ENOENT;
+			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+			if (t == NULL)
+				goto done;
+			err = -EPERM;
+			if (t == netdev_priv(itn->fb_tunnel_dev))
+				goto done;
+			dev = t->dev;
+		}
+		unregister_netdevice(dev);
+		err = 0;
+		break;
+
+	default:
+		err = -EINVAL;
+	}
+
+done:
+	return err;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
+
+int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+	if (new_mtu < 68 ||
+	    new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
+
+static void ip_tunnel_dev_free(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+
+	gro_cells_destroy(&tunnel->gro_cells);
+	free_percpu(dev->tstats);
+	free_netdev(dev);
+}
+
+void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct net *net = dev_net(dev);
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct ip_tunnel_net *itn;
+
+	itn = net_generic(net, tunnel->ip_tnl_net_id);
+
+	if (itn->fb_tunnel_dev != dev) {
+		ip_tunnel_del(netdev_priv(dev));
+		unregister_netdevice_queue(dev, head);
+	}
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
+
+int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
+				  struct rtnl_link_ops *ops, char *devname)
+{
+	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
+	struct ip_tunnel_parm parms;
+
+	itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
+	if (!itn->tunnels)
+		return -ENOMEM;
+
+	if (!ops) {
+		itn->fb_tunnel_dev = NULL;
+		return 0;
+	}
+	memset(&parms, 0, sizeof(parms));
+	if (devname)
+		strlcpy(parms.name, devname, IFNAMSIZ);
+
+	rtnl_lock();
+	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
+	rtnl_unlock();
+	if (IS_ERR(itn->fb_tunnel_dev)) {
+		kfree(itn->tunnels);
+		return PTR_ERR(itn->fb_tunnel_dev);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
+
+static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
+{
+	int h;
+
+	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
+		struct ip_tunnel *t;
+		struct hlist_node *n;
+		struct hlist_head *thead = &itn->tunnels[h];
+
+		hlist_for_each_entry_safe(t, n, thead, hash_node)
+			unregister_netdevice_queue(t->dev, head);
+	}
+	if (itn->fb_tunnel_dev)
+		unregister_netdevice_queue(itn->fb_tunnel_dev, head);
+}
+
+void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn)
+{
+	LIST_HEAD(list);
+
+	rtnl_lock();
+	ip_tunnel_destroy(itn, &list);
+	unregister_netdevice_many(&list);
+	rtnl_unlock();
+	kfree(itn->tunnels);
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
+
+int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
+		      struct ip_tunnel_parm *p)
+{
+	struct ip_tunnel *nt;
+	struct net *net = dev_net(dev);
+	struct ip_tunnel_net *itn;
+	int mtu;
+	int err;
+
+	nt = netdev_priv(dev);
+	itn = net_generic(net, nt->ip_tnl_net_id);
+
+	if (ip_tunnel_find(itn, p, dev->type))
+		return -EEXIST;
+
+	nt->parms = *p;
+	err = register_netdevice(dev);
+	if (err)
+		goto out;
+
+	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
+		eth_hw_addr_random(dev);
+
+	mtu = ip_tunnel_bind_dev(dev);
+	if (!tb[IFLA_MTU])
+		dev->mtu = mtu;
+
+	ip_tunnel_add(itn, nt);
+
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
+
+int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
+			 struct ip_tunnel_parm *p)
+{
+	struct ip_tunnel *t, *nt;
+	struct net *net = dev_net(dev);
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
+
+	if (dev == itn->fb_tunnel_dev)
+		return -EINVAL;
+
+	nt = netdev_priv(dev);
+
+	t = ip_tunnel_find(itn, p, dev->type);
+
+	if (t) {
+		if (t->dev != dev)
+			return -EEXIST;
+	} else {
+		t = nt;
+
+		if (dev->type != ARPHRD_ETHER) {
+			unsigned int nflags = 0;
+
+			if (ipv4_is_multicast(p->iph.daddr))
+				nflags = IFF_BROADCAST;
+			else if (p->iph.daddr)
+				nflags = IFF_POINTOPOINT;
+
+			if ((dev->flags ^ nflags) &
+			    (IFF_POINTOPOINT | IFF_BROADCAST))
+				return -EINVAL;
+		}
+	}
+
+	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
+
+int ip_tunnel_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct iphdr *iph = &tunnel->parms.iph;
+	int err;
+
+	dev->destructor	= ip_tunnel_dev_free;
+	dev->tstats = alloc_percpu(struct pcpu_tstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	err = gro_cells_init(&tunnel->gro_cells, dev);
+	if (err) {
+		free_percpu(dev->tstats);
+		return err;
+	}
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+	iph->version		= 4;
+	iph->ihl		= 5;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_init);
+
+void ip_tunnel_uninit(struct net_device *dev)
+{
+	struct net *net = dev_net(dev);
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct ip_tunnel_net *itn;
+
+	itn = net_generic(net, tunnel->ip_tnl_net_id);
+	/* fb_tunnel_dev will be unregisted in net-exit call. */
+	if (itn->fb_tunnel_dev != dev)
+		ip_tunnel_del(netdev_priv(dev));
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
+
+/* Do least required initialization, rest of init is done in tunnel_init call */
+void ip_tunnel_setup(struct net_device *dev, int net_id)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	tunnel->ip_tnl_net_id = net_id;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_setup);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index c3a4233c0ac2..9d2bdb2c1d3f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -38,7 +38,7 @@
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
 #include <net/inet_ecn.h>
 #include <net/xfrm.h>
 #include <net/net_namespace.h>
@@ -82,44 +82,6 @@ static int vti_tunnel_bind_dev(struct net_device *dev);
 } while (0)
 
 
-static struct rtnl_link_stats64 *vti_get_stats64(struct net_device *dev,
-						 struct rtnl_link_stats64 *tot)
-{
-	int i;
-
-	for_each_possible_cpu(i) {
-		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
-		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
-		unsigned int start;
-
-		do {
-			start = u64_stats_fetch_begin_bh(&tstats->syncp);
-			rx_packets = tstats->rx_packets;
-			tx_packets = tstats->tx_packets;
-			rx_bytes = tstats->rx_bytes;
-			tx_bytes = tstats->tx_bytes;
-		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
-		tot->rx_packets += rx_packets;
-		tot->tx_packets += tx_packets;
-		tot->rx_bytes   += rx_bytes;
-		tot->tx_bytes   += tx_bytes;
-	}
-
-	tot->multicast = dev->stats.multicast;
-	tot->rx_crc_errors = dev->stats.rx_crc_errors;
-	tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
-	tot->rx_length_errors = dev->stats.rx_length_errors;
-	tot->rx_errors = dev->stats.rx_errors;
-	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
-	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
-	tot->tx_dropped = dev->stats.tx_dropped;
-	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
-	tot->tx_errors = dev->stats.tx_errors;
-
-	return tot;
-}
-
 static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
 					   __be32 remote, __be32 local)
 {
@@ -597,7 +559,7 @@ static const struct net_device_ops vti_netdev_ops = {
 	.ndo_start_xmit	= vti_tunnel_xmit,
 	.ndo_do_ioctl	= vti_tunnel_ioctl,
 	.ndo_change_mtu	= vti_tunnel_change_mtu,
-	.ndo_get_stats64 = vti_get_stats64,
+	.ndo_get_stats64 = ip_tunnel_get_stats64,
 };
 
 static void vti_dev_free(struct net_device *dev)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 9a46daed2f3c..59cb8c769056 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -75,6 +75,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 	t->props.mode = x->props.mode;
 	t->props.saddr.a4 = x->props.saddr.a4;
 	t->props.flags = x->props.flags;
+	t->props.extra_flags = x->props.extra_flags;
 	memcpy(&t->mark, &x->mark, sizeof(t->mark));
 
 	if (xfrm_init_state(t))
@@ -163,6 +164,7 @@ static const struct net_protocol ipcomp4_protocol = {
 	.handler	=	xfrm4_rcv,
 	.err_handler	=	ipcomp4_err,
 	.no_policy	=	1,
+	.netns_ok	=	1,
 };
 
 static int __init ipcomp4_init(void)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index a2e50ae80b53..efa1138fa523 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -206,7 +206,7 @@ static int __init ic_open_devs(void)
 	struct ic_device *d, **last;
 	struct net_device *dev;
 	unsigned short oflags;
-	unsigned long start;
+	unsigned long start, next_msg;
 
 	last = &ic_first_dev;
 	rtnl_lock();
@@ -263,12 +263,23 @@ static int __init ic_open_devs(void)
 
 	/* wait for a carrier on at least one device */
 	start = jiffies;
+	next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
 	while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
+		int wait, elapsed;
+
 		for_each_netdev(&init_net, dev)
 			if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
 				goto have_carrier;
 
 		msleep(1);
+
+		if time_before(jiffies, next_msg)
+			continue;
+
+		elapsed = jiffies_to_msecs(jiffies - start);
+		wait = (CONF_CARRIER_TIMEOUT - elapsed + 500)/1000;
+		pr_info("Waiting up to %d more seconds for network.\n", wait);
+		next_msg = jiffies + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
 	}
 have_carrier:
 	rtnl_unlock();
@@ -1394,7 +1405,7 @@ static int __init ip_auto_config(void)
 	unsigned int i;
 
 #ifdef CONFIG_PROC_FS
-	proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops);
+	proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops);
 #endif /* CONFIG_PROC_FS */
 
 	if (!ic_enable)
@@ -1522,7 +1533,8 @@ static int __init ip_auto_config(void)
 		}
 	for (i++; i < CONF_NAMESERVERS_MAX; i++)
 		if (ic_nameservers[i] != NONE)
-			pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]);
+			pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]);
+	pr_cont("\n");
 #endif /* !SILENT */
 
 	return 0;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 191fc24a745a..77bfcce64fe5 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -111,227 +111,21 @@
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
 #include <net/inet_ecn.h>
 #include <net/xfrm.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
-#define HASH_SIZE  16
-#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
-
 static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
 static int ipip_net_id __read_mostly;
-struct ipip_net {
-	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
-	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
-	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
-	struct ip_tunnel __rcu *tunnels_wc[1];
-	struct ip_tunnel __rcu **tunnels[4];
-
-	struct net_device *fb_tunnel_dev;
-};
 
 static int ipip_tunnel_init(struct net_device *dev);
-static void ipip_tunnel_setup(struct net_device *dev);
-static void ipip_dev_free(struct net_device *dev);
 static struct rtnl_link_ops ipip_link_ops __read_mostly;
 
-static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
-						  struct rtnl_link_stats64 *tot)
-{
-	int i;
-
-	for_each_possible_cpu(i) {
-		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
-		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
-		unsigned int start;
-
-		do {
-			start = u64_stats_fetch_begin_bh(&tstats->syncp);
-			rx_packets = tstats->rx_packets;
-			tx_packets = tstats->tx_packets;
-			rx_bytes = tstats->rx_bytes;
-			tx_bytes = tstats->tx_bytes;
-		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
-		tot->rx_packets += rx_packets;
-		tot->tx_packets += tx_packets;
-		tot->rx_bytes   += rx_bytes;
-		tot->tx_bytes   += tx_bytes;
-	}
-
-	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
-	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
-	tot->tx_dropped = dev->stats.tx_dropped;
-	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
-	tot->tx_errors = dev->stats.tx_errors;
-	tot->collisions = dev->stats.collisions;
-
-	return tot;
-}
-
-static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
-		__be32 remote, __be32 local)
-{
-	unsigned int h0 = HASH(remote);
-	unsigned int h1 = HASH(local);
-	struct ip_tunnel *t;
-	struct ipip_net *ipn = net_generic(net, ipip_net_id);
-
-	for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
-		if (local == t->parms.iph.saddr &&
-		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
-			return t;
-
-	for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
-		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
-			return t;
-
-	for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
-		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
-			return t;
-
-	t = rcu_dereference(ipn->tunnels_wc[0]);
-	if (t && (t->dev->flags&IFF_UP))
-		return t;
-	return NULL;
-}
-
-static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
-		struct ip_tunnel_parm *parms)
-{
-	__be32 remote = parms->iph.daddr;
-	__be32 local = parms->iph.saddr;
-	unsigned int h = 0;
-	int prio = 0;
-
-	if (remote) {
-		prio |= 2;
-		h ^= HASH(remote);
-	}
-	if (local) {
-		prio |= 1;
-		h ^= HASH(local);
-	}
-	return &ipn->tunnels[prio][h];
-}
-
-static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
-		struct ip_tunnel *t)
-{
-	return __ipip_bucket(ipn, &t->parms);
-}
-
-static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
-{
-	struct ip_tunnel __rcu **tp;
-	struct ip_tunnel *iter;
-
-	for (tp = ipip_bucket(ipn, t);
-	     (iter = rtnl_dereference(*tp)) != NULL;
-	     tp = &iter->next) {
-		if (t == iter) {
-			rcu_assign_pointer(*tp, t->next);
-			break;
-		}
-	}
-}
-
-static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
-{
-	struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
-
-	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
-	rcu_assign_pointer(*tp, t);
-}
-
-static int ipip_tunnel_create(struct net_device *dev)
-{
-	struct ip_tunnel *t = netdev_priv(dev);
-	struct net *net = dev_net(dev);
-	struct ipip_net *ipn = net_generic(net, ipip_net_id);
-	int err;
-
-	err = ipip_tunnel_init(dev);
-	if (err < 0)
-		goto out;
-
-	err = register_netdevice(dev);
-	if (err < 0)
-		goto out;
-
-	strcpy(t->parms.name, dev->name);
-	dev->rtnl_link_ops = &ipip_link_ops;
-
-	dev_hold(dev);
-	ipip_tunnel_link(ipn, t);
-	return 0;
-
-out:
-	return err;
-}
-
-static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
-		struct ip_tunnel_parm *parms, int create)
-{
-	__be32 remote = parms->iph.daddr;
-	__be32 local = parms->iph.saddr;
-	struct ip_tunnel *t, *nt;
-	struct ip_tunnel __rcu **tp;
-	struct net_device *dev;
-	char name[IFNAMSIZ];
-	struct ipip_net *ipn = net_generic(net, ipip_net_id);
-
-	for (tp = __ipip_bucket(ipn, parms);
-		 (t = rtnl_dereference(*tp)) != NULL;
-		 tp = &t->next) {
-		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
-			return t;
-	}
-	if (!create)
-		return NULL;
-
-	if (parms->name[0])
-		strlcpy(name, parms->name, IFNAMSIZ);
-	else
-		strcpy(name, "tunl%d");
-
-	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
-	if (dev == NULL)
-		return NULL;
-
-	dev_net_set(dev, net);
-
-	nt = netdev_priv(dev);
-	nt->parms = *parms;
-
-	if (ipip_tunnel_create(dev) < 0)
-		goto failed_free;
-
-	return nt;
-
-failed_free:
-	ipip_dev_free(dev);
-	return NULL;
-}
-
-/* called with RTNL */
-static void ipip_tunnel_uninit(struct net_device *dev)
-{
-	struct net *net = dev_net(dev);
-	struct ipip_net *ipn = net_generic(net, ipip_net_id);
-
-	if (dev == ipn->fb_tunnel_dev)
-		RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
-	else
-		ipip_tunnel_unlink(ipn, netdev_priv(dev));
-	dev_put(dev);
-}
-
 static int ipip_err(struct sk_buff *skb, u32 info)
 {
 
@@ -339,41 +133,17 @@ static int ipip_err(struct sk_buff *skb, u32 info)
    8 bytes of packet payload. It means, that precise relaying of
    ICMP in the real Internet is absolutely infeasible.
  */
+	struct net *net = dev_net(skb->dev);
+	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 	const struct iphdr *iph = (const struct iphdr *)skb->data;
-	const int type = icmp_hdr(skb)->type;
-	const int code = icmp_hdr(skb)->code;
 	struct ip_tunnel *t;
 	int err;
-
-	switch (type) {
-	default:
-	case ICMP_PARAMETERPROB:
-		return 0;
-
-	case ICMP_DEST_UNREACH:
-		switch (code) {
-		case ICMP_SR_FAILED:
-		case ICMP_PORT_UNREACH:
-			/* Impossible event. */
-			return 0;
-		default:
-			/* All others are translated to HOST_UNREACH.
-			   rfc2003 contains "deep thoughts" about NET_UNREACH,
-			   I believe they are just ether pollution. --ANK
-			 */
-			break;
-		}
-		break;
-	case ICMP_TIME_EXCEEDED:
-		if (code != ICMP_EXC_TTL)
-			return 0;
-		break;
-	case ICMP_REDIRECT:
-		break;
-	}
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 
 	err = -ENOENT;
-	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
+	t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+			     iph->daddr, iph->saddr, 0);
 	if (t == NULL)
 		goto out;
 
@@ -403,53 +173,29 @@ static int ipip_err(struct sk_buff *skb, u32 info)
 	else
 		t->err_count = 1;
 	t->err_time = jiffies;
-out:
 
+out:
 	return err;
 }
 
+static const struct tnl_ptk_info tpi = {
+	/* no tunnel info required for ipip. */
+	.proto = htons(ETH_P_IP),
+};
+
 static int ipip_rcv(struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb->dev);
+	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 	struct ip_tunnel *tunnel;
 	const struct iphdr *iph = ip_hdr(skb);
-	int err;
-
-	tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
-	if (tunnel != NULL) {
-		struct pcpu_tstats *tstats;
 
+	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+			iph->saddr, iph->daddr, 0);
+	if (tunnel) {
 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 			goto drop;
-
-		secpath_reset(skb);
-
-		skb->mac_header = skb->network_header;
-		skb_reset_network_header(skb);
-		skb->protocol = htons(ETH_P_IP);
-		skb->pkt_type = PACKET_HOST;
-
-		__skb_tunnel_rx(skb, tunnel->dev);
-
-		err = IP_ECN_decapsulate(iph, skb);
-		if (unlikely(err)) {
-			if (log_ecn_error)
-				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
-						     &iph->saddr, iph->tos);
-			if (err > 1) {
-				++tunnel->dev->stats.rx_frame_errors;
-				++tunnel->dev->stats.rx_errors;
-				goto drop;
-			}
-		}
-
-		tstats = this_cpu_ptr(tunnel->dev->tstats);
-		u64_stats_update_begin(&tstats->syncp);
-		tstats->rx_packets++;
-		tstats->rx_bytes += skb->len;
-		u64_stats_update_end(&tstats->syncp);
-
-		netif_rx(skb);
-		return 0;
+		return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
 	}
 
 	return -1;
@@ -463,327 +209,64 @@ drop:
  *	This function assumes it is being called from dev_queue_xmit()
  *	and that skb is filled properly by that function.
  */
-
 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr  *tiph = &tunnel->parms.iph;
-	u8     tos = tunnel->parms.iph.tos;
-	__be16 df = tiph->frag_off;
-	struct rtable *rt;     			/* Route to the other host */
-	struct net_device *tdev;		/* Device to other host */
-	const struct iphdr  *old_iph = ip_hdr(skb);
-	struct iphdr  *iph;			/* Our new IP header */
-	unsigned int max_headroom;		/* The extra header space needed */
-	__be32 dst = tiph->daddr;
-	struct flowi4 fl4;
-	int    mtu;
-
-	if (skb->protocol != htons(ETH_P_IP))
-		goto tx_error;
-
-	if (skb->ip_summed == CHECKSUM_PARTIAL &&
-	    skb_checksum_help(skb))
-		goto tx_error;
 
-	if (tos & 1)
-		tos = old_iph->tos;
-
-	if (!dst) {
-		/* NBMA tunnel */
-		if ((rt = skb_rtable(skb)) == NULL) {
-			dev->stats.tx_fifo_errors++;
-			goto tx_error;
-		}
-		dst = rt_nexthop(rt, old_iph->daddr);
-	}
-
-	rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
-				   dst, tiph->saddr,
-				   0, 0,
-				   IPPROTO_IPIP, RT_TOS(tos),
-				   tunnel->parms.link);
-	if (IS_ERR(rt)) {
-		dev->stats.tx_carrier_errors++;
-		goto tx_error_icmp;
-	}
-	tdev = rt->dst.dev;
-
-	if (tdev == dev) {
-		ip_rt_put(rt);
-		dev->stats.collisions++;
+	if (unlikely(skb->protocol != htons(ETH_P_IP)))
 		goto tx_error;
-	}
 
-	df |= old_iph->frag_off & htons(IP_DF);
-
-	if (df) {
-		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
-
-		if (mtu < 68) {
-			dev->stats.collisions++;
-			ip_rt_put(rt);
-			goto tx_error;
-		}
-
-		if (skb_dst(skb))
-			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-
-		if ((old_iph->frag_off & htons(IP_DF)) &&
-		    mtu < ntohs(old_iph->tot_len)) {
-			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-				  htonl(mtu));
-			ip_rt_put(rt);
-			goto tx_error;
-		}
+	if (likely(!skb->encapsulation)) {
+		skb_reset_inner_headers(skb);
+		skb->encapsulation = 1;
 	}
 
-	if (tunnel->err_count > 0) {
-		if (time_before(jiffies,
-				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
-			tunnel->err_count--;
-			dst_link_failure(skb);
-		} else
-			tunnel->err_count = 0;
-	}
-
-	/*
-	 * Okay, now see if we can stuff it in the buffer as-is.
-	 */
-	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
-
-	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
-	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
-		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
-		if (!new_skb) {
-			ip_rt_put(rt);
-			dev->stats.tx_dropped++;
-			dev_kfree_skb(skb);
-			return NETDEV_TX_OK;
-		}
-		if (skb->sk)
-			skb_set_owner_w(new_skb, skb->sk);
-		dev_kfree_skb(skb);
-		skb = new_skb;
-		old_iph = ip_hdr(skb);
-	}
-
-	skb->transport_header = skb->network_header;
-	skb_push(skb, sizeof(struct iphdr));
-	skb_reset_network_header(skb);
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
-			      IPSKB_REROUTED);
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
-
-	/*
-	 *	Push down and install the IPIP header.
-	 */
-
-	iph 			=	ip_hdr(skb);
-	iph->version		=	4;
-	iph->ihl		=	sizeof(struct iphdr)>>2;
-	iph->frag_off		=	df;
-	iph->protocol		=	IPPROTO_IPIP;
-	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
-	iph->daddr		=	fl4.daddr;
-	iph->saddr		=	fl4.saddr;
-
-	if ((iph->ttl = tiph->ttl) == 0)
-		iph->ttl	=	old_iph->ttl;
-
-	iptunnel_xmit(skb, dev);
+	ip_tunnel_xmit(skb, dev, tiph);
 	return NETDEV_TX_OK;
 
-tx_error_icmp:
-	dst_link_failure(skb);
 tx_error:
 	dev->stats.tx_errors++;
 	dev_kfree_skb(skb);
 	return NETDEV_TX_OK;
 }
 
-static void ipip_tunnel_bind_dev(struct net_device *dev)
-{
-	struct net_device *tdev = NULL;
-	struct ip_tunnel *tunnel;
-	const struct iphdr *iph;
-
-	tunnel = netdev_priv(dev);
-	iph = &tunnel->parms.iph;
-
-	if (iph->daddr) {
-		struct rtable *rt;
-		struct flowi4 fl4;
-
-		rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
-					   iph->daddr, iph->saddr,
-					   0, 0,
-					   IPPROTO_IPIP,
-					   RT_TOS(iph->tos),
-					   tunnel->parms.link);
-		if (!IS_ERR(rt)) {
-			tdev = rt->dst.dev;
-			ip_rt_put(rt);
-		}
-		dev->flags |= IFF_POINTOPOINT;
-	}
-
-	if (!tdev && tunnel->parms.link)
-		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
-
-	if (tdev) {
-		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
-		dev->mtu = tdev->mtu - sizeof(struct iphdr);
-	}
-	dev->iflink = tunnel->parms.link;
-}
-
-static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
-{
-	struct net *net = dev_net(t->dev);
-	struct ipip_net *ipn = net_generic(net, ipip_net_id);
-
-	ipip_tunnel_unlink(ipn, t);
-	synchronize_net();
-	t->parms.iph.saddr = p->iph.saddr;
-	t->parms.iph.daddr = p->iph.daddr;
-	memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
-	memcpy(t->dev->broadcast, &p->iph.daddr, 4);
-	ipip_tunnel_link(ipn, t);
-	t->parms.iph.ttl = p->iph.ttl;
-	t->parms.iph.tos = p->iph.tos;
-	t->parms.iph.frag_off = p->iph.frag_off;
-	if (t->parms.link != p->link) {
-		t->parms.link = p->link;
-		ipip_tunnel_bind_dev(t->dev);
-	}
-	netdev_state_change(t->dev);
-}
-
 static int
-ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	int err = 0;
 	struct ip_tunnel_parm p;
-	struct ip_tunnel *t;
-	struct net *net = dev_net(dev);
-	struct ipip_net *ipn = net_generic(net, ipip_net_id);
-
-	switch (cmd) {
-	case SIOCGETTUNNEL:
-		t = NULL;
-		if (dev == ipn->fb_tunnel_dev) {
-			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
-				err = -EFAULT;
-				break;
-			}
-			t = ipip_tunnel_locate(net, &p, 0);
-		}
-		if (t == NULL)
-			t = netdev_priv(dev);
-		memcpy(&p, &t->parms, sizeof(p));
-		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
-			err = -EFAULT;
-		break;
-
-	case SIOCADDTUNNEL:
-	case SIOCCHGTUNNEL:
-		err = -EPERM;
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			goto done;
-
-		err = -EFAULT;
-		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-			goto done;
-
-		err = -EINVAL;
-		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
-		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
-			goto done;
-		if (p.iph.ttl)
-			p.iph.frag_off |= htons(IP_DF);
-
-		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
-
-		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
-			if (t != NULL) {
-				if (t->dev != dev) {
-					err = -EEXIST;
-					break;
-				}
-			} else {
-				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
-				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
-					err = -EINVAL;
-					break;
-				}
-				t = netdev_priv(dev);
-			}
-
-			ipip_tunnel_update(t, &p);
-		}
-
-		if (t) {
-			err = 0;
-			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
-				err = -EFAULT;
-		} else
-			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
-		break;
-
-	case SIOCDELTUNNEL:
-		err = -EPERM;
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			goto done;
-
-		if (dev == ipn->fb_tunnel_dev) {
-			err = -EFAULT;
-			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-				goto done;
-			err = -ENOENT;
-			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
-				goto done;
-			err = -EPERM;
-			if (t->dev == ipn->fb_tunnel_dev)
-				goto done;
-			dev = t->dev;
-		}
-		unregister_netdevice(dev);
-		err = 0;
-		break;
 
-	default:
-		err = -EINVAL;
-	}
-
-done:
-	return err;
-}
+	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+		return -EFAULT;
 
-static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+	if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+			p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
+		return -EINVAL;
+	if (p.i_key || p.o_key || p.i_flags || p.o_flags)
 		return -EINVAL;
-	dev->mtu = new_mtu;
+	if (p.iph.ttl)
+		p.iph.frag_off |= htons(IP_DF);
+
+	err = ip_tunnel_ioctl(dev, &p, cmd);
+	if (err)
+		return err;
+
+	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+		return -EFAULT;
+
 	return 0;
 }
 
 static const struct net_device_ops ipip_netdev_ops = {
-	.ndo_uninit	= ipip_tunnel_uninit,
+	.ndo_init       = ipip_tunnel_init,
+	.ndo_uninit     = ip_tunnel_uninit,
 	.ndo_start_xmit	= ipip_tunnel_xmit,
 	.ndo_do_ioctl	= ipip_tunnel_ioctl,
-	.ndo_change_mtu	= ipip_tunnel_change_mtu,
-	.ndo_get_stats64 = ipip_get_stats64,
+	.ndo_change_mtu = ip_tunnel_change_mtu,
+	.ndo_get_stats64 = ip_tunnel_get_stats64,
 };
 
-static void ipip_dev_free(struct net_device *dev)
-{
-	free_percpu(dev->tstats);
-	free_netdev(dev);
-}
-
 #define IPIP_FEATURES (NETIF_F_SG |		\
 		       NETIF_F_FRAGLIST |	\
 		       NETIF_F_HIGHDMA |	\
@@ -792,11 +275,8 @@ static void ipip_dev_free(struct net_device *dev)
 static void ipip_tunnel_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &ipip_netdev_ops;
-	dev->destructor		= ipip_dev_free;
 
 	dev->type		= ARPHRD_TUNNEL;
-	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
-	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
@@ -806,46 +286,19 @@ static void ipip_tunnel_setup(struct net_device *dev)
 
 	dev->features		|= IPIP_FEATURES;
 	dev->hw_features	|= IPIP_FEATURES;
+	ip_tunnel_setup(dev, ipip_net_id);
 }
 
 static int ipip_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 
-	tunnel->dev = dev;
-
 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 
-	ipip_tunnel_bind_dev(dev);
-
-	dev->tstats = alloc_percpu(struct pcpu_tstats);
-	if (!dev->tstats)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
-{
-	struct ip_tunnel *tunnel = netdev_priv(dev);
-	struct iphdr *iph = &tunnel->parms.iph;
-	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
-
-	tunnel->dev = dev;
-	strcpy(tunnel->parms.name, dev->name);
-
-	iph->version		= 4;
-	iph->protocol		= IPPROTO_IPIP;
-	iph->ihl		= 5;
-
-	dev->tstats = alloc_percpu(struct pcpu_tstats);
-	if (!dev->tstats)
-		return -ENOMEM;
-
-	dev_hold(dev);
-	rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
-	return 0;
+	tunnel->hlen = 0;
+	tunnel->parms.iph.protocol = IPPROTO_IPIP;
+	return ip_tunnel_init(dev);
 }
 
 static void ipip_netlink_parms(struct nlattr *data[],
@@ -885,28 +338,16 @@ static void ipip_netlink_parms(struct nlattr *data[],
 static int ipip_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
-	struct net *net = dev_net(dev);
-	struct ip_tunnel *nt;
-
-	nt = netdev_priv(dev);
-	ipip_netlink_parms(data, &nt->parms);
-
-	if (ipip_tunnel_locate(net, &nt->parms, 0))
-		return -EEXIST;
+	struct ip_tunnel_parm p;
 
-	return ipip_tunnel_create(dev);
+	ipip_netlink_parms(data, &p);
+	return ip_tunnel_newlink(dev, tb, &p);
 }
 
 static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
 			   struct nlattr *data[])
 {
-	struct ip_tunnel *t;
 	struct ip_tunnel_parm p;
-	struct net *net = dev_net(dev);
-	struct ipip_net *ipn = net_generic(net, ipip_net_id);
-
-	if (dev == ipn->fb_tunnel_dev)
-		return -EINVAL;
 
 	ipip_netlink_parms(data, &p);
 
@@ -914,16 +355,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
 	    (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
 		return -EINVAL;
 
-	t = ipip_tunnel_locate(net, &p, 0);
-
-	if (t) {
-		if (t->dev != dev)
-			return -EEXIST;
-	} else
-		t = netdev_priv(dev);
-
-	ipip_tunnel_update(t, &p);
-	return 0;
+	return ip_tunnel_changelink(dev, tb, &p);
 }
 
 static size_t ipip_get_size(const struct net_device *dev)
@@ -980,6 +412,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
 	.setup		= ipip_tunnel_setup,
 	.newlink	= ipip_newlink,
 	.changelink	= ipip_changelink,
+	.dellink	= ip_tunnel_dellink,
 	.get_size	= ipip_get_size,
 	.fill_info	= ipip_fill_info,
 };
@@ -990,90 +423,29 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
 	.priority	=	1,
 };
 
-static const char banner[] __initconst =
-	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
-
-static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
-{
-	int prio;
-
-	for (prio = 1; prio < 4; prio++) {
-		int h;
-		for (h = 0; h < HASH_SIZE; h++) {
-			struct ip_tunnel *t;
-
-			t = rtnl_dereference(ipn->tunnels[prio][h]);
-			while (t != NULL) {
-				unregister_netdevice_queue(t->dev, head);
-				t = rtnl_dereference(t->next);
-			}
-		}
-	}
-}
-
 static int __net_init ipip_init_net(struct net *net)
 {
-	struct ipip_net *ipn = net_generic(net, ipip_net_id);
-	struct ip_tunnel *t;
-	int err;
-
-	ipn->tunnels[0] = ipn->tunnels_wc;
-	ipn->tunnels[1] = ipn->tunnels_l;
-	ipn->tunnels[2] = ipn->tunnels_r;
-	ipn->tunnels[3] = ipn->tunnels_r_l;
-
-	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
-					   "tunl0",
-					   ipip_tunnel_setup);
-	if (!ipn->fb_tunnel_dev) {
-		err = -ENOMEM;
-		goto err_alloc_dev;
-	}
-	dev_net_set(ipn->fb_tunnel_dev, net);
-
-	err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
-	if (err)
-		goto err_reg_dev;
-
-	if ((err = register_netdev(ipn->fb_tunnel_dev)))
-		goto err_reg_dev;
-
-	t = netdev_priv(ipn->fb_tunnel_dev);
-
-	strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
-	return 0;
-
-err_reg_dev:
-	ipip_dev_free(ipn->fb_tunnel_dev);
-err_alloc_dev:
-	/* nothing */
-	return err;
+	return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
 }
 
 static void __net_exit ipip_exit_net(struct net *net)
 {
-	struct ipip_net *ipn = net_generic(net, ipip_net_id);
-	LIST_HEAD(list);
-
-	rtnl_lock();
-	ipip_destroy_tunnels(ipn, &list);
-	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
-	unregister_netdevice_many(&list);
-	rtnl_unlock();
+	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+	ip_tunnel_delete_net(itn);
 }
 
 static struct pernet_operations ipip_net_ops = {
 	.init = ipip_init_net,
 	.exit = ipip_exit_net,
 	.id   = &ipip_net_id,
-	.size = sizeof(struct ipip_net),
+	.size = sizeof(struct ip_tunnel_net),
 };
 
 static int __init ipip_init(void)
 {
 	int err;
 
-	printk(banner);
+	pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
 
 	err = register_pernet_device(&ipip_net_ops);
 	if (err < 0)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a9454cbd953c..9d9610ae7855 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -61,7 +61,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/compat.h>
 #include <linux/export.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
 #include <net/fib_rules.h>
@@ -626,9 +626,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 			nlh->nlmsg_type = NLMSG_ERROR;
-			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 			skb_trim(skb, nlh->nlmsg_len);
-			e = NLMSG_DATA(nlh);
+			e = nlmsg_data(nlh);
 			e->error = -ETIMEDOUT;
 			memset(&e->msg, 0, sizeof(e->msg));
 
@@ -828,6 +828,49 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
 	return NULL;
 }
 
+/* Look for a (*,*,oif) entry */
+static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
+						    int vifi)
+{
+	int line = MFC_HASH(htonl(INADDR_ANY), htonl(INADDR_ANY));
+	struct mfc_cache *c;
+
+	list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list)
+		if (c->mfc_origin == htonl(INADDR_ANY) &&
+		    c->mfc_mcastgrp == htonl(INADDR_ANY) &&
+		    c->mfc_un.res.ttls[vifi] < 255)
+			return c;
+
+	return NULL;
+}
+
+/* Look for a (*,G) entry */
+static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
+					     __be32 mcastgrp, int vifi)
+{
+	int line = MFC_HASH(mcastgrp, htonl(INADDR_ANY));
+	struct mfc_cache *c, *proxy;
+
+	if (mcastgrp == htonl(INADDR_ANY))
+		goto skip;
+
+	list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list)
+		if (c->mfc_origin == htonl(INADDR_ANY) &&
+		    c->mfc_mcastgrp == mcastgrp) {
+			if (c->mfc_un.res.ttls[vifi] < 255)
+				return c;
+
+			/* It's ok if the vifi is part of the static tree */
+			proxy = ipmr_cache_find_any_parent(mrt,
+							   c->mfc_parent);
+			if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+				return c;
+		}
+
+skip:
+	return ipmr_cache_find_any_parent(mrt, vifi);
+}
+
 /*
  *	Allocate a multicast cache entry
  */
@@ -867,14 +910,14 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
-			if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) -
 						 (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
-				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 				skb_trim(skb, nlh->nlmsg_len);
-				e = NLMSG_DATA(nlh);
+				e = nlmsg_data(nlh);
 				e->error = -EMSGSIZE;
 				memset(&e->msg, 0, sizeof(e->msg));
 			}
@@ -1053,7 +1096,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
  *	MFC cache manipulation by user space mroute daemon
  */
 
-static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
+static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
 {
 	int line;
 	struct mfc_cache *c, *next;
@@ -1062,7 +1105,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
 
 	list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
-		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
+		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr &&
+		    (parent == -1 || parent == c->mfc_parent)) {
 			list_del_rcu(&c->list);
 			mroute_netlink_event(mrt, c, RTM_DELROUTE);
 			ipmr_cache_free(c);
@@ -1073,7 +1117,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
 }
 
 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
-			struct mfcctl *mfc, int mrtsock)
+			struct mfcctl *mfc, int mrtsock, int parent)
 {
 	bool found = false;
 	int line;
@@ -1086,7 +1130,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 
 	list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
-		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
+		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr &&
+		    (parent == -1 || parent == c->mfc_parent)) {
 			found = true;
 			break;
 		}
@@ -1103,7 +1148,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 		return 0;
 	}
 
-	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
+	if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) &&
+	    !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 		return -EINVAL;
 
 	c = ipmr_cache_alloc();
@@ -1218,7 +1264,7 @@ static void mrtsock_destruct(struct sock *sk)
 
 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 {
-	int ret;
+	int ret, parent = 0;
 	struct vifctl vif;
 	struct mfcctl mfc;
 	struct net *net = sock_net(sk);
@@ -1287,16 +1333,22 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 		 */
 	case MRT_ADD_MFC:
 	case MRT_DEL_MFC:
+		parent = -1;
+	case MRT_ADD_MFC_PROXY:
+	case MRT_DEL_MFC_PROXY:
 		if (optlen != sizeof(mfc))
 			return -EINVAL;
 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
 			return -EFAULT;
+		if (parent == 0)
+			parent = mfc.mfcc_parent;
 		rtnl_lock();
-		if (optname == MRT_DEL_MFC)
-			ret = ipmr_mfc_delete(mrt, &mfc);
+		if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY)
+			ret = ipmr_mfc_delete(mrt, &mfc, parent);
 		else
 			ret = ipmr_mfc_add(net, mrt, &mfc,
-					   sk == rtnl_dereference(mrt->mroute_sk));
+					   sk == rtnl_dereference(mrt->mroute_sk),
+					   parent);
 		rtnl_unlock();
 		return ret;
 		/*
@@ -1749,17 +1801,28 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 {
 	int psend = -1;
 	int vif, ct;
+	int true_vifi = ipmr_find_vif(mrt, skb->dev);
 
 	vif = cache->mfc_parent;
 	cache->mfc_un.res.pkt++;
 	cache->mfc_un.res.bytes += skb->len;
 
+	if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+		struct mfc_cache *cache_proxy;
+
+		/* For an (*,G) entry, we only check that the incomming
+		 * interface is part of the static tree.
+		 */
+		cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+		if (cache_proxy &&
+		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+			goto forward;
+	}
+
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
 	if (mrt->vif_table[vif].dev != skb->dev) {
-		int true_vifi;
-
 		if (rt_is_output_route(skb_rtable(skb))) {
 			/* It is our own packet, looped back.
 			 * Very complicated situation...
@@ -1776,7 +1839,6 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 		}
 
 		cache->mfc_un.res.wrong_if++;
-		true_vifi = ipmr_find_vif(mrt, skb->dev);
 
 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -1794,15 +1856,34 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 		goto dont_forward;
 	}
 
+forward:
 	mrt->vif_table[vif].pkt_in++;
 	mrt->vif_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
 	 */
+	if (cache->mfc_origin == htonl(INADDR_ANY) &&
+	    cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+		if (true_vifi >= 0 &&
+		    true_vifi != cache->mfc_parent &&
+		    ip_hdr(skb)->ttl >
+				cache->mfc_un.res.ttls[cache->mfc_parent]) {
+			/* It's an (*,*) entry and the packet is not coming from
+			 * the upstream: forward the packet to the upstream
+			 * only.
+			 */
+			psend = cache->mfc_parent;
+			goto last_forward;
+		}
+		goto dont_forward;
+	}
 	for (ct = cache->mfc_un.res.maxvif - 1;
 	     ct >= cache->mfc_un.res.minvif; ct--) {
-		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+		/* For (*,G) entry, don't forward to the incoming interface */
+		if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+		     ct != true_vifi) &&
+		    ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
@@ -1813,6 +1894,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 			psend = ct;
 		}
 	}
+last_forward:
 	if (psend != -1) {
 		if (local) {
 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -1902,6 +1984,13 @@ int ip_mr_input(struct sk_buff *skb)
 
 	/* already under rcu_read_lock() */
 	cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
+	if (cache == NULL) {
+		int vif = ipmr_find_vif(mrt, skb->dev);
+
+		if (vif >= 0)
+			cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
+						    vif);
+	}
 
 	/*
 	 *	No usable cache entry
@@ -2107,7 +2196,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
 
 	rcu_read_lock();
 	cache = ipmr_cache_find(mrt, saddr, daddr);
+	if (cache == NULL && skb->dev) {
+		int vif = ipmr_find_vif(mrt, skb->dev);
 
+		if (vif >= 0)
+			cache = ipmr_cache_find_any(mrt, daddr, vif);
+	}
 	if (cache == NULL) {
 		struct sk_buff *skb2;
 		struct iphdr *iph;
@@ -2609,16 +2703,16 @@ static int __net_init ipmr_net_init(struct net *net)
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
-	if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
+	if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops))
 		goto proc_vif_fail;
-	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
+	if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops))
 		goto proc_cache_fail;
 #endif
 	return 0;
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
-	proc_net_remove(net, "ip_mr_vif");
+	remove_proc_entry("ip_mr_vif", net->proc_net);
 proc_vif_fail:
 	ipmr_rules_exit(net);
 #endif
@@ -2629,8 +2723,8 @@ fail:
 static void __net_exit ipmr_net_exit(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	proc_net_remove(net, "ip_mr_cache");
-	proc_net_remove(net, "ip_mr_vif");
+	remove_proc_entry("ip_mr_cache", net->proc_net);
+	remove_proc_entry("ip_mr_vif", net->proc_net);
 #endif
 	ipmr_rules_exit(net);
 }
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 4c0cf63dd92e..c3e0adea9c27 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -1,4 +1,9 @@
-/* IPv4 specific functions of netfilter core */
+/*
+ * IPv4 specific functions of netfilter core
+ *
+ * Rusty Russell (C) 2000 -- This code is GPL.
+ * Patrick McHardy (C) 2006-2012
+ */
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
@@ -40,14 +45,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
 	fl4.flowi4_flags = flags;
 	rt = ip_route_output_key(net, &fl4);
 	if (IS_ERR(rt))
-		return -1;
+		return PTR_ERR(rt);
 
 	/* Drop old route. */
 	skb_dst_drop(skb);
 	skb_dst_set(skb, &rt->dst);
 
 	if (skb_dst(skb)->error)
-		return -1;
+		return skb_dst(skb)->error;
 
 #ifdef CONFIG_XFRM
 	if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -56,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
 		skb_dst_set(skb, NULL);
 		dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
 		if (IS_ERR(dst))
-			return -1;
+			return PTR_ERR(dst);;
 		skb_dst_set(skb, dst);
 	}
 #endif
@@ -66,7 +71,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
 	if (skb_headroom(skb) < hh_len &&
 	    pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
 				0, GFP_ATOMIC))
-		return -1;
+		return -ENOMEM;
 
 	return 0;
 }
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index d8d6f2a5bf12..e7916c193932 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,19 +36,6 @@ config NF_CONNTRACK_PROC_COMPAT
 
 	  If unsure, say Y.
 
-config IP_NF_QUEUE
-	tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
-	depends on NETFILTER_ADVANCED
-	help
-	  Netfilter has the ability to queue packets to user space: the
-	  netlink device can be used to access them using this driver.
-
-	  This option enables the old IPv4-only "ip_queue" implementation
-	  which has been obsoleted by the new "nfnetlink_queue" code (see
-	  CONFIG_NETFILTER_NETLINK_QUEUE).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_IPTABLES
 	tristate "IP tables support (required for filtering/masq/NAT)"
 	default m if NETFILTER_ADVANCED=n
@@ -84,7 +71,7 @@ config IP_NF_MATCH_ECN
 
 config IP_NF_MATCH_RPFILTER
 	tristate '"rpfilter" reverse path filter match support'
-	depends on NETFILTER_ADVANCED
+	depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW)
 	---help---
 	  This option allows you to match packets whose replies would
 	  go out via the interface the packet came in.
@@ -241,8 +228,8 @@ config IP_NF_MANGLE
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP_NF_TARGET_CLUSTERIP
-	tristate "CLUSTERIP target support (EXPERIMENTAL)"
-	depends on IP_NF_MANGLE && EXPERIMENTAL
+	tristate "CLUSTERIP target support"
+	depends on IP_NF_MANGLE
 	depends on NF_CONNTRACK_IPV4
 	depends on NETFILTER_ADVANCED
 	select NF_CONNTRACK_MARK
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3ea4127404d6..85a4f21aac1a 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -6,6 +6,7 @@
  * Some ARP specific bits are:
  *
  * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net>
  *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -901,7 +902,7 @@ static int get_info(struct net *net, void __user *user,
 #endif
 	t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
 				    "arptable_%s", name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		struct arpt_getinfo info;
 		const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
@@ -958,7 +959,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
 	}
 
 	t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		const struct xt_table_info *private = t->private;
 
 		duprintf("t->private->number = %u\n",
@@ -1001,7 +1002,7 @@ static int __do_replace(struct net *net, const char *name,
 
 	t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
 				    "arptable_%s", name);
-	if (!t || IS_ERR(t)) {
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free_newinfo_counters_untrans;
 	}
@@ -1158,7 +1159,7 @@ static int do_add_counters(struct net *net, const void __user *user,
 	}
 
 	t = xt_find_table_lock(net, NFPROTO_ARP, name);
-	if (!t || IS_ERR(t)) {
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
 	}
@@ -1646,7 +1647,7 @@ static int compat_get_entries(struct net *net,
 
 	xt_compat_lock(NFPROTO_ARP);
 	t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		const struct xt_table_info *private = t->private;
 		struct xt_table_info info;
 
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 79ca5e70d497..eadab1ed6500 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -48,9 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net)
 	net->ipv4.arptable_filter =
 		arpt_register_table(net, &packet_filter, repl);
 	kfree(repl);
-	if (IS_ERR(net->ipv4.arptable_filter))
-		return PTR_ERR(net->ipv4.arptable_filter);
-	return 0;
+	return PTR_RET(net->ipv4.arptable_filter);
 }
 
 static void __net_exit arptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 17c5e06da662..d23118d95ff9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -182,8 +183,7 @@ ipt_get_target_c(const struct ipt_entry *e)
 	return ipt_get_target((struct ipt_entry *)e);
 }
 
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
-    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 static const char *const hooknames[] = {
 	[NF_INET_PRE_ROUTING]		= "PREROUTING",
 	[NF_INET_LOCAL_IN]		= "INPUT",
@@ -259,6 +259,7 @@ static void trace_packet(const struct sk_buff *skb,
 	const char *hookname, *chainname, *comment;
 	const struct ipt_entry *iter;
 	unsigned int rulenum = 0;
+	struct net *net = dev_net(in ? in : out);
 
 	table_base = private->entries[smp_processor_id()];
 	root = get_entry(table_base, private->hook_entry[hook]);
@@ -271,7 +272,7 @@ static void trace_packet(const struct sk_buff *skb,
 		    &chainname, &comment, &rulenum) != 0)
 			break;
 
-	nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
+	nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
 		      "TRACE: %s:%s:%s:%u ",
 		      tablename, chainname, comment, rulenum);
 }
@@ -361,8 +362,7 @@ ipt_do_table(struct sk_buff *skb,
 		t = ipt_get_target(e);
 		IP_NF_ASSERT(t->u.kernel.target);
 
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
-    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 		/* The packet is traced: log it */
 		if (unlikely(skb->nf_trace))
 			trace_packet(skb, hook, in, out,
@@ -1090,7 +1090,7 @@ static int get_info(struct net *net, void __user *user,
 #endif
 	t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
 				    "iptable_%s", name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		struct ipt_getinfo info;
 		const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
@@ -1149,7 +1149,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
 	}
 
 	t = xt_find_table_lock(net, AF_INET, get.name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		const struct xt_table_info *private = t->private;
 		duprintf("t->private->number = %u\n", private->number);
 		if (get.size == private->size)
@@ -1189,7 +1189,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 
 	t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
 				    "iptable_%s", name);
-	if (!t || IS_ERR(t)) {
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free_newinfo_counters_untrans;
 	}
@@ -1347,7 +1347,7 @@ do_add_counters(struct net *net, const void __user *user,
 	}
 
 	t = xt_find_table_lock(net, AF_INET, name);
-	if (!t || IS_ERR(t)) {
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
 	}
@@ -1931,7 +1931,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
 
 	xt_compat_lock(AF_INET);
 	t = xt_find_table_lock(net, AF_INET, get.name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		const struct xt_table_info *private = t->private;
 		struct xt_table_info info;
 		duprintf("t->private->number = %u\n", private->number);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 75e33a7048f8..0b732efd32e2 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -105,7 +105,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 		 * functions are also incrementing the refcount on their own,
 		 * so it's safe to remove the entry even if it's in use. */
 #ifdef CONFIG_PROC_FS
-		remove_proc_entry(c->pde->name, c->pde->parent);
+		proc_remove(c->pde);
 #endif
 		return;
 	}
@@ -631,7 +631,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
 
 	if (!ret) {
 		struct seq_file *sf = file->private_data;
-		struct clusterip_config *c = PDE(inode)->data;
+		struct clusterip_config *c = PDE_DATA(inode);
 
 		sf->private = c;
 
@@ -643,7 +643,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
 
 static int clusterip_proc_release(struct inode *inode, struct file *file)
 {
-	struct clusterip_config *c = PDE(inode)->data;
+	struct clusterip_config *c = PDE_DATA(inode);
 	int ret;
 
 	ret = seq_release(inode, file);
@@ -657,7 +657,7 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
 static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
 				size_t size, loff_t *ofs)
 {
-	struct clusterip_config *c = PDE(file->f_path.dentry->d_inode)->data;
+	struct clusterip_config *c = PDE_DATA(file_inode(file));
 #define PROC_WRITELEN	10
 	char buffer[PROC_WRITELEN+1];
 	unsigned long nodenum;
@@ -736,7 +736,7 @@ static void __exit clusterip_tg_exit(void)
 {
 	pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
 #ifdef CONFIG_PROC_FS
-	remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
+	proc_remove(clusterip_procdir);
 #endif
 	nf_unregister_hook(&cip_arp_ops);
 	xt_unregister_target(&clusterip_tg_reg);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index b5ef3cba2250..f8a222cb6448 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -4,6 +4,7 @@
  * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
  * (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2005-2007 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -37,7 +38,7 @@
 #include <linux/skbuff.h>
 #include <linux/kernel.h>
 #include <linux/timer.h>
-#include <linux/netlink.h>
+#include <net/netlink.h>
 #include <linux/netdevice.h>
 #include <linux/mm.h>
 #include <linux/moduleparam.h>
@@ -45,6 +46,7 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ipt_ULOG.h>
 #include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
 #include <net/sock.h>
 #include <linux/bitops.h>
 #include <asm/unaligned.h>
@@ -78,20 +80,26 @@ typedef struct {
 	struct timer_list timer;	/* the timer function */
 } ulog_buff_t;
 
-static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];	/* array of buffers */
+static int ulog_net_id __read_mostly;
+struct ulog_net {
+	unsigned int nlgroup[ULOG_MAXNLGROUPS];
+	ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
+	struct sock *nflognl;
+	spinlock_t lock;
+};
 
-static struct sock *nflognl;		/* our socket */
-static DEFINE_SPINLOCK(ulog_lock);	/* spinlock */
+static struct ulog_net *ulog_pernet(struct net *net)
+{
+	return net_generic(net, ulog_net_id);
+}
 
 /* send one ulog_buff_t to userspace */
-static void ulog_send(unsigned int nlgroupnum)
+static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
 {
-	ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
+	ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
 
-	if (timer_pending(&ub->timer)) {
-		pr_debug("ulog_send: timer was pending, deleting\n");
-		del_timer(&ub->timer);
-	}
+	pr_debug("ulog_send: timer is deleting\n");
+	del_timer(&ub->timer);
 
 	if (!ub->skb) {
 		pr_debug("ulog_send: nothing to send\n");
@@ -105,7 +113,8 @@ static void ulog_send(unsigned int nlgroupnum)
 	NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
 	pr_debug("throwing %d packets to netlink group %u\n",
 		 ub->qlen, nlgroupnum + 1);
-	netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
+	netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
+			  GFP_ATOMIC);
 
 	ub->qlen = 0;
 	ub->skb = NULL;
@@ -116,13 +125,16 @@ static void ulog_send(unsigned int nlgroupnum)
 /* timer function to flush queue in flushtimeout time */
 static void ulog_timer(unsigned long data)
 {
+	struct ulog_net *ulog = container_of((void *)data,
+					     struct ulog_net,
+					     nlgroup[*(unsigned int *)data]);
 	pr_debug("timer function called, calling ulog_send\n");
 
 	/* lock to protect against somebody modifying our structure
 	 * from ipt_ulog_target at the same time */
-	spin_lock_bh(&ulog_lock);
-	ulog_send(data);
-	spin_unlock_bh(&ulog_lock);
+	spin_lock_bh(&ulog->lock);
+	ulog_send(ulog, data);
+	spin_unlock_bh(&ulog->lock);
 }
 
 static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -162,6 +174,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
 	size_t size, copy_len;
 	struct nlmsghdr *nlh;
 	struct timeval tv;
+	struct net *net = dev_net(in ? in : out);
+	struct ulog_net *ulog = ulog_pernet(net);
 
 	/* ffs == find first bit set, necessary because userspace
 	 * is already shifting groupnumber, but we need unshifted.
@@ -174,11 +188,11 @@ static void ipt_ulog_packet(unsigned int hooknum,
 	else
 		copy_len = loginfo->copy_range;
 
-	size = NLMSG_SPACE(sizeof(*pm) + copy_len);
+	size = nlmsg_total_size(sizeof(*pm) + copy_len);
 
-	ub = &ulog_buffers[groupnum];
+	ub = &ulog->ulog_buffers[groupnum];
 
-	spin_lock_bh(&ulog_lock);
+	spin_lock_bh(&ulog->lock);
 
 	if (!ub->skb) {
 		if (!(ub->skb = ulog_alloc_skb(size)))
@@ -188,7 +202,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
 		/* either the queue len is too high or we don't have
 		 * enough room in nlskb left. send it to userspace. */
 
-		ulog_send(groupnum);
+		ulog_send(ulog, groupnum);
 
 		if (!(ub->skb = ulog_alloc_skb(size)))
 			goto alloc_failure;
@@ -262,16 +276,16 @@ static void ipt_ulog_packet(unsigned int hooknum,
 	if (ub->qlen >= loginfo->qthreshold) {
 		if (loginfo->qthreshold > 1)
 			nlh->nlmsg_type = NLMSG_DONE;
-		ulog_send(groupnum);
+		ulog_send(ulog, groupnum);
 	}
 out_unlock:
-	spin_unlock_bh(&ulog_lock);
+	spin_unlock_bh(&ulog->lock);
 
 	return;
 
 alloc_failure:
 	pr_debug("Error building netlink message\n");
-	spin_unlock_bh(&ulog_lock);
+	spin_unlock_bh(&ulog->lock);
 }
 
 static unsigned int
@@ -378,58 +392,45 @@ static struct nf_logger ipt_ulog_logger __read_mostly = {
 	.me		= THIS_MODULE,
 };
 
-static int __init ulog_tg_init(void)
+static int __net_init ulog_tg_net_init(struct net *net)
 {
-	int ret, i;
+	int i;
+	struct ulog_net *ulog = ulog_pernet(net);
 	struct netlink_kernel_cfg cfg = {
 		.groups	= ULOG_MAXNLGROUPS,
 	};
 
-	pr_debug("init module\n");
-
-	if (nlbufsiz > 128*1024) {
-		pr_warning("Netlink buffer has to be <= 128kB\n");
-		return -EINVAL;
-	}
-
+	spin_lock_init(&ulog->lock);
 	/* initialize ulog_buffers */
 	for (i = 0; i < ULOG_MAXNLGROUPS; i++)
-		setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
+		setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, i);
 
-	nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
-	if (!nflognl)
+	ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
+	if (!ulog->nflognl)
 		return -ENOMEM;
 
-	ret = xt_register_target(&ulog_tg_reg);
-	if (ret < 0) {
-		netlink_kernel_release(nflognl);
-		return ret;
-	}
 	if (nflog)
-		nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
+		nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
 
 	return 0;
 }
 
-static void __exit ulog_tg_exit(void)
+static void __net_exit ulog_tg_net_exit(struct net *net)
 {
 	ulog_buff_t *ub;
 	int i;
-
-	pr_debug("cleanup_module\n");
+	struct ulog_net *ulog = ulog_pernet(net);
 
 	if (nflog)
-		nf_log_unregister(&ipt_ulog_logger);
-	xt_unregister_target(&ulog_tg_reg);
-	netlink_kernel_release(nflognl);
+		nf_log_unset(net, &ipt_ulog_logger);
+
+	netlink_kernel_release(ulog->nflognl);
 
 	/* remove pending timers and free allocated skb's */
 	for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
-		ub = &ulog_buffers[i];
-		if (timer_pending(&ub->timer)) {
-			pr_debug("timer was pending, deleting\n");
-			del_timer(&ub->timer);
-		}
+		ub = &ulog->ulog_buffers[i];
+		pr_debug("timer is deleting\n");
+		del_timer(&ub->timer);
 
 		if (ub->skb) {
 			kfree_skb(ub->skb);
@@ -438,5 +439,50 @@ static void __exit ulog_tg_exit(void)
 	}
 }
 
+static struct pernet_operations ulog_tg_net_ops = {
+	.init = ulog_tg_net_init,
+	.exit = ulog_tg_net_exit,
+	.id   = &ulog_net_id,
+	.size = sizeof(struct ulog_net),
+};
+
+static int __init ulog_tg_init(void)
+{
+	int ret;
+	pr_debug("init module\n");
+
+	if (nlbufsiz > 128*1024) {
+		pr_warn("Netlink buffer has to be <= 128kB\n");
+		return -EINVAL;
+	}
+
+	ret = register_pernet_subsys(&ulog_tg_net_ops);
+	if (ret)
+		goto out_pernet;
+
+	ret = xt_register_target(&ulog_tg_reg);
+	if (ret < 0)
+		goto out_target;
+
+	if (nflog)
+		nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
+
+	return 0;
+
+out_target:
+	unregister_pernet_subsys(&ulog_tg_net_ops);
+out_pernet:
+	return ret;
+}
+
+static void __exit ulog_tg_exit(void)
+{
+	pr_debug("cleanup_module\n");
+	if (nflog)
+		nf_log_unregister(&ipt_ulog_logger);
+	xt_unregister_target(&ulog_tg_reg);
+	unregister_pernet_subsys(&ulog_tg_net_ops);
+}
+
 module_init(ulog_tg_init);
 module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index c30130062cd6..c49dcd0284a0 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -66,6 +66,12 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
 	return dev_match;
 }
 
+static bool rpfilter_is_local(const struct sk_buff *skb)
+{
+	const struct rtable *rt = skb_rtable(skb);
+	return rt && (rt->rt_flags & RTCF_LOCAL);
+}
+
 static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_rpfilter_info *info;
@@ -76,7 +82,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	info = par->matchinfo;
 	invert = info->flags & XT_RPFILTER_INVERT;
 
-	if (par->in->flags & IFF_LOOPBACK)
+	if (rpfilter_is_local(skb))
 		return true ^ invert;
 
 	iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 85d88f206447..cba5658ec82c 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -44,6 +44,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
 	u_int8_t tos;
 	__be32 saddr, daddr;
 	u_int32_t mark;
+	int err;
 
 	/* root is playing with raw sockets. */
 	if (skb->len < sizeof(struct iphdr) ||
@@ -66,9 +67,11 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
 		if (iph->saddr != saddr ||
 		    iph->daddr != daddr ||
 		    skb->mark != mark ||
-		    iph->tos != tos)
-			if (ip_route_me_harder(skb, RTN_UNSPEC))
-				ret = NF_DROP;
+		    iph->tos != tos) {
+			err = ip_route_me_harder(skb, RTN_UNSPEC);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
 	}
 
 	return ret;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index eeaff7e4acb5..6383273d54e1 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -176,6 +176,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
 #ifdef CONFIG_XFRM
 	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
+	int err;
 #endif
 	unsigned int ret;
 
@@ -195,9 +196,11 @@ nf_nat_ipv4_out(unsigned int hooknum,
 		     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
 		     ct->tuplehash[dir].tuple.src.u.all !=
-		     ct->tuplehash[!dir].tuple.dst.u.all))
-			if (nf_xfrm_me_harder(skb, AF_INET) < 0)
-				ret = NF_DROP;
+		     ct->tuplehash[!dir].tuple.dst.u.all)) {
+			err = nf_xfrm_me_harder(skb, AF_INET);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
 	}
 #endif
 	return ret;
@@ -213,6 +216,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
 	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	unsigned int ret;
+	int err;
 
 	/* root is playing with raw sockets. */
 	if (skb->len < sizeof(struct iphdr) ||
@@ -226,16 +230,19 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
 
 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
-			if (ip_route_me_harder(skb, RTN_UNSPEC))
-				ret = NF_DROP;
+			err = ip_route_me_harder(skb, RTN_UNSPEC);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
 		}
 #ifdef CONFIG_XFRM
 		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
 			 ct->tuplehash[dir].tuple.dst.u.all !=
-			 ct->tuplehash[!dir].tuple.src.u.all)
-			if (nf_xfrm_me_harder(skb, AF_INET) < 0)
-				ret = NF_DROP;
+			 ct->tuplehash[!dir].tuple.src.u.all) {
+			err = nf_xfrm_me_harder(skb, AF_INET);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
 #endif
 	}
 	return ret;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index fcdd0c2406e6..567d84168bd2 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -1,6 +1,7 @@
 
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -100,7 +101,6 @@ static unsigned int ipv4_helper(unsigned int hooknum,
 	enum ip_conntrack_info ctinfo;
 	const struct nf_conn_help *help;
 	const struct nf_conntrack_helper *helper;
-	unsigned int ret;
 
 	/* This is where we call the helper: as the packet goes out. */
 	ct = nf_ct_get(skb, &ctinfo);
@@ -116,13 +116,8 @@ static unsigned int ipv4_helper(unsigned int hooknum,
 	if (!helper)
 		return NF_ACCEPT;
 
-	ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
-			   ct, ctinfo);
-	if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) {
-		nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL,
-			      "nf_ct_%s: dropping packet", helper->name);
-	}
-	return ret;
+	return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
+			    ct, ctinfo);
 }
 
 static unsigned int ipv4_confirm(unsigned int hooknum,
@@ -420,54 +415,43 @@ static int ipv4_net_init(struct net *net)
 {
 	int ret = 0;
 
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_tcp4);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_tcp4 :protocol register failed\n");
+		pr_err("nf_conntrack_tcp4: pernet registration failed\n");
 		goto out_tcp;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_udp4);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_udp4 :protocol register failed\n");
+		pr_err("nf_conntrack_udp4: pernet registration failed\n");
 		goto out_udp;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_icmp);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_icmp4 :protocol register failed\n");
+		pr_err("nf_conntrack_icmp4: pernet registration failed\n");
 		goto out_icmp;
 	}
-	ret = nf_conntrack_l3proto_register(net,
-					    &nf_conntrack_l3proto_ipv4);
+	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l3proto_ipv4 :protocol register failed\n");
+		pr_err("nf_conntrack_ipv4: pernet registration failed\n");
 		goto out_ipv4;
 	}
 	return 0;
 out_ipv4:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_icmp);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
 out_icmp:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_udp4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
 out_udp:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_tcp4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
 out_tcp:
 	return ret;
 }
 
 static void ipv4_net_exit(struct net *net)
 {
-	nf_conntrack_l3proto_unregister(net,
-					&nf_conntrack_l3proto_ipv4);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_icmp);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_udp4);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_tcp4);
+	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
 }
 
 static struct pernet_operations ipv4_net_ops = {
@@ -500,16 +484,49 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 		pr_err("nf_conntrack_ipv4: can't register hooks.\n");
 		goto cleanup_pernet;
 	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n");
+		goto cleanup_hooks;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n");
+		goto cleanup_tcp4;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n");
+		goto cleanup_udp4;
+	}
+
+	ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
+		goto cleanup_icmpv4;
+	}
+
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 	ret = nf_conntrack_ipv4_compat_init();
 	if (ret < 0)
-		goto cleanup_hooks;
+		goto cleanup_proto;
 #endif
 	return ret;
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ cleanup_proto:
+	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+#endif
+ cleanup_icmpv4:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ cleanup_udp4:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ cleanup_tcp4:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
  cleanup_hooks:
 	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
-#endif
  cleanup_pernet:
 	unregister_pernet_subsys(&ipv4_net_ops);
  cleanup_sockopt:
@@ -523,6 +540,10 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 	nf_conntrack_ipv4_compat_fini();
 #endif
+	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
 	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
 	unregister_pernet_subsys(&ipv4_net_ops);
 	nf_unregister_sockopt(&so_getorigdst);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 9682b36df38c..4c48e434bb1f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -2,6 +2,7 @@
  *
  * (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -417,12 +418,12 @@ static int __net_init ip_conntrack_net_init(struct net *net)
 {
 	struct proc_dir_entry *proc, *proc_exp, *proc_stat;
 
-	proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops);
+	proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops);
 	if (!proc)
 		goto err1;
 
-	proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440,
-					&ip_exp_file_ops);
+	proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net,
+			       &ip_exp_file_ops);
 	if (!proc_exp)
 		goto err2;
 
@@ -433,9 +434,9 @@ static int __net_init ip_conntrack_net_init(struct net *net)
 	return 0;
 
 err3:
-	proc_net_remove(net, "ip_conntrack_expect");
+	remove_proc_entry("ip_conntrack_expect", net->proc_net);
 err2:
-	proc_net_remove(net, "ip_conntrack");
+	remove_proc_entry("ip_conntrack", net->proc_net);
 err1:
 	return -ENOMEM;
 }
@@ -443,8 +444,8 @@ err1:
 static void __net_exit ip_conntrack_net_exit(struct net *net)
 {
 	remove_proc_entry("ip_conntrack", net->proc_net_stat);
-	proc_net_remove(net, "ip_conntrack_expect");
-	proc_net_remove(net, "ip_conntrack");
+	remove_proc_entry("ip_conntrack_expect", net->proc_net);
+	remove_proc_entry("ip_conntrack", net->proc_net);
 }
 
 static struct pernet_operations ip_conntrack_net_ops = {
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 5241d997ab75..a338dad41b7d 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -1,5 +1,6 @@
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -187,8 +188,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
 	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
 	if (icmph == NULL) {
 		if (LOG_INVALID(net, IPPROTO_ICMP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				      "nf_ct_icmp: short packet ");
+			nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
+				      NULL, "nf_ct_icmp: short packet ");
 		return -NF_ACCEPT;
 	}
 
@@ -196,7 +197,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_ip_checksum(skb, hooknum, dataoff, 0)) {
 		if (LOG_INVALID(net, IPPROTO_ICMP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_icmp: bad HW ICMP checksum ");
 		return -NF_ACCEPT;
 	}
@@ -209,7 +210,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
 	 */
 	if (icmph->type > NR_ICMP_TYPES) {
 		if (LOG_INVALID(net, IPPROTO_ICMP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_icmp: invalid ICMP type ");
 		return -NF_ACCEPT;
 	}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9c3db10b22d3..9eea059dd621 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -2,6 +2,7 @@
  * H.323 extension for NAT alteration.
  *
  * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * This source code is licensed under General Public License version 2.
  *
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index a06d7d74817d..657d2307f031 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -13,6 +13,8 @@
  *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  *
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
  * TODO: - NAT to a unique tuple, not to TCP source port
  * 	   (needs netfilter tuple reservation)
  */
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index ea44f02563b5..690d890111bb 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -21,6 +21,8 @@
  *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  *
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
  */
 
 #include <linux/module.h>
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index bac712293fd6..5f011cc89cd9 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -38,6 +38,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  *
  * Author: James Morris <jmorris@intercode.com.au>
+ *
+ * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
  */
 #include <linux/module.h>
 #include <linux/moduleparam.h>
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 6f9c07268cf6..7d93d62cd5fd 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -322,8 +322,8 @@ void ping_err(struct sk_buff *skb, u32 info)
 	struct iphdr *iph = (struct iphdr *)skb->data;
 	struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
 	struct inet_sock *inet_sock;
-	int type = icmph->type;
-	int code = icmph->code;
+	int type = icmp_hdr(skb)->type;
+	int code = icmp_hdr(skb)->code;
 	struct net *net = dev_net(skb->dev);
 	struct sock *sk;
 	int harderr;
@@ -514,9 +514,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	ipc.opt = NULL;
 	ipc.oif = sk->sk_bound_dev_if;
 	ipc.tx_flags = 0;
-	err = sock_tx_timestamp(sk, &ipc.tx_flags);
-	if (err)
-		return err;
+
+	sock_tx_timestamp(sk, &ipc.tx_flags);
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sock_net(sk), msg, &ipc);
@@ -889,7 +888,7 @@ static int ping_proc_register(struct net *net)
 	struct proc_dir_entry *p;
 	int rc = 0;
 
-	p = proc_net_fops_create(net, "icmp", S_IRUGO, &ping_seq_fops);
+	p = proc_create("icmp", S_IRUGO, net->proc_net, &ping_seq_fops);
 	if (!p)
 		rc = -ENOMEM;
 	return rc;
@@ -897,7 +896,7 @@ static int ping_proc_register(struct net *net)
 
 static void ping_proc_unregister(struct net *net)
 {
-	proc_net_remove(net, "icmp");
+	remove_proc_entry("icmp", net->proc_net);
 }
 
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 8de53e1ddd54..2a5bf86d2415 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -125,6 +125,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
 	SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
 	SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
 	SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
+	SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -162,6 +163,7 @@ static const struct snmp_mib snmp4_tcp_list[] = {
 	SNMP_MIB_ITEM("RetransSegs", TCP_MIB_RETRANSSEGS),
 	SNMP_MIB_ITEM("InErrs", TCP_MIB_INERRS),
 	SNMP_MIB_ITEM("OutRsts", TCP_MIB_OUTRSTS),
+	SNMP_MIB_ITEM("InCsumErrors", TCP_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -172,6 +174,7 @@ static const struct snmp_mib snmp4_udp_list[] = {
 	SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),
 	SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),
 	SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
+	SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -224,6 +227,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS),
 	SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),
 	SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS),
+	SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES),
+	SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY),
 	SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
 	SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
 	SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED),
@@ -267,6 +272,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
 	SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
 	SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
+	SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
 	SNMP_MIB_SENTINEL
 };
 
@@ -319,15 +325,16 @@ static void icmp_put(struct seq_file *seq)
 	struct net *net = seq->private;
 	atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
 
-	seq_puts(seq, "\nIcmp: InMsgs InErrors");
+	seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " In%s", icmpmibmap[i].name);
 	seq_printf(seq, " OutMsgs OutErrors");
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " Out%s", icmpmibmap[i].name);
-	seq_printf(seq, "\nIcmp: %lu %lu",
+	seq_printf(seq, "\nIcmp: %lu %lu %lu",
 		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
-		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
+		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS),
+		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
 			   atomic_long_read(ptr + icmpmibmap[i].index));
@@ -471,28 +478,29 @@ static const struct file_operations netstat_seq_fops = {
 
 static __net_init int ip_proc_init_net(struct net *net)
 {
-	if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops))
+	if (!proc_create("sockstat", S_IRUGO, net->proc_net,
+			 &sockstat_seq_fops))
 		goto out_sockstat;
-	if (!proc_net_fops_create(net, "netstat", S_IRUGO, &netstat_seq_fops))
+	if (!proc_create("netstat", S_IRUGO, net->proc_net, &netstat_seq_fops))
 		goto out_netstat;
-	if (!proc_net_fops_create(net, "snmp", S_IRUGO, &snmp_seq_fops))
+	if (!proc_create("snmp", S_IRUGO, net->proc_net, &snmp_seq_fops))
 		goto out_snmp;
 
 	return 0;
 
 out_snmp:
-	proc_net_remove(net, "netstat");
+	remove_proc_entry("netstat", net->proc_net);
 out_netstat:
-	proc_net_remove(net, "sockstat");
+	remove_proc_entry("sockstat", net->proc_net);
 out_sockstat:
 	return -ENOMEM;
 }
 
 static __net_exit void ip_proc_exit_net(struct net *net)
 {
-	proc_net_remove(net, "snmp");
-	proc_net_remove(net, "netstat");
-	proc_net_remove(net, "sockstat");
+	remove_proc_entry("snmp", net->proc_net);
+	remove_proc_entry("netstat", net->proc_net);
+	remove_proc_entry("sockstat", net->proc_net);
 }
 
 static __net_initdata struct pernet_operations ip_proc_ops = {
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 0f9d09f54bd9..ce848461acbb 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -37,6 +37,12 @@ const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
 
 int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
+	if (!prot->netns_ok) {
+		pr_err("Protocol %u is not namespace aware, cannot register.\n",
+			protocol);
+		return -EINVAL;
+	}
+
 	return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],
 			NULL, prot) ? 0 : -1;
 }
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 6f08991409c3..dd44e0ab600c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -111,9 +111,7 @@ EXPORT_SYMBOL_GPL(raw_unhash_sk);
 static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
 		unsigned short num, __be32 raddr, __be32 laddr, int dif)
 {
-	struct hlist_node *node;
-
-	sk_for_each_from(sk, node) {
+	sk_for_each_from(sk) {
 		struct inet_sock *inet = inet_sk(sk);
 
 		if (net_eq(sock_net(sk), net) && inet->inet_num == num	&&
@@ -914,9 +912,7 @@ static struct sock *raw_get_first(struct seq_file *seq)
 
 	for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
 			++state->bucket) {
-		struct hlist_node *node;
-
-		sk_for_each(sk, node, &state->h->ht[state->bucket])
+		sk_for_each(sk, &state->h->ht[state->bucket])
 			if (sock_net(sk) == seq_file_net(seq))
 				goto found;
 	}
@@ -1050,7 +1046,7 @@ static const struct file_operations raw_seq_fops = {
 
 static __net_init int raw_init_net(struct net *net)
 {
-	if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops))
+	if (!proc_create("raw", S_IRUGO, net->proc_net, &raw_seq_fops))
 		return -ENOMEM;
 
 	return 0;
@@ -1058,7 +1054,7 @@ static __net_init int raw_init_net(struct net *net)
 
 static __net_exit void raw_exit_net(struct net *net)
 {
-	proc_net_remove(net, "raw");
+	remove_proc_entry("raw", net->proc_net);
 }
 
 static __net_initdata struct pernet_operations raw_net_ops = {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a0fcc47fee73..550781a17b34 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -117,15 +117,11 @@
 #define RT_GC_TIMEOUT (300*HZ)
 
 static int ip_rt_max_size;
-static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
-static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
-static int ip_rt_gc_min_interval __read_mostly	= HZ / 2;
 static int ip_rt_redirect_number __read_mostly	= 9;
 static int ip_rt_redirect_load __read_mostly	= HZ / 50;
 static int ip_rt_redirect_silence __read_mostly	= ((HZ / 50) << (9 + 1));
 static int ip_rt_error_cost __read_mostly	= HZ;
 static int ip_rt_error_burst __read_mostly	= 5 * HZ;
-static int ip_rt_gc_elasticity __read_mostly	= 8;
 static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
 static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
@@ -384,8 +380,8 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
 {
 	struct proc_dir_entry *pde;
 
-	pde = proc_net_fops_create(net, "rt_cache", S_IRUGO,
-			&rt_cache_seq_fops);
+	pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
+			  &rt_cache_seq_fops);
 	if (!pde)
 		goto err1;
 
@@ -2315,7 +2311,7 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
+static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(in_skb->sk);
 	struct rtmsg *rtm;
@@ -2423,6 +2419,11 @@ void ip_rt_multicast_event(struct in_device *in_dev)
 }
 
 #ifdef CONFIG_SYSCTL
+static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
+static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
+static int ip_rt_gc_min_interval __read_mostly	= HZ / 2;
+static int ip_rt_gc_elasticity __read_mostly	= 8;
+
 static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 					void __user *buffer,
 					size_t *lenp, loff_t *ppos)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b236ef04914f..b05c96e7af8b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -232,7 +232,8 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  *
  * return false if we decode an option that should not be.
  */
-bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
+bool cookie_check_timestamp(struct tcp_options_received *tcp_opt,
+			struct net *net, bool *ecn_ok)
 {
 	/* echoed timestamp, lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
@@ -247,7 +248,7 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
 
 	tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0;
 	*ecn_ok = (options >> 5) & 1;
-	if (*ecn_ok && !sysctl_tcp_ecn)
+	if (*ecn_ok && !net->ipv4.sysctl_tcp_ecn)
 		return false;
 
 	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
@@ -266,7 +267,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 			     struct ip_options *opt)
 {
 	struct tcp_options_received tcp_opt;
-	const u8 *hash_location;
 	struct inet_request_sock *ireq;
 	struct tcp_request_sock *treq;
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -293,9 +293,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
+	tcp_parse_options(skb, &tcp_opt, 0, NULL);
 
-	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
+	if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
 		goto out;
 
 	ret = NULL;
@@ -348,8 +348,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	 * hasn't changed since we received the original syn, but I see
 	 * no easy way to do this.
 	 */
-	flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk),
-			   RT_SCOPE_UNIVERSE, IPPROTO_TCP,
+	flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
+			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
 			   inet_sk_flowi_flags(sk),
 			   (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
 			   ireq->loc_addr, th->source, th->dest);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d84400b65049..fa2f63fc453b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -27,7 +27,8 @@
 #include <net/tcp_memcontrol.h>
 
 static int zero;
-static int two = 2;
+static int one = 1;
+static int four = 4;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -232,8 +233,8 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
 	return 0;
 }
 
-int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer,
-			  size_t *lenp, loff_t *ppos)
+static int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer,
+				 size_t *lenp, loff_t *ppos)
 {
 	ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
 	struct tcp_fastopen_context *ctxt;
@@ -538,13 +539,6 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.procname	= "tcp_ecn",
-		.data		= &sysctl_tcp_ecn,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
 		.procname	= "tcp_dsack",
 		.data		= &sysctl_tcp_dsack,
 		.maxlen		= sizeof(int),
@@ -556,14 +550,16 @@ static struct ctl_table ipv4_table[] = {
 		.data		= &sysctl_tcp_wmem,
 		.maxlen		= sizeof(sysctl_tcp_wmem),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "tcp_rmem",
 		.data		= &sysctl_tcp_rmem,
 		.maxlen		= sizeof(sysctl_tcp_rmem),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "tcp_app_win",
@@ -596,13 +592,6 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.procname	= "tcp_frto_response",
-		.data		= &sysctl_tcp_frto_response,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
 		.procname	= "tcp_low_latency",
 		.data		= &sysctl_tcp_low_latency,
 		.maxlen		= sizeof(int),
@@ -637,13 +626,6 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_tcp_congestion_control,
 	},
 	{
-		.procname	= "tcp_abc",
-		.data		= &sysctl_tcp_abc,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
 		.procname	= "tcp_mtu_probing",
 		.data		= &sysctl_tcp_mtu_probing,
 		.maxlen		= sizeof(int),
@@ -744,13 +726,6 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_dointvec,
 	},
 	{
-		.procname	= "tcp_cookie_size",
-		.data		= &sysctl_tcp_cookie_size,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
 		.procname       = "tcp_thin_linear_timeouts",
 		.data           = &sysctl_tcp_thin_linear_timeouts,
 		.maxlen         = sizeof(int),
@@ -771,7 +746,7 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
-		.extra2		= &two,
+		.extra2		= &four,
 	},
 	{
 		.procname	= "udp_mem",
@@ -786,7 +761,7 @@ static struct ctl_table ipv4_table[] = {
 		.maxlen		= sizeof(sysctl_udp_rmem_min),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero
+		.extra1		= &one
 	},
 	{
 		.procname	= "udp_wmem_min",
@@ -794,7 +769,7 @@ static struct ctl_table ipv4_table[] = {
 		.maxlen		= sizeof(sysctl_udp_wmem_min),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero
+		.extra1		= &one
 	},
 	{ }
 };
@@ -850,6 +825,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.proc_handler	= ipv4_ping_group_range,
 	},
 	{
+		.procname	= "tcp_ecn",
+		.data		= &init_net.ipv4.sysctl_tcp_ecn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
 		.procname	= "tcp_mem",
 		.maxlen		= sizeof(init_net.ipv4.sysctl_tcp_mem),
 		.mode		= 0644,
@@ -882,6 +864,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
 			&net->ipv4.sysctl_icmp_ratemask;
 		table[6].data =
 			&net->ipv4.sysctl_ping_group_range;
+		table[7].data =
+			&net->ipv4.sysctl_tcp_ecn;
 
 		/* Don't export sysctls to unprivileged users */
 		if (net->user_ns != &init_user_ns)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2aa69c8ae60c..dcb116dde216 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -400,6 +400,8 @@ void tcp_init_sock(struct sock *sk)
 	tcp_enable_early_retrans(tp);
 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
 
+	tp->tsoffset = 0;
+
 	sk->sk_state = TCP_CLOSE;
 
 	sk->sk_write_space = sk_stream_write_space;
@@ -407,15 +409,6 @@ void tcp_init_sock(struct sock *sk)
 
 	icsk->icsk_sync_mss = tcp_sync_mss;
 
-	/* TCP Cookie Transactions */
-	if (sysctl_tcp_cookie_size > 0) {
-		/* Default, cookies without s_data_payload. */
-		tp->cookie_values =
-			kzalloc(sizeof(*tp->cookie_values),
-				sk->sk_allocation);
-		if (tp->cookie_values != NULL)
-			kref_init(&tp->cookie_values->kref);
-	}
 	/* Presumed zeroed, in order of appearance:
 	 *	cookie_in_always, cookie_out_never,
 	 *	s_data_constant, s_data_in, s_data_out
@@ -773,7 +766,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 			 * Make sure that we have exactly size bytes
 			 * available to the caller, no more, no less.
 			 */
-			skb->avail_size = size;
+			skb->reserved_tailroom = skb->end - skb->tail - size;
 			return skb;
 		}
 		__kfree_skb(skb);
@@ -895,6 +888,7 @@ new_segment:
 			get_page(page);
 			skb_fill_page_desc(skb, i, page, offset, copy);
 		}
+		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 
 		skb->len += copy;
 		skb->data_len += copy;
@@ -1406,10 +1400,10 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
 		return;
 
 	last_issued = tp->ucopy.dma_cookie;
-	dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+	dma_async_issue_pending(tp->ucopy.dma_chan);
 
 	do {
-		if (dma_async_memcpy_complete(tp->ucopy.dma_chan,
+		if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
 					      last_issued, &done,
 					      &used) == DMA_SUCCESS) {
 			/* Safe to free early-copied skbs now */
@@ -1751,7 +1745,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				tcp_service_net_dma(sk, true);
 				tcp_cleanup_rbuf(sk, copied);
 			} else
-				dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+				dma_async_issue_pending(tp->ucopy.dma_chan);
 		}
 #endif
 		if (copied >= target) {
@@ -1844,7 +1838,7 @@ do_prequeue:
 					break;
 				}
 
-				dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+				dma_async_issue_pending(tp->ucopy.dma_chan);
 
 				if ((offset + used) == skb->len)
 					copied_early = true;
@@ -2287,7 +2281,6 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->packets_out = 0;
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_cnt = 0;
-	tp->bytes_acked = 0;
 	tp->window_clamp = 0;
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
@@ -2395,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		release_sock(sk);
 		return err;
 	}
-	case TCP_COOKIE_TRANSACTIONS: {
-		struct tcp_cookie_transactions ctd;
-		struct tcp_cookie_values *cvp = NULL;
-
-		if (sizeof(ctd) > optlen)
-			return -EINVAL;
-		if (copy_from_user(&ctd, optval, sizeof(ctd)))
-			return -EFAULT;
-
-		if (ctd.tcpct_used > sizeof(ctd.tcpct_value) ||
-		    ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
-			return -EINVAL;
-
-		if (ctd.tcpct_cookie_desired == 0) {
-			/* default to global value */
-		} else if ((0x1 & ctd.tcpct_cookie_desired) ||
-			   ctd.tcpct_cookie_desired > TCP_COOKIE_MAX ||
-			   ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
-			return -EINVAL;
-		}
-
-		if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
-			/* Supercedes all other values */
-			lock_sock(sk);
-			if (tp->cookie_values != NULL) {
-				kref_put(&tp->cookie_values->kref,
-					 tcp_cookie_values_release);
-				tp->cookie_values = NULL;
-			}
-			tp->rx_opt.cookie_in_always = 0; /* false */
-			tp->rx_opt.cookie_out_never = 1; /* true */
-			release_sock(sk);
-			return err;
-		}
-
-		/* Allocate ancillary memory before locking.
-		 */
-		if (ctd.tcpct_used > 0 ||
-		    (tp->cookie_values == NULL &&
-		     (sysctl_tcp_cookie_size > 0 ||
-		      ctd.tcpct_cookie_desired > 0 ||
-		      ctd.tcpct_s_data_desired > 0))) {
-			cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
-				      GFP_KERNEL);
-			if (cvp == NULL)
-				return -ENOMEM;
-
-			kref_init(&cvp->kref);
-		}
-		lock_sock(sk);
-		tp->rx_opt.cookie_in_always =
-			(TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
-		tp->rx_opt.cookie_out_never = 0; /* false */
-
-		if (tp->cookie_values != NULL) {
-			if (cvp != NULL) {
-				/* Changed values are recorded by a changed
-				 * pointer, ensuring the cookie will differ,
-				 * without separately hashing each value later.
-				 */
-				kref_put(&tp->cookie_values->kref,
-					 tcp_cookie_values_release);
-			} else {
-				cvp = tp->cookie_values;
-			}
-		}
-
-		if (cvp != NULL) {
-			cvp->cookie_desired = ctd.tcpct_cookie_desired;
-
-			if (ctd.tcpct_used > 0) {
-				memcpy(cvp->s_data_payload, ctd.tcpct_value,
-				       ctd.tcpct_used);
-				cvp->s_data_desired = ctd.tcpct_used;
-				cvp->s_data_constant = 1; /* true */
-			} else {
-				/* No constant payload data. */
-				cvp->s_data_desired = ctd.tcpct_s_data_desired;
-				cvp->s_data_constant = 0; /* false */
-			}
-
-			tp->cookie_values = cvp;
-		}
-		release_sock(sk);
-		return err;
-	}
 	default:
 		/* fallthru */
 		break;
@@ -2711,6 +2618,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		else
 			err = -EINVAL;
 		break;
+	case TCP_TIMESTAMP:
+		if (!tp->repair)
+			err = -EPERM;
+		else
+			tp->tsoffset = val - tcp_time_stamp;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -2894,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			return -EFAULT;
 		return 0;
 
-	case TCP_COOKIE_TRANSACTIONS: {
-		struct tcp_cookie_transactions ctd;
-		struct tcp_cookie_values *cvp = tp->cookie_values;
-
-		if (get_user(len, optlen))
-			return -EFAULT;
-		if (len < sizeof(ctd))
-			return -EINVAL;
-
-		memset(&ctd, 0, sizeof(ctd));
-		ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ?
-				   TCP_COOKIE_IN_ALWAYS : 0)
-				| (tp->rx_opt.cookie_out_never ?
-				   TCP_COOKIE_OUT_NEVER : 0);
-
-		if (cvp != NULL) {
-			ctd.tcpct_flags |= (cvp->s_data_in ?
-					    TCP_S_DATA_IN : 0)
-					 | (cvp->s_data_out ?
-					    TCP_S_DATA_OUT : 0);
-
-			ctd.tcpct_cookie_desired = cvp->cookie_desired;
-			ctd.tcpct_s_data_desired = cvp->s_data_desired;
-
-			memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
-			       cvp->cookie_pair_size);
-			ctd.tcpct_used = cvp->cookie_pair_size;
-		}
-
-		if (put_user(sizeof(ctd), optlen))
-			return -EFAULT;
-		if (copy_to_user(optval, &ctd, sizeof(ctd)))
-			return -EFAULT;
-		return 0;
-	}
 	case TCP_THIN_LINEAR_TIMEOUTS:
 		val = tp->thin_lto;
 		break;
@@ -2959,6 +2837,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 	case TCP_USER_TIMEOUT:
 		val = jiffies_to_msecs(icsk->icsk_user_timeout);
 		break;
+	case TCP_TIMESTAMP:
+		val = tcp_time_stamp + tp->tsoffset;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -3004,6 +2885,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 	__be32 delta;
 	unsigned int oldlen;
 	unsigned int mss;
+	struct sk_buff *gso_skb = skb;
+	__sum16 newcheck;
 
 	if (!pskb_may_pull(skb, sizeof(*th)))
 		goto out;
@@ -3032,6 +2915,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 			       SKB_GSO_DODGY |
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
+			       SKB_GSO_GRE |
+			       SKB_GSO_UDP_TUNNEL |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
@@ -3052,11 +2937,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 	th = tcp_hdr(skb);
 	seq = ntohl(th->seq);
 
+	newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
+					       (__force u32)delta));
+
 	do {
 		th->fin = th->psh = 0;
+		th->check = newcheck;
 
-		th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
-				       (__force u32)delta));
 		if (skb->ip_summed != CHECKSUM_PARTIAL)
 			th->check =
 			     csum_fold(csum_partial(skb_transport_header(skb),
@@ -3070,6 +2957,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 		th->cwr = 0;
 	} while (skb->next);
 
+	/* Following permits TCP Small Queues to work well with GSO :
+	 * The callback to TCP stack will be called at the time last frag
+	 * is freed at TX completion, and not right now when gso_skb
+	 * is freed by GSO engine
+	 */
+	if (gso_skb->destructor == tcp_wfree) {
+		swap(gso_skb->sk, skb->sk);
+		swap(gso_skb->destructor, skb->destructor);
+		swap(gso_skb->truesize, skb->truesize);
+	}
+
 	delta = htonl(oldlen + (skb->tail - skb->transport_header) +
 		      skb->data_len);
 	th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
@@ -3243,7 +3141,7 @@ __tcp_alloc_md5sig_pool(struct sock *sk)
 		struct crypto_hash *hash;
 
 		hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
-		if (!hash || IS_ERR(hash))
+		if (IS_ERR_OR_NULL(hash))
 			goto out_free;
 
 		per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
@@ -3396,134 +3294,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
 
 #endif
 
-/* Each Responder maintains up to two secret values concurrently for
- * efficient secret rollover.  Each secret value has 4 states:
- *
- * Generating.  (tcp_secret_generating != tcp_secret_primary)
- *    Generates new Responder-Cookies, but not yet used for primary
- *    verification.  This is a short-term state, typically lasting only
- *    one round trip time (RTT).
- *
- * Primary.  (tcp_secret_generating == tcp_secret_primary)
- *    Used both for generation and primary verification.
- *
- * Retiring.  (tcp_secret_retiring != tcp_secret_secondary)
- *    Used for verification, until the first failure that can be
- *    verified by the newer Generating secret.  At that time, this
- *    cookie's state is changed to Secondary, and the Generating
- *    cookie's state is changed to Primary.  This is a short-term state,
- *    typically lasting only one round trip time (RTT).
- *
- * Secondary.  (tcp_secret_retiring == tcp_secret_secondary)
- *    Used for secondary verification, after primary verification
- *    failures.  This state lasts no more than twice the Maximum Segment
- *    Lifetime (2MSL).  Then, the secret is discarded.
- */
-struct tcp_cookie_secret {
-	/* The secret is divided into two parts.  The digest part is the
-	 * equivalent of previously hashing a secret and saving the state,
-	 * and serves as an initialization vector (IV).  The message part
-	 * serves as the trailing secret.
-	 */
-	u32				secrets[COOKIE_WORKSPACE_WORDS];
-	unsigned long			expires;
-};
-
-#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
-#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
-#define TCP_SECRET_LIFE (HZ * 600)
-
-static struct tcp_cookie_secret tcp_secret_one;
-static struct tcp_cookie_secret tcp_secret_two;
-
-/* Essentially a circular list, without dynamic allocation. */
-static struct tcp_cookie_secret *tcp_secret_generating;
-static struct tcp_cookie_secret *tcp_secret_primary;
-static struct tcp_cookie_secret *tcp_secret_retiring;
-static struct tcp_cookie_secret *tcp_secret_secondary;
-
-static DEFINE_SPINLOCK(tcp_secret_locker);
-
-/* Select a pseudo-random word in the cookie workspace.
- */
-static inline u32 tcp_cookie_work(const u32 *ws, const int n)
-{
-	return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
-}
-
-/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed.
- * Called in softirq context.
- * Returns: 0 for success.
- */
-int tcp_cookie_generator(u32 *bakery)
-{
-	unsigned long jiffy = jiffies;
-
-	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
-		spin_lock_bh(&tcp_secret_locker);
-		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
-			/* refreshed by another */
-			memcpy(bakery,
-			       &tcp_secret_generating->secrets[0],
-			       COOKIE_WORKSPACE_WORDS);
-		} else {
-			/* still needs refreshing */
-			get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
-
-			/* The first time, paranoia assumes that the
-			 * randomization function isn't as strong.  But,
-			 * this secret initialization is delayed until
-			 * the last possible moment (packet arrival).
-			 * Although that time is observable, it is
-			 * unpredictably variable.  Mash in the most
-			 * volatile clock bits available, and expire the
-			 * secret extra quickly.
-			 */
-			if (unlikely(tcp_secret_primary->expires ==
-				     tcp_secret_secondary->expires)) {
-				struct timespec tv;
-
-				getnstimeofday(&tv);
-				bakery[COOKIE_DIGEST_WORDS+0] ^=
-					(u32)tv.tv_nsec;
-
-				tcp_secret_secondary->expires = jiffy
-					+ TCP_SECRET_1MSL
-					+ (0x0f & tcp_cookie_work(bakery, 0));
-			} else {
-				tcp_secret_secondary->expires = jiffy
-					+ TCP_SECRET_LIFE
-					+ (0xff & tcp_cookie_work(bakery, 1));
-				tcp_secret_primary->expires = jiffy
-					+ TCP_SECRET_2MSL
-					+ (0x1f & tcp_cookie_work(bakery, 2));
-			}
-			memcpy(&tcp_secret_secondary->secrets[0],
-			       bakery, COOKIE_WORKSPACE_WORDS);
-
-			rcu_assign_pointer(tcp_secret_generating,
-					   tcp_secret_secondary);
-			rcu_assign_pointer(tcp_secret_retiring,
-					   tcp_secret_primary);
-			/*
-			 * Neither call_rcu() nor synchronize_rcu() needed.
-			 * Retiring data is not freed.  It is replaced after
-			 * further (locked) pointer updates, and a quiet time
-			 * (minimum 1MSL, maximum LIFE - 2MSL).
-			 */
-		}
-		spin_unlock_bh(&tcp_secret_locker);
-	} else {
-		rcu_read_lock_bh();
-		memcpy(bakery,
-		       &rcu_dereference(tcp_secret_generating)->secrets[0],
-		       COOKIE_WORKSPACE_WORDS);
-		rcu_read_unlock_bh();
-	}
-	return 0;
-}
-EXPORT_SYMBOL(tcp_cookie_generator);
-
 void tcp_done(struct sock *sk)
 {
 	struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
@@ -3578,7 +3348,6 @@ void __init tcp_init(void)
 	unsigned long limit;
 	int max_rshare, max_wshare, cnt;
 	unsigned int i;
-	unsigned long jiffy = jiffies;
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
@@ -3654,13 +3423,5 @@ void __init tcp_init(void)
 
 	tcp_register_congestion_control(&tcp_reno);
 
-	memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
-	memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
-	tcp_secret_one.expires = jiffy; /* past due */
-	tcp_secret_two.expires = jiffy; /* past due */
-	tcp_secret_generating = &tcp_secret_one;
-	tcp_secret_primary = &tcp_secret_one;
-	tcp_secret_retiring = &tcp_secret_two;
-	tcp_secret_secondary = &tcp_secret_two;
 	tcp_tasklet_init();
 }
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index cdf2e707bb10..019c2389a341 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -317,28 +317,11 @@ void tcp_slow_start(struct tcp_sock *tp)
 		snd_cwnd = 1U;
 	}
 
-	/* RFC3465: ABC Slow start
-	 * Increase only after a full MSS of bytes is acked
-	 *
-	 * TCP sender SHOULD increase cwnd by the number of
-	 * previously unacknowledged bytes ACKed by each incoming
-	 * acknowledgment, provided the increase is not more than L
-	 */
-	if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache)
-		return;
-
 	if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
 		cnt = sysctl_tcp_max_ssthresh >> 1;	/* limited slow start */
 	else
 		cnt = snd_cwnd;				/* exponential increase */
 
-	/* RFC3465: ABC
-	 * We MAY increase by 2 if discovered delayed ack
-	 */
-	if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
-		cnt <<= 1;
-	tp->bytes_acked = 0;
-
 	tp->snd_cwnd_cnt += cnt;
 	while (tp->snd_cwnd_cnt >= snd_cwnd) {
 		tp->snd_cwnd_cnt -= snd_cwnd;
@@ -378,20 +361,9 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 	/* In "safe" area, increase. */
 	if (tp->snd_cwnd <= tp->snd_ssthresh)
 		tcp_slow_start(tp);
-
 	/* In dangerous area, increase slowly. */
-	else if (sysctl_tcp_abc) {
-		/* RFC3465: Appropriate Byte Count
-		 * increase once for each full cwnd acked
-		 */
-		if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
-			tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-		}
-	} else {
+	else
 		tcp_cong_avoid_ai(tp, tp->snd_cwnd);
-	}
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ad70a962c20e..08bbe6096528 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -81,8 +81,6 @@ int sysctl_tcp_sack __read_mostly = 1;
 int sysctl_tcp_fack __read_mostly = 1;
 int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
 EXPORT_SYMBOL(sysctl_tcp_reordering);
-int sysctl_tcp_ecn __read_mostly = 2;
-EXPORT_SYMBOL(sysctl_tcp_ecn);
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 1;
@@ -95,13 +93,11 @@ int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly = 2;
-int sysctl_tcp_frto_response __read_mostly;
 
 int sysctl_tcp_thin_dupack __read_mostly;
 
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
-int sysctl_tcp_abc __read_mostly;
-int sysctl_tcp_early_retrans __read_mostly = 2;
+int sysctl_tcp_early_retrans __read_mostly = 3;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
@@ -111,17 +107,16 @@ int sysctl_tcp_early_retrans __read_mostly = 2;
 #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
 #define FLAG_ECE		0x40 /* ECE in this ACK				*/
 #define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
-#define FLAG_ONLY_ORIG_SACKED	0x200 /* SACKs only non-rexmit sent before RTO */
+#define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/
 #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
 #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
-#define FLAG_NONHEAD_RETRANS_ACKED	0x1000 /* Non-head rexmitted data was ACKed */
 #define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */
+#define FLAG_UPDATE_TS_RECENT	0x4000 /* tcp_replace_ts_recent() */
 
 #define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
 #define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE)
 #define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
-#define FLAG_ANY_PROGRESS	(FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
 
 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
 #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
@@ -1162,10 +1157,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
 					   tcp_highest_sack_seq(tp)))
 					state->reord = min(fack_count,
 							   state->reord);
-
-				/* SACK enhanced F-RTO (RFC4138; Appendix B) */
-				if (!after(end_seq, tp->frto_highmark))
-					state->flag |= FLAG_ONLY_ORIG_SACKED;
+				if (!after(end_seq, tp->high_seq))
+					state->flag |= FLAG_ORIG_SACK_ACKED;
 			}
 
 			if (sacked & TCPCB_LOST) {
@@ -1558,7 +1551,6 @@ static int
 tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 			u32 prior_snd_una)
 {
-	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	const unsigned char *ptr = (skb_transport_header(ack_skb) +
 				    TCP_SKB_CB(ack_skb)->sacked);
@@ -1731,12 +1723,6 @@ walk:
 				       start_seq, end_seq, dup_sack);
 
 advance_sp:
-		/* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
-		 * due to in-order walk
-		 */
-		if (after(end_seq, tp->frto_highmark))
-			state.flag &= ~FLAG_ONLY_ORIG_SACKED;
-
 		i++;
 	}
 
@@ -1753,8 +1739,7 @@ advance_sp:
 	tcp_verify_left_out(tp);
 
 	if ((state.reord < tp->fackets_out) &&
-	    ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
-	    (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
+	    ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
 		tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
 
 out:
@@ -1828,198 +1813,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
 	tp->sacked_out = 0;
 }
 
-static int tcp_is_sackfrto(const struct tcp_sock *tp)
-{
-	return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
-}
-
-/* F-RTO can only be used if TCP has never retransmitted anything other than
- * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
- */
-bool tcp_use_frto(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct sk_buff *skb;
-
-	if (!sysctl_tcp_frto)
-		return false;
-
-	/* MTU probe and F-RTO won't really play nicely along currently */
-	if (icsk->icsk_mtup.probe_size)
-		return false;
-
-	if (tcp_is_sackfrto(tp))
-		return true;
-
-	/* Avoid expensive walking of rexmit queue if possible */
-	if (tp->retrans_out > 1)
-		return false;
-
-	skb = tcp_write_queue_head(sk);
-	if (tcp_skb_is_last(sk, skb))
-		return true;
-	skb = tcp_write_queue_next(sk, skb);	/* Skips head */
-	tcp_for_write_queue_from(skb, sk) {
-		if (skb == tcp_send_head(sk))
-			break;
-		if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
-			return false;
-		/* Short-circuit when first non-SACKed skb has been checked */
-		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
-			break;
-	}
-	return true;
-}
-
-/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
- * recovery a bit and use heuristics in tcp_process_frto() to detect if
- * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
- * keep retrans_out counting accurate (with SACK F-RTO, other than head
- * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
- * bits are handled if the Loss state is really to be entered (in
- * tcp_enter_frto_loss).
- *
- * Do like tcp_enter_loss() would; when RTO expires the second time it
- * does:
- *  "Reduce ssthresh if it has not yet been made inside this window."
- */
-void tcp_enter_frto(struct sock *sk)
-{
-	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb;
-
-	if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
-	    tp->snd_una == tp->high_seq ||
-	    ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
-	     !icsk->icsk_retransmits)) {
-		tp->prior_ssthresh = tcp_current_ssthresh(sk);
-		/* Our state is too optimistic in ssthresh() call because cwnd
-		 * is not reduced until tcp_enter_frto_loss() when previous F-RTO
-		 * recovery has not yet completed. Pattern would be this: RTO,
-		 * Cumulative ACK, RTO (2xRTO for the same segment does not end
-		 * up here twice).
-		 * RFC4138 should be more specific on what to do, even though
-		 * RTO is quite unlikely to occur after the first Cumulative ACK
-		 * due to back-off and complexity of triggering events ...
-		 */
-		if (tp->frto_counter) {
-			u32 stored_cwnd;
-			stored_cwnd = tp->snd_cwnd;
-			tp->snd_cwnd = 2;
-			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
-			tp->snd_cwnd = stored_cwnd;
-		} else {
-			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
-		}
-		/* ... in theory, cong.control module could do "any tricks" in
-		 * ssthresh(), which means that ca_state, lost bits and lost_out
-		 * counter would have to be faked before the call occurs. We
-		 * consider that too expensive, unlikely and hacky, so modules
-		 * using these in ssthresh() must deal these incompatibility
-		 * issues if they receives CA_EVENT_FRTO and frto_counter != 0
-		 */
-		tcp_ca_event(sk, CA_EVENT_FRTO);
-	}
-
-	tp->undo_marker = tp->snd_una;
-	tp->undo_retrans = 0;
-
-	skb = tcp_write_queue_head(sk);
-	if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
-		tp->undo_marker = 0;
-	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
-		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-		tp->retrans_out -= tcp_skb_pcount(skb);
-	}
-	tcp_verify_left_out(tp);
-
-	/* Too bad if TCP was application limited */
-	tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
-
-	/* Earlier loss recovery underway (see RFC4138; Appendix B).
-	 * The last condition is necessary at least in tp->frto_counter case.
-	 */
-	if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
-	    ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
-	    after(tp->high_seq, tp->snd_una)) {
-		tp->frto_highmark = tp->high_seq;
-	} else {
-		tp->frto_highmark = tp->snd_nxt;
-	}
-	tcp_set_ca_state(sk, TCP_CA_Disorder);
-	tp->high_seq = tp->snd_nxt;
-	tp->frto_counter = 1;
-}
-
-/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
- * which indicates that we should follow the traditional RTO recovery,
- * i.e. mark everything lost and do go-back-N retransmission.
- */
-static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb;
-
-	tp->lost_out = 0;
-	tp->retrans_out = 0;
-	if (tcp_is_reno(tp))
-		tcp_reset_reno_sack(tp);
-
-	tcp_for_write_queue(skb, sk) {
-		if (skb == tcp_send_head(sk))
-			break;
-
-		TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
-		/*
-		 * Count the retransmission made on RTO correctly (only when
-		 * waiting for the first ACK and did not get it)...
-		 */
-		if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
-			/* For some reason this R-bit might get cleared? */
-			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
-				tp->retrans_out += tcp_skb_pcount(skb);
-			/* ...enter this if branch just for the first segment */
-			flag |= FLAG_DATA_ACKED;
-		} else {
-			if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
-				tp->undo_marker = 0;
-			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-		}
-
-		/* Marking forward transmissions that were made after RTO lost
-		 * can cause unnecessary retransmissions in some scenarios,
-		 * SACK blocks will mitigate that in some but not in all cases.
-		 * We used to not mark them but it was causing break-ups with
-		 * receivers that do only in-order receival.
-		 *
-		 * TODO: we could detect presence of such receiver and select
-		 * different behavior per flow.
-		 */
-		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
-			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-			tp->lost_out += tcp_skb_pcount(skb);
-			tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
-		}
-	}
-	tcp_verify_left_out(tp);
-
-	tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
-	tp->snd_cwnd_cnt = 0;
-	tp->snd_cwnd_stamp = tcp_time_stamp;
-	tp->frto_counter = 0;
-	tp->bytes_acked = 0;
-
-	tp->reordering = min_t(unsigned int, tp->reordering,
-			       sysctl_tcp_reordering);
-	tcp_set_ca_state(sk, TCP_CA_Loss);
-	tp->high_seq = tp->snd_nxt;
-	TCP_ECN_queue_cwr(tp);
-
-	tcp_clear_all_retrans_hints(tp);
-}
-
 static void tcp_clear_retrans_partial(struct tcp_sock *tp)
 {
 	tp->retrans_out = 0;
@@ -2046,10 +1839,13 @@ void tcp_enter_loss(struct sock *sk, int how)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
+	bool new_recovery = false;
 
 	/* Reduce ssthresh if it has not yet been made inside this window. */
-	if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
+	if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
+	    !after(tp->high_seq, tp->snd_una) ||
 	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
+		new_recovery = true;
 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
 		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 		tcp_ca_event(sk, CA_EVENT_LOSS);
@@ -2058,17 +1854,13 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tp->snd_cwnd_cnt   = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 
-	tp->bytes_acked = 0;
 	tcp_clear_retrans_partial(tp);
 
 	if (tcp_is_reno(tp))
 		tcp_reset_reno_sack(tp);
 
-	if (!how) {
-		/* Push undo marker, if it was plain RTO and nothing
-		 * was retransmitted. */
-		tp->undo_marker = tp->snd_una;
-	} else {
+	tp->undo_marker = tp->snd_una;
+	if (how) {
 		tp->sacked_out = 0;
 		tp->fackets_out = 0;
 	}
@@ -2095,8 +1887,14 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tcp_set_ca_state(sk, TCP_CA_Loss);
 	tp->high_seq = tp->snd_nxt;
 	TCP_ECN_queue_cwr(tp);
-	/* Abort F-RTO algorithm if one is in progress */
-	tp->frto_counter = 0;
+
+	/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
+	 * loss recovery is underway except recurring timeout(s) on
+	 * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
+	 */
+	tp->frto = sysctl_tcp_frto &&
+		   (new_recovery || icsk->icsk_retransmits) &&
+		   !inet_csk(sk)->icsk_mtup.probe_size;
 }
 
 /* If ACK arrived pointing to a remembered SACK, it means that our
@@ -2155,15 +1953,16 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
 	 * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
 	 * available, or RTO is scheduled to fire first.
 	 */
-	if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt)
+	if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
+	    (flag & FLAG_ECE) || !tp->srtt)
 		return false;
 
 	delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
 	if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
 		return false;
 
-	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX);
-	tp->early_retrans_delayed = 1;
+	inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
+				  TCP_RTO_MAX);
 	return true;
 }
 
@@ -2279,10 +2078,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 packets_out;
 
-	/* Do not perform any recovery during F-RTO algorithm */
-	if (tp->frto_counter)
-		return false;
-
 	/* Trick#1: The loss is proven. */
 	if (tp->lost_out)
 		return true;
@@ -2326,7 +2121,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
 	 * interval if appropriate.
 	 */
 	if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
-	    (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) &&
+	    (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
 	    !tcp_may_send_now(sk))
 		return !tcp_pause_early_retransmit(sk, flag);
 
@@ -2643,12 +2438,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
 	return failed;
 }
 
-/* Undo during loss recovery after partial ACK. */
-static bool tcp_try_undo_loss(struct sock *sk)
+/* Undo during loss recovery after partial ACK or using F-RTO. */
+static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tcp_may_undo(tp)) {
+	if (frto_undo || tcp_may_undo(tp)) {
 		struct sk_buff *skb;
 		tcp_for_write_queue(skb, sk) {
 			if (skb == tcp_send_head(sk))
@@ -2662,9 +2457,12 @@ static bool tcp_try_undo_loss(struct sock *sk)
 		tp->lost_out = 0;
 		tcp_undo_cwr(sk, true);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
+		if (frto_undo)
+			NET_INC_STATS_BH(sock_net(sk),
+					 LINUX_MIB_TCPSPURIOUSRTOS);
 		inet_csk(sk)->icsk_retransmits = 0;
 		tp->undo_marker = 0;
-		if (tcp_is_sack(tp))
+		if (frto_undo || tcp_is_sack(tp))
 			tcp_set_ca_state(sk, TCP_CA_Open);
 		return true;
 	}
@@ -2686,7 +2484,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->high_seq = tp->snd_nxt;
-	tp->bytes_acked = 0;
+	tp->tlp_high_seq = 0;
 	tp->snd_cwnd_cnt = 0;
 	tp->prior_cwnd = tp->snd_cwnd;
 	tp->prr_delivered = 0;
@@ -2737,7 +2535,6 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->prior_ssthresh = 0;
-	tp->bytes_acked = 0;
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		tp->undo_marker = 0;
 		tcp_init_cwnd_reduction(sk, set_ssthresh);
@@ -2765,7 +2562,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
 
 	tcp_verify_left_out(tp);
 
-	if (!tp->frto_counter && !tcp_any_retrans_done(sk))
+	if (!tcp_any_retrans_done(sk))
 		tp->retrans_stamp = 0;
 
 	if (flag & FLAG_ECE)
@@ -2882,6 +2679,58 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 	tcp_set_ca_state(sk, TCP_CA_Recovery);
 }
 
+/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
+ * recovered or spurious. Otherwise retransmits more on partial ACKs.
+ */
+static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	bool recovered = !before(tp->snd_una, tp->high_seq);
+
+	if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
+		if (flag & FLAG_ORIG_SACK_ACKED) {
+			/* Step 3.b. A timeout is spurious if not all data are
+			 * lost, i.e., never-retransmitted data are (s)acked.
+			 */
+			tcp_try_undo_loss(sk, true);
+			return;
+		}
+		if (after(tp->snd_nxt, tp->high_seq) &&
+		    (flag & FLAG_DATA_SACKED || is_dupack)) {
+			tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
+		} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
+			tp->high_seq = tp->snd_nxt;
+			__tcp_push_pending_frames(sk, tcp_current_mss(sk),
+						  TCP_NAGLE_OFF);
+			if (after(tp->snd_nxt, tp->high_seq))
+				return; /* Step 2.b */
+			tp->frto = 0;
+		}
+	}
+
+	if (recovered) {
+		/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
+		icsk->icsk_retransmits = 0;
+		tcp_try_undo_recovery(sk);
+		return;
+	}
+	if (flag & FLAG_DATA_ACKED)
+		icsk->icsk_retransmits = 0;
+	if (tcp_is_reno(tp)) {
+		/* A Reno DUPACK means new data in F-RTO step 2.b above are
+		 * delivered. Lower inflight to clock out (re)tranmissions.
+		 */
+		if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
+			tcp_add_reno_sack(sk);
+		else if (flag & FLAG_SND_UNA_ADVANCED)
+			tcp_reset_reno_sack(tp);
+	}
+	if (tcp_try_undo_loss(sk, false))
+		return;
+	tcp_xmit_retransmit_queue(sk);
+}
+
 /* Process an event, which can update packets-in-flight not trivially.
  * Main goal of this function is to calculate new estimate for left_out,
  * taking into account both packets sitting in receiver's buffer and
@@ -2928,12 +2777,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 		tp->retrans_stamp = 0;
 	} else if (!before(tp->snd_una, tp->high_seq)) {
 		switch (icsk->icsk_ca_state) {
-		case TCP_CA_Loss:
-			icsk->icsk_retransmits = 0;
-			if (tcp_try_undo_recovery(sk))
-				return;
-			break;
-
 		case TCP_CA_CWR:
 			/* CWR is to be held something *above* high_seq
 			 * is ACKed for CWR bit to reach receiver. */
@@ -2964,18 +2807,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 		newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
 		break;
 	case TCP_CA_Loss:
-		if (flag & FLAG_DATA_ACKED)
-			icsk->icsk_retransmits = 0;
-		if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
-			tcp_reset_reno_sack(tp);
-		if (!tcp_try_undo_loss(sk)) {
-			tcp_moderate_cwnd(tp);
-			tcp_xmit_retransmit_queue(sk);
-			return;
-		}
+		tcp_process_loss(sk, flag, is_dupack);
 		if (icsk->icsk_ca_state != TCP_CA_Open)
 			return;
-		/* Loss is undone; fall through to processing in Open state. */
+		/* Fall through to processing in Open state. */
 	default:
 		if (tcp_is_reno(tp)) {
 			if (flag & FLAG_SND_UNA_ADVANCED)
@@ -3088,6 +2923,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
  */
 void tcp_rearm_rto(struct sock *sk)
 {
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* If the retrans timer is currently being used by Fast Open
@@ -3101,12 +2937,13 @@ void tcp_rearm_rto(struct sock *sk)
 	} else {
 		u32 rto = inet_csk(sk)->icsk_rto;
 		/* Offset the time elapsed after installing regular RTO */
-		if (tp->early_retrans_delayed) {
+		if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+		    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 			struct sk_buff *skb = tcp_write_queue_head(sk);
 			const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
 			s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
 			/* delta may not be positive if the socket is locked
-			 * when the delayed ER timer fires and is rescheduled.
+			 * when the retrans timer fires and is rescheduled.
 			 */
 			if (delta > 0)
 				rto = delta;
@@ -3114,7 +2951,6 @@ void tcp_rearm_rto(struct sock *sk)
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
 					  TCP_RTO_MAX);
 	}
-	tp->early_retrans_delayed = 0;
 }
 
 /* This function is called when the delayed ER timer fires. TCP enters
@@ -3202,8 +3038,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			flag |= FLAG_RETRANS_DATA_ACKED;
 			ca_seq_rtt = -1;
 			seq_rtt = -1;
-			if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
-				flag |= FLAG_NONHEAD_RETRANS_ACKED;
 		} else {
 			ca_seq_rtt = now - scb->when;
 			last_ackt = skb->tstamp;
@@ -3212,6 +3046,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			}
 			if (!(sacked & TCPCB_SACKED_ACKED))
 				reord = min(pkts_acked, reord);
+			if (!after(scb->end_seq, tp->high_seq))
+				flag |= FLAG_ORIG_SACK_ACKED;
 		}
 
 		if (sacked & TCPCB_SACKED_ACKED)
@@ -3412,166 +3248,74 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
 	return flag;
 }
 
-/* A very conservative spurious RTO response algorithm: reduce cwnd and
- * continue in congestion avoidance.
- */
-static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
+/* RFC 5961 7 [ACK Throttling] */
+static void tcp_send_challenge_ack(struct sock *sk)
 {
-	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
-	tp->snd_cwnd_cnt = 0;
-	tp->bytes_acked = 0;
-	TCP_ECN_queue_cwr(tp);
-	tcp_moderate_cwnd(tp);
-}
+	/* unprotected vars, we dont care of overwrites */
+	static u32 challenge_timestamp;
+	static unsigned int challenge_count;
+	u32 now = jiffies / HZ;
 
-/* A conservative spurious RTO response algorithm: reduce cwnd using
- * PRR and continue in congestion avoidance.
- */
-static void tcp_cwr_spur_to_response(struct sock *sk)
-{
-	tcp_enter_cwr(sk, 0);
+	if (now != challenge_timestamp) {
+		challenge_timestamp = now;
+		challenge_count = 0;
+	}
+	if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+		tcp_send_ack(sk);
+	}
 }
 
-static void tcp_undo_spur_to_response(struct sock *sk, int flag)
+static void tcp_store_ts_recent(struct tcp_sock *tp)
 {
-	if (flag & FLAG_ECE)
-		tcp_cwr_spur_to_response(sk);
-	else
-		tcp_undo_cwr(sk, true);
+	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
+	tp->rx_opt.ts_recent_stamp = get_seconds();
 }
 
-/* F-RTO spurious RTO detection algorithm (RFC4138)
- *
- * F-RTO affects during two new ACKs following RTO (well, almost, see inline
- * comments). State (ACK number) is kept in frto_counter. When ACK advances
- * window (but not to or beyond highest sequence sent before RTO):
- *   On First ACK,  send two new segments out.
- *   On Second ACK, RTO was likely spurious. Do spurious response (response
- *                  algorithm is not part of the F-RTO detection algorithm
- *                  given in RFC4138 but can be selected separately).
- * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
- * and TCP falls back to conventional RTO recovery. F-RTO allows overriding
- * of Nagle, this is done using frto_counter states 2 and 3, when a new data
- * segment of any size sent during F-RTO, state 2 is upgraded to 3.
- *
- * Rationale: if the RTO was spurious, new ACKs should arrive from the
- * original window even after we transmit two new data segments.
- *
- * SACK version:
- *   on first step, wait until first cumulative ACK arrives, then move to
- *   the second step. In second step, the next ACK decides.
- *
- * F-RTO is implemented (mainly) in four functions:
- *   - tcp_use_frto() is used to determine if TCP is can use F-RTO
- *   - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
- *     called when tcp_use_frto() showed green light
- *   - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
- *   - tcp_enter_frto_loss() is called if there is not enough evidence
- *     to prove that the RTO is indeed spurious. It transfers the control
- *     from F-RTO to the conventional RTO recovery
- */
-static bool tcp_process_frto(struct sock *sk, int flag)
+static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	tcp_verify_left_out(tp);
-
-	/* Duplicate the behavior from Loss state (fastretrans_alert) */
-	if (flag & FLAG_DATA_ACKED)
-		inet_csk(sk)->icsk_retransmits = 0;
-
-	if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
-	    ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
-		tp->undo_marker = 0;
-
-	if (!before(tp->snd_una, tp->frto_highmark)) {
-		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
-		return true;
-	}
-
-	if (!tcp_is_sackfrto(tp)) {
-		/* RFC4138 shortcoming in step 2; should also have case c):
-		 * ACK isn't duplicate nor advances window, e.g., opposite dir
-		 * data, winupdate
+	if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
+		/* PAWS bug workaround wrt. ACK frames, the PAWS discard
+		 * extra check below makes sure this can only happen
+		 * for pure ACK frames.  -DaveM
+		 *
+		 * Not only, also it occurs for expired timestamps.
 		 */
-		if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
-			return true;
-
-		if (!(flag & FLAG_DATA_ACKED)) {
-			tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
-					    flag);
-			return true;
-		}
-	} else {
-		if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
-			if (!tcp_packets_in_flight(tp)) {
-				tcp_enter_frto_loss(sk, 2, flag);
-				return true;
-			}
-
-			/* Prevent sending of new data. */
-			tp->snd_cwnd = min(tp->snd_cwnd,
-					   tcp_packets_in_flight(tp));
-			return true;
-		}
-
-		if ((tp->frto_counter >= 2) &&
-		    (!(flag & FLAG_FORWARD_PROGRESS) ||
-		     ((flag & FLAG_DATA_SACKED) &&
-		      !(flag & FLAG_ONLY_ORIG_SACKED)))) {
-			/* RFC4138 shortcoming (see comment above) */
-			if (!(flag & FLAG_FORWARD_PROGRESS) &&
-			    (flag & FLAG_NOT_DUP))
-				return true;
-
-			tcp_enter_frto_loss(sk, 3, flag);
-			return true;
-		}
-	}
 
-	if (tp->frto_counter == 1) {
-		/* tcp_may_send_now needs to see updated state */
-		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
-		tp->frto_counter = 2;
-
-		if (!tcp_may_send_now(sk))
-			tcp_enter_frto_loss(sk, 2, flag);
-
-		return true;
-	} else {
-		switch (sysctl_tcp_frto_response) {
-		case 2:
-			tcp_undo_spur_to_response(sk, flag);
-			break;
-		case 1:
-			tcp_conservative_spur_to_response(tp);
-			break;
-		default:
-			tcp_cwr_spur_to_response(sk);
-			break;
-		}
-		tp->frto_counter = 0;
-		tp->undo_marker = 0;
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
+		if (tcp_paws_check(&tp->rx_opt, 0))
+			tcp_store_ts_recent(tp);
 	}
-	return false;
 }
 
-/* RFC 5961 7 [ACK Throttling] */
-static void tcp_send_challenge_ack(struct sock *sk)
+/* This routine deals with acks during a TLP episode.
+ * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
+ */
+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
 {
-	/* unprotected vars, we dont care of overwrites */
-	static u32 challenge_timestamp;
-	static unsigned int challenge_count;
-	u32 now = jiffies / HZ;
+	struct tcp_sock *tp = tcp_sk(sk);
+	bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
+			     !(flag & (FLAG_SND_UNA_ADVANCED |
+				       FLAG_NOT_DUP | FLAG_DATA_SACKED));
 
-	if (now != challenge_timestamp) {
-		challenge_timestamp = now;
-		challenge_count = 0;
+	/* Mark the end of TLP episode on receiving TLP dupack or when
+	 * ack is after tlp_high_seq.
+	 */
+	if (is_tlp_dupack) {
+		tp->tlp_high_seq = 0;
+		return;
 	}
-	if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
-		tcp_send_ack(sk);
+
+	if (after(ack, tp->tlp_high_seq)) {
+		tp->tlp_high_seq = 0;
+		/* Don't reduce cwnd if DSACK arrives for TLP retrans. */
+		if (!(flag & FLAG_DSACKING_ACK)) {
+			tcp_init_cwnd_reduction(sk, true);
+			tcp_set_ca_state(sk, TCP_CA_CWR);
+			tcp_end_cwnd_reduction(sk);
+			tcp_set_ca_state(sk, TCP_CA_Open);
+			NET_INC_STATS_BH(sock_net(sk),
+					 LINUX_MIB_TCPLOSSPROBERECOVERY);
+		}
 	}
 }
 
@@ -3589,7 +3333,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	int prior_packets;
 	int prior_sacked = tp->sacked_out;
 	int pkts_acked = 0;
-	bool frto_cwnd = false;
 
 	/* If the ack is older than previous acks
 	 * then we can probably ignore it.
@@ -3609,24 +3352,22 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (after(ack, tp->snd_nxt))
 		goto invalid_ack;
 
-	if (tp->early_retrans_delayed)
+	if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
 		tcp_rearm_rto(sk);
 
 	if (after(ack, prior_snd_una))
 		flag |= FLAG_SND_UNA_ADVANCED;
 
-	if (sysctl_tcp_abc) {
-		if (icsk->icsk_ca_state < TCP_CA_CWR)
-			tp->bytes_acked += ack - prior_snd_una;
-		else if (icsk->icsk_ca_state == TCP_CA_Loss)
-			/* we assume just one segment left network */
-			tp->bytes_acked += min(ack - prior_snd_una,
-					       tp->mss_cache);
-	}
-
 	prior_fackets = tp->fackets_out;
 	prior_in_flight = tcp_packets_in_flight(tp);
 
+	/* ts_recent update must be made after we are sure that the packet
+	 * is in window.
+	 */
+	if (flag & FLAG_UPDATE_TS_RECENT)
+		tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
+
 	if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
 		/* Window is constant, pure forward advance.
 		 * No more checks are required.
@@ -3671,30 +3412,29 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 	pkts_acked = prior_packets - tp->packets_out;
 
-	if (tp->frto_counter)
-		frto_cwnd = tcp_process_frto(sk, flag);
-	/* Guarantee sacktag reordering detection against wrap-arounds */
-	if (before(tp->frto_highmark, tp->snd_una))
-		tp->frto_highmark = 0;
-
 	if (tcp_ack_is_dubious(sk, flag)) {
 		/* Advance CWND, if state allows this. */
-		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
-		    tcp_may_raise_cwnd(sk, flag))
+		if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
 			tcp_cong_avoid(sk, ack, prior_in_flight);
 		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
 		tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
 				      is_dupack, flag);
 	} else {
-		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
+		if (flag & FLAG_DATA_ACKED)
 			tcp_cong_avoid(sk, ack, prior_in_flight);
 	}
 
+	if (tp->tlp_high_seq)
+		tcp_process_tlp_ack(sk, ack, flag);
+
 	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
 		struct dst_entry *dst = __sk_dst_get(sk);
 		if (dst)
 			dst_confirm(dst);
 	}
+
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS)
+		tcp_schedule_loss_probe(sk);
 	return 1;
 
 no_queue:
@@ -3708,6 +3448,9 @@ no_queue:
 	 */
 	if (tcp_send_head(sk))
 		tcp_ack_probe(sk);
+
+	if (tp->tlp_high_seq)
+		tcp_process_tlp_ack(sk, ack, flag);
 	return 1;
 
 invalid_ack:
@@ -3732,8 +3475,8 @@ old_ack:
  * But, this can also be called on packets in the established flow when
  * the fast version below fails.
  */
-void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
-		       const u8 **hvpp, int estab,
+void tcp_parse_options(const struct sk_buff *skb,
+		       struct tcp_options_received *opt_rx, int estab,
 		       struct tcp_fastopen_cookie *foc)
 {
 	const unsigned char *ptr;
@@ -3817,31 +3560,6 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
 				 */
 				break;
 #endif
-			case TCPOPT_COOKIE:
-				/* This option is variable length.
-				 */
-				switch (opsize) {
-				case TCPOLEN_COOKIE_BASE:
-					/* not yet implemented */
-					break;
-				case TCPOLEN_COOKIE_PAIR:
-					/* not yet implemented */
-					break;
-				case TCPOLEN_COOKIE_MIN+0:
-				case TCPOLEN_COOKIE_MIN+2:
-				case TCPOLEN_COOKIE_MIN+4:
-				case TCPOLEN_COOKIE_MIN+6:
-				case TCPOLEN_COOKIE_MAX:
-					/* 16-bit multiple */
-					opt_rx->cookie_plus = opsize;
-					*hvpp = ptr;
-					break;
-				default:
-					/* ignore option */
-					break;
-				}
-				break;
-
 			case TCPOPT_EXP:
 				/* Fast Open option shares code 254 using a
 				 * 16 bits magic number. It's valid only in
@@ -3877,7 +3595,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
 		++ptr;
 		tp->rx_opt.rcv_tsval = ntohl(*ptr);
 		++ptr;
-		tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+		tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
 		return true;
 	}
 	return false;
@@ -3887,8 +3605,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
  * If it is wrong it falls back on tcp_parse_options().
  */
 static bool tcp_fast_parse_options(const struct sk_buff *skb,
-				   const struct tcphdr *th,
-				   struct tcp_sock *tp, const u8 **hvpp)
+				   const struct tcphdr *th, struct tcp_sock *tp)
 {
 	/* In the spirit of fast parsing, compare doff directly to constant
 	 * values.  Because equality is used, short doff can be ignored here.
@@ -3901,7 +3618,11 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
 		if (tcp_parse_aligned_timestamp(tp, th))
 			return true;
 	}
-	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
+
+	tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
+	if (tp->rx_opt.saw_tstamp)
+		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
+
 	return true;
 }
 
@@ -3943,27 +3664,6 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
 EXPORT_SYMBOL(tcp_parse_md5sig_option);
 #endif
 
-static inline void tcp_store_ts_recent(struct tcp_sock *tp)
-{
-	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
-	tp->rx_opt.ts_recent_stamp = get_seconds();
-}
-
-static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
-{
-	if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
-		/* PAWS bug workaround wrt. ACK frames, the PAWS discard
-		 * extra check below makes sure this can only happen
-		 * for pure ACK frames.  -DaveM
-		 *
-		 * Not only, also it occurs for expired timestamps.
-		 */
-
-		if (tcp_paws_check(&tp->rx_opt, 0))
-			tcp_store_ts_recent(tp);
-	}
-}
-
 /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
  *
  * It is not fatal. If this ACK does _not_ change critical state (seqs, window)
@@ -5279,12 +4979,10 @@ out:
 static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 				  const struct tcphdr *th, int syn_inerr)
 {
-	const u8 *hash_location;
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* RFC1323: H1. Apply PAWS check first. */
-	if (tcp_fast_parse_options(skb, th, tp, &hash_location) &&
-	    tp->rx_opt.saw_tstamp &&
+	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
 	    tcp_paws_discard(sk, skb)) {
 		if (!th->rst) {
 			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
@@ -5498,6 +5196,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				if (tcp_checksum_complete_user(sk, skb))
 					goto csum_error;
 
+				if ((int)skb->truesize > sk->sk_forward_alloc)
+					goto step5;
+
 				/* Predicted packet is in window by definition.
 				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
 				 * Hence, check seq<=rcv_wup reduces to:
@@ -5509,9 +5210,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
 				tcp_rcv_rtt_measure_ts(sk, skb);
 
-				if ((int)skb->truesize > sk->sk_forward_alloc)
-					goto step5;
-
 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
 
 				/* Bulk data transfer: receiver */
@@ -5559,14 +5257,9 @@ slow_path:
 		return 0;
 
 step5:
-	if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
+	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
 		goto discard;
 
-	/* ts_recent update must be made after we are sure that the packet
-	 * is in window.
-	 */
-	tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
-
 	tcp_rcv_rtt_measure_ts(sk, skb);
 
 	/* Process urgent data. */
@@ -5580,6 +5273,7 @@ step5:
 	return 0;
 
 csum_error:
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 
 discard:
@@ -5638,12 +5332,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 
 	if (mss == tp->rx_opt.user_mss) {
 		struct tcp_options_received opt;
-		const u8 *hash_location;
 
 		/* Get original SYNACK MSS value if user MSS sets mss_clamp */
 		tcp_clear_options(&opt);
 		opt.user_mss = opt.mss_clamp = 0;
-		tcp_parse_options(synack, &opt, &hash_location, 0, NULL);
+		tcp_parse_options(synack, &opt, 0, NULL);
 		mss = opt.mss_clamp;
 	}
 
@@ -5674,14 +5367,14 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 const struct tcphdr *th, unsigned int len)
 {
-	const u8 *hash_location;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_cookie_values *cvp = tp->cookie_values;
 	struct tcp_fastopen_cookie foc = { .len = -1 };
 	int saved_clamp = tp->rx_opt.mss_clamp;
 
-	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc);
+	tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
+	if (tp->rx_opt.saw_tstamp)
+		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
 
 	if (th->ack) {
 		/* rfc793:
@@ -5776,30 +5469,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 * is initialized. */
 		tp->copied_seq = tp->rcv_nxt;
 
-		if (cvp != NULL &&
-		    cvp->cookie_pair_size > 0 &&
-		    tp->rx_opt.cookie_plus > 0) {
-			int cookie_size = tp->rx_opt.cookie_plus
-					- TCPOLEN_COOKIE_BASE;
-			int cookie_pair_size = cookie_size
-					     + cvp->cookie_desired;
-
-			/* A cookie extension option was sent and returned.
-			 * Note that each incoming SYNACK replaces the
-			 * Responder cookie.  The initial exchange is most
-			 * fragile, as protection against spoofing relies
-			 * entirely upon the sequence and timestamp (above).
-			 * This replacement strategy allows the correct pair to
-			 * pass through, while any others will be filtered via
-			 * Responder verification later.
-			 */
-			if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
-				memcpy(&cvp->cookie_pair[cvp->cookie_desired],
-				       hash_location, cookie_size);
-				cvp->cookie_pair_size = cookie_pair_size;
-			}
-		}
-
 		smp_mb();
 
 		tcp_finish_connect(sk, skb);
@@ -6000,7 +5669,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 	/* step 5: check the ACK field */
 	if (true) {
-		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
+		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
+						  FLAG_UPDATE_TS_RECENT) > 0;
 
 		switch (sk->sk_state) {
 		case TCP_SYN_RECV:
@@ -6151,11 +5821,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		}
 	}
 
-	/* ts_recent update must be made after we are sure that the packet
-	 * is in window.
-	 */
-	tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
-
 	/* step 6: check the URG bit */
 	tcp_urg(sk, skb, th);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index eadb693eef55..719652305a29 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -274,13 +274,6 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
 	struct inet_sock *inet = inet_sk(sk);
 	u32 mtu = tcp_sk(sk)->mtu_info;
 
-	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
-	 * send out by Linux are always <576bytes so they should go through
-	 * unfragmented).
-	 */
-	if (sk->sk_state == TCP_LISTEN)
-		return;
-
 	dst = inet_csk_update_pmtu(sk, mtu);
 	if (!dst)
 		return;
@@ -408,6 +401,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 			goto out;
 
 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+			/* We are not interested in TCP_LISTEN and open_requests
+			 * (SYN-ACKs send out by Linux are always <576bytes so
+			 * they should go through unfragmented).
+			 */
+			if (sk->sk_state == TCP_LISTEN)
+				goto out;
+
 			tp->mtu_info = info;
 			if (!sock_owned_by_user(sk)) {
 				tcp_v4_mtu_reduced(sk);
@@ -657,7 +657,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 		 * no RST generated if md5 hash doesn't match.
 		 */
 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
-					     &tcp_hashinfo, ip_hdr(skb)->daddr,
+					     &tcp_hashinfo, ip_hdr(skb)->saddr,
+					     th->source, ip_hdr(skb)->daddr,
 					     ntohs(th->source), inet_iif(skb));
 		/* don't send rst if it can't find key */
 		if (!sk1)
@@ -725,7 +726,7 @@ release_sk1:
  */
 
 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
-			    u32 win, u32 ts, int oif,
+			    u32 win, u32 tsval, u32 tsecr, int oif,
 			    struct tcp_md5sig_key *key,
 			    int reply_flags, u8 tos)
 {
@@ -746,12 +747,12 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 
 	arg.iov[0].iov_base = (unsigned char *)&rep;
 	arg.iov[0].iov_len  = sizeof(rep.th);
-	if (ts) {
+	if (tsecr) {
 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 				   (TCPOPT_TIMESTAMP << 8) |
 				   TCPOLEN_TIMESTAMP);
-		rep.opt[1] = htonl(tcp_time_stamp);
-		rep.opt[2] = htonl(ts);
+		rep.opt[1] = htonl(tsval);
+		rep.opt[2] = htonl(tsecr);
 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 	}
 
@@ -766,7 +767,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 
 #ifdef CONFIG_TCP_MD5SIG
 	if (key) {
-		int offset = (ts) ? 3 : 0;
+		int offset = (tsecr) ? 3 : 0;
 
 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 					  (TCPOPT_NOP << 16) |
@@ -801,6 +802,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 
 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+			tcp_time_stamp + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent,
 			tw->tw_bound_dev_if,
 			tcp_twsk_md5_key(tcptw),
@@ -820,6 +822,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 	tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
 			tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+			tcp_time_stamp,
 			req->ts_recent,
 			0,
 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
@@ -835,7 +838,6 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  */
 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 			      struct request_sock *req,
-			      struct request_values *rvp,
 			      u16 queue_mapping,
 			      bool nocache)
 {
@@ -848,7 +850,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 		return -1;
 
-	skb = tcp_make_synack(sk, dst, req, rvp, NULL);
+	skb = tcp_make_synack(sk, dst, req, NULL);
 
 	if (skb) {
 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
@@ -865,10 +867,9 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	return err;
 }
 
-static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
-			     struct request_values *rvp)
+static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
 {
-	int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
+	int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
 
 	if (!res)
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -951,7 +952,6 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_md5sig_key *key;
-	struct hlist_node *pos;
 	unsigned int size = sizeof(struct in_addr);
 	struct tcp_md5sig_info *md5sig;
 
@@ -965,7 +965,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 	if (family == AF_INET6)
 		size = sizeof(struct in6_addr);
 #endif
-	hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
+	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
 		if (key->family != family)
 			continue;
 		if (!memcmp(&key->addr, addr, size))
@@ -1066,14 +1066,14 @@ static void tcp_clear_md5_list(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_md5sig_key *key;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 	struct tcp_md5sig_info *md5sig;
 
 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
 
 	if (!hlist_empty(&md5sig->head))
 		tcp_free_md5sig_pool();
-	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
+	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
 		hlist_del_rcu(&key->node);
 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 		kfree_rcu(key, rcu);
@@ -1369,8 +1369,7 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
 static int tcp_v4_conn_req_fastopen(struct sock *sk,
 				    struct sk_buff *skb,
 				    struct sk_buff *skb_synack,
-				    struct request_sock *req,
-				    struct request_values *rvp)
+				    struct request_sock *req)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
@@ -1465,9 +1464,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
 
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcp_extend_values tmp_ext;
 	struct tcp_options_received tmp_opt;
-	const u8 *hash_location;
 	struct request_sock *req;
 	struct inet_request_sock *ireq;
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1517,42 +1514,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, &hash_location, 0,
-	    want_cookie ? NULL : &foc);
-
-	if (tmp_opt.cookie_plus > 0 &&
-	    tmp_opt.saw_tstamp &&
-	    !tp->rx_opt.cookie_out_never &&
-	    (sysctl_tcp_cookie_size > 0 ||
-	     (tp->cookie_values != NULL &&
-	      tp->cookie_values->cookie_desired > 0))) {
-		u8 *c;
-		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
-		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
-
-		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
-			goto drop_and_release;
-
-		/* Secret recipe starts with IP addresses */
-		*mess++ ^= (__force u32)daddr;
-		*mess++ ^= (__force u32)saddr;
-
-		/* plus variable length Initiator Cookie */
-		c = (u8 *)mess;
-		while (l-- > 0)
-			*c++ ^= *hash_location++;
-
-		want_cookie = false;	/* not our kind of cookie */
-		tmp_ext.cookie_out_never = 0; /* false */
-		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
-	} else if (!tp->rx_opt.cookie_in_always) {
-		/* redundant indications, but ensure initialization. */
-		tmp_ext.cookie_out_never = 1; /* true */
-		tmp_ext.cookie_plus = 0;
-	} else {
-		goto drop_and_release;
-	}
-	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
+	tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
 
 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
@@ -1570,7 +1532,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	if (!want_cookie || tmp_opt.tstamp_ok)
-		TCP_ECN_create_request(req, skb);
+		TCP_ECN_create_request(req, skb, sock_net(sk));
 
 	if (want_cookie) {
 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
@@ -1634,7 +1596,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * of tcp_v4_send_synack()->tcp_select_initial_window().
 	 */
 	skb_synack = tcp_make_synack(sk, dst, req,
-	    (struct request_values *)&tmp_ext,
 	    fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
 
 	if (skb_synack) {
@@ -1658,8 +1619,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		if (fastopen_cookie_present(&foc) && foc.len != 0)
 			NET_INC_STATS_BH(sock_net(sk),
 			    LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-	} else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req,
-	    (struct request_values *)&tmp_ext))
+	} else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
 		goto drop_and_free;
 
 	return 0;
@@ -1906,6 +1866,7 @@ discard:
 	return 0;
 
 csum_err:
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 	goto discard;
 }
@@ -1948,6 +1909,51 @@ void tcp_v4_early_demux(struct sk_buff *skb)
 	}
 }
 
+/* Packet is added to VJ-style prequeue for processing in process
+ * context, if a reader task is waiting. Apparently, this exciting
+ * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
+ * failed somewhere. Latency? Burstiness? Well, at least now we will
+ * see, why it failed. 8)8)				  --ANK
+ *
+ */
+bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (sysctl_tcp_low_latency || !tp->ucopy.task)
+		return false;
+
+	if (skb->len <= tcp_hdrlen(skb) &&
+	    skb_queue_len(&tp->ucopy.prequeue) == 0)
+		return false;
+
+	skb_dst_force(skb);
+	__skb_queue_tail(&tp->ucopy.prequeue, skb);
+	tp->ucopy.memory += skb->truesize;
+	if (tp->ucopy.memory > sk->sk_rcvbuf) {
+		struct sk_buff *skb1;
+
+		BUG_ON(sock_owned_by_user(sk));
+
+		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
+			sk_backlog_rcv(sk, skb1);
+			NET_INC_STATS_BH(sock_net(sk),
+					 LINUX_MIB_TCPPREQUEUEDROPPED);
+		}
+
+		tp->ucopy.memory = 0;
+	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
+		wake_up_interruptible_sync_poll(sk_sleep(sk),
+					   POLLIN | POLLRDNORM | POLLRDBAND);
+		if (!inet_csk_ack_scheduled(sk))
+			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+						  (3 * tcp_rto_min(sk)) / 4,
+						  TCP_RTO_MAX);
+	}
+	return true;
+}
+EXPORT_SYMBOL(tcp_prequeue);
+
 /*
  *	From tcp_input.c
  */
@@ -1981,7 +1987,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	 * provided case of th->doff==0 is eliminated.
 	 * So, we defer the checks. */
 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
-		goto bad_packet;
+		goto csum_error;
 
 	th = tcp_hdr(skb);
 	iph = ip_hdr(skb);
@@ -2047,6 +2053,8 @@ no_tcp_socket:
 		goto discard_it;
 
 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+csum_error:
+		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
 bad_packet:
 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
 	} else {
@@ -2068,15 +2076,19 @@ do_time_wait:
 		goto discard_it;
 	}
 
-	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
-		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+	if (skb->len < (th->doff << 2)) {
 		inet_twsk_put(inet_twsk(sk));
-		goto discard_it;
+		goto bad_packet;
+	}
+	if (tcp_checksum_complete(skb)) {
+		inet_twsk_put(inet_twsk(sk));
+		goto csum_error;
 	}
 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
 	case TCP_TW_SYN: {
 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
 							&tcp_hashinfo,
+							iph->saddr, th->source,
 							iph->daddr, th->dest,
 							inet_iif(skb));
 		if (sk2) {
@@ -2194,12 +2206,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	if (inet_csk(sk)->icsk_bind_hash)
 		inet_put_port(sk);
 
-	/* TCP Cookie Transactions */
-	if (tp->cookie_values != NULL) {
-		kref_put(&tp->cookie_values->kref,
-			 tcp_cookie_values_release);
-		tp->cookie_values = NULL;
-	}
 	BUG_ON(tp->fastopen_rsk != NULL);
 
 	/* If socket is aborted during connect operation */
@@ -2577,7 +2583,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
 
 int tcp_seq_open(struct inode *inode, struct file *file)
 {
-	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
+	struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
 	struct tcp_iter_state *s;
 	int err;
 
@@ -2612,7 +2618,7 @@ EXPORT_SYMBOL(tcp_proc_register);
 
 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
 {
-	proc_net_remove(net, afinfo->name);
+	remove_proc_entry(afinfo->name, net->proc_net);
 }
 EXPORT_SYMBOL(tcp_proc_unregister);
 
@@ -2656,7 +2662,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 	__u16 srcp = ntohs(inet->inet_sport);
 	int rx_queue;
 
-	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 		timer_active	= 1;
 		timer_expires	= icsk->icsk_timeout;
 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
@@ -2891,6 +2899,7 @@ EXPORT_SYMBOL(tcp_prot);
 
 static int __net_init tcp_sk_init(struct net *net)
 {
+	net->ipv4.sysctl_tcp_ecn = 2;
 	return 0;
 }
 
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index b6f3583ddfe8..da14436c1735 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -64,7 +64,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
 {
 	struct cg_proto *cg_proto;
 	struct tcp_memcontrol *tcp;
-	u64 val;
 
 	cg_proto = tcp_prot.proto_cgroup(memcg);
 	if (!cg_proto)
@@ -72,8 +71,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
 
 	tcp = tcp_from_cgproto(cg_proto);
 	percpu_counter_destroy(&tcp->tcp_sockets_allocated);
-
-	val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
 }
 EXPORT_SYMBOL(tcp_destroy_cgroup);
 
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index f696d7c2e9fa..f6a005c485a9 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -96,7 +96,8 @@ struct tcpm_hash_bucket {
 
 static DEFINE_SPINLOCK(tcp_metrics_lock);
 
-static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst)
+static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
+			  bool fastopen_clear)
 {
 	u32 val;
 
@@ -122,9 +123,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst)
 	tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
 	tm->tcpm_ts = 0;
 	tm->tcpm_ts_stamp = 0;
-	tm->tcpm_fastopen.mss = 0;
-	tm->tcpm_fastopen.syn_loss = 0;
-	tm->tcpm_fastopen.cookie.len = 0;
+	if (fastopen_clear) {
+		tm->tcpm_fastopen.mss = 0;
+		tm->tcpm_fastopen.syn_loss = 0;
+		tm->tcpm_fastopen.cookie.len = 0;
+	}
 }
 
 static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
@@ -154,7 +157,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
 	}
 	tm->tcpm_addr = *addr;
 
-	tcpm_suck_dst(tm, dst);
+	tcpm_suck_dst(tm, dst, true);
 
 	if (likely(!reclaim)) {
 		tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain;
@@ -171,7 +174,7 @@ out_unlock:
 static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
 {
 	if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
-		tcpm_suck_dst(tm, dst);
+		tcpm_suck_dst(tm, dst, false);
 }
 
 #define TCP_METRICS_RECLAIM_DEPTH	5
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f35f2dfb6401..0f0178827259 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -93,15 +93,15 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 			   const struct tcphdr *th)
 {
 	struct tcp_options_received tmp_opt;
-	const u8 *hash_location;
 	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	bool paws_reject = false;
 
 	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
-		tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
+		tcp_parse_options(skb, &tmp_opt, 0, NULL);
 
 		if (tmp_opt.saw_tstamp) {
+			tmp_opt.rcv_tsecr	-= tcptw->tw_ts_offset;
 			tmp_opt.ts_recent	= tcptw->tw_ts_recent;
 			tmp_opt.ts_recent_stamp	= tcptw->tw_ts_recent_stamp;
 			paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
@@ -288,6 +288,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		tcptw->tw_rcv_wnd	= tcp_receive_window(tp);
 		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+		tcptw->tw_ts_offset	= tp->tsoffset;
 
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
@@ -386,32 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		struct tcp_request_sock *treq = tcp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 		struct tcp_sock *newtp = tcp_sk(newsk);
-		struct tcp_sock *oldtp = tcp_sk(sk);
-		struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
-
-		/* TCP Cookie Transactions require space for the cookie pair,
-		 * as it differs for each connection.  There is no need to
-		 * copy any s_data_payload stored at the original socket.
-		 * Failure will prevent resuming the connection.
-		 *
-		 * Presumed copied, in order of appearance:
-		 *	cookie_in_always, cookie_out_never
-		 */
-		if (oldcvp != NULL) {
-			struct tcp_cookie_values *newcvp =
-				kzalloc(sizeof(*newtp->cookie_values),
-					GFP_ATOMIC);
-
-			if (newcvp != NULL) {
-				kref_init(&newcvp->kref);
-				newcvp->cookie_desired =
-						oldcvp->cookie_desired;
-				newtp->cookie_values = newcvp;
-			} else {
-				/* Not Yet Implemented */
-				newtp->cookie_values = NULL;
-			}
-		}
 
 		/* Now setup tcp_sock */
 		newtp->pred_flags = 0;
@@ -420,8 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->rcv_nxt = treq->rcv_isn + 1;
 
 		newtp->snd_sml = newtp->snd_una =
-		newtp->snd_nxt = newtp->snd_up =
-			treq->snt_isn + 1 + tcp_s_data_size(oldtp);
+		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
 
 		tcp_prequeue_init(newtp);
 		INIT_LIST_HEAD(&newtp->tsq_node);
@@ -438,6 +412,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->fackets_out = 0;
 		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 		tcp_enable_early_retrans(newtp);
+		newtp->tlp_high_seq = 0;
 
 		/* So many TCP implementations out there (incorrectly) count the
 		 * initial SYN frame in their delayed-ACK and congestion control
@@ -446,10 +421,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		 */
 		newtp->snd_cwnd = TCP_INIT_CWND;
 		newtp->snd_cwnd_cnt = 0;
-		newtp->bytes_acked = 0;
-
-		newtp->frto_counter = 0;
-		newtp->frto_highmark = 0;
 
 		if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&
 		    !try_module_get(newicsk->icsk_ca_ops->owner))
@@ -458,8 +429,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		skb_queue_head_init(&newtp->out_of_order_queue);
-		newtp->write_seq = newtp->pushed_seq =
-			treq->snt_isn + 1 + tcp_s_data_size(oldtp);
+		newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
 
 		newtp->rx_opt.saw_tstamp = 0;
 
@@ -500,6 +470,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 			newtp->rx_opt.ts_recent_stamp = 0;
 			newtp->tcp_header_len = sizeof(struct tcphdr);
 		}
+		newtp->tsoffset = 0;
 #ifdef CONFIG_TCP_MD5SIG
 		newtp->md5sig_info = NULL;	/*XXX*/
 		if (newtp->af_specific->md5_lookup(sk, newsk))
@@ -535,7 +506,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			   bool fastopen)
 {
 	struct tcp_options_received tmp_opt;
-	const u8 *hash_location;
 	struct sock *child;
 	const struct tcphdr *th = tcp_hdr(skb);
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
@@ -545,7 +515,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
 	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(struct tcphdr)>>2)) {
-		tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
+		tcp_parse_options(skb, &tmp_opt, 0, NULL);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent = req->ts_recent;
@@ -581,8 +551,13 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 		 *
 		 * Note that even if there is new data in the SYN packet
 		 * they will be thrown away too.
+		 *
+		 * Reset timer after retransmitting SYNACK, similar to
+		 * the idea of fast retransmit in recovery.
 		 */
-		inet_rtx_syn_ack(sk, req);
+		if (!inet_rtx_syn_ack(sk, req))
+			req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout,
+					   TCP_RTO_MAX) + jiffies;
 		return NULL;
 	}
 
@@ -645,7 +620,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	 */
 	if ((flg & TCP_FLAG_ACK) && !fastopen &&
 	    (TCP_SKB_CB(skb)->ack_seq !=
-	     tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
+	     tcp_rsk(req)->snt_isn + 1))
 		return sk;
 
 	/* Also, it would be not so bad idea to check rcv_tsecr, which
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5d451593ef16..536d40929ba6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,28 +65,24 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
-int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
-EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
-
 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			   int push_one, gfp_t gfp);
 
 /* Account for new data that has been sent to the network. */
 static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int prior_packets = tp->packets_out;
 
 	tcp_advance_send_head(sk, skb);
 	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
 
-	/* Don't override Nagle indefinitely with F-RTO */
-	if (tp->frto_counter == 2)
-		tp->frto_counter = 3;
-
 	tp->packets_out += tcp_skb_pcount(skb);
-	if (!prior_packets || tp->early_retrans_delayed)
+	if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 		tcp_rearm_rto(sk);
+	}
 }
 
 /* SND.NXT, if window was not shrunk.
@@ -314,7 +310,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->ecn_flags = 0;
-	if (sysctl_tcp_ecn == 1) {
+	if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1) {
 		TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
 		tp->ecn_flags = TCP_ECN_OK;
 	}
@@ -384,7 +380,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
 #define OPTION_TS		(1 << 1)
 #define OPTION_MD5		(1 << 2)
 #define OPTION_WSCALE		(1 << 3)
-#define OPTION_COOKIE_EXTENSION	(1 << 4)
 #define OPTION_FAST_OPEN_COOKIE	(1 << 8)
 
 struct tcp_out_options {
@@ -398,36 +393,6 @@ struct tcp_out_options {
 	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
 };
 
-/* The sysctl int routines are generic, so check consistency here.
- */
-static u8 tcp_cookie_size_check(u8 desired)
-{
-	int cookie_size;
-
-	if (desired > 0)
-		/* previously specified */
-		return desired;
-
-	cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
-	if (cookie_size <= 0)
-		/* no default specified */
-		return 0;
-
-	if (cookie_size <= TCP_COOKIE_MIN)
-		/* value too small, specify minimum */
-		return TCP_COOKIE_MIN;
-
-	if (cookie_size >= TCP_COOKIE_MAX)
-		/* value too large, specify maximum */
-		return TCP_COOKIE_MAX;
-
-	if (cookie_size & 1)
-		/* 8-bit multiple, illegal, fix it */
-		cookie_size++;
-
-	return (u8)cookie_size;
-}
-
 /* Write previously computed TCP options to the packet.
  *
  * Beware: Something in the Internet is very sensitive to the ordering of
@@ -446,27 +411,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 {
 	u16 options = opts->options;	/* mungable copy */
 
-	/* Having both authentication and cookies for security is redundant,
-	 * and there's certainly not enough room.  Instead, the cookie-less
-	 * extension variant is proposed.
-	 *
-	 * Consider the pessimal case with authentication.  The options
-	 * could look like:
-	 *   COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40
-	 */
 	if (unlikely(OPTION_MD5 & options)) {
-		if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
-			*ptr++ = htonl((TCPOPT_COOKIE << 24) |
-				       (TCPOLEN_COOKIE_BASE << 16) |
-				       (TCPOPT_MD5SIG << 8) |
-				       TCPOLEN_MD5SIG);
-		} else {
-			*ptr++ = htonl((TCPOPT_NOP << 24) |
-				       (TCPOPT_NOP << 16) |
-				       (TCPOPT_MD5SIG << 8) |
-				       TCPOLEN_MD5SIG);
-		}
-		options &= ~OPTION_COOKIE_EXTENSION;
+		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+			       (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
 		/* overload cookie hash location */
 		opts->hash_location = (__u8 *)ptr;
 		ptr += 4;
@@ -495,44 +442,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 		*ptr++ = htonl(opts->tsecr);
 	}
 
-	/* Specification requires after timestamp, so do it now.
-	 *
-	 * Consider the pessimal case without authentication.  The options
-	 * could look like:
-	 *   MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40
-	 */
-	if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
-		__u8 *cookie_copy = opts->hash_location;
-		u8 cookie_size = opts->hash_size;
-
-		/* 8-bit multiple handled in tcp_cookie_size_check() above,
-		 * and elsewhere.
-		 */
-		if (0x2 & cookie_size) {
-			__u8 *p = (__u8 *)ptr;
-
-			/* 16-bit multiple */
-			*p++ = TCPOPT_COOKIE;
-			*p++ = TCPOLEN_COOKIE_BASE + cookie_size;
-			*p++ = *cookie_copy++;
-			*p++ = *cookie_copy++;
-			ptr++;
-			cookie_size -= 2;
-		} else {
-			/* 32-bit multiple */
-			*ptr++ = htonl(((TCPOPT_NOP << 24) |
-					(TCPOPT_NOP << 16) |
-					(TCPOPT_COOKIE << 8) |
-					TCPOLEN_COOKIE_BASE) +
-				       cookie_size);
-		}
-
-		if (cookie_size > 0) {
-			memcpy(ptr, cookie_copy, cookie_size);
-			ptr += (cookie_size / 4);
-		}
-	}
-
 	if (unlikely(OPTION_SACK_ADVERTISE & options)) {
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_NOP << 16) |
@@ -591,11 +500,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 				struct tcp_md5sig_key **md5)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_cookie_values *cvp = tp->cookie_values;
 	unsigned int remaining = MAX_TCP_OPTION_SPACE;
-	u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
-			 tcp_cookie_size_check(cvp->cookie_desired) :
-			 0;
 	struct tcp_fastopen_request *fastopen = tp->fastopen_req;
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -622,7 +527,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 
 	if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
 		opts->options |= OPTION_TS;
-		opts->tsval = TCP_SKB_CB(skb)->when;
+		opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset;
 		opts->tsecr = tp->rx_opt.ts_recent;
 		remaining -= TCPOLEN_TSTAMP_ALIGNED;
 	}
@@ -647,52 +552,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 			tp->syn_fastopen = 1;
 		}
 	}
-	/* Note that timestamps are required by the specification.
-	 *
-	 * Odd numbers of bytes are prohibited by the specification, ensuring
-	 * that the cookie is 16-bit aligned, and the resulting cookie pair is
-	 * 32-bit aligned.
-	 */
-	if (*md5 == NULL &&
-	    (OPTION_TS & opts->options) &&
-	    cookie_size > 0) {
-		int need = TCPOLEN_COOKIE_BASE + cookie_size;
-
-		if (0x2 & need) {
-			/* 32-bit multiple */
-			need += 2; /* NOPs */
-
-			if (need > remaining) {
-				/* try shrinking cookie to fit */
-				cookie_size -= 2;
-				need -= 4;
-			}
-		}
-		while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
-			cookie_size -= 4;
-			need -= 4;
-		}
-		if (TCP_COOKIE_MIN <= cookie_size) {
-			opts->options |= OPTION_COOKIE_EXTENSION;
-			opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
-			opts->hash_size = cookie_size;
-
-			/* Remember for future incarnations. */
-			cvp->cookie_desired = cookie_size;
-
-			if (cvp->cookie_desired != cvp->cookie_pair_size) {
-				/* Currently use random bytes as a nonce,
-				 * assuming these are completely unpredictable
-				 * by hostile users of the same system.
-				 */
-				get_random_bytes(&cvp->cookie_pair[0],
-						 cookie_size);
-				cvp->cookie_pair_size = cookie_size;
-			}
 
-			remaining -= need;
-		}
-	}
 	return MAX_TCP_OPTION_SPACE - remaining;
 }
 
@@ -702,14 +562,10 @@ static unsigned int tcp_synack_options(struct sock *sk,
 				   unsigned int mss, struct sk_buff *skb,
 				   struct tcp_out_options *opts,
 				   struct tcp_md5sig_key **md5,
-				   struct tcp_extend_values *xvp,
 				   struct tcp_fastopen_cookie *foc)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	unsigned int remaining = MAX_TCP_OPTION_SPACE;
-	u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
-			 xvp->cookie_plus :
-			 0;
 
 #ifdef CONFIG_TCP_MD5SIG
 	*md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
@@ -757,28 +613,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
 			remaining -= need;
 		}
 	}
-	/* Similar rationale to tcp_syn_options() applies here, too.
-	 * If the <SYN> options fit, the same options should fit now!
-	 */
-	if (*md5 == NULL &&
-	    ireq->tstamp_ok &&
-	    cookie_plus > TCPOLEN_COOKIE_BASE) {
-		int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */
-
-		if (0x2 & need) {
-			/* 32-bit multiple */
-			need += 2; /* NOPs */
-		}
-		if (need <= remaining) {
-			opts->options |= OPTION_COOKIE_EXTENSION;
-			opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
-			remaining -= need;
-		} else {
-			/* There's no error return, so flag it. */
-			xvp->cookie_out_never = 1; /* true */
-			opts->hash_size = 0;
-		}
-	}
+
 	return MAX_TCP_OPTION_SPACE - remaining;
 }
 
@@ -806,7 +641,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
 
 	if (likely(tp->rx_opt.tstamp_ok)) {
 		opts->options |= OPTION_TS;
-		opts->tsval = tcb ? tcb->when : 0;
+		opts->tsval = tcb ? tcb->when + tp->tsoffset : 0;
 		opts->tsecr = tp->rx_opt.ts_recent;
 		size += TCPOLEN_TSTAMP_ALIGNED;
 	}
@@ -953,7 +788,7 @@ void __init tcp_tasklet_init(void)
  * We cant xmit new skbs from this context, as we might already
  * hold qdisc lock.
  */
-static void tcp_wfree(struct sk_buff *skb)
+void tcp_wfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1012,6 +847,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		__net_timestamp(skb);
 
 	if (likely(clone_it)) {
+		const struct sk_buff *fclone = skb + 1;
+
+		if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
+			     fclone->fclone == SKB_FCLONE_CLONE))
+			NET_INC_STATS_BH(sock_net(sk),
+					 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+
 		if (unlikely(skb_cloned(skb)))
 			skb = pskb_copy(skb, gfp_mask);
 		else
@@ -1298,7 +1140,6 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
 	eat = min_t(int, len, skb_headlen(skb));
 	if (eat) {
 		__skb_pull(skb, eat);
-		skb->avail_size -= eat;
 		len -= eat;
 		if (!len)
 			return;
@@ -1331,7 +1172,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
 /* Remove acked data from a packet in the transmit queue. */
 int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 {
-	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		return -ENOMEM;
 
 	__pskb_trim_head(skb, len);
@@ -1351,8 +1192,8 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 	return 0;
 }
 
-/* Calculate MSS. Not accounting for SACKs here.  */
-int tcp_mtu_to_mss(struct sock *sk, int pmtu)
+/* Calculate MSS not accounting any TCP options.  */
+static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1381,13 +1222,17 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu)
 	/* Then reserve room for full set of TCP options and 8 bytes of data */
 	if (mss_now < 48)
 		mss_now = 48;
-
-	/* Now subtract TCP options size, not including SACKs */
-	mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
-
 	return mss_now;
 }
 
+/* Calculate MSS. Not accounting for SACKs here.  */
+int tcp_mtu_to_mss(struct sock *sk, int pmtu)
+{
+	/* Subtract TCP options size, not including SACKs */
+	return __tcp_mtu_to_mss(sk, pmtu) -
+	       (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
+}
+
 /* Inverse of above */
 int tcp_mss_to_mtu(struct sock *sk, int mss)
 {
@@ -1629,11 +1474,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
 	if (nonagle & TCP_NAGLE_PUSH)
 		return true;
 
-	/* Don't use the nagle rule for urgent data (or for the final FIN).
-	 * Nagle can be ignored during F-RTO too (see RFC4138).
-	 */
-	if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
-	    (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
+	/* Don't use the nagle rule for urgent data (or for the final FIN). */
+	if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
 		return true;
 
 	if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1806,8 +1648,11 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 			goto send_now;
 	}
 
-	/* Ok, it looks like it is advisable to defer.  */
-	tp->tso_deferred = 1 | (jiffies << 1);
+	/* Ok, it looks like it is advisable to defer.
+	 * Do not rearm the timer if already set to not break TCP ACK clocking.
+	 */
+	if (!tp->tso_deferred)
+		tp->tso_deferred = 1 | (jiffies << 1);
 
 	return true;
 
@@ -1955,6 +1800,9 @@ static int tcp_mtu_probe(struct sock *sk)
  * snd_up-64k-mss .. snd_up cannot be large. However, taking into
  * account rare use of URG, this is not a big flaw.
  *
+ * Send at most one packet when push_one > 0. Temporarily ignore
+ * cwnd limit to force at most one packet out when push_one == 2.
+
  * Returns true, if no segments are in flight and we have queued segments,
  * but cannot send anything now because of SWS or another problem.
  */
@@ -1990,8 +1838,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			goto repair; /* Skip network transmission */
 
 		cwnd_quota = tcp_cwnd_test(tp, skb);
-		if (!cwnd_quota)
-			break;
+		if (!cwnd_quota) {
+			if (push_one == 2)
+				/* Force out a loss probe pkt. */
+				cwnd_quota = 1;
+			else
+				break;
+		}
 
 		if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
 			break;
@@ -2045,10 +1898,129 @@ repair:
 	if (likely(sent_pkts)) {
 		if (tcp_in_cwnd_reduction(sk))
 			tp->prr_out += sent_pkts;
+
+		/* Send one loss probe per tail loss episode. */
+		if (push_one != 2)
+			tcp_schedule_loss_probe(sk);
 		tcp_cwnd_validate(sk);
 		return false;
 	}
-	return !tp->packets_out && tcp_send_head(sk);
+	return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
+}
+
+bool tcp_schedule_loss_probe(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 timeout, tlp_time_stamp, rto_time_stamp;
+	u32 rtt = tp->srtt >> 3;
+
+	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
+		return false;
+	/* No consecutive loss probes. */
+	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
+		tcp_rearm_rto(sk);
+		return false;
+	}
+	/* Don't do any loss probe on a Fast Open connection before 3WHS
+	 * finishes.
+	 */
+	if (sk->sk_state == TCP_SYN_RECV)
+		return false;
+
+	/* TLP is only scheduled when next timer event is RTO. */
+	if (icsk->icsk_pending != ICSK_TIME_RETRANS)
+		return false;
+
+	/* Schedule a loss probe in 2*RTT for SACK capable connections
+	 * in Open state, that are either limited by cwnd or application.
+	 */
+	if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out ||
+	    !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
+		return false;
+
+	if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
+	     tcp_send_head(sk))
+		return false;
+
+	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+	 * for delayed ack when there's one outstanding packet.
+	 */
+	timeout = rtt << 1;
+	if (tp->packets_out == 1)
+		timeout = max_t(u32, timeout,
+				(rtt + (rtt >> 1) + TCP_DELACK_MAX));
+	timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+
+	/* If RTO is shorter, just schedule TLP in its place. */
+	tlp_time_stamp = tcp_time_stamp + timeout;
+	rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
+	if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
+		s32 delta = rto_time_stamp - tcp_time_stamp;
+		if (delta > 0)
+			timeout = delta;
+	}
+
+	inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
+				  TCP_RTO_MAX);
+	return true;
+}
+
+/* When probe timeout (PTO) fires, send a new segment if one exists, else
+ * retransmit the last segment.
+ */
+void tcp_send_loss_probe(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	int pcount;
+	int mss = tcp_current_mss(sk);
+	int err = -1;
+
+	if (tcp_send_head(sk) != NULL) {
+		err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+		goto rearm_timer;
+	}
+
+	/* At most one outstanding TLP retransmission. */
+	if (tp->tlp_high_seq)
+		goto rearm_timer;
+
+	/* Retransmit last segment. */
+	skb = tcp_write_queue_tail(sk);
+	if (WARN_ON(!skb))
+		goto rearm_timer;
+
+	pcount = tcp_skb_pcount(skb);
+	if (WARN_ON(!pcount))
+		goto rearm_timer;
+
+	if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
+		if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
+			goto rearm_timer;
+		skb = tcp_write_queue_tail(sk);
+	}
+
+	if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
+		goto rearm_timer;
+
+	/* Probe with zero data doesn't trigger fast recovery. */
+	if (skb->len > 0)
+		err = __tcp_retransmit_skb(sk, skb);
+
+	/* Record snd_nxt for loss detection. */
+	if (likely(!err))
+		tp->tlp_high_seq = tp->snd_nxt;
+
+rearm_timer:
+	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+				  inet_csk(sk)->icsk_rto,
+				  TCP_RTO_MAX);
+
+	if (likely(!err))
+		NET_INC_STATS_BH(sock_net(sk),
+				 LINUX_MIB_TCPLOSSPROBES);
+	return;
 }
 
 /* Push out any pending frames which were held back due to
@@ -2382,8 +2354,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-	/* make sure skb->data is aligned on arches that require it */
-	if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
+	/* make sure skb->data is aligned on arches that require it
+	 * and check if ack-trimming & collapsing extended the headroom
+	 * beyond what csum_start can cover.
+	 */
+	if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
+		     skb_headroom(skb) >= 0xFFFF)) {
 		struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
 						   GFP_ATOMIC);
 		return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
@@ -2669,32 +2645,24 @@ int tcp_send_synack(struct sock *sk)
  * sk: listener socket
  * dst: dst entry attached to the SYNACK
  * req: request_sock pointer
- * rvp: request_values pointer
  *
  * Allocate one skb and build a SYNACK packet.
  * @dst is consumed : Caller should not use it again.
  */
 struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 				struct request_sock *req,
-				struct request_values *rvp,
 				struct tcp_fastopen_cookie *foc)
 {
 	struct tcp_out_options opts;
-	struct tcp_extend_values *xvp = tcp_xv(rvp);
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct tcp_sock *tp = tcp_sk(sk);
-	const struct tcp_cookie_values *cvp = tp->cookie_values;
 	struct tcphdr *th;
 	struct sk_buff *skb;
 	struct tcp_md5sig_key *md5;
 	int tcp_header_size;
 	int mss;
-	int s_data_desired = 0;
 
-	if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
-		s_data_desired = cvp->s_data_desired;
-	skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
-			sk_gfp_atomic(sk, GFP_ATOMIC));
+	skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC));
 	if (unlikely(!skb)) {
 		dst_release(dst);
 		return NULL;
@@ -2703,6 +2671,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	skb_reserve(skb, MAX_TCP_HEADER);
 
 	skb_dst_set(skb, dst);
+	security_skb_owned_by(skb, sk);
 
 	mss = dst_metric_advmss(dst);
 	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
@@ -2736,9 +2705,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	else
 #endif
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	tcp_header_size = tcp_synack_options(sk, req, mss,
-					     skb, &opts, &md5, xvp, foc)
-			+ sizeof(*th);
+	tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
+					     foc) + sizeof(*th);
 
 	skb_push(skb, tcp_header_size);
 	skb_reset_transport_header(skb);
@@ -2756,40 +2724,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
 			     TCPHDR_SYN | TCPHDR_ACK);
 
-	if (OPTION_COOKIE_EXTENSION & opts.options) {
-		if (s_data_desired) {
-			u8 *buf = skb_put(skb, s_data_desired);
-
-			/* copy data directly from the listening socket. */
-			memcpy(buf, cvp->s_data_payload, s_data_desired);
-			TCP_SKB_CB(skb)->end_seq += s_data_desired;
-		}
-
-		if (opts.hash_size > 0) {
-			__u32 workspace[SHA_WORKSPACE_WORDS];
-			u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
-			u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
-
-			/* Secret recipe depends on the Timestamp, (future)
-			 * Sequence and Acknowledgment Numbers, Initiator
-			 * Cookie, and others handled by IP variant caller.
-			 */
-			*tail-- ^= opts.tsval;
-			*tail-- ^= tcp_rsk(req)->rcv_isn + 1;
-			*tail-- ^= TCP_SKB_CB(skb)->seq + 1;
-
-			/* recommended */
-			*tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
-			*tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
-
-			sha_transform((__u32 *)&xvp->cookie_bakery[0],
-				      (char *)mess,
-				      &workspace[0]);
-			opts.hash_location =
-				(__u8 *)&xvp->cookie_bakery[0];
-		}
-	}
-
 	th->seq = htonl(TCP_SKB_CB(skb)->seq);
 	/* XXX data is queued and acked as is. No buffer/window check */
 	th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
@@ -2930,7 +2864,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 	 */
 	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
 		tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
-	space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
+	space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
 		MAX_TCP_OPTION_SPACE;
 
 	syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 4526fe68e60e..d4943f67aff2 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -234,7 +234,7 @@ static __init int tcpprobe_init(void)
 	if (!tcp_probe.log)
 		goto err0;
 
-	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &tcpprobe_fops))
+	if (!proc_create(procname, S_IRUSR, init_net.proc_net, &tcpprobe_fops))
 		goto err0;
 
 	ret = register_jprobe(&tcp_jprobe);
@@ -244,7 +244,7 @@ static __init int tcpprobe_init(void)
 	pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize);
 	return 0;
  err1:
-	proc_net_remove(&init_net, procname);
+	remove_proc_entry(procname, init_net.proc_net);
  err0:
 	kfree(tcp_probe.log);
 	return ret;
@@ -253,7 +253,7 @@ module_init(tcpprobe_init);
 
 static __exit void tcpprobe_exit(void)
 {
-	proc_net_remove(&init_net, procname);
+	remove_proc_entry(procname, init_net.proc_net);
 	unregister_jprobe(&tcp_jprobe);
 	kfree(tcp_probe.log);
 }
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b78aac30c498..4b85e6f636c9 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -342,10 +342,6 @@ void tcp_retransmit_timer(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
-	if (tp->early_retrans_delayed) {
-		tcp_resume_early_retransmit(sk);
-		return;
-	}
 	if (tp->fastopen_rsk) {
 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
 			     sk->sk_state != TCP_FIN_WAIT1);
@@ -360,6 +356,8 @@ void tcp_retransmit_timer(struct sock *sk)
 
 	WARN_ON(tcp_write_queue_empty(sk));
 
+	tp->tlp_high_seq = 0;
+
 	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
 	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
 		/* Receiver dastardly shrinks window. Our retransmits
@@ -418,11 +416,7 @@ void tcp_retransmit_timer(struct sock *sk)
 		NET_INC_STATS_BH(sock_net(sk), mib_idx);
 	}
 
-	if (tcp_use_frto(sk)) {
-		tcp_enter_frto(sk);
-	} else {
-		tcp_enter_loss(sk, 0);
-	}
+	tcp_enter_loss(sk, 0);
 
 	if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
 		/* Retransmission failed because of local congestion,
@@ -495,13 +489,20 @@ void tcp_write_timer_handler(struct sock *sk)
 	}
 
 	event = icsk->icsk_pending;
-	icsk->icsk_pending = 0;
 
 	switch (event) {
+	case ICSK_TIME_EARLY_RETRANS:
+		tcp_resume_early_retransmit(sk);
+		break;
+	case ICSK_TIME_LOSS_PROBE:
+		tcp_send_loss_probe(sk);
+		break;
 	case ICSK_TIME_RETRANS:
+		icsk->icsk_pending = 0;
 		tcp_retransmit_timer(sk);
 		break;
 	case ICSK_TIME_PROBE0:
+		icsk->icsk_pending = 0;
 		tcp_probe_timer(sk);
 		break;
 	}
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 1b91bf48e277..76a1e23259e1 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -236,7 +236,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
 		tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
 		break;
 
-	case CA_EVENT_FRTO:
+	case CA_EVENT_LOSS:
 		tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
 		/* Update RTT_min when next ack arrives */
 		w->reset_rtt_min = 1;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1f4d405eafba..0bf5d399a03c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -139,6 +139,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
 {
 	struct sock *sk2;
 	struct hlist_nulls_node *node;
+	kuid_t uid = sock_i_uid(sk);
 
 	sk_nulls_for_each(sk2, node, &hslot->head)
 		if (net_eq(sock_net(sk2), net) &&
@@ -147,6 +148,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
 		    (!sk2->sk_reuse || !sk->sk_reuse) &&
 		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
 		     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+		    (!sk2->sk_reuseport || !sk->sk_reuseport ||
+		      !uid_eq(uid, sock_i_uid(sk2))) &&
 		    (*saddr_comp)(sk, sk2)) {
 			if (bitmap)
 				__set_bit(udp_sk(sk2)->udp_port_hash >> log,
@@ -169,6 +172,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
 {
 	struct sock *sk2;
 	struct hlist_nulls_node *node;
+	kuid_t uid = sock_i_uid(sk);
 	int res = 0;
 
 	spin_lock(&hslot2->lock);
@@ -179,6 +183,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
 		    (!sk2->sk_reuse || !sk->sk_reuse) &&
 		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
 		     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+		    (!sk2->sk_reuseport || !sk->sk_reuseport ||
+		      !uid_eq(uid, sock_i_uid(sk2))) &&
 		    (*saddr_comp)(sk, sk2)) {
 			res = 1;
 			break;
@@ -337,26 +343,26 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
 			!ipv6_only_sock(sk)) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		score = (sk->sk_family == PF_INET ? 1 : 0);
+		score = (sk->sk_family == PF_INET ? 2 : 1);
 		if (inet->inet_rcv_saddr) {
 			if (inet->inet_rcv_saddr != daddr)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (inet->inet_daddr) {
 			if (inet->inet_daddr != saddr)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (inet->inet_dport) {
 			if (inet->inet_dport != sport)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (sk->sk_bound_dev_if) {
 			if (sk->sk_bound_dev_if != dif)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 	}
 	return score;
@@ -365,7 +371,6 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
 /*
  * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)
  */
-#define SCORE2_MAX (1 + 2 + 2 + 2)
 static inline int compute_score2(struct sock *sk, struct net *net,
 				 __be32 saddr, __be16 sport,
 				 __be32 daddr, unsigned int hnum, int dif)
@@ -380,21 +385,21 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 		if (inet->inet_num != hnum)
 			return -1;
 
-		score = (sk->sk_family == PF_INET ? 1 : 0);
+		score = (sk->sk_family == PF_INET ? 2 : 1);
 		if (inet->inet_daddr) {
 			if (inet->inet_daddr != saddr)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (inet->inet_dport) {
 			if (inet->inet_dport != sport)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (sk->sk_bound_dev_if) {
 			if (sk->sk_bound_dev_if != dif)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 	}
 	return score;
@@ -409,19 +414,29 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 {
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
-	int score, badness;
+	int score, badness, matches = 0, reuseport = 0;
+	u32 hash = 0;
 
 begin:
 	result = NULL;
-	badness = -1;
+	badness = 0;
 	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
 		score = compute_score2(sk, net, saddr, sport,
 				      daddr, hnum, dif);
 		if (score > badness) {
 			result = sk;
 			badness = score;
-			if (score == SCORE2_MAX)
-				goto exact_match;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				hash = inet_ehashfn(net, daddr, hnum,
+						    saddr, htons(sport));
+				matches = 1;
+			}
+		} else if (score == badness && reuseport) {
+			matches++;
+			if (((u64)hash * matches) >> 32 == 0)
+				result = sk;
+			hash = next_pseudo_random32(hash);
 		}
 	}
 	/*
@@ -431,9 +446,7 @@ begin:
 	 */
 	if (get_nulls_value(node) != slot2)
 		goto begin;
-
 	if (result) {
-exact_match:
 		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
 			result = NULL;
 		else if (unlikely(compute_score2(result, net, saddr, sport,
@@ -457,7 +470,8 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	unsigned short hnum = ntohs(dport);
 	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
 	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
-	int score, badness;
+	int score, badness, matches = 0, reuseport = 0;
+	u32 hash = 0;
 
 	rcu_read_lock();
 	if (hslot->count > 10) {
@@ -486,13 +500,24 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	}
 begin:
 	result = NULL;
-	badness = -1;
+	badness = 0;
 	sk_nulls_for_each_rcu(sk, node, &hslot->head) {
 		score = compute_score(sk, net, saddr, hnum, sport,
 				      daddr, dport, dif);
 		if (score > badness) {
 			result = sk;
 			badness = score;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				hash = inet_ehashfn(net, daddr, hnum,
+						    saddr, htons(sport));
+				matches = 1;
+			}
+		} else if (score == badness && reuseport) {
+			matches++;
+			if (((u64)hash * matches) >> 32 == 0)
+				result = sk;
+			hash = next_pseudo_random32(hash);
 		}
 	}
 	/*
@@ -877,9 +902,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	ipc.addr = inet->inet_saddr;
 
 	ipc.oif = sk->sk_bound_dev_if;
-	err = sock_tx_timestamp(sk, &ipc.tx_flags);
-	if (err)
-		return err;
+
+	sock_tx_timestamp(sk, &ipc.tx_flags);
+
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sock_net(sk), msg, &ipc);
 		if (err)
@@ -971,7 +996,7 @@ back_from_confirm:
 				  sizeof(struct udphdr), &ipc, &rt,
 				  msg->msg_flags);
 		err = PTR_ERR(skb);
-		if (skb && !IS_ERR(skb))
+		if (!IS_ERR_OR_NULL(skb))
 			err = udp_send_skb(skb, fl4);
 		goto out;
 	}
@@ -1106,6 +1131,8 @@ static unsigned int first_packet_length(struct sock *sk)
 	spin_lock_bh(&rcvq->lock);
 	while ((skb = skb_peek(rcvq)) != NULL &&
 		udp_lib_checksum_complete(skb)) {
+		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS,
+				 IS_UDPLITE(sk));
 		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
 				 IS_UDPLITE(sk));
 		atomic_inc(&sk->sk_drops);
@@ -1261,8 +1288,10 @@ out:
 
 csum_copy_err:
 	slow = lock_sock_fast(sk);
-	if (!skb_kill_datagram(sk, skb, flags))
+	if (!skb_kill_datagram(sk, skb, flags)) {
+		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
 		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+	}
 	unlock_sock_fast(sk, slow);
 
 	if (noblock)
@@ -1488,7 +1517,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	if (rcu_access_pointer(sk->sk_filter) &&
 	    udp_lib_checksum_complete(skb))
-		goto drop;
+		goto csum_error;
 
 
 	if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
@@ -1508,6 +1537,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	return rc;
 
+csum_error:
+	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
 drop:
 	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	atomic_inc(&sk->sk_drops);
@@ -1724,6 +1755,7 @@ csum_error:
 		       proto == IPPROTO_UDPLITE ? "Lite" : "",
 		       &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
 		       ulen);
+	UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
 drop:
 	UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
@@ -1737,9 +1769,16 @@ int udp_rcv(struct sk_buff *skb)
 
 void udp_destroy_sock(struct sock *sk)
 {
+	struct udp_sock *up = udp_sk(sk);
 	bool slow = lock_sock_fast(sk);
 	udp_flush_pending_frames(sk);
 	unlock_sock_fast(sk, slow);
+	if (static_key_false(&udp_encap_needed) && up->encap_type) {
+		void (*encap_destroy)(struct sock *sk);
+		encap_destroy = ACCESS_ONCE(up->encap_destroy);
+		if (encap_destroy)
+			encap_destroy(sk);
+	}
 }
 
 /*
@@ -2061,7 +2100,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
 
 int udp_seq_open(struct inode *inode, struct file *file)
 {
-	struct udp_seq_afinfo *afinfo = PDE(inode)->data;
+	struct udp_seq_afinfo *afinfo = PDE_DATA(inode);
 	struct udp_iter_state *s;
 	int err;
 
@@ -2097,7 +2136,7 @@ EXPORT_SYMBOL(udp_proc_register);
 
 void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
 {
-	proc_net_remove(net, afinfo->name);
+	remove_proc_entry(afinfo->name, net->proc_net);
 }
 EXPORT_SYMBOL(udp_proc_unregister);
 
@@ -2247,31 +2286,91 @@ void __init udp_init(void)
 
 int udp4_ufo_send_check(struct sk_buff *skb)
 {
-	const struct iphdr *iph;
-	struct udphdr *uh;
-
-	if (!pskb_may_pull(skb, sizeof(*uh)))
+	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 		return -EINVAL;
 
-	iph = ip_hdr(skb);
-	uh = udp_hdr(skb);
+	if (likely(!skb->encapsulation)) {
+		const struct iphdr *iph;
+		struct udphdr *uh;
 
-	uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
-				       IPPROTO_UDP, 0);
-	skb->csum_start = skb_transport_header(skb) - skb->head;
-	skb->csum_offset = offsetof(struct udphdr, check);
-	skb->ip_summed = CHECKSUM_PARTIAL;
+		iph = ip_hdr(skb);
+		uh = udp_hdr(skb);
+
+		uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
+				IPPROTO_UDP, 0);
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		skb->ip_summed = CHECKSUM_PARTIAL;
+	}
 	return 0;
 }
 
+static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
+		netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	int mac_len = skb->mac_len;
+	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
+	__be16 protocol = skb->protocol;
+	netdev_features_t enc_features;
+	int outer_hlen;
+
+	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
+		goto out;
+
+	skb->encapsulation = 0;
+	__skb_pull(skb, tnl_hlen);
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, skb_inner_network_offset(skb));
+	skb->mac_len = skb_inner_network_offset(skb);
+	skb->protocol = htons(ETH_P_TEB);
+
+	/* segment inner packet. */
+	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+	segs = skb_mac_gso_segment(skb, enc_features);
+	if (!segs || IS_ERR(segs))
+		goto out;
+
+	outer_hlen = skb_tnl_header_len(skb);
+	skb = segs;
+	do {
+		struct udphdr *uh;
+		int udp_offset = outer_hlen - tnl_hlen;
+
+		skb->mac_len = mac_len;
+
+		skb_push(skb, outer_hlen);
+		skb_reset_mac_header(skb);
+		skb_set_network_header(skb, mac_len);
+		skb_set_transport_header(skb, udp_offset);
+		uh = udp_hdr(skb);
+		uh->len = htons(skb->len - udp_offset);
+
+		/* csum segment if tunnel sets skb with csum. */
+		if (unlikely(uh->check)) {
+			struct iphdr *iph = ip_hdr(skb);
+
+			uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+						       skb->len - udp_offset,
+						       IPPROTO_UDP, 0);
+			uh->check = csum_fold(skb_checksum(skb, udp_offset,
+							   skb->len - udp_offset, 0));
+			if (uh->check == 0)
+				uh->check = CSUM_MANGLED_0;
+
+		}
+		skb->ip_summed = CHECKSUM_NONE;
+		skb->protocol = protocol;
+	} while ((skb = skb->next));
+out:
+	return segs;
+}
+
 struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	unsigned int mss;
-	int offset;
-	__wsum csum;
-
 	mss = skb_shinfo(skb)->gso_size;
 	if (unlikely(skb->len <= mss))
 		goto out;
@@ -2280,7 +2379,9 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 		/* Packet is from an untrusted source, reset gso_segs. */
 		int type = skb_shinfo(skb)->gso_type;
 
-		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+				      SKB_GSO_UDP_TUNNEL |
+				      SKB_GSO_GRE) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
@@ -2290,20 +2391,27 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 		goto out;
 	}
 
-	/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
-	 * do checksum of UDP packets sent as multiple IP fragments.
-	 */
-	offset = skb_checksum_start_offset(skb);
-	csum = skb_checksum(skb, offset, skb->len - offset, 0);
-	offset += skb->csum_offset;
-	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
-	skb->ip_summed = CHECKSUM_NONE;
-
 	/* Fragment the skb. IP headers of the fragments are updated in
 	 * inet_gso_segment()
 	 */
-	segs = skb_segment(skb, features);
+	if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+		segs = skb_udp_tunnel_segment(skb, features);
+	else {
+		int offset;
+		__wsum csum;
+
+		/* Do software UFO. Complete and fill in the UDP checksum as
+		 * HW cannot do checksum of UDP packets sent as multiple
+		 * IP fragments.
+		 */
+		offset = skb_checksum_start_offset(skb);
+		csum = skb_checksum(skb, offset, skb->len - offset, 0);
+		offset += skb->csum_offset;
+		*(__sum16 *)(skb->data + offset) = csum_fold(csum);
+		skb->ip_summed = CHECKSUM_NONE;
+
+		segs = skb_segment(skb, features);
+	}
 out:
 	return segs;
 }
-
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 505b30ad9182..7927db0a9279 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -25,7 +25,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
 		return 0;
 
 	return inet_sk_diag_fill(sk, NULL, skb, req,
-			sk_user_ns(NETLINK_CB(cb->skb).ssk),
+			sk_user_ns(NETLINK_CB(cb->skb).sk),
 			NETLINK_CB(cb->skb).portid,
 			cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
 }
@@ -64,14 +64,14 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 		goto out;
 
 	err = -ENOMEM;
-	rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) +
-				     sizeof(struct inet_diag_meminfo) +
-				     64)), GFP_KERNEL);
+	rep = nlmsg_new(sizeof(struct inet_diag_msg) +
+			sizeof(struct inet_diag_meminfo) + 64,
+			GFP_KERNEL);
 	if (!rep)
 		goto out;
 
 	err = inet_sk_diag_fill(sk, NULL, rep, req,
-			   sk_user_ns(NETLINK_CB(in_skb).ssk),
+			   sk_user_ns(NETLINK_CB(in_skb).sk),
 			   NETLINK_CB(in_skb).portid,
 			   nlh->nlmsg_seq, 0, nlh);
 	if (err < 0) {
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 06814b6216dc..1f12c8b45864 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -132,7 +132,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * header and optional ESP marker bytes) and then modify the
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
-	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto drop;
 
 	/* Now we can update and verify the packet length... */
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index ddee0a099a2c..eb1dd4d643f2 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -103,8 +103,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
 
-	/* DS disclosed */
-	top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
+	/* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
+	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+		top_iph->tos = 0;
+	else
+		top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
+	top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
 					    XFRM_MODE_SKB_CB(skb)->tos);
 
 	flags = x->props.flags;
@@ -142,8 +146,8 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	for_each_input_rcu(rcv_notify_handlers, handler)
 		handler->handler(skb);
 
-	if (skb_cloned(skb) &&
-	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+	err = skb_unclone(skb, GFP_ATOMIC);
+	if (err)
 		goto out;
 
 	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 3be0ac2c1920..9a459be24af7 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -262,21 +262,56 @@ static struct ctl_table xfrm4_policy_table[] = {
 	{ }
 };
 
-static struct ctl_table_header *sysctl_hdr;
-#endif
-
-static void __init xfrm4_policy_init(void)
+static int __net_init xfrm4_net_init(struct net *net)
 {
-	xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
+	struct ctl_table *table;
+	struct ctl_table_header *hdr;
+
+	table = xfrm4_policy_table;
+	if (!net_eq(net, &init_net)) {
+		table = kmemdup(table, sizeof(xfrm4_policy_table), GFP_KERNEL);
+		if (!table)
+			goto err_alloc;
+
+		table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh;
+	}
+
+	hdr = register_net_sysctl(net, "net/ipv4", table);
+	if (!hdr)
+		goto err_reg;
+
+	net->ipv4.xfrm4_hdr = hdr;
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
 }
 
-static void __exit xfrm4_policy_fini(void)
+static void __net_exit xfrm4_net_exit(struct net *net)
 {
-#ifdef CONFIG_SYSCTL
-	if (sysctl_hdr)
-		unregister_net_sysctl_table(sysctl_hdr);
+	struct ctl_table *table;
+
+	if (net->ipv4.xfrm4_hdr == NULL)
+		return;
+
+	table = net->ipv4.xfrm4_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->ipv4.xfrm4_hdr);
+	if (!net_eq(net, &init_net))
+		kfree(table);
+}
+
+static struct pernet_operations __net_initdata xfrm4_net_ops = {
+	.init	= xfrm4_net_init,
+	.exit	= xfrm4_net_exit,
+};
 #endif
-	xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
+
+static void __init xfrm4_policy_init(void)
+{
+	xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
 }
 
 void __init xfrm4_init(void)
@@ -286,8 +321,7 @@ void __init xfrm4_init(void)
 	xfrm4_state_init();
 	xfrm4_policy_init();
 #ifdef CONFIG_SYSCTL
-	sysctl_hdr = register_net_sysctl(&init_net, "net/ipv4",
-					 xfrm4_policy_table);
+	register_pernet_subsys(&xfrm4_net_ops);
 #endif
 }
 
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 4f7fe7270e37..11b13ea69db4 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -11,7 +11,7 @@ menuconfig IPV6
 	  You will still be able to do traditional IPv4 networking as well.
 
 	  For general information about IPv6, see
-	  <http://playground.sun.com/pub/ipng/html/ipng-main.html>.
+	  <https://en.wikipedia.org/wiki/IPv6>.
 	  For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
 	  For specific information about IPv6 under Linux, read the HOWTO at
 	  <http://www.bieringer.de/linux/IPv6/>.
@@ -50,16 +50,15 @@ config IPV6_ROUTER_PREF
 	  If unsure, say N.
 
 config IPV6_ROUTE_INFO
-	bool "IPv6: Route Information (RFC 4191) support (EXPERIMENTAL)"
-	depends on IPV6_ROUTER_PREF && EXPERIMENTAL
+	bool "IPv6: Route Information (RFC 4191) support"
+	depends on IPV6_ROUTER_PREF
 	---help---
 	  This is experimental support of Route Information.
 
 	  If unsure, say N.
 
 config IPV6_OPTIMISTIC_DAD
-	bool "IPv6: Enable RFC 4429 Optimistic DAD (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	bool "IPv6: Enable RFC 4429 Optimistic DAD"
 	---help---
 	  This is experimental support for optimistic Duplicate
 	  Address Detection.  It allows for autoconfigured addresses
@@ -105,8 +104,7 @@ config INET6_IPCOMP
 	  If unsure, say Y.
 
 config IPV6_MIP6
-	tristate "IPv6: Mobility (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	tristate "IPv6: Mobility"
 	select XFRM
 	---help---
 	  Support for IPv6 Mobility described in RFC 3775.
@@ -150,8 +148,7 @@ config INET6_XFRM_MODE_BEET
 	  If unsure, say Y.
 
 config INET6_XFRM_MODE_ROUTEOPTIMIZATION
-	tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	tristate "IPv6: MIPv6 route optimization mode"
 	select XFRM
 	---help---
 	  Support for MIPv6 route optimization mode.
@@ -159,6 +156,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION
 config IPV6_SIT
 	tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)"
 	select INET_TUNNEL
+	select NET_IP_TUNNEL
 	select IPV6_NDISC_NODETYPE
 	default y
 	---help---
@@ -171,8 +169,8 @@ config IPV6_SIT
 	  Saying M here will produce a module called sit. If unsure, say Y.
 
 config IPV6_SIT_6RD
-	bool "IPv6: IPv6 Rapid Deployment (6RD) (EXPERIMENTAL)"
-	depends on IPV6_SIT && EXPERIMENTAL
+	bool "IPv6: IPv6 Rapid Deployment (6RD)"
+	depends on IPV6_SIT
 	default n
 	---help---
 	  IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon
@@ -204,6 +202,7 @@ config IPV6_TUNNEL
 config IPV6_GRE
 	tristate "IPv6: GRE tunnel"
 	select IPV6_TUNNEL
+	select NET_IP_TUNNEL
 	---help---
 	  Tunneling means encapsulating data of one protocol type within
 	  another protocol and sending it over a channel that understands the
@@ -219,7 +218,6 @@ config IPV6_GRE
 
 config IPV6_MULTIPLE_TABLES
 	bool "IPv6: Multiple Routing Tables"
-	depends on EXPERIMENTAL
 	select FIB_RULES
 	---help---
 	  Support multiple routing tables.
@@ -239,8 +237,8 @@ config IPV6_SUBTREES
 	  If unsure, say N.
 
 config IPV6_MROUTE
-	bool "IPv6: multicast routing (EXPERIMENTAL)"
-	depends on IPV6 && EXPERIMENTAL
+	bool "IPv6: multicast routing"
+	depends on IPV6
 	---help---
 	  Experimental support for IPv6 multicast forwarding.
 	  If unsure, say N.
@@ -260,7 +258,7 @@ config IPV6_MROUTE_MULTIPLE_TABLES
 	  If unsure, say N.
 
 config IPV6_PIMSM_V2
-	bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)"
+	bool "IPv6: PIM-SM version 2 support"
 	depends on IPV6_MROUTE
 	---help---
 	  Support for IPv6 PIM multicast routing protocol PIM-SMv2.
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 4ea244891b58..9af088d2cdaa 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
 obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
 
-obj-y += addrconf_core.o exthdrs_core.o
+obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
 obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
 
 obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1b5d8cb9b123..d1ab6ab29a55 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -70,6 +70,7 @@
 #include <net/snmp.h>
 
 #include <net/af_ieee802154.h>
+#include <net/firewire.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
 #include <net/ndisc.h>
@@ -110,10 +111,6 @@ static inline u32 cstamp_delta(unsigned long cstamp)
 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
 }
 
-#define ADDRCONF_TIMER_FUZZ_MINUS	(HZ > 50 ? HZ/50 : 1)
-#define ADDRCONF_TIMER_FUZZ		(HZ / 4)
-#define ADDRCONF_TIMER_FUZZ_MAX		(HZ)
-
 #ifdef CONFIG_SYSCTL
 static void addrconf_sysctl_register(struct inet6_dev *idev);
 static void addrconf_sysctl_unregister(struct inet6_dev *idev);
@@ -172,8 +169,6 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 			       struct net_device *dev);
 
-static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
-
 static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
@@ -248,6 +243,9 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
 const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
 
 /* Check if a valid qdisc is available */
 static inline bool addrconf_qdisc_ok(const struct net_device *dev)
@@ -422,6 +420,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 		ipv6_regen_rndid((unsigned long) ndev);
 	}
 #endif
+	ndev->token = in6addr_any;
 
 	if (netif_running(dev) && addrconf_qdisc_ok(dev))
 		ndev->if_flags |= IF_READY;
@@ -432,6 +431,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	/* protected by rtnl_lock */
 	rcu_assign_pointer(dev->ip6_ptr, ndev);
 
+	/* Join interface-local all-node multicast group */
+	ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
+
 	/* Join all-node multicast group */
 	ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
 
@@ -542,8 +544,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
 };
 
 static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
-				     struct nlmsghdr *nlh,
-				     void *arg)
+				     struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(in_skb->sk);
 	struct nlattr *tb[NETCONFA_MAX+1];
@@ -603,6 +604,77 @@ errout:
 	return err;
 }
 
+static int inet6_netconf_dump_devconf(struct sk_buff *skb,
+				      struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	int h, s_h;
+	int idx, s_idx;
+	struct net_device *dev;
+	struct inet6_dev *idev;
+	struct hlist_head *head;
+
+	s_h = cb->args[0];
+	s_idx = idx = cb->args[1];
+
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		rcu_read_lock();
+		cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
+			  net->dev_base_seq;
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
+			if (idx < s_idx)
+				goto cont;
+			idev = __in6_dev_get(dev);
+			if (!idev)
+				goto cont;
+
+			if (inet6_netconf_fill_devconf(skb, dev->ifindex,
+						       &idev->cnf,
+						       NETLINK_CB(cb->skb).portid,
+						       cb->nlh->nlmsg_seq,
+						       RTM_NEWNETCONF,
+						       NLM_F_MULTI,
+						       -1) <= 0) {
+				rcu_read_unlock();
+				goto done;
+			}
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+			idx++;
+		}
+		rcu_read_unlock();
+	}
+	if (h == NETDEV_HASHENTRIES) {
+		if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+					       net->ipv6.devconf_all,
+					       NETLINK_CB(cb->skb).portid,
+					       cb->nlh->nlmsg_seq,
+					       RTM_NEWNETCONF, NLM_F_MULTI,
+					       -1) <= 0)
+			goto done;
+		else
+			h++;
+	}
+	if (h == NETDEV_HASHENTRIES + 1) {
+		if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+					       net->ipv6.devconf_dflt,
+					       NETLINK_CB(cb->skb).portid,
+					       cb->nlh->nlmsg_seq,
+					       RTM_NEWNETCONF, NLM_F_MULTI,
+					       -1) <= 0)
+			goto done;
+		else
+			h++;
+	}
+done:
+	cb->args[0] = h;
+	cb->args[1] = idx;
+
+	return skb->len;
+}
+
 #ifdef CONFIG_SYSCTL
 static void dev_forward_change(struct inet6_dev *idev)
 {
@@ -615,10 +687,15 @@ static void dev_forward_change(struct inet6_dev *idev)
 	if (idev->cnf.forwarding)
 		dev_disable_lro(dev);
 	if (dev->flags & IFF_MULTICAST) {
-		if (idev->cnf.forwarding)
+		if (idev->cnf.forwarding) {
 			ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
-		else
+			ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters);
+			ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters);
+		} else {
 			ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
+			ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters);
+			ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters);
+		}
 	}
 
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
@@ -799,6 +876,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	ifa->prefix_len = pfxlen;
 	ifa->flags = flags | IFA_F_TENTATIVE;
 	ifa->cstamp = ifa->tstamp = jiffies;
+	ifa->tokenized = false;
 
 	ifa->rt = rt;
 
@@ -830,7 +908,7 @@ out2:
 	rcu_read_unlock_bh();
 
 	if (likely(err == 0))
-		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
+		inet6addr_notifier_call_chain(NETDEV_UP, ifa);
 	else {
 		kfree(ifa);
 		ifa = ERR_PTR(err);
@@ -920,7 +998,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	ipv6_ifa_notify(RTM_DELADDR, ifp);
 
-	atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp);
+	inet6addr_notifier_call_chain(NETDEV_DOWN, ifp);
 
 	/*
 	 * Purge or update corresponding prefix
@@ -1051,7 +1129,7 @@ retry:
 		ipv6_add_addr(idev, &addr, tmp_plen,
 			      ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
 			      addr_flags) : NULL;
-	if (!ift || IS_ERR(ift)) {
+	if (IS_ERR_OR_NULL(ift)) {
 		in6_ifa_put(ifp);
 		in6_dev_put(idev);
 		pr_info("%s: retry temporary address regeneration\n", __func__);
@@ -1412,11 +1490,10 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  struct net_device *dev, int strict)
 {
 	struct inet6_ifaddr *ifp;
-	struct hlist_node *node;
 	unsigned int hash = inet6_addr_hash(addr);
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -1438,9 +1515,8 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 {
 	unsigned int hash = inet6_addr_hash(addr);
 	struct inet6_ifaddr *ifp;
-	struct hlist_node *node;
 
-	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1480,10 +1556,9 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 {
 	struct inet6_ifaddr *ifp, *result = NULL;
 	unsigned int hash = inet6_addr_hash(addr);
-	struct hlist_node *node;
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1664,6 +1739,20 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
 	return 0;
 }
 
+static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev)
+{
+	union fwnet_hwaddr *ha;
+
+	if (dev->addr_len != FWNET_ALEN)
+		return -1;
+
+	ha = (union fwnet_hwaddr *)dev->dev_addr;
+
+	memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id));
+	eui[0] ^= 2;
+	return 0;
+}
+
 static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
 {
 	/* XXX: inherit EUI-64 from other interface -- yoshfuji */
@@ -1728,6 +1817,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
 		return addrconf_ifid_gre(eui, dev);
 	case ARPHRD_IEEE802154:
 		return addrconf_ifid_eui64(eui, dev);
+	case ARPHRD_IEEE1394:
+		return addrconf_ifid_ieee1394(eui, dev);
 	}
 	return -1;
 }
@@ -2042,11 +2133,19 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 		struct inet6_ifaddr *ifp;
 		struct in6_addr addr;
 		int create = 0, update_lft = 0;
+		bool tokenized = false;
 
 		if (pinfo->prefix_len == 64) {
 			memcpy(&addr, &pinfo->prefix, 8);
-			if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
-			    ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
+
+			if (!ipv6_addr_any(&in6_dev->token)) {
+				read_lock_bh(&in6_dev->lock);
+				memcpy(addr.s6_addr + 8,
+				       in6_dev->token.s6_addr + 8, 8);
+				read_unlock_bh(&in6_dev->lock);
+				tokenized = true;
+			} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
+				   ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
 				in6_dev_put(in6_dev);
 				return;
 			}
@@ -2080,13 +2179,14 @@ ok:
 						    addr_type&IPV6_ADDR_SCOPE_MASK,
 						    addr_flags);
 
-			if (!ifp || IS_ERR(ifp)) {
+			if (IS_ERR_OR_NULL(ifp)) {
 				in6_dev_put(in6_dev);
 				return;
 			}
 
 			update_lft = create = 1;
 			ifp->cstamp = jiffies;
+			ifp->tokenized = tokenized;
 			addrconf_dad_start(ifp);
 		}
 
@@ -2525,6 +2625,9 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 static void init_loopback(struct net_device *dev)
 {
 	struct inet6_dev  *idev;
+	struct net_device *sp_dev;
+	struct inet6_ifaddr *sp_ifa;
+	struct rt6_info *sp_rt;
 
 	/* ::1 */
 
@@ -2536,6 +2639,30 @@ static void init_loopback(struct net_device *dev)
 	}
 
 	add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
+
+	/* Add routes to other interface's IPv6 addresses */
+	for_each_netdev(dev_net(dev), sp_dev) {
+		if (!strcmp(sp_dev->name, dev->name))
+			continue;
+
+		idev = __in6_dev_get(sp_dev);
+		if (!idev)
+			continue;
+
+		read_lock_bh(&idev->lock);
+		list_for_each_entry(sp_ifa, &idev->addr_list, if_list) {
+
+			if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
+				continue;
+
+			sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0);
+
+			/* Failure cases are ignored */
+			if (!IS_ERR(sp_rt))
+				ip6_ins_rt(sp_rt);
+		}
+		read_unlock_bh(&idev->lock);
+	}
 }
 
 static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr)
@@ -2569,7 +2696,8 @@ static void addrconf_dev_config(struct net_device *dev)
 	    (dev->type != ARPHRD_FDDI) &&
 	    (dev->type != ARPHRD_ARCNET) &&
 	    (dev->type != ARPHRD_INFINIBAND) &&
-	    (dev->type != ARPHRD_IEEE802154)) {
+	    (dev->type != ARPHRD_IEEE802154) &&
+	    (dev->type != ARPHRD_IEEE1394)) {
 		/* Alas, we support only Ethernet autoconfiguration. */
 		return;
 	}
@@ -2900,11 +3028,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	/* Step 2: clear hash table */
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 		struct hlist_head *h = &inet6_addr_lst[i];
-		struct hlist_node *n;
 
 		spin_lock_bh(&addrconf_hash_lock);
 	restart:
-		hlist_for_each_entry_rcu(ifa, n, h, addr_lst) {
+		hlist_for_each_entry_rcu(ifa, h, addr_lst) {
 			if (ifa->idev == idev) {
 				hlist_del_init_rcu(&ifa->addr_lst);
 				addrconf_del_timer(ifa);
@@ -2958,7 +3085,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 		if (state != INET6_IFADDR_STATE_DEAD) {
 			__ipv6_ifa_notify(RTM_DELADDR, ifa);
-			atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
+			inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
 		}
 		in6_ifa_put(ifa);
 
@@ -3211,8 +3338,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
 	}
 
 	for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
-		struct hlist_node *n;
-		hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket],
+		hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket],
 					 addr_lst) {
 			if (!net_eq(dev_net(ifa->idev->dev), net))
 				continue;
@@ -3237,9 +3363,8 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 {
 	struct if6_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct hlist_node *n = &ifa->addr_lst;
 
-	hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) {
+	hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) {
 		if (!net_eq(dev_net(ifa->idev->dev), net))
 			continue;
 		state->offset++;
@@ -3248,7 +3373,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 
 	while (++state->bucket < IN6_ADDR_HSIZE) {
 		state->offset = 0;
-		hlist_for_each_entry_rcu_bh(ifa, n,
+		hlist_for_each_entry_rcu_bh(ifa,
 				     &inet6_addr_lst[state->bucket], addr_lst) {
 			if (!net_eq(dev_net(ifa->idev->dev), net))
 				continue;
@@ -3318,14 +3443,14 @@ static const struct file_operations if6_fops = {
 
 static int __net_init if6_proc_net_init(struct net *net)
 {
-	if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops))
+	if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 static void __net_exit if6_proc_net_exit(struct net *net)
 {
-       proc_net_remove(net, "if_inet6");
+	remove_proc_entry("if_inet6", net->proc_net);
 }
 
 static struct pernet_operations if6_proc_net_ops = {
@@ -3350,11 +3475,10 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
 {
 	int ret = 0;
 	struct inet6_ifaddr *ifp = NULL;
-	struct hlist_node *n;
 	unsigned int hash = inet6_addr_hash(addr);
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3376,7 +3500,6 @@ static void addrconf_verify(unsigned long foo)
 {
 	unsigned long now, next, next_sec, next_sched;
 	struct inet6_ifaddr *ifp;
-	struct hlist_node *node;
 	int i;
 
 	rcu_read_lock_bh();
@@ -3388,7 +3511,7 @@ static void addrconf_verify(unsigned long foo)
 
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
-		hlist_for_each_entry_rcu_bh(ifp, node,
+		hlist_for_each_entry_rcu_bh(ifp,
 					 &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 
@@ -3511,7 +3634,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
 };
 
 static int
-inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ifaddrmsg *ifm;
@@ -3577,7 +3700,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
 }
 
 static int
-inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ifaddrmsg *ifm;
@@ -3808,6 +3931,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 						NLM_F_MULTI);
 			if (err <= 0)
 				break;
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 		}
 		break;
 	}
@@ -3859,17 +3983,17 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 	struct net_device *dev;
 	struct inet6_dev *idev;
 	struct hlist_head *head;
-	struct hlist_node *node;
 
 	s_h = cb->args[0];
 	s_idx = idx = cb->args[1];
 	s_ip_idx = ip_idx = cb->args[2];
 
 	rcu_read_lock();
+	cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq;
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &net->dev_index_head[h];
-		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			if (h > s_h || idx > s_idx)
@@ -3917,8 +4041,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
 	return inet6_dump_addr(skb, cb, type);
 }
 
-static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
-			     void *arg)
+static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(in_skb->sk);
 	struct ifaddrmsg *ifm;
@@ -4051,7 +4174,8 @@ static inline size_t inet6_ifla6_size(void)
 	     + nla_total_size(sizeof(struct ifla_cacheinfo))
 	     + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
 	     + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
-	     + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */
+	     + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
+	     + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */
 }
 
 static inline size_t inet6_if_nlmsg_size(void)
@@ -4138,6 +4262,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
 		goto nla_put_failure;
 	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
 
+	nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
+	if (nla == NULL)
+		goto nla_put_failure;
+	read_lock_bh(&idev->lock);
+	memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
+	read_unlock_bh(&idev->lock);
+
 	return 0;
 
 nla_put_failure:
@@ -4165,6 +4296,80 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
 	return 0;
 }
 
+static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
+{
+	struct inet6_ifaddr *ifp;
+	struct net_device *dev = idev->dev;
+	bool update_rs = false;
+
+	if (token == NULL)
+		return -EINVAL;
+	if (ipv6_addr_any(token))
+		return -EINVAL;
+	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP))
+		return -EINVAL;
+	if (!ipv6_accept_ra(idev))
+		return -EINVAL;
+	if (idev->cnf.rtr_solicits <= 0)
+		return -EINVAL;
+
+	write_lock_bh(&idev->lock);
+
+	BUILD_BUG_ON(sizeof(token->s6_addr) != 16);
+	memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8);
+
+	write_unlock_bh(&idev->lock);
+
+	if (!idev->dead && (idev->if_flags & IF_READY)) {
+		struct in6_addr ll_addr;
+
+		ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE |
+				IFA_F_OPTIMISTIC);
+
+		/* If we're not ready, then normal ifup will take care
+		 * of this. Otherwise, we need to request our rs here.
+		 */
+		ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters);
+		update_rs = true;
+	}
+
+	write_lock_bh(&idev->lock);
+
+	if (update_rs)
+		idev->if_flags |= IF_RS_SENT;
+
+	/* Well, that's kinda nasty ... */
+	list_for_each_entry(ifp, &idev->addr_list, if_list) {
+		spin_lock(&ifp->lock);
+		if (ifp->tokenized) {
+			ifp->valid_lft = 0;
+			ifp->prefered_lft = 0;
+		}
+		spin_unlock(&ifp->lock);
+	}
+
+	write_unlock_bh(&idev->lock);
+	return 0;
+}
+
+static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+	int err = -EINVAL;
+	struct inet6_dev *idev = __in6_dev_get(dev);
+	struct nlattr *tb[IFLA_INET6_MAX + 1];
+
+	if (!idev)
+		return -EAFNOSUPPORT;
+
+	if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
+		BUG();
+
+	if (tb[IFLA_INET6_TOKEN])
+		err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
+
+	return err;
+}
+
 static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 			     u32 portid, u32 seq, int event, unsigned int flags)
 {
@@ -4215,7 +4420,6 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net_device *dev;
 	struct inet6_dev *idev;
 	struct hlist_head *head;
-	struct hlist_node *node;
 
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
@@ -4224,7 +4428,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &net->dev_index_head[h];
-		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			idev = __in6_dev_get(dev);
@@ -4344,6 +4548,8 @@ errout:
 
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
+	struct net *net = dev_net(ifp->idev->dev);
+
 	inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
 
 	switch (event) {
@@ -4369,6 +4575,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 			dst_free(&ifp->rt->dst);
 		break;
 	}
+	atomic_inc(&net->ipv6.dev_addr_genid);
 }
 
 static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -4787,26 +4994,20 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev)
 
 static int __net_init addrconf_init_net(struct net *net)
 {
-	int err;
+	int err = -ENOMEM;
 	struct ipv6_devconf *all, *dflt;
 
-	err = -ENOMEM;
-	all = &ipv6_devconf;
-	dflt = &ipv6_devconf_dflt;
+	all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL);
+	if (all == NULL)
+		goto err_alloc_all;
 
-	if (!net_eq(net, &init_net)) {
-		all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL);
-		if (all == NULL)
-			goto err_alloc_all;
+	dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
+	if (dflt == NULL)
+		goto err_alloc_dflt;
 
-		dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
-		if (dflt == NULL)
-			goto err_alloc_dflt;
-	} else {
-		/* these will be inherited by all namespaces */
-		dflt->autoconf = ipv6_defaults.autoconf;
-		dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
-	}
+	/* these will be inherited by all namespaces */
+	dflt->autoconf = ipv6_defaults.autoconf;
+	dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
 
 	net->ipv6.devconf_all = all;
 	net->ipv6.devconf_dflt = dflt;
@@ -4851,26 +5052,11 @@ static struct pernet_operations addrconf_ops = {
 	.exit = addrconf_exit_net,
 };
 
-/*
- *      Device notifier
- */
-
-int register_inet6addr_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_register(&inet6addr_chain, nb);
-}
-EXPORT_SYMBOL(register_inet6addr_notifier);
-
-int unregister_inet6addr_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
-}
-EXPORT_SYMBOL(unregister_inet6addr_notifier);
-
 static struct rtnl_af_ops inet6_ops = {
 	.family		  = AF_INET6,
 	.fill_link_af	  = inet6_fill_link_af,
 	.get_link_af_size = inet6_get_link_af_size,
+	.set_link_af	  = inet6_set_link_af,
 };
 
 /*
@@ -4943,7 +5129,7 @@ int __init addrconf_init(void)
 	__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
 			inet6_dump_ifacaddr, NULL);
 	__rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
-			NULL, NULL);
+			inet6_netconf_dump_devconf, NULL);
 
 	ipv6_addr_label_rtnl_register();
 
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index d051e5f4bf34..72104562c864 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -78,3 +78,22 @@ int __ipv6_addr_type(const struct in6_addr *addr)
 }
 EXPORT_SYMBOL(__ipv6_addr_type);
 
+static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
+
+int register_inet6addr_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&inet6addr_chain, nb);
+}
+EXPORT_SYMBOL(register_inet6addr_notifier);
+
+int unregister_inet6addr_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
+}
+EXPORT_SYMBOL(unregister_inet6addr_notifier);
+
+int inet6addr_notifier_call_chain(unsigned long val, void *v)
+{
+	return atomic_notifier_call_chain(&inet6addr_chain, val, v);
+}
+EXPORT_SYMBOL(inet6addr_notifier_call_chain);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index ff76eecfd622..f083a583a05c 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -173,9 +173,8 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
 						  const struct in6_addr *addr,
 						  int type, int ifindex)
 {
-	struct hlist_node *pos;
 	struct ip6addrlbl_entry *p;
-	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
+	hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
 		if (__ip6addrlbl_match(net, p, addr, type, ifindex))
 			return p;
 	}
@@ -261,9 +260,9 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
 	if (hlist_empty(&ip6addrlbl_table.head)) {
 		hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
 	} else {
-		struct hlist_node *pos, *n;
+		struct hlist_node *n;
 		struct ip6addrlbl_entry *p = NULL;
-		hlist_for_each_entry_safe(p, pos, n,
+		hlist_for_each_entry_safe(p, n,
 					  &ip6addrlbl_table.head, list) {
 			if (p->prefixlen == newp->prefixlen &&
 			    net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
@@ -319,13 +318,13 @@ static int __ip6addrlbl_del(struct net *net,
 			    int ifindex)
 {
 	struct ip6addrlbl_entry *p = NULL;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 	int ret = -ESRCH;
 
 	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
 		  __func__, prefix, prefixlen, ifindex);
 
-	hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+	hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
 		if (p->prefixlen == prefixlen &&
 		    net_eq(ip6addrlbl_net(p), net) &&
 		    p->ifindex == ifindex &&
@@ -380,11 +379,11 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
 static void __net_exit ip6addrlbl_net_exit(struct net *net)
 {
 	struct ip6addrlbl_entry *p = NULL;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 
 	/* Remove all labels belonging to the exiting net */
 	spin_lock(&ip6addrlbl_table.lock);
-	hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+	hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
 		if (net_eq(ip6addrlbl_net(p), net)) {
 			hlist_del_rcu(&p->list);
 			ip6addrlbl_put(p);
@@ -415,8 +414,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
 	[IFAL_LABEL]		= { .len = sizeof(u32), },
 };
 
-static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
-			     void *arg)
+static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ifaddrlblmsg *ifal;
@@ -437,10 +435,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (!tb[IFAL_ADDRESS])
 		return -EINVAL;
-
 	pfx = nla_data(tb[IFAL_ADDRESS]);
-	if (!pfx)
-		return -EINVAL;
 
 	if (!tb[IFAL_LABEL])
 		return -EINVAL;
@@ -505,12 +500,11 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ip6addrlbl_entry *p;
-	struct hlist_node *pos;
 	int idx = 0, s_idx = cb->args[0];
 	int err;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
+	hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
 		if (idx >= s_idx &&
 		    net_eq(ip6addrlbl_net(p), net)) {
 			if ((err = ip6addrlbl_fill(skb, p,
@@ -535,8 +529,7 @@ static inline int ip6addrlbl_msgsize(void)
 		+ nla_total_size(4);	/* IFAL_LABEL */
 }
 
-static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
-			  void *arg)
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
 {
 	struct net *net = sock_net(in_skb->sk);
 	struct ifaddrlblmsg *ifal;
@@ -563,10 +556,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 
 	if (!tb[IFAL_ADDRESS])
 		return -EINVAL;
-
 	addr = nla_data(tb[IFAL_ADDRESS]);
-	if (!addr)
-		return -EINVAL;
 
 	rcu_read_lock();
 	p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b043c60429bd..ab5c7ad482cd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -49,7 +49,6 @@
 #include <net/udp.h>
 #include <net/udplite.h>
 #include <net/tcp.h>
-#include <net/ipip.h>
 #include <net/protocol.h>
 #include <net/inet_common.h>
 #include <net/route.h>
@@ -323,7 +322,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			struct net_device *dev = NULL;
 
 			rcu_read_lock();
-			if (addr_type & IPV6_ADDR_LINKLOCAL) {
+			if (__ipv6_addr_needs_scope_id(addr_type)) {
 				if (addr_len >= sizeof(struct sockaddr_in6) &&
 				    addr->sin6_scope_id) {
 					/* Override any existing binding, if another one
@@ -471,8 +470,8 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 
 		sin->sin6_port = inet->inet_sport;
 	}
-	if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-		sin->sin6_scope_id = sk->sk_bound_dev_if;
+	sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
+						 sk->sk_bound_dev_if);
 	*uaddr_len = sizeof(*sin);
 	return 0;
 }
@@ -811,11 +810,10 @@ static struct pernet_operations inet6_net_ops = {
 
 static int __init inet6_init(void)
 {
-	struct sk_buff *dummy_skb;
 	struct list_head *r;
 	int err = 0;
 
-	BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));
+	BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	/* Register the socket-side information for inet6_create.  */
 	for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 384233188ac1..bb02e176cb70 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -521,8 +521,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* We are going to _remove_ AH header to keep sockets happy,
 	 * so... Later this can change. */
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
 	skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 757a810d8f15..5a80f15a9de2 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -47,7 +47,7 @@
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
 /* Big ac list lock for all the sockets */
-static DEFINE_RWLOCK(ipv6_sk_ac_lock);
+static DEFINE_SPINLOCK(ipv6_sk_ac_lock);
 
 
 /*
@@ -128,10 +128,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
 	err = ipv6_dev_ac_inc(dev, addr);
 	if (!err) {
-		write_lock_bh(&ipv6_sk_ac_lock);
+		spin_lock_bh(&ipv6_sk_ac_lock);
 		pac->acl_next = np->ipv6_ac_list;
 		np->ipv6_ac_list = pac;
-		write_unlock_bh(&ipv6_sk_ac_lock);
+		spin_unlock_bh(&ipv6_sk_ac_lock);
 		pac = NULL;
 	}
 
@@ -152,7 +152,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	struct ipv6_ac_socklist *pac, *prev_pac;
 	struct net *net = sock_net(sk);
 
-	write_lock_bh(&ipv6_sk_ac_lock);
+	spin_lock_bh(&ipv6_sk_ac_lock);
 	prev_pac = NULL;
 	for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
 		if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
@@ -161,7 +161,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		prev_pac = pac;
 	}
 	if (!pac) {
-		write_unlock_bh(&ipv6_sk_ac_lock);
+		spin_unlock_bh(&ipv6_sk_ac_lock);
 		return -ENOENT;
 	}
 	if (prev_pac)
@@ -169,7 +169,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	else
 		np->ipv6_ac_list = pac->acl_next;
 
-	write_unlock_bh(&ipv6_sk_ac_lock);
+	spin_unlock_bh(&ipv6_sk_ac_lock);
 
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
@@ -192,10 +192,10 @@ void ipv6_sock_ac_close(struct sock *sk)
 	if (!np->ipv6_ac_list)
 		return;
 
-	write_lock_bh(&ipv6_sk_ac_lock);
+	spin_lock_bh(&ipv6_sk_ac_lock);
 	pac = np->ipv6_ac_list;
 	np->ipv6_ac_list = NULL;
-	write_unlock_bh(&ipv6_sk_ac_lock);
+	spin_unlock_bh(&ipv6_sk_ac_lock);
 
 	prev_index = 0;
 	rcu_read_lock();
@@ -509,7 +509,7 @@ static const struct file_operations ac6_seq_fops = {
 
 int __net_init ac6_proc_init(struct net *net)
 {
-	if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops))
+	if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops))
 		return -ENOMEM;
 
 	return 0;
@@ -517,7 +517,7 @@ int __net_init ac6_proc_init(struct net *net)
 
 void ac6_proc_exit(struct net *net)
 {
-	proc_net_remove(net, "anycast6");
+	remove_proc_entry("anycast6", net->proc_net);
 }
 #endif
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 7a778b9a7b85..4b56cbbc7890 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -30,6 +30,7 @@
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
+#include <net/dsfield.h>
 
 #include <linux/errqueue.h>
 #include <asm/uaccess.h>
@@ -123,7 +124,7 @@ ipv4_connected:
 		goto out;
 	}
 
-	if (addr_type&IPV6_ADDR_LINKLOCAL) {
+	if (__ipv6_addr_needs_scope_id(addr_type)) {
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    usin->sin6_scope_id) {
 			if (sk->sk_bound_dev_if &&
@@ -354,19 +355,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
 		sin->sin6_port = serr->port;
-		sin->sin6_scope_id = 0;
 		if (skb->protocol == htons(ETH_P_IPV6)) {
-			sin->sin6_addr =
-				*(struct in6_addr *)(nh + serr->addr_offset);
+			const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
+								  struct ipv6hdr, daddr);
+			sin->sin6_addr = ip6h->daddr;
 			if (np->sndflow)
-				sin->sin6_flowinfo =
-					(*(__be32 *)(nh + serr->addr_offset - 24) &
-					 IPV6_FLOWINFO_MASK);
-			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-				sin->sin6_scope_id = IP6CB(skb)->iif;
+				sin->sin6_flowinfo = ip6_flowinfo(ip6h);
+			sin->sin6_scope_id =
+				ipv6_iface_scope_id(&sin->sin6_addr,
+						    IP6CB(skb)->iif);
 		} else {
 			ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),
 					       &sin->sin6_addr);
+			sin->sin6_scope_id = 0;
 		}
 	}
 
@@ -376,18 +377,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
-		sin->sin6_scope_id = 0;
 		if (skb->protocol == htons(ETH_P_IPV6)) {
 			sin->sin6_addr = ipv6_hdr(skb)->saddr;
 			if (np->rxopt.all)
 				ip6_datagram_recv_ctl(sk, msg, skb);
-			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-				sin->sin6_scope_id = IP6CB(skb)->iif;
+			sin->sin6_scope_id =
+				ipv6_iface_scope_id(&sin->sin6_addr,
+						    IP6CB(skb)->iif);
 		} else {
 			struct inet_sock *inet = inet_sk(sk);
 
 			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
 					       &sin->sin6_addr);
+			sin->sin6_scope_id = 0;
 			if (inet->cmsg_flags)
 				ip_cmsg_recv(msg, skb);
 		}
@@ -489,13 +491,14 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
 	}
 
 	if (np->rxopt.bits.rxtclass) {
-		int tclass = ipv6_tclass(ipv6_hdr(skb));
+		int tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 		put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
 	}
 
-	if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) {
-		__be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK;
-		put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
+	if (np->rxopt.bits.rxflow) {
+		__be32 flowinfo = ip6_flowinfo((struct ipv6hdr *)nh);
+		if (flowinfo)
+			put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
 	}
 
 	/* HbH is allowed only once */
@@ -591,7 +594,9 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
 			sin6.sin6_addr = ipv6_hdr(skb)->daddr;
 			sin6.sin6_port = ports[1];
 			sin6.sin6_flowinfo = 0;
-			sin6.sin6_scope_id = 0;
+			sin6.sin6_scope_id =
+				ipv6_iface_scope_id(&ipv6_hdr(skb)->daddr,
+						    opt->iif);
 
 			put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
 		}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 473f628f9f20..07a7d65a7cb6 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -553,7 +553,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
 	const unsigned char *nh = skb_network_header(skb);
 
 	if (nh[optoff + 1] == 2) {
-		IP6CB(skb)->ra = optoff;
+		IP6CB(skb)->flags |= IP6SKB_ROUTERALERT;
+		memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra));
 		return true;
 	}
 	LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index fff5bdd8b680..b4ff0a42b8c7 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -124,15 +124,6 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
 }
 
 /*
- * Slightly more convenient version of icmpv6_send.
- */
-void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
-{
-	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
-	kfree_skb(skb);
-}
-
-/*
  * Figure out, may we reply to this packet with icmp error.
  *
  * We do not reply, if:
@@ -332,7 +323,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk
 	 * anycast.
 	 */
 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
-		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
+		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
 		dst_release(dst);
 		return ERR_PTR(-EINVAL);
 	}
@@ -381,7 +372,7 @@ relookup_failed:
 /*
  *	Send an ICMP message in response to a packet in error
  */
-void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 {
 	struct net *net = dev_net(skb->dev);
 	struct inet6_dev *idev = NULL;
@@ -406,7 +397,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	/*
 	 *	Make sure we respect the rules
 	 *	i.e. RFC 1885 2.4(e)
-	 *	Rule (e.1) is enforced by not using icmpv6_send
+	 *	Rule (e.1) is enforced by not using icmp6_send
 	 *	in any code that processes icmp errors.
 	 */
 	addr_type = ipv6_addr_type(&hdr->daddr);
@@ -434,7 +425,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	 *	Source addr check
 	 */
 
-	if (addr_type & IPV6_ADDR_LINKLOCAL)
+	if (__ipv6_addr_needs_scope_id(addr_type))
 		iif = skb->dev->ifindex;
 
 	/*
@@ -444,7 +435,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	 *	and anycast addresses will be checked later.
 	 */
 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
-		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
+		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
 		return;
 	}
 
@@ -452,7 +443,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	 *	Never answer to a ICMP packet.
 	 */
 	if (is_ineligible(skb)) {
-		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
+		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
 		return;
 	}
 
@@ -529,7 +520,14 @@ out_dst_release:
 out:
 	icmpv6_xmit_unlock(sk);
 }
-EXPORT_SYMBOL(icmpv6_send);
+
+/* Slightly more convenient version of icmp6_send.
+ */
+void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
+{
+	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
+	kfree_skb(skb);
+}
 
 static void icmpv6_echo_reply(struct sk_buff *skb)
 {
@@ -701,7 +699,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		if (__skb_checksum_complete(skb)) {
 			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
 				       saddr, daddr);
-			goto discard_it;
+			goto csum_error;
 		}
 	}
 
@@ -787,6 +785,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
 	kfree_skb(skb);
 	return 0;
 
+csum_error:
+	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
 discard_it:
 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
 drop_no_count:
@@ -885,8 +885,14 @@ int __init icmpv6_init(void)
 	err = -EAGAIN;
 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
 		goto fail;
+
+	err = inet6_register_icmp_sender(icmp6_send);
+	if (err)
+		goto sender_reg_err;
 	return 0;
 
+sender_reg_err:
+	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
 fail:
 	pr_err("Failed to register ICMP6 protocol\n");
 	unregister_pernet_subsys(&icmpv6_sk_ops);
@@ -895,6 +901,7 @@ fail:
 
 void icmpv6_cleanup(void)
 {
+	inet6_unregister_icmp_sender(icmp6_send);
 	unregister_pernet_subsys(&icmpv6_sk_ops);
 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
 }
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 30647857a375..e4311cbc8b4e 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -31,25 +31,37 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 			    const struct inet_bind_bucket *tb, bool relax)
 {
 	const struct sock *sk2;
-	const struct hlist_node *node;
+	int reuse = sk->sk_reuse;
+	int reuseport = sk->sk_reuseport;
+	kuid_t uid = sock_i_uid((struct sock *)sk);
 
 	/* We must walk the whole port owner list in this case. -DaveM */
 	/*
 	 * See comment in inet_csk_bind_conflict about sock lookup
 	 * vs net namespaces issues.
 	 */
-	sk_for_each_bound(sk2, node, &tb->owners) {
+	sk_for_each_bound(sk2, &tb->owners) {
 		if (sk != sk2 &&
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
-		    (!sk->sk_reuse || !sk2->sk_reuse ||
-		     sk2->sk_state == TCP_LISTEN) &&
-		     ipv6_rcv_saddr_equal(sk, sk2))
-			break;
+		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+			if ((!reuse || !sk2->sk_reuse ||
+			     sk2->sk_state == TCP_LISTEN) &&
+			    (!reuseport || !sk2->sk_reuseport ||
+			     (sk2->sk_state != TCP_TIME_WAIT &&
+			      !uid_eq(uid,
+				      sock_i_uid((struct sock *)sk2))))) {
+				if (ipv6_rcv_saddr_equal(sk, sk2))
+					break;
+			}
+			if (!relax && reuse && sk2->sk_reuse &&
+			    sk2->sk_state != TCP_LISTEN &&
+			    ipv6_rcv_saddr_equal(sk, sk2))
+				break;
+		}
 	}
 
-	return node != NULL;
+	return sk2 != NULL;
 }
 
 EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
@@ -161,10 +173,8 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
 	sin6->sin6_port	= inet_sk(sk)->inet_dport;
 	/* We do not store received flowlabel for TCP */
 	sin6->sin6_flowinfo = 0;
-	sin6->sin6_scope_id = 0;
-	if (sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-		sin6->sin6_scope_id = sk->sk_bound_dev_if;
+	sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
+						  sk->sk_bound_dev_if);
 }
 
 EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index dea17fd28e50..32b4a1675d82 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -158,25 +158,38 @@ static inline int compute_score(struct sock *sk, struct net *net,
 }
 
 struct sock *inet6_lookup_listener(struct net *net,
-		struct inet_hashinfo *hashinfo, const struct in6_addr *daddr,
+		struct inet_hashinfo *hashinfo, const struct in6_addr *saddr,
+		const __be16 sport, const struct in6_addr *daddr,
 		const unsigned short hnum, const int dif)
 {
 	struct sock *sk;
 	const struct hlist_nulls_node *node;
 	struct sock *result;
-	int score, hiscore;
+	int score, hiscore, matches = 0, reuseport = 0;
+	u32 phash = 0;
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
 
 	rcu_read_lock();
 begin:
 	result = NULL;
-	hiscore = -1;
+	hiscore = 0;
 	sk_nulls_for_each(sk, node, &ilb->head) {
 		score = compute_score(sk, net, hnum, daddr, dif);
 		if (score > hiscore) {
 			hiscore = score;
 			result = sk;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				phash = inet6_ehashfn(net, daddr, hnum,
+						      saddr, sport);
+				matches = 1;
+			}
+		} else if (score == hiscore && reuseport) {
+			matches++;
+			if (((u64)phash * matches) >> 32 == 0)
+				result = sk;
+			phash = next_pseudo_random32(phash);
 		}
 	}
 	/*
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
new file mode 100644
index 000000000000..72d198b8e4d2
--- /dev/null
+++ b/net/ipv6/ip6_checksum.c
@@ -0,0 +1,97 @@
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <asm/checksum.h>
+
+#ifndef _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+			const struct in6_addr *daddr,
+			__u32 len, unsigned short proto,
+			__wsum csum)
+{
+
+	int carry;
+	__u32 ulen;
+	__u32 uproto;
+	__u32 sum = (__force u32)csum;
+
+	sum += (__force u32)saddr->s6_addr32[0];
+	carry = (sum < (__force u32)saddr->s6_addr32[0]);
+	sum += carry;
+
+	sum += (__force u32)saddr->s6_addr32[1];
+	carry = (sum < (__force u32)saddr->s6_addr32[1]);
+	sum += carry;
+
+	sum += (__force u32)saddr->s6_addr32[2];
+	carry = (sum < (__force u32)saddr->s6_addr32[2]);
+	sum += carry;
+
+	sum += (__force u32)saddr->s6_addr32[3];
+	carry = (sum < (__force u32)saddr->s6_addr32[3]);
+	sum += carry;
+
+	sum += (__force u32)daddr->s6_addr32[0];
+	carry = (sum < (__force u32)daddr->s6_addr32[0]);
+	sum += carry;
+
+	sum += (__force u32)daddr->s6_addr32[1];
+	carry = (sum < (__force u32)daddr->s6_addr32[1]);
+	sum += carry;
+
+	sum += (__force u32)daddr->s6_addr32[2];
+	carry = (sum < (__force u32)daddr->s6_addr32[2]);
+	sum += carry;
+
+	sum += (__force u32)daddr->s6_addr32[3];
+	carry = (sum < (__force u32)daddr->s6_addr32[3]);
+	sum += carry;
+
+	ulen = (__force u32)htonl((__u32) len);
+	sum += ulen;
+	carry = (sum < ulen);
+	sum += carry;
+
+	uproto = (__force u32)htonl(proto);
+	sum += uproto;
+	carry = (sum < uproto);
+	sum += carry;
+
+	return csum_fold((__force __wsum)sum);
+}
+EXPORT_SYMBOL(csum_ipv6_magic);
+#endif
+
+int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
+{
+	int err;
+
+	UDP_SKB_CB(skb)->partial_cov = 0;
+	UDP_SKB_CB(skb)->cscov = skb->len;
+
+	if (proto == IPPROTO_UDPLITE) {
+		err = udplite_checksum_init(skb, uh);
+		if (err)
+			return err;
+	}
+
+	if (uh->check == 0) {
+		/* RFC 2460 section 8.1 says that we SHOULD log
+		   this error. Well, it is reasonable.
+		 */
+		LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
+		return 1;
+	}
+	if (skb->ip_summed == CHECKSUM_COMPLETE &&
+	    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+			     skb->len, proto, skb->csum))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (!skb_csum_unnecessary(skb))
+		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+							 &ipv6_hdr(skb)->daddr,
+							 skb->len, proto, 0));
+
+	return 0;
+}
+EXPORT_SYMBOL(udp6_csum_init);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 710cafd2e1a9..192dd1a0e188 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -224,7 +224,6 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 {
 	struct fib6_table *tb;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	unsigned int h;
 
 	if (id == 0)
@@ -232,7 +231,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 	h = id & (FIB6_TABLE_HASHSZ - 1);
 	rcu_read_lock();
 	head = &net->ipv6.fib_table_hash[h];
-	hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
+	hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
 		if (tb->tb6_id == id) {
 			rcu_read_unlock();
 			return tb;
@@ -363,7 +362,6 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	struct rt6_rtnl_dump_arg arg;
 	struct fib6_walker_t *w;
 	struct fib6_table *tb;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	int res = 0;
 
@@ -398,7 +396,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
 		e = 0;
 		head = &net->ipv6.fib_table_hash[h];
-		hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
+		hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
 			if (e < s_e)
 				goto next;
 			res = fib6_dump_table(tb, skb, cb);
@@ -1520,14 +1518,13 @@ void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg
 		    int prune, void *arg)
 {
 	struct fib6_table *table;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	unsigned int h;
 
 	rcu_read_lock();
 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
 		head = &net->ipv6.fib_table_hash[h];
-		hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
+		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
 			read_lock_bh(&table->tb6_lock);
 			fib6_clean_tree(net, &table->tb6_root,
 					func, prune, arg);
@@ -1540,14 +1537,13 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
 		    int prune, void *arg)
 {
 	struct fib6_table *table;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	unsigned int h;
 
 	rcu_read_lock();
 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
 		head = &net->ipv6.fib_table_hash[h];
-		hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
+		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
 			write_lock_bh(&table->tb6_lock);
 			fib6_clean_tree(net, &table->tb6_root,
 					func, prune, arg);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index d6de4b447250..46e88433ec7d 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -51,25 +51,38 @@
 #define FL_HASH(l)	(ntohl(l)&FL_HASH_MASK)
 
 static atomic_t fl_size = ATOMIC_INIT(0);
-static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1];
+static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
 
 static void ip6_fl_gc(unsigned long dummy);
 static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
 
 /* FL hash table lock: it protects only of GC */
 
-static DEFINE_RWLOCK(ip6_fl_lock);
+static DEFINE_SPINLOCK(ip6_fl_lock);
 
 /* Big socket sock */
 
-static DEFINE_RWLOCK(ip6_sk_fl_lock);
+static DEFINE_SPINLOCK(ip6_sk_fl_lock);
 
+#define for_each_fl_rcu(hash, fl)				\
+	for (fl = rcu_dereference_bh(fl_ht[(hash)]);		\
+	     fl != NULL;					\
+	     fl = rcu_dereference_bh(fl->next))
+#define for_each_fl_continue_rcu(fl)				\
+	for (fl = rcu_dereference_bh(fl->next);			\
+	     fl != NULL;					\
+	     fl = rcu_dereference_bh(fl->next))
+
+#define for_each_sk_fl_rcu(np, sfl)				\
+	for (sfl = rcu_dereference_bh(np->ipv6_fl_list);	\
+	     sfl != NULL;					\
+	     sfl = rcu_dereference_bh(sfl->next))
 
 static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
 {
 	struct ip6_flowlabel *fl;
 
-	for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
+	for_each_fl_rcu(FL_HASH(label), fl) {
 		if (fl->label == label && net_eq(fl->fl_net, net))
 			return fl;
 	}
@@ -80,11 +93,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
 {
 	struct ip6_flowlabel *fl;
 
-	read_lock_bh(&ip6_fl_lock);
+	rcu_read_lock_bh();
 	fl = __fl_lookup(net, label);
-	if (fl)
-		atomic_inc(&fl->users);
-	read_unlock_bh(&ip6_fl_lock);
+	if (fl && !atomic_inc_not_zero(&fl->users))
+		fl = NULL;
+	rcu_read_unlock_bh();
 	return fl;
 }
 
@@ -96,13 +109,13 @@ static void fl_free(struct ip6_flowlabel *fl)
 			put_pid(fl->owner.pid);
 		release_net(fl->fl_net);
 		kfree(fl->opt);
+		kfree_rcu(fl, rcu);
 	}
-	kfree(fl);
 }
 
 static void fl_release(struct ip6_flowlabel *fl)
 {
-	write_lock_bh(&ip6_fl_lock);
+	spin_lock_bh(&ip6_fl_lock);
 
 	fl->lastuse = jiffies;
 	if (atomic_dec_and_test(&fl->users)) {
@@ -119,7 +132,7 @@ static void fl_release(struct ip6_flowlabel *fl)
 		    time_after(ip6_fl_gc_timer.expires, ttd))
 			mod_timer(&ip6_fl_gc_timer, ttd);
 	}
-	write_unlock_bh(&ip6_fl_lock);
+	spin_unlock_bh(&ip6_fl_lock);
 }
 
 static void ip6_fl_gc(unsigned long dummy)
@@ -128,12 +141,15 @@ static void ip6_fl_gc(unsigned long dummy)
 	unsigned long now = jiffies;
 	unsigned long sched = 0;
 
-	write_lock(&ip6_fl_lock);
+	spin_lock(&ip6_fl_lock);
 
 	for (i=0; i<=FL_HASH_MASK; i++) {
-		struct ip6_flowlabel *fl, **flp;
+		struct ip6_flowlabel *fl;
+		struct ip6_flowlabel __rcu **flp;
+
 		flp = &fl_ht[i];
-		while ((fl=*flp) != NULL) {
+		while ((fl = rcu_dereference_protected(*flp,
+						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
 			if (atomic_read(&fl->users) == 0) {
 				unsigned long ttd = fl->lastuse + fl->linger;
 				if (time_after(ttd, fl->expires))
@@ -156,18 +172,21 @@ static void ip6_fl_gc(unsigned long dummy)
 	if (sched) {
 		mod_timer(&ip6_fl_gc_timer, sched);
 	}
-	write_unlock(&ip6_fl_lock);
+	spin_unlock(&ip6_fl_lock);
 }
 
 static void __net_exit ip6_fl_purge(struct net *net)
 {
 	int i;
 
-	write_lock(&ip6_fl_lock);
+	spin_lock(&ip6_fl_lock);
 	for (i = 0; i <= FL_HASH_MASK; i++) {
-		struct ip6_flowlabel *fl, **flp;
+		struct ip6_flowlabel *fl;
+		struct ip6_flowlabel __rcu **flp;
+
 		flp = &fl_ht[i];
-		while ((fl = *flp) != NULL) {
+		while ((fl = rcu_dereference_protected(*flp,
+						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
 			if (net_eq(fl->fl_net, net) &&
 			    atomic_read(&fl->users) == 0) {
 				*flp = fl->next;
@@ -178,7 +197,7 @@ static void __net_exit ip6_fl_purge(struct net *net)
 			flp = &fl->next;
 		}
 	}
-	write_unlock(&ip6_fl_lock);
+	spin_unlock(&ip6_fl_lock);
 }
 
 static struct ip6_flowlabel *fl_intern(struct net *net,
@@ -188,7 +207,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
 
 	fl->label = label & IPV6_FLOWLABEL_MASK;
 
-	write_lock_bh(&ip6_fl_lock);
+	spin_lock_bh(&ip6_fl_lock);
 	if (label == 0) {
 		for (;;) {
 			fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
@@ -210,16 +229,16 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
 		lfl = __fl_lookup(net, fl->label);
 		if (lfl != NULL) {
 			atomic_inc(&lfl->users);
-			write_unlock_bh(&ip6_fl_lock);
+			spin_unlock_bh(&ip6_fl_lock);
 			return lfl;
 		}
 	}
 
 	fl->lastuse = jiffies;
 	fl->next = fl_ht[FL_HASH(fl->label)];
-	fl_ht[FL_HASH(fl->label)] = fl;
+	rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
 	atomic_inc(&fl_size);
-	write_unlock_bh(&ip6_fl_lock);
+	spin_unlock_bh(&ip6_fl_lock);
 	return NULL;
 }
 
@@ -234,17 +253,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
 
 	label &= IPV6_FLOWLABEL_MASK;
 
-	read_lock_bh(&ip6_sk_fl_lock);
-	for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
+	rcu_read_lock_bh();
+	for_each_sk_fl_rcu(np, sfl) {
 		struct ip6_flowlabel *fl = sfl->fl;
 		if (fl->label == label) {
 			fl->lastuse = jiffies;
 			atomic_inc(&fl->users);
-			read_unlock_bh(&ip6_sk_fl_lock);
+			rcu_read_unlock_bh();
 			return fl;
 		}
 	}
-	read_unlock_bh(&ip6_sk_fl_lock);
+	rcu_read_unlock_bh();
 	return NULL;
 }
 
@@ -255,11 +274,21 @@ void fl6_free_socklist(struct sock *sk)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6_fl_socklist *sfl;
 
-	while ((sfl = np->ipv6_fl_list) != NULL) {
+	if (!rcu_access_pointer(np->ipv6_fl_list))
+		return;
+
+	spin_lock_bh(&ip6_sk_fl_lock);
+	while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
+						lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
 		np->ipv6_fl_list = sfl->next;
+		spin_unlock_bh(&ip6_sk_fl_lock);
+
 		fl_release(sfl->fl);
-		kfree(sfl);
+		kfree_rcu(sfl, rcu);
+
+		spin_lock_bh(&ip6_sk_fl_lock);
 	}
+	spin_unlock_bh(&ip6_sk_fl_lock);
 }
 
 /* Service routines */
@@ -424,7 +453,7 @@ static int mem_check(struct sock *sk)
 	if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
 		return 0;
 
-	for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
+	for_each_sk_fl_rcu(np, sfl)
 		count++;
 
 	if (room <= 0 ||
@@ -467,11 +496,11 @@ static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
 static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
 		struct ip6_flowlabel *fl)
 {
-	write_lock_bh(&ip6_sk_fl_lock);
+	spin_lock_bh(&ip6_sk_fl_lock);
 	sfl->fl = fl;
 	sfl->next = np->ipv6_fl_list;
-	np->ipv6_fl_list = sfl;
-	write_unlock_bh(&ip6_sk_fl_lock);
+	rcu_assign_pointer(np->ipv6_fl_list, sfl);
+	spin_unlock_bh(&ip6_sk_fl_lock);
 }
 
 int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
@@ -481,7 +510,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct in6_flowlabel_req freq;
 	struct ipv6_fl_socklist *sfl1=NULL;
-	struct ipv6_fl_socklist *sfl, **sflp;
+	struct ipv6_fl_socklist *sfl;
+	struct ipv6_fl_socklist __rcu **sflp;
 	struct ip6_flowlabel *fl, *fl1 = NULL;
 
 
@@ -493,31 +523,33 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 
 	switch (freq.flr_action) {
 	case IPV6_FL_A_PUT:
-		write_lock_bh(&ip6_sk_fl_lock);
-		for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
+		spin_lock_bh(&ip6_sk_fl_lock);
+		for (sflp = &np->ipv6_fl_list;
+		     (sfl = rcu_dereference(*sflp))!=NULL;
+		     sflp = &sfl->next) {
 			if (sfl->fl->label == freq.flr_label) {
 				if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
 					np->flow_label &= ~IPV6_FLOWLABEL_MASK;
-				*sflp = sfl->next;
-				write_unlock_bh(&ip6_sk_fl_lock);
+				*sflp = rcu_dereference(sfl->next);
+				spin_unlock_bh(&ip6_sk_fl_lock);
 				fl_release(sfl->fl);
-				kfree(sfl);
+				kfree_rcu(sfl, rcu);
 				return 0;
 			}
 		}
-		write_unlock_bh(&ip6_sk_fl_lock);
+		spin_unlock_bh(&ip6_sk_fl_lock);
 		return -ESRCH;
 
 	case IPV6_FL_A_RENEW:
-		read_lock_bh(&ip6_sk_fl_lock);
-		for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+		rcu_read_lock_bh();
+		for_each_sk_fl_rcu(np, sfl) {
 			if (sfl->fl->label == freq.flr_label) {
 				err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
-				read_unlock_bh(&ip6_sk_fl_lock);
+				rcu_read_unlock_bh();
 				return err;
 			}
 		}
-		read_unlock_bh(&ip6_sk_fl_lock);
+		rcu_read_unlock_bh();
 
 		if (freq.flr_share == IPV6_FL_S_NONE &&
 		    ns_capable(net->user_ns, CAP_NET_ADMIN)) {
@@ -541,11 +573,11 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 
 		if (freq.flr_label) {
 			err = -EEXIST;
-			read_lock_bh(&ip6_sk_fl_lock);
-			for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+			rcu_read_lock_bh();
+			for_each_sk_fl_rcu(np, sfl) {
 				if (sfl->fl->label == freq.flr_label) {
 					if (freq.flr_flags&IPV6_FL_F_EXCL) {
-						read_unlock_bh(&ip6_sk_fl_lock);
+						rcu_read_unlock_bh();
 						goto done;
 					}
 					fl1 = sfl->fl;
@@ -553,7 +585,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 					break;
 				}
 			}
-			read_unlock_bh(&ip6_sk_fl_lock);
+			rcu_read_unlock_bh();
 
 			if (fl1 == NULL)
 				fl1 = fl_lookup(net, freq.flr_label);
@@ -641,13 +673,13 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
 	struct net *net = seq_file_net(seq);
 
 	for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
-		fl = fl_ht[state->bucket];
-
-		while (fl && !net_eq(fl->fl_net, net))
-			fl = fl->next;
-		if (fl)
-			break;
+		for_each_fl_rcu(state->bucket, fl) {
+			if (net_eq(fl->fl_net, net))
+				goto out;
+		}
 	}
+	fl = NULL;
+out:
 	return fl;
 }
 
@@ -656,18 +688,22 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo
 	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
 	struct net *net = seq_file_net(seq);
 
-	fl = fl->next;
+	for_each_fl_continue_rcu(fl) {
+		if (net_eq(fl->fl_net, net))
+			goto out;
+	}
+
 try_again:
-	while (fl && !net_eq(fl->fl_net, net))
-		fl = fl->next;
-
-	while (!fl) {
-		if (++state->bucket <= FL_HASH_MASK) {
-			fl = fl_ht[state->bucket];
-			goto try_again;
-		} else
-			break;
+	if (++state->bucket <= FL_HASH_MASK) {
+		for_each_fl_rcu(state->bucket, fl) {
+			if (net_eq(fl->fl_net, net))
+				goto out;
+		}
+		goto try_again;
 	}
+	fl = NULL;
+
+out:
 	return fl;
 }
 
@@ -681,9 +717,9 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(ip6_fl_lock)
+	__acquires(RCU)
 {
-	read_lock_bh(&ip6_fl_lock);
+	rcu_read_lock_bh();
 	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 
@@ -700,9 +736,9 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void ip6fl_seq_stop(struct seq_file *seq, void *v)
-	__releases(ip6_fl_lock)
+	__releases(RCU)
 {
-	read_unlock_bh(&ip6_fl_lock);
+	rcu_read_unlock_bh();
 }
 
 static int ip6fl_seq_show(struct seq_file *seq, void *v)
@@ -775,15 +811,15 @@ static const struct file_operations ip6fl_seq_fops = {
 
 static int __net_init ip6_flowlabel_proc_init(struct net *net)
 {
-	if (!proc_net_fops_create(net, "ip6_flowlabel",
-				  S_IRUGO, &ip6fl_seq_fops))
+	if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net,
+			 &ip6fl_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
 {
-	proc_net_remove(net, "ip6_flowlabel");
+	remove_proc_entry("ip6_flowlabel", net->proc_net);
 }
 #else
 static inline int ip6_flowlabel_proc_init(struct net *net)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 131dd097736d..d3ddd8400354 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -38,6 +38,7 @@
 
 #include <net/sock.h>
 #include <net/ip.h>
+#include <net/ip_tunnels.h>
 #include <net/icmp.h>
 #include <net/protocol.h>
 #include <net/addrconf.h>
@@ -110,46 +111,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr)
 #define tunnels_l	tunnels[1]
 #define tunnels_wc	tunnels[0]
 
-static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
-		struct rtnl_link_stats64 *tot)
-{
-	int i;
-
-	for_each_possible_cpu(i) {
-		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
-		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
-		unsigned int start;
-
-		do {
-			start = u64_stats_fetch_begin_bh(&tstats->syncp);
-			rx_packets = tstats->rx_packets;
-			tx_packets = tstats->tx_packets;
-			rx_bytes = tstats->rx_bytes;
-			tx_bytes = tstats->tx_bytes;
-		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
-		tot->rx_packets += rx_packets;
-		tot->tx_packets += tx_packets;
-		tot->rx_bytes   += rx_bytes;
-		tot->tx_bytes   += tx_bytes;
-	}
-
-	tot->multicast = dev->stats.multicast;
-	tot->rx_crc_errors = dev->stats.rx_crc_errors;
-	tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
-	tot->rx_length_errors = dev->stats.rx_length_errors;
-	tot->rx_frame_errors = dev->stats.rx_frame_errors;
-	tot->rx_errors = dev->stats.rx_errors;
-
-	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
-	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
-	tot->tx_dropped = dev->stats.tx_dropped;
-	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
-	tot->tx_errors = dev->stats.tx_errors;
-
-	return tot;
-}
-
 /* Given src, dst and key, find appropriate for input tunnel. */
 
 static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
@@ -667,7 +628,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 	struct net_device_stats *stats = &tunnel->dev->stats;
 	int err = -1;
 	u8 proto;
-	int pkt_len;
 	struct sk_buff *new_skb;
 
 	if (dev->type == ARPHRD_ETHER)
@@ -772,9 +732,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 	 *	Push down and install the IP header.
 	 */
 	ipv6h = ipv6_hdr(skb);
-	*(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000);
-	dsfield = INET_ECN_encapsulate(0, dsfield);
-	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
 	ipv6h->hop_limit = tunnel->parms.hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6h->saddr = fl6->saddr;
@@ -803,23 +761,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 		}
 	}
 
-	nf_reset(skb);
-	pkt_len = skb->len;
-	err = ip6_local_out(skb);
-
-	if (net_xmit_eval(err) == 0) {
-		struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats);
-
-		tstats->tx_bytes += pkt_len;
-		tstats->tx_packets++;
-	} else {
-		stats->tx_errors++;
-		stats->tx_aborted_errors++;
-	}
-
+	ip6tunnel_xmit(skb, dev);
 	if (ndst)
 		ip6_tnl_dst_store(tunnel, ndst);
-
 	return 0;
 tx_err_link_failure:
 	stats->tx_carrier_errors++;
@@ -1240,7 +1184,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
 	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
 	__be16 *p = (__be16 *)(ipv6h+1);
 
-	*(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000);
+	ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel);
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = NEXTHDR_GRE;
 	ipv6h->saddr = t->parms.laddr;
@@ -1273,7 +1217,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
 	.ndo_start_xmit		= ip6gre_tunnel_xmit,
 	.ndo_do_ioctl		= ip6gre_tunnel_ioctl,
 	.ndo_change_mtu		= ip6gre_tunnel_change_mtu,
-	.ndo_get_stats64	= ip6gre_get_stats64,
+	.ndo_get_stats64	= ip_tunnel_get_stats64,
 };
 
 static void ip6gre_dev_free(struct net_device *dev)
@@ -1522,7 +1466,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
 	.ndo_set_mac_address = eth_mac_addr,
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_change_mtu = ip6gre_tunnel_change_mtu,
-	.ndo_get_stats64 = ip6gre_get_stats64,
+	.ndo_get_stats64 = ip_tunnel_get_stats64,
 };
 
 static void ip6gre_tap_setup(struct net_device *dev)
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
new file mode 100644
index 000000000000..4578e23834f7
--- /dev/null
+++ b/net/ipv6/ip6_icmp.c
@@ -0,0 +1,47 @@
+#include <linux/export.h>
+#include <linux/icmpv6.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+
+#include <net/ipv6.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+static ip6_icmp_send_t __rcu *ip6_icmp_send;
+
+int inet6_register_icmp_sender(ip6_icmp_send_t *fn)
+{
+	return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ?
+	        0 : -EBUSY;
+}
+EXPORT_SYMBOL(inet6_register_icmp_sender);
+
+int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn)
+{
+	int ret;
+
+	ret = (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, fn, NULL) == fn) ?
+	      0 : -EINVAL;
+
+	synchronize_net();
+
+	return ret;
+}
+EXPORT_SYMBOL(inet6_unregister_icmp_sender);
+
+void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+{
+	ip6_icmp_send_t *send;
+
+	rcu_read_lock();
+	send = rcu_dereference(ip6_icmp_send);
+
+	if (!send)
+		goto out;
+	send(skb, type, code, info);
+out:
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(icmpv6_send);
+#endif
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index a52d864d562b..2bab2aa59745 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -118,6 +118,27 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	    ipv6_addr_loopback(&hdr->daddr))
 		goto err;
 
+	/* RFC4291 Errata ID: 3480
+	 * Interface-Local scope spans only a single interface on a
+	 * node and is useful only for loopback transmission of
+	 * multicast.  Packets with interface-local scope received
+	 * from another node must be discarded.
+	 */
+	if (!(skb->pkt_type == PACKET_LOOPBACK ||
+	      dev->flags & IFF_LOOPBACK) &&
+	    ipv6_addr_is_multicast(&hdr->daddr) &&
+	    IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
+		goto err;
+
+	/* RFC4291 2.7
+	 * Nodes must not originate a packet to a multicast address whose scope
+	 * field contains the reserved value 0; if such a packet is received, it
+	 * must be silently dropped.
+	 */
+	if (ipv6_addr_is_multicast(&hdr->daddr) &&
+	    IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0)
+		goto err;
+
 	/*
 	 * RFC4291 2.7
 	 * Multicast addresses must not be used as source addresses in IPv6
@@ -212,7 +233,7 @@ resubmit:
 			if (ipv6_addr_is_multicast(&hdr->daddr) &&
 			    !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
 			    &hdr->saddr) &&
-			    !ipv6_is_mld(skb, nexthdr))
+			    !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
 				goto discard;
 		}
 		if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
@@ -232,9 +253,11 @@ resubmit:
 				icmpv6_send(skb, ICMPV6_PARAMPROB,
 					    ICMPV6_UNK_NEXTHDR, nhoff);
 			}
-		} else
+			kfree_skb(skb);
+		} else {
 			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
-		kfree_skb(skb);
+			consume_skb(skb);
+		}
 	}
 	rcu_read_unlock();
 	return 0;
@@ -270,7 +293,8 @@ int ip6_mc_input(struct sk_buff *skb)
 	 *      IPv6 multicast router mode is now supported ;)
 	 */
 	if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
-	    !(ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) &&
+	    !(ipv6_addr_type(&hdr->daddr) &
+	      (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
 	    likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
 		/*
 		 * Okay, we try to forward - split and duplicate
@@ -280,10 +304,8 @@ int ip6_mc_input(struct sk_buff *skb)
 		struct inet6_skb_parm *opt = IP6CB(skb);
 
 		/* Check for MLD */
-		if (unlikely(opt->ra)) {
+		if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 			/* Check if this is a mld message */
-			u8 *ptr = skb_network_header(skb) + opt->ra;
-			struct icmp6hdr *icmp6;
 			u8 nexthdr = hdr->nexthdr;
 			__be16 frag_off;
 			int offset;
@@ -291,7 +313,7 @@ int ip6_mc_input(struct sk_buff *skb)
 			/* Check if the value of Router Alert
 			 * is for MLD (0x0000).
 			 */
-			if ((ptr[2] | ptr[3]) == 0) {
+			if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) {
 				deliver = false;
 
 				if (!ipv6_ext_hdr(nexthdr)) {
@@ -303,24 +325,10 @@ int ip6_mc_input(struct sk_buff *skb)
 				if (offset < 0)
 					goto out;
 
-				if (nexthdr != IPPROTO_ICMPV6)
-					goto out;
-
-				if (!pskb_may_pull(skb, (skb_network_header(skb) +
-						   offset + 1 - skb->data)))
+				if (!ipv6_is_mld(skb, nexthdr, offset))
 					goto out;
 
-				icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
-
-				switch (icmp6->icmp6_type) {
-				case ICMPV6_MGM_QUERY:
-				case ICMPV6_MGM_REPORT:
-				case ICMPV6_MGM_REDUCTION:
-				case ICMPV6_MLD2_REPORT:
-					deliver = true;
-					break;
-				}
-				goto out;
+				deliver = true;
 			}
 			/* unknown RA - process it normally */
 		}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index f26f0da7f095..71b766ee821d 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -92,13 +92,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 	u8 *prevhdr;
 	int offset = 0;
 
-	if (!(features & NETIF_F_V6_CSUM))
-		features &= ~NETIF_F_SG;
-
 	if (unlikely(skb_shinfo(skb)->gso_type &
 		     ~(SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_GRE |
+		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_TCPV6 |
 		       0)))
 		goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0c7c03d50dc0..d2eedf192330 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,8 +56,6 @@
 #include <net/checksum.h>
 #include <linux/mroute6.h>
 
-int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
-
 int __ip6_local_out(struct sk_buff *skb)
 {
 	int len;
@@ -88,7 +86,8 @@ static int ip6_finish_output2(struct sk_buff *skb)
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *dev = dst->dev;
 	struct neighbour *neigh;
-	struct rt6_info *rt;
+	struct in6_addr *nexthop;
+	int ret;
 
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->dev = dev;
@@ -121,12 +120,26 @@ static int ip6_finish_output2(struct sk_buff *skb)
 
 		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
 				skb->len);
+
+		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
+		    IPV6_ADDR_SCOPE_NODELOCAL &&
+		    !(dev->flags & IFF_LOOPBACK)) {
+			kfree_skb(skb);
+			return 0;
+		}
 	}
 
-	rt = (struct rt6_info *) dst;
-	neigh = rt->n;
-	if (neigh)
-		return dst_neigh_output(dst, neigh, skb);
+	rcu_read_lock_bh();
+	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
+	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+	if (unlikely(!neigh))
+		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+	if (!IS_ERR(neigh)) {
+		ret = dst_neigh_output(dst, neigh, skb);
+		rcu_read_unlock_bh();
+		return ret;
+	}
+	rcu_read_unlock_bh();
 
 	IP6_INC_STATS_BH(dev_net(dst->dev),
 			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
@@ -216,7 +229,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	if (hlimit < 0)
 		hlimit = ip6_dst_hoplimit(dst);
 
-	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
+	ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
 
 	hdr->payload_len = htons(seg_len);
 	hdr->nexthdr = proto;
@@ -236,9 +249,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 			       dst->dev, dst_output);
 	}
 
-	net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n");
 	skb->dev = dst->dev;
-	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+	ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 	kfree_skb(skb);
 	return -EMSGSIZE;
@@ -246,39 +258,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 
 EXPORT_SYMBOL(ip6_xmit);
 
-/*
- *	To avoid extra problems ND packets are send through this
- *	routine. It's code duplication but I really want to avoid
- *	extra checks since ipv6_build_header is used by TCP (which
- *	is for us performance critical)
- */
-
-int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
-	       const struct in6_addr *saddr, const struct in6_addr *daddr,
-	       int proto, int len)
-{
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct ipv6hdr *hdr;
-
-	skb->protocol = htons(ETH_P_IPV6);
-	skb->dev = dev;
-
-	skb_reset_network_header(skb);
-	skb_put(skb, sizeof(struct ipv6hdr));
-	hdr = ipv6_hdr(skb);
-
-	*(__be32*)hdr = htonl(0x60000000);
-
-	hdr->payload_len = htons(len);
-	hdr->nexthdr = proto;
-	hdr->hop_limit = np->hop_limit;
-
-	hdr->saddr = *saddr;
-	hdr->daddr = *daddr;
-
-	return 0;
-}
-
 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 {
 	struct ip6_ra_chain *ra;
@@ -913,8 +892,12 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 	 * dst entry of the nexthop router
 	 */
 	rt = (struct rt6_info *) *dst;
-	n = rt->n;
-	if (n && !(n->nud_state & NUD_VALID)) {
+	rcu_read_lock_bh();
+	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
+	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
+	rcu_read_unlock_bh();
+
+	if (err) {
 		struct inet6_ifaddr *ifp;
 		struct flowi6 fl_gw6;
 		int redirect;
@@ -1241,11 +1224,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	}
 
 	/* For UDP, check if TX timestamp is enabled */
-	if (sk->sk_type == SOCK_DGRAM) {
-		err = sock_tx_timestamp(sk, &tx_flags);
-		if (err)
-			goto error;
-	}
+	if (sk->sk_type == SOCK_DGRAM)
+		sock_tx_timestamp(sk, &tx_flags);
 
 	/*
 	 * Let's try using as much space as possible.
@@ -1548,9 +1528,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb_reset_network_header(skb);
 	hdr = ipv6_hdr(skb);
 
-	*(__be32*)hdr = fl6->flowlabel |
-		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
-
+	ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
 	hdr->hop_limit = np->cork.hop_limit;
 	hdr->nexthdr = proto;
 	hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a14f28b280f5..1e55866cead7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -47,6 +47,7 @@
 
 #include <net/icmp.h>
 #include <net/ip.h>
+#include <net/ip_tunnels.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
@@ -955,7 +956,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	unsigned int max_headroom = sizeof(struct ipv6hdr);
 	u8 proto;
 	int err = -1;
-	int pkt_len;
 
 	if (!fl6->flowi6_mark)
 		dst = ip6_tnl_dst_check(t);
@@ -1030,26 +1030,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	ipv6h = ipv6_hdr(skb);
-	*(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000);
-	dsfield = INET_ECN_encapsulate(0, dsfield);
-	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6h->saddr = fl6->saddr;
 	ipv6h->daddr = fl6->daddr;
-	nf_reset(skb);
-	pkt_len = skb->len;
-	err = ip6_local_out(skb);
-
-	if (net_xmit_eval(err) == 0) {
-		struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats);
-
-		tstats->tx_bytes += pkt_len;
-		tstats->tx_packets++;
-	} else {
-		stats->tx_errors++;
-		stats->tx_aborted_errors++;
-	}
+	ip6tunnel_xmit(skb, dev);
 	if (ndst)
 		ip6_tnl_dst_store(t, ndst);
 	return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8fd154e5f079..241fb8ad9fcf 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -842,9 +842,9 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 		if (ipv6_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 			nlh->nlmsg_type = NLMSG_ERROR;
-			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 			skb_trim(skb, nlh->nlmsg_len);
-			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
+			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 		} else
 			kfree_skb(skb);
@@ -1017,6 +1017,50 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
 	return NULL;
 }
 
+/* Look for a (*,*,oif) entry */
+static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
+						      mifi_t mifi)
+{
+	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
+	struct mfc6_cache *c;
+
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
+		if (ipv6_addr_any(&c->mf6c_origin) &&
+		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
+		    (c->mfc_un.res.ttls[mifi] < 255))
+			return c;
+
+	return NULL;
+}
+
+/* Look for a (*,G) entry */
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+					       struct in6_addr *mcastgrp,
+					       mifi_t mifi)
+{
+	int line = MFC6_HASH(mcastgrp, &in6addr_any);
+	struct mfc6_cache *c, *proxy;
+
+	if (ipv6_addr_any(mcastgrp))
+		goto skip;
+
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
+		if (ipv6_addr_any(&c->mf6c_origin) &&
+		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
+			if (c->mfc_un.res.ttls[mifi] < 255)
+				return c;
+
+			/* It's ok if the mifi is part of the static tree */
+			proxy = ip6mr_cache_find_any_parent(mrt,
+							    c->mf6c_parent);
+			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
+				return c;
+		}
+
+skip:
+	return ip6mr_cache_find_any_parent(mrt, mifi);
+}
+
 /*
  *	Allocate a multicast cache entry
  */
@@ -1056,13 +1100,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
 		if (ipv6_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 
-			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
-				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 				skb_trim(skb, nlh->nlmsg_len);
-				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
+				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
 			}
 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 		} else
@@ -1247,7 +1291,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
  *	MFC6 cache manipulation by user space
  */
 
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
+static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+			    int parent)
 {
 	int line;
 	struct mfc6_cache *c, *next;
@@ -1256,7 +1301,9 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
 
 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+		    ipv6_addr_equal(&c->mf6c_mcastgrp,
+				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
+		    (parent == -1 || parent == c->mf6c_parent)) {
 			write_lock_bh(&mrt_lock);
 			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
@@ -1312,9 +1359,9 @@ static int __net_init ip6mr_net_init(struct net *net)
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
-	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
+	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
 		goto proc_vif_fail;
-	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
+	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
 		goto proc_cache_fail;
 #endif
 
@@ -1322,7 +1369,7 @@ static int __net_init ip6mr_net_init(struct net *net)
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
-	proc_net_remove(net, "ip6_mr_vif");
+	remove_proc_entry("ip6_mr_vif", net->proc_net);
 proc_vif_fail:
 	ip6mr_rules_exit(net);
 #endif
@@ -1333,8 +1380,8 @@ fail:
 static void __net_exit ip6mr_net_exit(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	proc_net_remove(net, "ip6_mr_cache");
-	proc_net_remove(net, "ip6_mr_vif");
+	remove_proc_entry("ip6_mr_cache", net->proc_net);
+	remove_proc_entry("ip6_mr_vif", net->proc_net);
 #endif
 	ip6mr_rules_exit(net);
 }
@@ -1391,7 +1438,7 @@ void ip6_mr_cleanup(void)
 }
 
 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
-			 struct mf6cctl *mfc, int mrtsock)
+			 struct mf6cctl *mfc, int mrtsock, int parent)
 {
 	bool found = false;
 	int line;
@@ -1413,7 +1460,9 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 
 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+		    ipv6_addr_equal(&c->mf6c_mcastgrp,
+				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
+		    (parent == -1 || parent == mfc->mf6cc_parent)) {
 			found = true;
 			break;
 		}
@@ -1430,7 +1479,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 		return 0;
 	}
 
-	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
+	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
+	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
 		return -EINVAL;
 
 	c = ip6mr_cache_alloc();
@@ -1596,7 +1646,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
 
 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 {
-	int ret;
+	int ret, parent = 0;
 	struct mif6ctl vif;
 	struct mf6cctl mfc;
 	mifi_t mifi;
@@ -1653,15 +1703,21 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	 */
 	case MRT6_ADD_MFC:
 	case MRT6_DEL_MFC:
+		parent = -1;
+	case MRT6_ADD_MFC_PROXY:
+	case MRT6_DEL_MFC_PROXY:
 		if (optlen < sizeof(mfc))
 			return -EINVAL;
 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
 			return -EFAULT;
+		if (parent == 0)
+			parent = mfc.mf6cc_parent;
 		rtnl_lock();
-		if (optname == MRT6_DEL_MFC)
-			ret = ip6mr_mfc_delete(mrt, &mfc);
+		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
+			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
 		else
-			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
+			ret = ip6mr_mfc_add(net, mrt, &mfc,
+					    sk == mrt->mroute6_sk, parent);
 		rtnl_unlock();
 		return ret;
 
@@ -2018,19 +2074,29 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 {
 	int psend = -1;
 	int vif, ct;
+	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
 	vif = cache->mf6c_parent;
 	cache->mfc_un.res.pkt++;
 	cache->mfc_un.res.bytes += skb->len;
 
+	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+		struct mfc6_cache *cache_proxy;
+
+		/* For an (*,G) entry, we only check that the incomming
+		 * interface is part of the static tree.
+		 */
+		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+		if (cache_proxy &&
+		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+			goto forward;
+	}
+
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
 	if (mrt->vif6_table[vif].dev != skb->dev) {
-		int true_vifi;
-
 		cache->mfc_un.res.wrong_if++;
-		true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2048,14 +2114,32 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 		goto dont_forward;
 	}
 
+forward:
 	mrt->vif6_table[vif].pkt_in++;
 	mrt->vif6_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
 	 */
+	if (ipv6_addr_any(&cache->mf6c_origin) &&
+	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+		if (true_vifi >= 0 &&
+		    true_vifi != cache->mf6c_parent &&
+		    ipv6_hdr(skb)->hop_limit >
+				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+			/* It's an (*,*) entry and the packet is not coming from
+			 * the upstream: forward the packet to the upstream
+			 * only.
+			 */
+			psend = cache->mf6c_parent;
+			goto last_forward;
+		}
+		goto dont_forward;
+	}
 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
-		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+		/* For (*,G) entry, don't forward to the incoming interface */
+		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
+		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
@@ -2064,6 +2148,7 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 			psend = ct;
 		}
 	}
+last_forward:
 	if (psend != -1) {
 		ip6mr_forward2(net, mrt, skb, cache, psend);
 		return 0;
@@ -2099,6 +2184,14 @@ int ip6_mr_input(struct sk_buff *skb)
 	read_lock(&mrt_lock);
 	cache = ip6mr_cache_find(mrt,
 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
+	if (cache == NULL) {
+		int vif = ip6mr_find_vif(mrt, skb->dev);
+
+		if (vif >= 0)
+			cache = ip6mr_cache_find_any(mrt,
+						     &ipv6_hdr(skb)->daddr,
+						     vif);
+	}
 
 	/*
 	 *	No usable cache entry
@@ -2186,6 +2279,13 @@ int ip6mr_get_route(struct net *net,
 
 	read_lock(&mrt_lock);
 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
+	if (!cache && skb->dev) {
+		int vif = ip6mr_find_vif(mrt, skb->dev);
+
+		if (vif >= 0)
+			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
+						     vif);
+	}
 
 	if (!cache) {
 		struct sk_buff *skb2;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 28dfa5f3801f..bfa6cc36ef2a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -376,8 +376,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 			goto done;	/* err = -EADDRNOTAVAIL */
 		rv = !0;
 		for (i=0; i<psl->sl_count; i++) {
-			rv = memcmp(&psl->sl_addr[i], source,
-				sizeof(struct in6_addr));
+			rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
 			if (rv == 0)
 				break;
 		}
@@ -427,12 +426,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	}
 	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
 	for (i=0; i<psl->sl_count; i++) {
-		rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr));
-		if (rv == 0)
-			break;
+		rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
+		if (rv == 0) /* There is an error in the address. */
+			goto done;
 	}
-	if (rv == 0)		/* address already there is an error */
-		goto done;
 	for (j=psl->sl_count-1; j>=i; j--)
 		psl->sl_addr[j+1] = psl->sl_addr[j];
 	psl->sl_addr[i] = *source;
@@ -664,6 +661,10 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
 	struct net_device *dev = mc->idev->dev;
 	char buf[MAX_ADDR_LEN];
 
+	if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
+	    IPV6_ADDR_SCOPE_LINKLOCAL)
+		return;
+
 	spin_lock_bh(&mc->mca_lock);
 	if (!(mc->mca_flags&MAF_LOADED)) {
 		mc->mca_flags |= MAF_LOADED;
@@ -690,6 +691,10 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 	struct net_device *dev = mc->idev->dev;
 	char buf[MAX_ADDR_LEN];
 
+	if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
+	    IPV6_ADDR_SCOPE_LINKLOCAL)
+		return;
+
 	spin_lock_bh(&mc->mca_lock);
 	if (mc->mca_flags&MAF_LOADED) {
 		mc->mca_flags &= ~MAF_LOADED;
@@ -935,33 +940,6 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
 }
 
 /*
- * identify MLD packets for MLD filter exceptions
- */
-bool ipv6_is_mld(struct sk_buff *skb, int nexthdr)
-{
-	struct icmp6hdr *pic;
-
-	if (nexthdr != IPPROTO_ICMPV6)
-		return false;
-
-	if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
-		return false;
-
-	pic = icmp6_hdr(skb);
-
-	switch (pic->icmp6_type) {
-	case ICMPV6_MGM_QUERY:
-	case ICMPV6_MGM_REPORT:
-	case ICMPV6_MGM_REDUCTION:
-	case ICMPV6_MLD2_REPORT:
-		return true;
-	default:
-		break;
-	}
-	return false;
-}
-
-/*
  *	check if the interface/address pair is valid
  */
 bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
@@ -1340,6 +1318,31 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
 	return scount;
 }
 
+static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
+		       struct net_device *dev,
+		       const struct in6_addr *saddr,
+		       const struct in6_addr *daddr,
+		       int proto, int len)
+{
+	struct ipv6hdr *hdr;
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->dev = dev;
+
+	skb_reset_network_header(skb);
+	skb_put(skb, sizeof(struct ipv6hdr));
+	hdr = ipv6_hdr(skb);
+
+	ip6_flow_hdr(hdr, 0, 0);
+
+	hdr->payload_len = htons(len);
+	hdr->nexthdr = proto;
+	hdr->hop_limit = inet6_sk(sk)->hop_limit;
+
+	hdr->saddr = *saddr;
+	hdr->daddr = *daddr;
+}
+
 static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 {
 	struct net *net = dev_net(dev);
@@ -1375,7 +1378,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 	} else
 		saddr = &addr_buf;
 
-	ip6_nd_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
+	ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
 
 	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
 
@@ -1418,7 +1421,7 @@ static void mld_sendpack(struct sk_buff *skb)
 	icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT,
 			 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 			 skb->dev->ifindex);
-	dst = icmp6_dst_alloc(skb->dev, NULL, &fl6);
+	dst = icmp6_dst_alloc(skb->dev, &fl6);
 
 	err = 0;
 	if (IS_ERR(dst)) {
@@ -1767,7 +1770,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	} else
 		saddr = &addr_buf;
 
-	ip6_nd_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);
+	ip6_mc_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);
 
 	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
 
@@ -1786,7 +1789,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	icmpv6_flow_init(sk, &fl6, type,
 			 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 			 skb->dev->ifindex);
-	dst = icmp6_dst_alloc(skb->dev, NULL, &fl6);
+	dst = icmp6_dst_alloc(skb->dev, &fl6);
 	if (IS_ERR(dst)) {
 		err = PTR_ERR(dst);
 		goto err_out;
@@ -2596,10 +2599,10 @@ static int __net_init igmp6_proc_init(struct net *net)
 	int err;
 
 	err = -ENOMEM;
-	if (!proc_net_fops_create(net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops))
+	if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops))
 		goto out;
-	if (!proc_net_fops_create(net, "mcfilter6", S_IRUGO,
-				  &igmp6_mcf_seq_fops))
+	if (!proc_create("mcfilter6", S_IRUGO, net->proc_net,
+			 &igmp6_mcf_seq_fops))
 		goto out_proc_net_igmp6;
 
 	err = 0;
@@ -2607,14 +2610,14 @@ out:
 	return err;
 
 out_proc_net_igmp6:
-	proc_net_remove(net, "igmp6");
+	remove_proc_entry("igmp6", net->proc_net);
 	goto out;
 }
 
 static void __net_exit igmp6_proc_exit(struct net *net)
 {
-	proc_net_remove(net, "mcfilter6");
-	proc_net_remove(net, "igmp6");
+	remove_proc_entry("mcfilter6", net->proc_net);
+	remove_proc_entry("igmp6", net->proc_net);
 }
 #else
 static inline int igmp6_proc_init(struct net *net)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 6574175795df..2712ab22a174 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -143,16 +143,12 @@ struct neigh_table nd_tbl = {
 	.gc_thresh3 =	1024,
 };
 
-static inline int ndisc_opt_addr_space(struct net_device *dev)
+static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
 {
-	return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
-}
-
-static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
-				  unsigned short addr_type)
-{
-	int pad   = ndisc_addr_option_pad(addr_type);
-	int space = NDISC_OPT_SPACE(data_len + pad);
+	int pad   = ndisc_addr_option_pad(skb->dev->type);
+	int data_len = skb->dev->addr_len;
+	int space = ndisc_opt_addr_space(skb->dev);
+	u8 *opt = skb_put(skb, space);
 
 	opt[0] = type;
 	opt[1] = space>>3;
@@ -166,7 +162,6 @@ static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
 	opt += data_len;
 	if ((space -= data_len) > 0)
 		memset(opt, 0, space);
-	return opt + space;
 }
 
 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
@@ -370,91 +365,88 @@ static void pndisc_destructor(struct pneigh_entry *n)
 	ipv6_dev_mc_dec(dev, &maddr);
 }
 
-static struct sk_buff *ndisc_build_skb(struct net_device *dev,
-				       const struct in6_addr *daddr,
-				       const struct in6_addr *saddr,
-				       struct icmp6hdr *icmp6h,
-				       const struct in6_addr *target,
-				       int llinfo)
+static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
+				       int len)
 {
-	struct net *net = dev_net(dev);
-	struct sock *sk = net->ipv6.ndisc_sk;
-	struct sk_buff *skb;
-	struct icmp6hdr *hdr;
 	int hlen = LL_RESERVED_SPACE(dev);
 	int tlen = dev->needed_tailroom;
-	int len;
+	struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
+	struct sk_buff *skb;
 	int err;
-	u8 *opt;
-
-	if (!dev->addr_len)
-		llinfo = 0;
-
-	len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
-	if (llinfo)
-		len += ndisc_opt_addr_space(dev);
 
 	skb = sock_alloc_send_skb(sk,
-				  (MAX_HEADER + sizeof(struct ipv6hdr) +
-				   len + hlen + tlen),
+				  hlen + sizeof(struct ipv6hdr) + len + tlen,
 				  1, &err);
 	if (!skb) {
-		ND_PRINTK(0, err, "ND: %s failed to allocate an skb, err=%d\n",
+		ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n",
 			  __func__, err);
 		return NULL;
 	}
 
-	skb_reserve(skb, hlen);
-	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->dev = dev;
 
-	skb->transport_header = skb->tail;
-	skb_put(skb, len);
+	skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
+	skb_reset_transport_header(skb);
 
-	hdr = (struct icmp6hdr *)skb_transport_header(skb);
-	memcpy(hdr, icmp6h, sizeof(*hdr));
+	return skb;
+}
 
-	opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
-	if (target) {
-		*(struct in6_addr *)opt = *target;
-		opt += sizeof(*target);
-	}
+static void ip6_nd_hdr(struct sk_buff *skb,
+		       const struct in6_addr *saddr,
+		       const struct in6_addr *daddr,
+		       int hop_limit, int len)
+{
+	struct ipv6hdr *hdr;
 
-	if (llinfo)
-		ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
-				       dev->addr_len, dev->type);
+	skb_push(skb, sizeof(*hdr));
+	skb_reset_network_header(skb);
+	hdr = ipv6_hdr(skb);
 
-	hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
-					   IPPROTO_ICMPV6,
-					   csum_partial(hdr,
-							len, 0));
+	ip6_flow_hdr(hdr, 0, 0);
 
-	return skb;
+	hdr->payload_len = htons(len);
+	hdr->nexthdr = IPPROTO_ICMPV6;
+	hdr->hop_limit = hop_limit;
+
+	hdr->saddr = *saddr;
+	hdr->daddr = *daddr;
 }
 
-static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev,
-			   struct neighbour *neigh,
+static void ndisc_send_skb(struct sk_buff *skb,
 			   const struct in6_addr *daddr,
-			   const struct in6_addr *saddr,
-			   struct icmp6hdr *icmp6h)
+			   const struct in6_addr *saddr)
 {
-	struct flowi6 fl6;
-	struct dst_entry *dst;
-	struct net *net = dev_net(dev);
+	struct dst_entry *dst = skb_dst(skb);
+	struct net *net = dev_net(skb->dev);
 	struct sock *sk = net->ipv6.ndisc_sk;
 	struct inet6_dev *idev;
 	int err;
+	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
 	u8 type;
 
 	type = icmp6h->icmp6_type;
 
-	icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex);
-	dst = icmp6_dst_alloc(dev, neigh, &fl6);
-	if (IS_ERR(dst)) {
-		kfree_skb(skb);
-		return;
+	if (!dst) {
+		struct sock *sk = net->ipv6.ndisc_sk;
+		struct flowi6 fl6;
+
+		icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
+		dst = icmp6_dst_alloc(skb->dev, &fl6);
+		if (IS_ERR(dst)) {
+			kfree_skb(skb);
+			return;
+		}
+
+		skb_dst_set(skb, dst);
 	}
 
-	skb_dst_set(skb, dst);
+	icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len,
+					      IPPROTO_ICMPV6,
+					      csum_partial(icmp6h,
+							   skb->len, 0));
+
+	ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
 
 	rcu_read_lock();
 	idev = __in6_dev_get(dst->dev);
@@ -470,36 +462,17 @@ static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev,
 	rcu_read_unlock();
 }
 
-/*
- *	Send a Neighbour Discover packet
- */
-static void __ndisc_send(struct net_device *dev,
-			 struct neighbour *neigh,
-			 const struct in6_addr *daddr,
-			 const struct in6_addr *saddr,
-			 struct icmp6hdr *icmp6h, const struct in6_addr *target,
-			 int llinfo)
-{
-	struct sk_buff *skb;
-
-	skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo);
-	if (!skb)
-		return;
-
-	ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h);
-}
-
 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 			  const struct in6_addr *daddr,
 			  const struct in6_addr *solicited_addr,
-			  int router, int solicited, int override, int inc_opt)
+			  bool router, bool solicited, bool override, bool inc_opt)
 {
+	struct sk_buff *skb;
 	struct in6_addr tmpaddr;
 	struct inet6_ifaddr *ifp;
 	const struct in6_addr *src_addr;
-	struct icmp6hdr icmp6h = {
-		.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
-	};
+	struct nd_msg *msg;
+	int optlen = 0;
 
 	/* for anycast or proxy, solicited_addr != src_addr */
 	ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
@@ -517,13 +490,32 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 		src_addr = &tmpaddr;
 	}
 
-	icmp6h.icmp6_router = router;
-	icmp6h.icmp6_solicited = solicited;
-	icmp6h.icmp6_override = override;
+	if (!dev->addr_len)
+		inc_opt = 0;
+	if (inc_opt)
+		optlen += ndisc_opt_addr_space(dev);
 
-	__ndisc_send(dev, neigh, daddr, src_addr,
-		     &icmp6h, solicited_addr,
-		     inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
+	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+	if (!skb)
+		return;
+
+	msg = (struct nd_msg *)skb_put(skb, sizeof(*msg));
+	*msg = (struct nd_msg) {
+		.icmph = {
+			.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
+			.icmp6_router = router,
+			.icmp6_solicited = solicited,
+			.icmp6_override = override,
+		},
+		.target = *solicited_addr,
+	};
+
+	if (inc_opt)
+		ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
+				       dev->dev_addr);
+
+
+	ndisc_send_skb(skb, daddr, src_addr);
 }
 
 static void ndisc_send_unsol_na(struct net_device *dev)
@@ -551,10 +543,11 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		   const struct in6_addr *solicit,
 		   const struct in6_addr *daddr, const struct in6_addr *saddr)
 {
+	struct sk_buff *skb;
 	struct in6_addr addr_buf;
-	struct icmp6hdr icmp6h = {
-		.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
-	};
+	int inc_opt = dev->addr_len;
+	int optlen = 0;
+	struct nd_msg *msg;
 
 	if (saddr == NULL) {
 		if (ipv6_get_lladdr(dev, &addr_buf,
@@ -563,18 +556,37 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		saddr = &addr_buf;
 	}
 
-	__ndisc_send(dev, neigh, daddr, saddr,
-		     &icmp6h, solicit,
-		     !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
+	if (ipv6_addr_any(saddr))
+		inc_opt = 0;
+	if (inc_opt)
+		optlen += ndisc_opt_addr_space(dev);
+
+	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+	if (!skb)
+		return;
+
+	msg = (struct nd_msg *)skb_put(skb, sizeof(*msg));
+	*msg = (struct nd_msg) {
+		.icmph = {
+			.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
+		},
+		.target = *solicit,
+	};
+
+	if (inc_opt)
+		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
+				       dev->dev_addr);
+
+	ndisc_send_skb(skb, daddr, saddr);
 }
 
 void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
 		   const struct in6_addr *daddr)
 {
-	struct icmp6hdr icmp6h = {
-		.icmp6_type = NDISC_ROUTER_SOLICITATION,
-	};
+	struct sk_buff *skb;
+	struct rs_msg *msg;
 	int send_sllao = dev->addr_len;
+	int optlen = 0;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	/*
@@ -598,9 +610,25 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
 		}
 	}
 #endif
-	__ndisc_send(dev, NULL, daddr, saddr,
-		     &icmp6h, NULL,
-		     send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
+	if (send_sllao)
+		optlen += ndisc_opt_addr_space(dev);
+
+	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+	if (!skb)
+		return;
+
+	msg = (struct rs_msg *)skb_put(skb, sizeof(*msg));
+	*msg = (struct rs_msg) {
+		.icmph = {
+			.icmp6_type = NDISC_ROUTER_SOLICITATION,
+		},
+	};
+
+	if (send_sllao)
+		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
+				       dev->dev_addr);
+
+	ndisc_send_skb(skb, daddr, saddr);
 }
 
 
@@ -676,6 +704,11 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	bool inc;
 	int is_router = -1;
 
+	if (skb->len < sizeof(struct nd_msg)) {
+		ND_PRINTK(2, warn, "NS: packet too short\n");
+		return;
+	}
+
 	if (ipv6_addr_is_multicast(&msg->target)) {
 		ND_PRINTK(2, warn, "NS: multicast target address\n");
 		return;
@@ -685,11 +718,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	 * RFC2461 7.1.1:
 	 * DAD has to be destined for solicited node multicast address.
 	 */
-	if (dad &&
-	    !(daddr->s6_addr32[0] == htonl(0xff020000) &&
-	      daddr->s6_addr32[1] == htonl(0x00000000) &&
-	      daddr->s6_addr32[2] == htonl(0x00000001) &&
-	      daddr->s6_addr [12] == 0xff )) {
+	if (dad && !ipv6_addr_is_solict_mult(daddr)) {
 		ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n");
 		return;
 	}
@@ -780,11 +809,11 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	}
 
 	if (is_router < 0)
-		is_router = !!idev->cnf.forwarding;
+		is_router = idev->cnf.forwarding;
 
 	if (dad) {
 		ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
-			      is_router, 0, (ifp != NULL), 1);
+			      !!is_router, false, (ifp != NULL), true);
 		goto out;
 	}
 
@@ -805,8 +834,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 			     NEIGH_UPDATE_F_OVERRIDE);
 	if (neigh || !dev->header_ops) {
 		ndisc_send_na(dev, neigh, saddr, &msg->target,
-			      is_router,
-			      1, (ifp != NULL && inc), inc);
+			      !!is_router,
+			      true, (ifp != NULL && inc), inc);
 		if (neigh)
 			neigh_release(neigh);
 	}
@@ -1350,25 +1379,34 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 	icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
 }
 
+static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
+					   struct sk_buff *orig_skb,
+					   int rd_len)
+{
+	u8 *opt = skb_put(skb, rd_len);
+
+	memset(opt, 0, 8);
+	*(opt++) = ND_OPT_REDIRECT_HDR;
+	*(opt++) = (rd_len >> 3);
+	opt += 6;
+
+	memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8);
+}
+
 void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 {
 	struct net_device *dev = skb->dev;
 	struct net *net = dev_net(dev);
 	struct sock *sk = net->ipv6.ndisc_sk;
-	int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+	int optlen = 0;
 	struct inet_peer *peer;
 	struct sk_buff *buff;
-	struct icmp6hdr *icmph;
+	struct rd_msg *msg;
 	struct in6_addr saddr_buf;
-	struct in6_addr *addrp;
 	struct rt6_info *rt;
 	struct dst_entry *dst;
-	struct inet6_dev *idev;
 	struct flowi6 fl6;
-	u8 *opt;
-	int hlen, tlen;
 	int rd_len;
-	int err;
 	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
 	bool ret;
 
@@ -1424,7 +1462,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 			memcpy(ha_buf, neigh->ha, dev->addr_len);
 			read_unlock_bh(&neigh->lock);
 			ha = ha_buf;
-			len += ndisc_opt_addr_space(dev);
+			optlen += ndisc_opt_addr_space(dev);
 		} else
 			read_unlock_bh(&neigh->lock);
 
@@ -1432,80 +1470,40 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	}
 
 	rd_len = min_t(unsigned int,
-		     IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
+		       IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen,
+		       skb->len + 8);
 	rd_len &= ~0x7;
-	len += rd_len;
-
-	hlen = LL_RESERVED_SPACE(dev);
-	tlen = dev->needed_tailroom;
-	buff = sock_alloc_send_skb(sk,
-				   (MAX_HEADER + sizeof(struct ipv6hdr) +
-				    len + hlen + tlen),
-				   1, &err);
-	if (buff == NULL) {
-		ND_PRINTK(0, err,
-			  "Redirect: %s failed to allocate an skb, err=%d\n",
-			  __func__, err);
-		goto release;
-	}
-
-	skb_reserve(buff, hlen);
-	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
-		   IPPROTO_ICMPV6, len);
-
-	skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
-	skb_put(buff, len);
-	icmph = icmp6_hdr(buff);
-
-	memset(icmph, 0, sizeof(struct icmp6hdr));
-	icmph->icmp6_type = NDISC_REDIRECT;
+	optlen += rd_len;
 
-	/*
-	 *	copy target and destination addresses
-	 */
-
-	addrp = (struct in6_addr *)(icmph + 1);
-	*addrp = *target;
-	addrp++;
-	*addrp = ipv6_hdr(skb)->daddr;
+	buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+	if (!buff)
+		goto release;
 
-	opt = (u8*) (addrp + 1);
+	msg = (struct rd_msg *)skb_put(buff, sizeof(*msg));
+	*msg = (struct rd_msg) {
+		.icmph = {
+			.icmp6_type = NDISC_REDIRECT,
+		},
+		.target = *target,
+		.dest = ipv6_hdr(skb)->daddr,
+	};
 
 	/*
 	 *	include target_address option
 	 */
 
 	if (ha)
-		opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
-					     dev->addr_len, dev->type);
+		ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha);
 
 	/*
 	 *	build redirect option and copy skb over to the new packet.
 	 */
 
-	memset(opt, 0, 8);
-	*(opt++) = ND_OPT_REDIRECT_HDR;
-	*(opt++) = (rd_len >> 3);
-	opt += 6;
-
-	memcpy(opt, ipv6_hdr(skb), rd_len - 8);
-
-	icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
-					     len, IPPROTO_ICMPV6,
-					     csum_partial(icmph, len, 0));
+	if (rd_len)
+		ndisc_fill_redirect_hdr_option(buff, skb, rd_len);
 
 	skb_dst_set(buff, dst);
-	rcu_read_lock();
-	idev = __in6_dev_get(dst->dev);
-	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
-	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
-		      dst_output);
-	if (!err) {
-		ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT);
-		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
-	}
-
-	rcu_read_unlock();
+	ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf);
 	return;
 
 release:
@@ -1522,7 +1520,7 @@ int ndisc_rcv(struct sk_buff *skb)
 {
 	struct nd_msg *msg;
 
-	if (!pskb_may_pull(skb, skb->len))
+	if (skb_linearize(skb))
 		return 0;
 
 	msg = (struct nd_msg *)skb_transport_header(skb);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 429089cb073d..72836f40b730 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -1,3 +1,9 @@
+/*
+ * IPv6 specific functions of netfilter core
+ *
+ * Rusty Russell (C) 2000 -- This code is GPL.
+ * Patrick McHardy (C) 2006-2012
+ */
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/ipv6.h>
@@ -29,7 +35,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 		LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
 		dst_release(dst);
-		return -EINVAL;
+		return dst->error;
 	}
 
 	/* Drop old route. */
@@ -43,7 +49,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
 		skb_dst_set(skb, NULL);
 		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
 		if (IS_ERR(dst))
-			return -1;
+			return PTR_ERR(dst);
 		skb_dst_set(skb, dst);
 	}
 #endif
@@ -53,7 +59,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
 	if (skb_headroom(skb) < hh_len &&
 	    pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
 			     0, GFP_ATOMIC))
-		return -1;
+		return -ENOMEM;
 
 	return 0;
 }
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index c72532a60d88..4433ab40e7de 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -105,7 +105,7 @@ config IP6_NF_MATCH_MH
 
 config IP6_NF_MATCH_RPFILTER
 	tristate '"rpfilter" reverse path filter match support'
-	depends on NETFILTER_ADVANCED
+	depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW)
 	---help---
 	  This option allows you to match packets whose replies would
 	  go out via the interface the packet came in.
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 125a90d6a795..44400c216dc6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -284,6 +285,7 @@ static void trace_packet(const struct sk_buff *skb,
 	const char *hookname, *chainname, *comment;
 	const struct ip6t_entry *iter;
 	unsigned int rulenum = 0;
+	struct net *net = dev_net(in ? in : out);
 
 	table_base = private->entries[smp_processor_id()];
 	root = get_entry(table_base, private->hook_entry[hook]);
@@ -296,7 +298,7 @@ static void trace_packet(const struct sk_buff *skb,
 		    &chainname, &comment, &rulenum) != 0)
 			break;
 
-	nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo,
+	nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
 		      "TRACE: %s:%s:%s:%u ",
 		      tablename, chainname, comment, rulenum);
 }
@@ -1098,7 +1100,7 @@ static int get_info(struct net *net, void __user *user,
 #endif
 	t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
 				    "ip6table_%s", name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		struct ip6t_getinfo info;
 		const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
@@ -1157,7 +1159,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
 	}
 
 	t = xt_find_table_lock(net, AF_INET6, get.name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		struct xt_table_info *private = t->private;
 		duprintf("t->private->number = %u\n", private->number);
 		if (get.size == private->size)
@@ -1197,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 
 	t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
 				    "ip6table_%s", name);
-	if (!t || IS_ERR(t)) {
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free_newinfo_counters_untrans;
 	}
@@ -1355,7 +1357,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	}
 
 	t = xt_find_table_lock(net, AF_INET6, name);
-	if (!t || IS_ERR(t)) {
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
 	}
@@ -1939,7 +1941,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
 
 	xt_compat_lock(AF_INET6);
 	t = xt_find_table_lock(net, AF_INET6, get.name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		const struct xt_table_info *private = t->private;
 		struct xt_table_info info;
 		duprintf("t->private->number = %u\n", private->number);
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index 83acc1405a18..590f767db5d4 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -18,9 +18,8 @@
 static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
 {
 	struct ip6t_npt_tginfo *npt = par->targinfo;
-	__wsum src_sum = 0, dst_sum = 0;
 	struct in6_addr pfx;
-	unsigned int i;
+	__wsum src_sum, dst_sum;
 
 	if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
 		return -EINVAL;
@@ -33,12 +32,8 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
 	if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
 		return -EINVAL;
 
-	for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
-		src_sum = csum_add(src_sum,
-				(__force __wsum)npt->src_pfx.in6.s6_addr16[i]);
-		dst_sum = csum_add(dst_sum,
-				(__force __wsum)npt->dst_pfx.in6.s6_addr16[i]);
-	}
+	src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0);
+	dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0);
 
 	npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
 	return 0;
@@ -57,7 +52,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
 		if (pfx_len - i >= 32)
 			mask = 0;
 		else
-			mask = htonl(~((1 << (pfx_len - i)) - 1));
+			mask = htonl((1 << (i - pfx_len + 32)) - 1);
 
 		idx = i / 32;
 		addr->s6_addr32[idx] &= mask;
@@ -114,6 +109,7 @@ ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
 static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
 	{
 		.name		= "SNPT",
+		.table		= "mangle",
 		.target		= ip6t_snpt_tg,
 		.targetsize	= sizeof(struct ip6t_npt_tginfo),
 		.checkentry	= ip6t_npt_checkentry,
@@ -124,6 +120,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
 	},
 	{
 		.name		= "DNPT",
+		.table		= "mangle",
 		.target		= ip6t_dnpt_tg,
 		.targetsize	= sizeof(struct ip6t_npt_tginfo),
 		.checkentry	= ip6t_npt_checkentry,
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 029623dbd411..70f9abc0efe9 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -7,6 +7,8 @@
  * Authors:
  *	Yasuyuki Kozakai	<yasuyuki.kozakai@toshiba.co.jp>
  *
+ * Copyright (c) 2005-2007 Patrick McHardy <kaber@trash.net>
+ *
  * Based on net/ipv4/netfilter/ipt_REJECT.c
  *
  * This program is free software; you can redistribute it and/or
@@ -126,7 +128,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	skb_put(nskb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(nskb);
 	ip6h = ipv6_hdr(nskb);
-	*(__be32 *)ip6h =  htonl(0x60000000 | (tclass << 20));
+	ip6_flow_hdr(ip6h, tclass, 0);
 	ip6h->hop_limit = ip6_dst_hoplimit(dst);
 	ip6h->nexthdr = IPPROTO_TCP;
 	ip6h->saddr = oip6h->daddr;
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 5060d54199ab..e0983f3648a6 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -71,6 +71,12 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
 	return ret;
 }
 
+static bool rpfilter_is_local(const struct sk_buff *skb)
+{
+	const struct rt6_info *rt = (const void *) skb_dst(skb);
+	return rt && (rt->rt6i_flags & RTF_LOCAL);
+}
+
 static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_rpfilter_info *info = par->matchinfo;
@@ -78,7 +84,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	struct ipv6hdr *iph;
 	bool invert = info->flags & XT_RPFILTER_INVERT;
 
-	if (par->in->flags & IFF_LOOPBACK)
+	if (rpfilter_is_local(skb))
 		return true ^ invert;
 
 	iph = ipv6_hdr(skb);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 7431121b87de..e075399d8b72 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/slab.h>
+#include <net/ipv6.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -37,7 +38,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
 	struct in6_addr saddr, daddr;
 	u_int8_t hop_limit;
 	u_int32_t flowlabel, mark;
-
+	int err;
 #if 0
 	/* root is playing with raw sockets. */
 	if (skb->len < sizeof(struct iphdr) ||
@@ -60,12 +61,15 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
 			    dev_net(out)->ipv6.ip6table_mangle);
 
 	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
-	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+	    (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
+	     !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
 	     skb->mark != mark ||
 	     ipv6_hdr(skb)->hop_limit != hop_limit ||
-	     flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
-		return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+	     flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
+		err = ip6_route_me_harder(skb);
+		if (err < 0)
+			ret = NF_DROP_ERR(err);
+	}
 
 	return ret;
 }
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index e0e788d25b14..6383f90efda8 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -179,6 +179,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
 #ifdef CONFIG_XFRM
 	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
+	int err;
 #endif
 	unsigned int ret;
 
@@ -197,9 +198,11 @@ nf_nat_ipv6_out(unsigned int hooknum,
 				      &ct->tuplehash[!dir].tuple.dst.u3) ||
 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
 		     ct->tuplehash[dir].tuple.src.u.all !=
-		     ct->tuplehash[!dir].tuple.dst.u.all))
-			if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
-				ret = NF_DROP;
+		     ct->tuplehash[!dir].tuple.dst.u.all)) {
+			err = nf_xfrm_me_harder(skb, AF_INET6);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
 	}
 #endif
 	return ret;
@@ -215,6 +218,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
 	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	unsigned int ret;
+	int err;
 
 	/* root is playing with raw sockets. */
 	if (skb->len < sizeof(struct ipv6hdr))
@@ -227,16 +231,19 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
 
 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
 				      &ct->tuplehash[!dir].tuple.src.u3)) {
-			if (ip6_route_me_harder(skb))
-				ret = NF_DROP;
+			err = ip6_route_me_harder(skb);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
 		}
 #ifdef CONFIG_XFRM
 		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
 			 ct->tuplehash[dir].tuple.dst.u.all !=
-			 ct->tuplehash[!dir].tuple.src.u.all)
-			if (nf_xfrm_me_harder(skb, AF_INET6))
-				ret = NF_DROP;
+			 ct->tuplehash[!dir].tuple.src.u.all) {
+			err = nf_xfrm_me_harder(skb, AF_INET6);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
 #endif
 	}
 	return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 137e245860ab..97bcf2bae857 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -104,7 +104,6 @@ static unsigned int ipv6_helper(unsigned int hooknum,
 	const struct nf_conn_help *help;
 	const struct nf_conntrack_helper *helper;
 	enum ip_conntrack_info ctinfo;
-	unsigned int ret;
 	__be16 frag_off;
 	int protoff;
 	u8 nexthdr;
@@ -130,12 +129,7 @@ static unsigned int ipv6_helper(unsigned int hooknum,
 		return NF_ACCEPT;
 	}
 
-	ret = helper->help(skb, protoff, ct, ctinfo);
-	if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) {
-		nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL,
-			      "nf_ct_%s: dropping packet", helper->name);
-	}
-	return ret;
+	return helper->help(skb, protoff, ct, ctinfo);
 }
 
 static unsigned int ipv6_confirm(unsigned int hooknum,
@@ -336,12 +330,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 					sizeof(sin6.sin6_addr));
 
 	nf_ct_put(ct);
-
-	if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
-		sin6.sin6_scope_id = sk->sk_bound_dev_if;
-	else
-		sin6.sin6_scope_id = 0;
-
+	sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr,
+						 sk->sk_bound_dev_if);
 	return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
 }
 
@@ -421,54 +411,43 @@ static int ipv6_net_init(struct net *net)
 {
 	int ret = 0;
 
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_tcp6);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6);
 	if (ret < 0) {
-		printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n");
+		pr_err("nf_conntrack_tcp6: pernet registration failed\n");
 		goto out;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_udp6);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6);
 	if (ret < 0) {
-		printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n");
+		pr_err("nf_conntrack_udp6: pernet registration failed\n");
 		goto cleanup_tcp6;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_icmpv6);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6);
 	if (ret < 0) {
-		printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n");
+		pr_err("nf_conntrack_icmp6: pernet registration failed\n");
 		goto cleanup_udp6;
 	}
-	ret = nf_conntrack_l3proto_register(net,
-					    &nf_conntrack_l3proto_ipv6);
+	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
 	if (ret < 0) {
-		printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n");
+		pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
 		goto cleanup_icmpv6;
 	}
 	return 0;
  cleanup_icmpv6:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_icmpv6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
  cleanup_udp6:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_udp6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
  cleanup_tcp6:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_tcp6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
  out:
 	return ret;
 }
 
 static void ipv6_net_exit(struct net *net)
 {
-	nf_conntrack_l3proto_unregister(net,
-					&nf_conntrack_l3proto_ipv6);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_icmpv6);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_udp6);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_tcp6);
+	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
 }
 
 static struct pernet_operations ipv6_net_ops = {
@@ -491,19 +470,52 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 
 	ret = register_pernet_subsys(&ipv6_net_ops);
 	if (ret < 0)
-		goto cleanup_pernet;
+		goto cleanup_sockopt;
+
 	ret = nf_register_hooks(ipv6_conntrack_ops,
 				ARRAY_SIZE(ipv6_conntrack_ops));
 	if (ret < 0) {
 		pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
 		       "hook.\n");
-		goto cleanup_ipv6;
+		goto cleanup_pernet;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n");
+		goto cleanup_hooks;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n");
+		goto cleanup_tcp6;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n");
+		goto cleanup_udp6;
+	}
+
+	ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
+		goto cleanup_icmpv6;
 	}
 	return ret;
 
- cleanup_ipv6:
-	unregister_pernet_subsys(&ipv6_net_ops);
+ cleanup_icmpv6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
+ cleanup_udp6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+ cleanup_tcp6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ cleanup_hooks:
+	nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
  cleanup_pernet:
+	unregister_pernet_subsys(&ipv6_net_ops);
+ cleanup_sockopt:
 	nf_unregister_sockopt(&so_getorigdst6);
 	return ret;
 }
@@ -511,6 +523,10 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 static void __exit nf_conntrack_l3proto_ipv6_fini(void)
 {
 	synchronize_net();
+	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
 	nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
 	unregister_pernet_subsys(&ipv6_net_ops);
 	nf_unregister_sockopt(&so_getorigdst6);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 24df3dde0076..b3807c5cb888 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
 			 type + 128);
 		nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
 		if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
-			nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
+				      NULL, NULL,
 				      "nf_ct_icmpv6: invalid new with type %d ",
 				      type + 128);
 		return false;
@@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 	icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
 	if (icmp6h == NULL) {
 		if (LOG_INVALID(net, IPPROTO_ICMPV6))
-		nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
 			      "nf_ct_icmpv6: short packet ");
 		return -NF_ACCEPT;
 	}
@@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
 		if (LOG_INVALID(net, IPPROTO_ICMPV6))
-			nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_icmpv6: ICMPv6 checksum failed ");
 		return -NF_ACCEPT;
 	}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 3dacecc99065..dffdc1a389c5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -14,6 +14,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) "IPv6-nf: " fmt
+
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -39,6 +41,7 @@
 #include <net/rawv6.h>
 #include <net/ndisc.h>
 #include <net/addrconf.h>
+#include <net/inet_ecn.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 #include <linux/sysctl.h>
 #include <linux/netfilter.h>
@@ -97,9 +100,9 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
 		if (table == NULL)
 			goto err_alloc;
 
-		table[0].data = &net->ipv6.frags.high_thresh;
-		table[1].data = &net->ipv6.frags.low_thresh;
-		table[2].data = &net->ipv6.frags.timeout;
+		table[0].data = &net->nf_frag.frags.timeout;
+		table[1].data = &net->nf_frag.frags.low_thresh;
+		table[2].data = &net->nf_frag.frags.high_thresh;
 	}
 
 	hdr = register_net_sysctl(net, "net/netfilter", table);
@@ -136,6 +139,11 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
 }
 #endif
 
+static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+{
+	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
+}
+
 static unsigned int nf_hashfn(struct inet_frag_queue *q)
 {
 	const struct frag_queue *nq;
@@ -164,7 +172,7 @@ static void nf_ct_frag6_expire(unsigned long data)
 /* Creation primitives. */
 static inline struct frag_queue *fq_find(struct net *net, __be32 id,
 					 u32 user, struct in6_addr *src,
-					 struct in6_addr *dst)
+					 struct in6_addr *dst, u8 ecn)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
@@ -174,19 +182,18 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
 	arg.user = user;
 	arg.src = src;
 	arg.dst = dst;
+	arg.ecn = ecn;
 
 	read_lock_bh(&nf_frags.lock);
 	hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
 
 	q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
 	local_bh_enable();
-	if (q == NULL)
-		goto oom;
-
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
+		return NULL;
+	}
 	return container_of(q, struct frag_queue, q);
-
-oom:
-	return NULL;
 }
 
 
@@ -196,6 +203,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 	struct sk_buff *prev, *next;
 	unsigned int payload_len;
 	int offset, end;
+	u8 ecn;
 
 	if (fq->q.last_in & INET_FRAG_COMPLETE) {
 		pr_debug("Already completed\n");
@@ -213,6 +221,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 		return -1;
 	}
 
+	ecn = ip6_frag_ecn(ipv6_hdr(skb));
+
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		const unsigned char *nh = skb_network_header(skb);
 		skb->csum = csum_sub(skb->csum,
@@ -317,9 +327,10 @@ found:
 	}
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
+	fq->ecn |= ecn;
 	if (payload_len > fq->q.max_size)
 		fq->q.max_size = payload_len;
-	atomic_add(skb->truesize, &fq->q.net->mem);
+	add_frag_mem_limit(&fq->q, skb->truesize);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
@@ -328,9 +339,8 @@ found:
 		fq->nhoffset = nhoff;
 		fq->q.last_in |= INET_FRAG_FIRST_IN;
 	}
-	write_lock(&nf_frags.lock);
-	list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
-	write_unlock(&nf_frags.lock);
+
+	inet_frag_lru_move(&fq->q);
 	return 0;
 
 discard_fq:
@@ -353,12 +363,17 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 {
 	struct sk_buff *fp, *op, *head = fq->q.fragments;
 	int    payload_len;
+	u8 ecn;
 
 	inet_frag_kill(&fq->q, &nf_frags);
 
 	WARN_ON(head == NULL);
 	WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
 
+	ecn = ip_frag_ecn_table[fq->ecn];
+	if (unlikely(ecn == 0xff))
+		goto out_fail;
+
 	/* Unfragmented part is taken from the first segment. */
 	payload_len = ((head->data - skb_network_header(head)) -
 		       sizeof(struct ipv6hdr) + fq->q.len -
@@ -369,7 +384,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 	}
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
+	if (skb_unclone(head, GFP_ATOMIC)) {
 		pr_debug("skb is cloned but can't expand head");
 		goto out_oom;
 	}
@@ -398,7 +413,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 		clone->ip_summed = head->ip_summed;
 
 		NFCT_FRAG6_CB(clone)->orig = NULL;
-		atomic_add(clone->truesize, &fq->q.net->mem);
+		add_frag_mem_limit(&fq->q, clone->truesize);
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
@@ -422,13 +437,14 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
 	}
-	atomic_sub(head->truesize, &fq->q.net->mem);
+	sub_frag_mem_limit(&fq->q, head->truesize);
 
 	head->local_df = 1;
 	head->next = NULL;
 	head->dev = dev;
 	head->tstamp = fq->q.stamp;
 	ipv6_hdr(head)->payload_len = htons(payload_len);
+	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
 	IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
 
 	/* Yes, and fold redundant checksum back. 8) */
@@ -573,7 +589,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
 	inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
 	local_bh_enable();
 
-	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr);
+	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
+		     ip6_frag_ecn(hdr));
 	if (fq == NULL) {
 		pr_debug("Can't find and can't create new queue\n");
 		goto ret_orig;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 745a32042950..f3c1ff4357ff 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -90,6 +90,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
 	SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
 	SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
 	SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
+	SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -99,6 +100,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
 	SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
 	SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
 	SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
+	SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -129,6 +131,7 @@ static const struct snmp_mib snmp6_udp6_list[] = {
 	SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
 	SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
 	SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
+	SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -139,6 +142,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
 	SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
 	SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
 	SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
+	SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -247,7 +251,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
 
 static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, snmp6_dev_seq_show, PDE(inode)->data);
+	return single_open(file, snmp6_dev_seq_show, PDE_DATA(inode));
 }
 
 static const struct file_operations snmp6_dev_seq_fops = {
@@ -287,19 +291,18 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
 		return -ENOENT;
 	if (!idev->stats.proc_dir_entry)
 		return -EINVAL;
-	remove_proc_entry(idev->stats.proc_dir_entry->name,
-			  net->mib.proc_net_devsnmp6);
+	proc_remove(idev->stats.proc_dir_entry);
 	idev->stats.proc_dir_entry = NULL;
 	return 0;
 }
 
 static int __net_init ipv6_proc_init_net(struct net *net)
 {
-	if (!proc_net_fops_create(net, "sockstat6", S_IRUGO,
-			&sockstat6_seq_fops))
+	if (!proc_create("sockstat6", S_IRUGO, net->proc_net,
+			 &sockstat6_seq_fops))
 		return -ENOMEM;
 
-	if (!proc_net_fops_create(net, "snmp6", S_IRUGO, &snmp6_seq_fops))
+	if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops))
 		goto proc_snmp6_fail;
 
 	net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
@@ -308,17 +311,17 @@ static int __net_init ipv6_proc_init_net(struct net *net)
 	return 0;
 
 proc_dev_snmp6_fail:
-	proc_net_remove(net, "snmp6");
+	remove_proc_entry("snmp6", net->proc_net);
 proc_snmp6_fail:
-	proc_net_remove(net, "sockstat6");
+	remove_proc_entry("sockstat6", net->proc_net);
 	return -ENOMEM;
 }
 
 static void __net_exit ipv6_proc_exit_net(struct net *net)
 {
-	proc_net_remove(net, "sockstat6");
-	proc_net_remove(net, "dev_snmp6");
-	proc_net_remove(net, "snmp6");
+	remove_proc_entry("sockstat6", net->proc_net);
+	remove_proc_entry("dev_snmp6", net->proc_net);
+	remove_proc_entry("snmp6", net->proc_net);
 }
 
 static struct pernet_operations ipv6_proc_ops = {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 70fa81449997..eedff8ccded5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -71,10 +71,9 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 		unsigned short num, const struct in6_addr *loc_addr,
 		const struct in6_addr *rmt_addr, int dif)
 {
-	struct hlist_node *node;
 	bool is_multicast = ipv6_addr_is_multicast(loc_addr);
 
-	sk_for_each_from(sk, node)
+	sk_for_each_from(sk)
 		if (inet_sk(sk)->inet_num == num) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 
@@ -264,7 +263,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (addr_type != IPV6_ADDR_ANY) {
 		struct net_device *dev = NULL;
 
-		if (addr_type & IPV6_ADDR_LINKLOCAL) {
+		if (__ipv6_addr_needs_scope_id(addr_type)) {
 			if (addr_len >= sizeof(struct sockaddr_in6) &&
 			    addr->sin6_scope_id) {
 				/* Override any existing binding, if another
@@ -499,9 +498,8 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 		sin6->sin6_port = 0;
 		sin6->sin6_addr = ipv6_hdr(skb)->saddr;
 		sin6->sin6_flowinfo = 0;
-		sin6->sin6_scope_id = 0;
-		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-			sin6->sin6_scope_id = IP6CB(skb)->iif;
+		sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
+							  IP6CB(skb)->iif);
 	}
 
 	sock_recv_ts_and_drops(msg, sk, skb);
@@ -803,7 +801,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    sin6->sin6_scope_id &&
-		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
+		    __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
 			fl6.flowi6_oif = sin6->sin6_scope_id;
 	} else {
 		if (sk->sk_state != TCP_ESTABLISHED)
@@ -1292,7 +1290,7 @@ static const struct file_operations raw6_seq_fops = {
 
 static int __net_init raw6_init_net(struct net *net)
 {
-	if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops))
+	if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops))
 		return -ENOMEM;
 
 	return 0;
@@ -1300,7 +1298,7 @@ static int __net_init raw6_init_net(struct net *net)
 
 static void __net_exit raw6_exit_net(struct net *net)
 {
-	proc_net_remove(net, "raw6");
+	remove_proc_entry("raw6", net->proc_net);
 }
 
 static struct pernet_operations raw6_net_ops = {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e5253ec9e0fc..790d9f4b8b0b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -26,6 +26,9 @@
  *	YOSHIFUJI,H. @USAGI	Always remove fragment header to
  *				calculate ICV correctly.
  */
+
+#define pr_fmt(fmt) "IPv6: " fmt
+
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -55,6 +58,7 @@
 #include <net/ndisc.h>
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
+#include <net/inet_ecn.h>
 
 struct ip6frag_skb_cb
 {
@@ -64,6 +68,10 @@ struct ip6frag_skb_cb
 
 #define FRAG6_CB(skb)	((struct ip6frag_skb_cb*)((skb)->cb))
 
+static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+{
+	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
+}
 
 static struct inet_frags ip6_frags;
 
@@ -79,20 +87,8 @@ unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
 {
 	u32 c;
 
-	c = jhash_3words((__force u32)saddr->s6_addr32[0],
-			 (__force u32)saddr->s6_addr32[1],
-			 (__force u32)saddr->s6_addr32[2],
-			 rnd);
-
-	c = jhash_3words((__force u32)saddr->s6_addr32[3],
-			 (__force u32)daddr->s6_addr32[0],
-			 (__force u32)daddr->s6_addr32[1],
-			 c);
-
-	c =  jhash_3words((__force u32)daddr->s6_addr32[2],
-			  (__force u32)daddr->s6_addr32[3],
-			  (__force u32)id,
-			  c);
+	c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+			 (__force u32)id, rnd);
 
 	return c & (INETFRAGS_HASHSZ - 1);
 }
@@ -128,6 +124,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
 	fq->user = arg->user;
 	fq->saddr = *arg->src;
 	fq->daddr = *arg->dst;
+	fq->ecn = arg->ecn;
 }
 EXPORT_SYMBOL(ip6_frag_init);
 
@@ -182,7 +179,8 @@ static void ip6_frag_expire(unsigned long data)
 }
 
 static __inline__ struct frag_queue *
-fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst)
+fq_find(struct net *net, __be32 id, const struct in6_addr *src,
+	const struct in6_addr *dst, u8 ecn)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
@@ -192,14 +190,16 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6
 	arg.user = IP6_DEFRAG_LOCAL_DELIVER;
 	arg.src = src;
 	arg.dst = dst;
+	arg.ecn = ecn;
 
 	read_lock(&ip6_frags.lock);
 	hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
 
 	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
-	if (q == NULL)
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
 		return NULL;
-
+	}
 	return container_of(q, struct frag_queue, q);
 }
 
@@ -210,6 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	struct net_device *dev;
 	int offset, end;
 	struct net *net = dev_net(skb_dst(skb)->dev);
+	u8 ecn;
 
 	if (fq->q.last_in & INET_FRAG_COMPLETE)
 		goto err;
@@ -227,6 +228,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 		return -1;
 	}
 
+	ecn = ip6_frag_ecn(ipv6_hdr(skb));
+
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		const unsigned char *nh = skb_network_header(skb);
 		skb->csum = csum_sub(skb->csum,
@@ -327,7 +330,8 @@ found:
 	}
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
-	atomic_add(skb->truesize, &fq->q.net->mem);
+	fq->ecn |= ecn;
+	add_frag_mem_limit(&fq->q, skb->truesize);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
@@ -338,12 +342,18 @@ found:
 	}
 
 	if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
-	    fq->q.meat == fq->q.len)
-		return ip6_frag_reasm(fq, prev, dev);
+	    fq->q.meat == fq->q.len) {
+		int res;
+		unsigned long orefdst = skb->_skb_refdst;
+
+		skb->_skb_refdst = 0UL;
+		res = ip6_frag_reasm(fq, prev, dev);
+		skb->_skb_refdst = orefdst;
+		return res;
+	}
 
-	write_lock(&ip6_frags.lock);
-	list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
-	write_unlock(&ip6_frags.lock);
+	skb_dst_drop(skb);
+	inet_frag_lru_move(&fq->q);
 	return -1;
 
 discard_fq:
@@ -372,9 +382,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	int    payload_len;
 	unsigned int nhoff;
 	int sum_truesize;
+	u8 ecn;
 
 	inet_frag_kill(&fq->q, &ip6_frags);
 
+	ecn = ip_frag_ecn_table[fq->ecn];
+	if (unlikely(ecn == 0xff))
+		goto out_fail;
+
 	/* Make the one we just received the head. */
 	if (prev) {
 		head = prev->next;
@@ -406,7 +421,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(head, GFP_ATOMIC))
 		goto out_oom;
 
 	/* If the first fragment is fragmented itself, we split
@@ -429,7 +444,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		head->len -= clone->len;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		atomic_add(clone->truesize, &fq->q.net->mem);
+		add_frag_mem_limit(&fq->q, clone->truesize);
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
@@ -467,12 +482,13 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		}
 		fp = next;
 	}
-	atomic_sub(sum_truesize, &fq->q.net->mem);
+	sub_frag_mem_limit(&fq->q, sum_truesize);
 
 	head->next = NULL;
 	head->dev = dev;
 	head->tstamp = fq->q.stamp;
 	ipv6_hdr(head)->payload_len = htons(payload_len);
+	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
 	IP6CB(head)->nhoff = nhoff;
 
 	/* Yes, and fold redundant checksum back. 8) */
@@ -536,7 +552,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 				 IPSTATS_MIB_REASMFAILS, evicted);
 
-	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr);
+	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
+		     ip6_frag_ecn(hdr));
 	if (fq != NULL) {
 		int ret;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 363d8b7772e8..ad0aa6b0b86a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -145,25 +145,12 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 	struct neighbour *n;
 
 	daddr = choose_neigh_daddr(rt, skb, daddr);
-	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
+	n = __ipv6_neigh_lookup(dst->dev, daddr);
 	if (n)
 		return n;
 	return neigh_create(&nd_tbl, daddr, dst->dev);
 }
 
-static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
-{
-	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
-	if (!n) {
-		n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
-		if (IS_ERR(n))
-			return PTR_ERR(n);
-	}
-	rt->n = n;
-
-	return 0;
-}
-
 static struct dst_ops ip6_dst_ops_template = {
 	.family			=	AF_INET6,
 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
@@ -300,9 +287,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
 	struct inet6_dev *idev = rt->rt6i_idev;
-
-	if (rt->n)
-		neigh_release(rt->n);
+	struct dst_entry *from = dst->from;
 
 	if (!(rt->dst.flags & DST_HOST))
 		dst_destroy_metrics_generic(dst);
@@ -312,8 +297,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 		in6_dev_put(idev);
 	}
 
-	if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
-		dst_release(dst->from);
+	dst->from = NULL;
+	dst_release(from);
 
 	if (rt6_has_peer(rt)) {
 		struct inet_peer *peer = rt6_peer_ptr(rt);
@@ -354,11 +339,6 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 				in6_dev_put(idev);
 			}
 		}
-		if (rt->n && rt->n->dev == dev) {
-			rt->n->dev = loopback_dev;
-			dev_hold(loopback_dev);
-			dev_put(dev);
-		}
 	}
 }
 
@@ -388,15 +368,8 @@ static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 {
 	unsigned int val = fl6->flowi6_proto;
 
-	val ^= (__force u32)fl6->daddr.s6_addr32[0];
-	val ^= (__force u32)fl6->daddr.s6_addr32[1];
-	val ^= (__force u32)fl6->daddr.s6_addr32[2];
-	val ^= (__force u32)fl6->daddr.s6_addr32[3];
-
-	val ^= (__force u32)fl6->saddr.s6_addr32[0];
-	val ^= (__force u32)fl6->saddr.s6_addr32[1];
-	val ^= (__force u32)fl6->saddr.s6_addr32[2];
-	val ^= (__force u32)fl6->saddr.s6_addr32[3];
+	val ^= ipv6_addr_hash(&fl6->daddr);
+	val ^= ipv6_addr_hash(&fl6->saddr);
 
 	/* Work only if this not encapsulated */
 	switch (fl6->flowi6_proto) {
@@ -505,24 +478,34 @@ static void rt6_probe(struct rt6_info *rt)
 	 * Router Reachability Probe MUST be rate-limited
 	 * to no more than one per minute.
 	 */
-	neigh = rt ? rt->n : NULL;
-	if (!neigh || (neigh->nud_state & NUD_VALID))
+	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 		return;
-	read_lock_bh(&neigh->lock);
-	if (!(neigh->nud_state & NUD_VALID) &&
+	rcu_read_lock_bh();
+	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+	if (neigh) {
+		write_lock(&neigh->lock);
+		if (neigh->nud_state & NUD_VALID)
+			goto out;
+	}
+
+	if (!neigh ||
 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 		struct in6_addr mcaddr;
 		struct in6_addr *target;
 
-		neigh->updated = jiffies;
-		read_unlock_bh(&neigh->lock);
+		if (neigh) {
+			neigh->updated = jiffies;
+			write_unlock(&neigh->lock);
+		}
 
-		target = (struct in6_addr *)&neigh->primary_key;
+		target = (struct in6_addr *)&rt->rt6i_gateway;
 		addrconf_addr_solict_mult(target, &mcaddr);
 		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
 	} else {
-		read_unlock_bh(&neigh->lock);
+out:
+		write_unlock(&neigh->lock);
 	}
+	rcu_read_unlock_bh();
 }
 #else
 static inline void rt6_probe(struct rt6_info *rt)
@@ -549,20 +532,24 @@ static inline bool rt6_check_neigh(struct rt6_info *rt)
 	struct neighbour *neigh;
 	bool ret = false;
 
-	neigh = rt->n;
 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 	    !(rt->rt6i_flags & RTF_GATEWAY))
-		ret = true;
-	else if (neigh) {
-		read_lock_bh(&neigh->lock);
+		return true;
+
+	rcu_read_lock_bh();
+	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+	if (neigh) {
+		read_lock(&neigh->lock);
 		if (neigh->nud_state & NUD_VALID)
 			ret = true;
 #ifdef CONFIG_IPV6_ROUTER_PREF
 		else if (!(neigh->nud_state & NUD_FAILED))
 			ret = true;
 #endif
-		read_unlock_bh(&neigh->lock);
+		read_unlock(&neigh->lock);
 	}
+	rcu_read_unlock_bh();
+
 	return ret;
 }
 
@@ -838,8 +825,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 	rt = ip6_rt_copy(ort, daddr);
 
 	if (rt) {
-		int attempts = !in_softirq();
-
 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
 			if (ort->rt6i_dst.plen != 128 &&
 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
@@ -855,32 +840,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 			rt->rt6i_src.plen = 128;
 		}
 #endif
-
-	retry:
-		if (rt6_bind_neighbour(rt, rt->dst.dev)) {
-			struct net *net = dev_net(rt->dst.dev);
-			int saved_rt_min_interval =
-				net->ipv6.sysctl.ip6_rt_gc_min_interval;
-			int saved_rt_elasticity =
-				net->ipv6.sysctl.ip6_rt_gc_elasticity;
-
-			if (attempts-- > 0) {
-				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
-				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
-
-				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
-
-				net->ipv6.sysctl.ip6_rt_gc_elasticity =
-					saved_rt_elasticity;
-				net->ipv6.sysctl.ip6_rt_gc_min_interval =
-					saved_rt_min_interval;
-				goto retry;
-			}
-
-			net_warn_ratelimited("Neighbour table overflow\n");
-			dst_free(&rt->dst);
-			return NULL;
-		}
 	}
 
 	return rt;
@@ -891,10 +850,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 {
 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 
-	if (rt) {
+	if (rt)
 		rt->rt6i_flags |= RTF_CACHE;
-		rt->n = neigh_clone(ort->n);
-	}
 	return rt;
 }
 
@@ -928,7 +885,7 @@ restart:
 	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 
-	if (!rt->n && !(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_LOCAL)))
+	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 	else if (!(rt->dst.flags & DST_HOST))
 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
@@ -994,7 +951,7 @@ void ip6_route_input(struct sk_buff *skb)
 		.flowi6_iif = skb->dev->ifindex,
 		.daddr = iph->daddr,
 		.saddr = iph->saddr,
-		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
+		.flowlabel = ip6_flowinfo(iph),
 		.flowi6_mark = skb->mark,
 		.flowi6_proto = iph->nexthdr,
 	};
@@ -1054,7 +1011,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 
 		rt->rt6i_gateway = ort->rt6i_gateway;
 		rt->rt6i_flags = ort->rt6i_flags;
-		rt6_clean_expires(rt);
 		rt->rt6i_metric = 0;
 
 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
@@ -1159,7 +1115,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 	fl6.flowi6_flags = 0;
 	fl6.daddr = iph->daddr;
 	fl6.saddr = iph->saddr;
-	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+	fl6.flowlabel = ip6_flowinfo(iph);
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
@@ -1187,7 +1143,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
 	fl6.flowi6_flags = 0;
 	fl6.daddr = iph->daddr;
 	fl6.saddr = iph->saddr;
-	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+	fl6.flowlabel = ip6_flowinfo(iph);
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
@@ -1247,7 +1203,6 @@ static struct dst_entry *icmp6_dst_gc_list;
 static DEFINE_SPINLOCK(icmp6_dst_lock);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
-				  struct neighbour *neigh,
 				  struct flowi6 *fl6)
 {
 	struct dst_entry *dst;
@@ -1265,20 +1220,8 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 		goto out;
 	}
 
-	if (neigh)
-		neigh_hold(neigh);
-	else {
-		neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
-		if (IS_ERR(neigh)) {
-			in6_dev_put(idev);
-			dst_free(&rt->dst);
-			return ERR_CAST(neigh);
-		}
-	}
-
 	rt->dst.flags |= DST_HOST;
 	rt->dst.output  = ip6_output;
-	rt->n = neigh;
 	atomic_set(&rt->dst.__refcnt, 1);
 	rt->rt6i_dst.addr = fl6->daddr;
 	rt->rt6i_dst.plen = 128;
@@ -1587,12 +1530,6 @@ int ip6_route_add(struct fib6_config *cfg)
 	} else
 		rt->rt6i_prefsrc.plen = 0;
 
-	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
-		err = rt6_bind_neighbour(rt, dev);
-		if (err)
-			goto out;
-	}
-
 	rt->rt6i_flags = cfg->fc_flags;
 
 install_route:
@@ -1705,37 +1642,32 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	struct net *net = dev_net(skb->dev);
 	struct netevent_redirect netevent;
 	struct rt6_info *rt, *nrt = NULL;
-	const struct in6_addr *target;
 	struct ndisc_options ndopts;
-	const struct in6_addr *dest;
-	struct neighbour *old_neigh;
 	struct inet6_dev *in6_dev;
 	struct neighbour *neigh;
-	struct icmp6hdr *icmph;
+	struct rd_msg *msg;
 	int optlen, on_link;
 	u8 *lladdr;
 
 	optlen = skb->tail - skb->transport_header;
-	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+	optlen -= sizeof(*msg);
 
 	if (optlen < 0) {
 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
 		return;
 	}
 
-	icmph = icmp6_hdr(skb);
-	target = (const struct in6_addr *) (icmph + 1);
-	dest = target + 1;
+	msg = (struct rd_msg *)icmp6_hdr(skb);
 
-	if (ipv6_addr_is_multicast(dest)) {
+	if (ipv6_addr_is_multicast(&msg->dest)) {
 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
 		return;
 	}
 
 	on_link = 0;
-	if (ipv6_addr_equal(dest, target)) {
+	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
 		on_link = 1;
-	} else if (ipv6_addr_type(target) !=
+	} else if (ipv6_addr_type(&msg->target) !=
 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
 		return;
@@ -1752,7 +1684,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	 *	first-hop router for the specified ICMP Destination Address.
 	 */
 
-	if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
+	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
 		return;
 	}
@@ -1779,15 +1711,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	 */
 	dst_confirm(&rt->dst);
 
-	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
+	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
 	if (!neigh)
 		return;
 
-	/* Duplicate redirect: silently ignore. */
-	old_neigh = rt->n;
-	if (neigh == old_neigh)
-		goto out;
-
 	/*
 	 *	We have finally decided to accept it.
 	 */
@@ -1799,7 +1726,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 				     NEIGH_UPDATE_F_ISROUTER))
 		     );
 
-	nrt = ip6_rt_copy(rt, dest);
+	nrt = ip6_rt_copy(rt, &msg->dest);
 	if (!nrt)
 		goto out;
 
@@ -1808,16 +1735,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
-	nrt->n = neigh_clone(neigh);
 
 	if (ip6_ins_rt(nrt))
 		goto out;
 
 	netevent.old = &rt->dst;
-	netevent.old_neigh = old_neigh;
 	netevent.new = &nrt->dst;
-	netevent.new_neigh = neigh;
-	netevent.daddr = dest;
+	netevent.daddr = &msg->dest;
+	netevent.neigh = neigh;
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 	if (rt->rt6i_flags & RTF_CACHE) {
@@ -1859,8 +1784,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
 		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
 		    (RTF_DEFAULT | RTF_ADDRCONF))
 			rt6_set_from(rt, ort);
-		else
-			rt6_clean_expires(rt);
 		rt->rt6i_metric = 0;
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -1992,7 +1915,8 @@ void rt6_purge_dflt_routers(struct net *net)
 restart:
 	read_lock_bh(&table->tb6_lock);
 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
-		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
+		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
+		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
 			dst_hold(&rt->dst);
 			read_unlock_bh(&table->tb6_lock);
 			ip6_del_rt(rt);
@@ -2123,7 +2047,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 {
 	struct net *net = dev_net(idev->dev);
 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
-	int err;
 
 	if (!rt) {
 		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
@@ -2142,11 +2065,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 		rt->rt6i_flags |= RTF_ANYCAST;
 	else
 		rt->rt6i_flags |= RTF_LOCAL;
-	err = rt6_bind_neighbour(rt, rt->dst.dev);
-	if (err) {
-		dst_free(&rt->dst);
-		return ERR_PTR(err);
-	}
 
 	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
@@ -2437,7 +2355,7 @@ beginning:
 	return last_err;
 }
 
-static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
 {
 	struct fib6_config cfg;
 	int err;
@@ -2452,7 +2370,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
 		return ip6_route_del(&cfg);
 }
 
-static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
 {
 	struct fib6_config cfg;
 	int err;
@@ -2492,7 +2410,6 @@ static int rt6_fill_node(struct net *net,
 	struct nlmsghdr *nlh;
 	long expires;
 	u32 table;
-	struct neighbour *n;
 
 	if (prefix) {	/* user wants prefix routes only */
 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2605,9 +2522,8 @@ static int rt6_fill_node(struct net *net,
 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
 		goto nla_put_failure;
 
-	n = rt->n;
-	if (n) {
-		if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
+	if (rt->rt6i_flags & RTF_GATEWAY) {
+		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
 			goto nla_put_failure;
 	}
 
@@ -2646,7 +2562,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 		     prefix, 0, NLM_F_MULTI);
 }
 
-static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
 {
 	struct net *net = sock_net(in_skb->sk);
 	struct nlattr *tb[RTA_MAX+1];
@@ -2802,7 +2718,6 @@ struct rt6_proc_arg
 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 {
 	struct seq_file *m = p_arg;
-	struct neighbour *n;
 
 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
 
@@ -2811,9 +2726,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 #else
 	seq_puts(m, "00000000000000000000000000000000 00 ");
 #endif
-	n = rt->n;
-	if (n) {
-		seq_printf(m, "%pi6", n->primary_key);
+	if (rt->rt6i_flags & RTF_GATEWAY) {
+		seq_printf(m, "%pi6", &rt->rt6i_gateway);
 	} else {
 		seq_puts(m, "00000000000000000000000000000000");
 	}
@@ -3080,8 +2994,8 @@ static void __net_exit ip6_route_net_exit(struct net *net)
 static int __net_init ip6_route_net_init_late(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
-	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
+	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
 #endif
 	return 0;
 }
@@ -3089,8 +3003,8 @@ static int __net_init ip6_route_net_init_late(struct net *net)
 static void __net_exit ip6_route_net_exit_late(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	proc_net_remove(net, "ipv6_route");
-	proc_net_remove(net, "rt6_stats");
+	remove_proc_entry("ipv6_route", net->proc_net);
+	remove_proc_entry("rt6_stats", net->proc_net);
 #endif
 }
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index cfba99b2c2a4..335363478bbf 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -49,7 +49,7 @@
 #include <net/ip.h>
 #include <net/udp.h>
 #include <net/icmp.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
 #include <net/inet_ecn.h>
 #include <net/xfrm.h>
 #include <net/dsfield.h>
@@ -72,6 +72,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 static int ipip6_tunnel_init(struct net_device *dev);
 static void ipip6_tunnel_setup(struct net_device *dev);
 static void ipip6_dev_free(struct net_device *dev);
+static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
+		      __be32 *v4dst);
 static struct rtnl_link_ops sit_link_ops __read_mostly;
 
 static int sit_net_id __read_mostly;
@@ -85,41 +87,6 @@ struct sit_net {
 	struct net_device *fb_tunnel_dev;
 };
 
-static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev,
-						   struct rtnl_link_stats64 *tot)
-{
-	int i;
-
-	for_each_possible_cpu(i) {
-		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
-		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
-		unsigned int start;
-
-		do {
-			start = u64_stats_fetch_begin_bh(&tstats->syncp);
-			rx_packets = tstats->rx_packets;
-			tx_packets = tstats->tx_packets;
-			rx_bytes = tstats->rx_bytes;
-			tx_bytes = tstats->tx_bytes;
-		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
-		tot->rx_packets += rx_packets;
-		tot->tx_packets += tx_packets;
-		tot->rx_bytes   += rx_bytes;
-		tot->tx_bytes   += tx_bytes;
-	}
-
-	tot->rx_errors = dev->stats.rx_errors;
-	tot->rx_frame_errors = dev->stats.rx_frame_errors;
-	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
-	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
-	tot->tx_dropped = dev->stats.tx_dropped;
-	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
-	tot->tx_errors = dev->stats.tx_errors;
-
-	return tot;
-}
-
 /*
  * Must be invoked with rcu_read_lock
  */
@@ -590,17 +557,21 @@ out:
 	return err;
 }
 
+static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
+				  const struct in6_addr *v6addr)
+{
+	__be32 v4embed = 0;
+	if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed)
+		return true;
+	return false;
+}
+
 static int ipip6_rcv(struct sk_buff *skb)
 {
-	const struct iphdr *iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	struct ip_tunnel *tunnel;
 	int err;
 
-	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
-		goto out;
-
-	iph = ip_hdr(skb);
-
 	tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
 				     iph->saddr, iph->daddr);
 	if (tunnel != NULL) {
@@ -613,10 +584,19 @@ static int ipip6_rcv(struct sk_buff *skb)
 		skb->protocol = htons(ETH_P_IPV6);
 		skb->pkt_type = PACKET_HOST;
 
-		if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
-		    !isatap_chksrc(skb, iph, tunnel)) {
-			tunnel->dev->stats.rx_errors++;
-			goto out;
+		if (tunnel->dev->priv_flags & IFF_ISATAP) {
+			if (!isatap_chksrc(skb, iph, tunnel)) {
+				tunnel->dev->stats.rx_errors++;
+				goto out;
+			}
+		} else {
+			if (is_spoofed_6rd(tunnel, iph->saddr,
+					   &ipv6_hdr(skb)->saddr) ||
+			    is_spoofed_6rd(tunnel, iph->daddr,
+					   &ipv6_hdr(skb)->daddr)) {
+				tunnel->dev->stats.rx_errors++;
+				goto out;
+			}
 		}
 
 		__skb_tunnel_rx(skb, tunnel->dev);
@@ -650,14 +630,12 @@ out:
 }
 
 /*
- * Returns the embedded IPv4 address if the IPv6 address
- * comes from 6rd / 6to4 (RFC 3056) addr space.
+ * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
+ * stores the embedded IPv4 address in v4dst and returns true.
  */
-static inline
-__be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel)
+static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
+		      __be32 *v4dst)
 {
-	__be32 dst = 0;
-
 #ifdef CONFIG_IPV6_SIT_6RD
 	if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
 			      tunnel->ip6rd.prefixlen)) {
@@ -676,14 +654,24 @@ __be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel)
 			d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >>
 			     (32 - pbi1);
 
-		dst = tunnel->ip6rd.relay_prefix | htonl(d);
+		*v4dst = tunnel->ip6rd.relay_prefix | htonl(d);
+		return true;
 	}
 #else
 	if (v6dst->s6_addr16[0] == htons(0x2002)) {
 		/* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
-		memcpy(&dst, &v6dst->s6_addr16[1], 4);
+		memcpy(v4dst, &v6dst->s6_addr16[1], 4);
+		return true;
 	}
 #endif
+	return false;
+}
+
+static inline __be32 try_6rd(struct ip_tunnel *tunnel,
+			     const struct in6_addr *v6dst)
+{
+	__be32 dst = 0;
+	check_6rd(tunnel, v6dst, &dst);
 	return dst;
 }
 
@@ -744,7 +732,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	}
 
 	if (!dst)
-		dst = try_6rd(&iph6->daddr, tunnel);
+		dst = try_6rd(tunnel, &iph6->daddr);
 
 	if (!dst) {
 		struct neighbour *neigh = NULL;
@@ -876,6 +864,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	if ((iph->ttl = tiph->ttl) == 0)
 		iph->ttl	=	iph6->hop_limit;
 
+	skb->ip_summed = CHECKSUM_NONE;
+	ip_select_ident(iph, skb_dst(skb), NULL);
 	iptunnel_xmit(skb, dev);
 	return NETDEV_TX_OK;
 
@@ -1177,7 +1167,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
 	.ndo_start_xmit	= ipip6_tunnel_xmit,
 	.ndo_do_ioctl	= ipip6_tunnel_ioctl,
 	.ndo_change_mtu	= ipip6_tunnel_change_mtu,
-	.ndo_get_stats64= ipip6_get_stats64,
+	.ndo_get_stats64 = ip_tunnel_get_stats64,
 };
 
 static void ipip6_dev_free(struct net_device *dev)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 40161977f7cf..d5dda20bd717 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -149,7 +149,6 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
 struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_options_received tcp_opt;
-	const u8 *hash_location;
 	struct inet_request_sock *ireq;
 	struct inet6_request_sock *ireq6;
 	struct tcp_request_sock *treq;
@@ -177,9 +176,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
+	tcp_parse_options(skb, &tcp_opt, 0, NULL);
 
-	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
+	if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
 		goto out;
 
 	ret = NULL;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4f43537197ef..71167069b394 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -386,9 +386,17 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 		if (dst)
 			dst->ops->redirect(dst, sk, skb);
+		goto out;
 	}
 
 	if (type == ICMPV6_PKT_TOOBIG) {
+		/* We are not interested in TCP_LISTEN and open_requests
+		 * (SYN-ACKs send out by Linux are always <576bytes so
+		 * they should go through unfragmented).
+		 */
+		if (sk->sk_state == TCP_LISTEN)
+			goto out;
+
 		tp->mtu_info = ntohl(info);
 		if (!sock_owned_by_user(sk))
 			tcp_v6_mtu_reduced(sk);
@@ -454,7 +462,6 @@ out:
 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 			      struct flowi6 *fl6,
 			      struct request_sock *req,
-			      struct request_values *rvp,
 			      u16 queue_mapping)
 {
 	struct inet6_request_sock *treq = inet6_rsk(req);
@@ -466,7 +473,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
 		goto done;
 
-	skb = tcp_make_synack(sk, dst, req, rvp, NULL);
+	skb = tcp_make_synack(sk, dst, req, NULL);
 
 	if (skb) {
 		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
@@ -481,13 +488,12 @@ done:
 	return err;
 }
 
-static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
-			     struct request_values *rvp)
+static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
 {
 	struct flowi6 fl6;
 	int res;
 
-	res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
+	res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
 	if (!res)
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
 	return res;
@@ -713,7 +719,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 #endif
 
 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
-				 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
+				 u32 tsval, u32 tsecr,
+				 struct tcp_md5sig_key *key, int rst, u8 tclass)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct tcphdr *t1;
@@ -725,7 +732,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	struct dst_entry *dst;
 	__be32 *topt;
 
-	if (ts)
+	if (tsecr)
 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
 #ifdef CONFIG_TCP_MD5SIG
 	if (key)
@@ -755,11 +762,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 
 	topt = (__be32 *)(t1 + 1);
 
-	if (ts) {
+	if (tsecr) {
 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
-		*topt++ = htonl(tcp_time_stamp);
-		*topt++ = htonl(ts);
+		*topt++ = htonl(tsval);
+		*topt++ = htonl(tsecr);
 	}
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -835,7 +842,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 		 * no RST generated if md5 hash doesn't match.
 		 */
 		sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
-					   &tcp_hashinfo, &ipv6h->daddr,
+					   &tcp_hashinfo, &ipv6h->saddr,
+					   th->source, &ipv6h->daddr,
 					   ntohs(th->source), inet6_iif(skb));
 		if (!sk1)
 			return;
@@ -859,7 +867,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
 			  (th->doff << 2);
 
-	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
+	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0);
 
 #ifdef CONFIG_TCP_MD5SIG
 release_sk1:
@@ -870,10 +878,11 @@ release_sk1:
 #endif
 }
 
-static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+			    u32 win, u32 tsval, u32 tsecr,
 			    struct tcp_md5sig_key *key, u8 tclass)
 {
-	tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
+	tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass);
 }
 
 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -883,6 +892,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 
 	tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+			tcp_time_stamp + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
 			tw->tw_tclass);
 
@@ -892,7 +902,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req)
 {
-	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
+	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
+			req->rcv_wnd, tcp_time_stamp, req->ts_recent,
 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
 }
 
@@ -935,9 +946,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
  */
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcp_extend_values tmp_ext;
 	struct tcp_options_received tmp_opt;
-	const u8 *hash_location;
 	struct request_sock *req;
 	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -975,50 +984,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
-
-	if (tmp_opt.cookie_plus > 0 &&
-	    tmp_opt.saw_tstamp &&
-	    !tp->rx_opt.cookie_out_never &&
-	    (sysctl_tcp_cookie_size > 0 ||
-	     (tp->cookie_values != NULL &&
-	      tp->cookie_values->cookie_desired > 0))) {
-		u8 *c;
-		u32 *d;
-		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
-		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
-
-		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
-			goto drop_and_free;
-
-		/* Secret recipe starts with IP addresses */
-		d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
-		*mess++ ^= *d++;
-		*mess++ ^= *d++;
-		*mess++ ^= *d++;
-		*mess++ ^= *d++;
-		d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
-		*mess++ ^= *d++;
-		*mess++ ^= *d++;
-		*mess++ ^= *d++;
-		*mess++ ^= *d++;
-
-		/* plus variable length Initiator Cookie */
-		c = (u8 *)mess;
-		while (l-- > 0)
-			*c++ ^= *hash_location++;
-
-		want_cookie = false;	/* not our kind of cookie */
-		tmp_ext.cookie_out_never = 0; /* false */
-		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
-	} else if (!tp->rx_opt.cookie_in_always) {
-		/* redundant indications, but ensure initialization. */
-		tmp_ext.cookie_out_never = 1; /* true */
-		tmp_ext.cookie_plus = 0;
-	} else {
-		goto drop_and_free;
-	}
-	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
+	tcp_parse_options(skb, &tmp_opt, 0, NULL);
 
 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
@@ -1030,7 +996,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq->rmt_addr = ipv6_hdr(skb)->saddr;
 	treq->loc_addr = ipv6_hdr(skb)->daddr;
 	if (!want_cookie || tmp_opt.tstamp_ok)
-		TCP_ECN_create_request(req, skb);
+		TCP_ECN_create_request(req, skb, sock_net(sk));
 
 	treq->iif = sk->sk_bound_dev_if;
 
@@ -1096,7 +1062,6 @@ have_isn:
 		goto drop_and_release;
 
 	if (tcp_v6_send_synack(sk, dst, &fl6, req,
-			       (struct request_values *)&tmp_ext,
 			       skb_get_queue_mapping(skb)) ||
 	    want_cookie)
 		goto drop_and_free;
@@ -1167,7 +1132,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newnp->opt	   = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
 		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
-		newnp->rcv_tclass  = ipv6_tclass(ipv6_hdr(skb));
+		newnp->rcv_tclass  = ipv6_get_dsfield(ipv6_hdr(skb));
 
 		/*
 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1247,7 +1212,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = inet6_iif(skb);
 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
-	newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+	newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 
 	/* Clone native IPv6 options from listening socket (if any)
 
@@ -1440,6 +1405,7 @@ discard:
 	kfree_skb(skb);
 	return 0;
 csum_err:
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 	goto discard;
 
@@ -1460,7 +1426,7 @@ ipv6_pktoptions:
 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
 		if (np->rxopt.bits.rxtclass)
-			np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+			np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 		if (ipv6_opt_accepted(sk, opt_skb)) {
 			skb_set_owner_r(opt_skb, sk);
 			opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1501,7 +1467,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 		goto discard_it;
 
 	if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
-		goto bad_packet;
+		goto csum_error;
 
 	th = tcp_hdr(skb);
 	hdr = ipv6_hdr(skb);
@@ -1565,6 +1531,8 @@ no_tcp_socket:
 		goto discard_it;
 
 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+csum_error:
+		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
 bad_packet:
 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
 	} else {
@@ -1572,11 +1540,6 @@ bad_packet:
 	}
 
 discard_it:
-
-	/*
-	 *	Discard frame
-	 */
-
 	kfree_skb(skb);
 	return 0;
 
@@ -1590,10 +1553,13 @@ do_time_wait:
 		goto discard_it;
 	}
 
-	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
-		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+	if (skb->len < (th->doff<<2)) {
 		inet_twsk_put(inet_twsk(sk));
-		goto discard_it;
+		goto bad_packet;
+	}
+	if (tcp_checksum_complete(skb)) {
+		inet_twsk_put(inet_twsk(sk));
+		goto csum_error;
 	}
 
 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
@@ -1602,6 +1568,7 @@ do_time_wait:
 		struct sock *sk2;
 
 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+					    &ipv6_hdr(skb)->saddr, th->source,
 					    &ipv6_hdr(skb)->daddr,
 					    ntohs(th->dest), inet6_iif(skb));
 		if (sk2 != NULL) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index fb083295ff0b..d4defdd44937 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -45,6 +45,7 @@
 #include <net/tcp_states.h>
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
+#include <net/inet6_hashtables.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -203,7 +204,8 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 {
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
-	int score, badness;
+	int score, badness, matches = 0, reuseport = 0;
+	u32 hash = 0;
 
 begin:
 	result = NULL;
@@ -214,8 +216,18 @@ begin:
 		if (score > badness) {
 			result = sk;
 			badness = score;
-			if (score == SCORE2_MAX)
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				hash = inet6_ehashfn(net, daddr, hnum,
+						     saddr, sport);
+				matches = 1;
+			} else if (score == SCORE2_MAX)
 				goto exact_match;
+		} else if (score == badness && reuseport) {
+			matches++;
+			if (((u64)hash * matches) >> 32 == 0)
+				result = sk;
+			hash = next_pseudo_random32(hash);
 		}
 	}
 	/*
@@ -249,7 +261,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
 	unsigned short hnum = ntohs(dport);
 	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
 	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
-	int score, badness;
+	int score, badness, matches = 0, reuseport = 0;
+	u32 hash = 0;
 
 	rcu_read_lock();
 	if (hslot->count > 10) {
@@ -284,6 +297,17 @@ begin:
 		if (score > badness) {
 			result = sk;
 			badness = score;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				hash = inet6_ehashfn(net, daddr, hnum,
+						     saddr, sport);
+				matches = 1;
+			}
+		} else if (score == badness && reuseport) {
+			matches++;
+			if (((u64)hash * matches) >> 32 == 0)
+				result = sk;
+			hash = next_pseudo_random32(hash);
 		}
 	}
 	/*
@@ -426,15 +450,16 @@ try_again:
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_port = udp_hdr(skb)->source;
 		sin6->sin6_flowinfo = 0;
-		sin6->sin6_scope_id = 0;
 
-		if (is_udp4)
+		if (is_udp4) {
 			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
 					       &sin6->sin6_addr);
-		else {
+			sin6->sin6_scope_id = 0;
+		} else {
 			sin6->sin6_addr = ipv6_hdr(skb)->saddr;
-			if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-				sin6->sin6_scope_id = IP6CB(skb)->iif;
+			sin6->sin6_scope_id =
+				ipv6_iface_scope_id(&sin6->sin6_addr,
+						    IP6CB(skb)->iif);
 		}
 
 	}
@@ -458,12 +483,17 @@ out:
 csum_copy_err:
 	slow = lock_sock_fast(sk);
 	if (!skb_kill_datagram(sk, skb, flags)) {
-		if (is_udp4)
+		if (is_udp4) {
+			UDP_INC_STATS_USER(sock_net(sk),
+					UDP_MIB_CSUMERRORS, is_udplite);
 			UDP_INC_STATS_USER(sock_net(sk),
 					UDP_MIB_INERRORS, is_udplite);
-		else
+		} else {
+			UDP6_INC_STATS_USER(sock_net(sk),
+					UDP_MIB_CSUMERRORS, is_udplite);
 			UDP6_INC_STATS_USER(sock_net(sk),
 					UDP_MIB_INERRORS, is_udplite);
+		}
 	}
 	unlock_sock_fast(sk, slow);
 
@@ -612,7 +642,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	if (rcu_access_pointer(sk->sk_filter)) {
 		if (udp_lib_checksum_complete(skb))
-			goto drop;
+			goto csum_error;
 	}
 
 	if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
@@ -631,6 +661,8 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	bh_unlock_sock(sk);
 
 	return rc;
+csum_error:
+	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
 drop:
 	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	atomic_inc(&sk->sk_drops);
@@ -752,40 +784,6 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	return 0;
 }
 
-static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
-				 int proto)
-{
-	int err;
-
-	UDP_SKB_CB(skb)->partial_cov = 0;
-	UDP_SKB_CB(skb)->cscov = skb->len;
-
-	if (proto == IPPROTO_UDPLITE) {
-		err = udplite_checksum_init(skb, uh);
-		if (err)
-			return err;
-	}
-
-	if (uh->check == 0) {
-		/* RFC 2460 section 8.1 says that we SHOULD log
-		   this error. Well, it is reasonable.
-		 */
-		LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
-		return 1;
-	}
-	if (skb->ip_summed == CHECKSUM_COMPLETE &&
-	    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
-			     skb->len, proto, skb->csum))
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-	if (!skb_csum_unnecessary(skb))
-		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-							 &ipv6_hdr(skb)->daddr,
-							 skb->len, proto, 0));
-
-	return 0;
-}
-
 int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		   int proto)
 {
@@ -826,7 +824,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	}
 
 	if (udp6_csum_init(skb, uh, proto))
-		goto discard;
+		goto csum_error;
 
 	/*
 	 *	Multicast receive code
@@ -859,7 +857,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		goto discard;
 
 	if (udp_lib_checksum_complete(skb))
-		goto discard;
+		goto csum_error;
 
 	UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
@@ -876,7 +874,9 @@ short_packet:
 		       skb->len,
 		       daddr,
 		       ntohs(uh->dest));
-
+	goto discard;
+csum_error:
+	UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
 discard:
 	UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
@@ -1128,7 +1128,7 @@ do_udp_sendmsg:
 
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    sin6->sin6_scope_id &&
-		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
+		    __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
 			fl6.flowi6_oif = sin6->sin6_scope_id;
 	} else {
 		if (sk->sk_state != TCP_ESTABLISHED)
@@ -1295,10 +1295,18 @@ do_confirm:
 
 void udpv6_destroy_sock(struct sock *sk)
 {
+	struct udp_sock *up = udp_sk(sk);
 	lock_sock(sk);
 	udp_v6_flush_pending_frames(sk);
 	release_sock(sk);
 
+	if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
+		void (*encap_destroy)(struct sock *sk);
+		encap_destroy = ACCESS_ONCE(up->encap_destroy);
+		if (encap_destroy)
+			encap_destroy(sk);
+	}
+
 	inet6_destroy_sock(sk);
 }
 
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 0c8934a317c2..3bb3a891a424 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -21,6 +21,10 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
 	const struct ipv6hdr *ipv6h;
 	struct udphdr *uh;
 
+	/* UDP Tunnel offload on ipv6 is not yet supported. */
+	if (skb->encapsulation)
+		return -EINVAL;
+
 	if (!pskb_may_pull(skb, sizeof(*uh)))
 		return -EINVAL;
 
@@ -56,7 +60,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		/* Packet is from an untrusted source, reset gso_segs. */
 		int type = skb_shinfo(skb)->gso_type;
 
-		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+		if (unlikely(type & ~(SKB_GSO_UDP |
+				      SKB_GSO_DODGY |
+				      SKB_GSO_UDP_TUNNEL |
+				      SKB_GSO_GRE) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9f2095b19ad0..4770d515c2c8 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	       sizeof(top_iph->flow_lbl));
 	top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
 
-	dsfield = XFRM_MODE_SKB_CB(skb)->tos;
-	dsfield = INET_ECN_encapsulate(dsfield, dsfield);
+	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+		dsfield = 0;
+	else
+		dsfield = XFRM_MODE_SKB_CB(skb)->tos;
+	dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
 	if (x->props.flags & XFRM_STATE_NOECN)
 		dsfield &= ~INET_ECN_MASK;
 	ipv6_change_dsfield(top_iph, 0, dsfield);
@@ -69,8 +72,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
 
-	if (skb_cloned(skb) &&
-	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+	err = skb_unclone(skb, GFP_ATOMIC);
+	if (err)
 		goto out;
 
 	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index c9844135c9ca..4ef7bdb65440 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -110,7 +110,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 
 	/* Sheit... I remember I did this right. Apparently,
 	 * it was magically lost, so this code needs audit */
-	xdst->u.rt6.n = neigh_clone(rt->n);
 	xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
 						   RTF_LOCAL);
 	xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
@@ -321,7 +320,51 @@ static struct ctl_table xfrm6_policy_table[] = {
 	{ }
 };
 
-static struct ctl_table_header *sysctl_hdr;
+static int __net_init xfrm6_net_init(struct net *net)
+{
+	struct ctl_table *table;
+	struct ctl_table_header *hdr;
+
+	table = xfrm6_policy_table;
+	if (!net_eq(net, &init_net)) {
+		table = kmemdup(table, sizeof(xfrm6_policy_table), GFP_KERNEL);
+		if (!table)
+			goto err_alloc;
+
+		table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh;
+	}
+
+	hdr = register_net_sysctl(net, "net/ipv6", table);
+	if (!hdr)
+		goto err_reg;
+
+	net->ipv6.sysctl.xfrm6_hdr = hdr;
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
+}
+
+static void __net_exit xfrm6_net_exit(struct net *net)
+{
+	struct ctl_table *table;
+
+	if (net->ipv6.sysctl.xfrm6_hdr == NULL)
+		return;
+
+	table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->ipv6.sysctl.xfrm6_hdr);
+	if (!net_eq(net, &init_net))
+		kfree(table);
+}
+
+static struct pernet_operations xfrm6_net_ops = {
+	.init	= xfrm6_net_init,
+	.exit	= xfrm6_net_exit,
+};
 #endif
 
 int __init xfrm6_init(void)
@@ -340,8 +383,7 @@ int __init xfrm6_init(void)
 		goto out_policy;
 
 #ifdef CONFIG_SYSCTL
-	sysctl_hdr = register_net_sysctl(&init_net, "net/ipv6",
-					 xfrm6_policy_table);
+	register_pernet_subsys(&xfrm6_net_ops);
 #endif
 out:
 	return ret;
@@ -353,8 +395,7 @@ out_policy:
 void xfrm6_fini(void)
 {
 #ifdef CONFIG_SYSCTL
-	if (sysctl_hdr)
-		unregister_net_sysctl_table(sysctl_hdr);
+	unregister_pernet_subsys(&xfrm6_net_ops);
 #endif
 	xfrm6_policy_fini();
 	xfrm6_state_fini();
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index ee5a7065aacc..de2bcfaaf759 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -72,7 +72,7 @@ static inline unsigned int xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *ad
 {
 	unsigned int h;
 
-	h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]);
+	h = ipv6_addr_hash((const struct in6_addr *)addr);
 	h ^= h >> 16;
 	h ^= h >> 8;
 	h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1;
@@ -89,12 +89,11 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const
 {
 	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
-	struct hlist_node *pos;
 
-	hlist_for_each_entry_rcu(x6spi, pos,
+	hlist_for_each_entry_rcu(x6spi,
 			     &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 			     list_byaddr) {
-		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0)
+		if (xfrm6_addr_equal(&x6spi->addr, saddr))
 			return x6spi;
 	}
 
@@ -120,9 +119,8 @@ static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi)
 	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
 	int index = xfrm6_tunnel_spi_hash_byspi(spi);
-	struct hlist_node *pos;
 
-	hlist_for_each_entry(x6spi, pos,
+	hlist_for_each_entry(x6spi,
 			     &xfrm6_tn->spi_byspi[index],
 			     list_byspi) {
 		if (x6spi->spi == spi)
@@ -203,15 +201,15 @@ static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
 {
 	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 
 	spin_lock_bh(&xfrm6_tunnel_spi_lock);
 
-	hlist_for_each_entry_safe(x6spi, pos, n,
+	hlist_for_each_entry_safe(x6spi, n,
 				  &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 				  list_byaddr)
 	{
-		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
+		if (xfrm6_addr_equal(&x6spi->addr, saddr)) {
 			if (atomic_dec_and_test(&x6spi->refcnt)) {
 				hlist_del_rcu(&x6spi->list_byaddr);
 				hlist_del_rcu(&x6spi->list_byspi);
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index dfd6faaf0ea7..f547a47d381c 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -228,9 +228,8 @@ static struct sock *__ipxitf_find_socket(struct ipx_interface *intrfc,
 					 __be16 port)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
-	sk_for_each(s, node, &intrfc->if_sklist)
+	sk_for_each(s, &intrfc->if_sklist)
 		if (ipx_sk(s)->port == port)
 			goto found;
 	s = NULL;
@@ -259,12 +258,11 @@ static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc,
 						__be16 port)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	ipxitf_hold(intrfc);
 	spin_lock_bh(&intrfc->if_sklist_lock);
 
-	sk_for_each(s, node, &intrfc->if_sklist) {
+	sk_for_each(s, &intrfc->if_sklist) {
 		struct ipx_sock *ipxs = ipx_sk(s);
 
 		if (ipxs->port == port &&
@@ -282,14 +280,14 @@ found:
 static void __ipxitf_down(struct ipx_interface *intrfc)
 {
 	struct sock *s;
-	struct hlist_node *node, *t;
+	struct hlist_node *t;
 
 	/* Delete all routes associated with this interface */
 	ipxrtr_del_routes(intrfc);
 
 	spin_lock_bh(&intrfc->if_sklist_lock);
 	/* error sockets */
-	sk_for_each_safe(s, node, t, &intrfc->if_sklist) {
+	sk_for_each_safe(s, t, &intrfc->if_sklist) {
 		struct ipx_sock *ipxs = ipx_sk(s);
 
 		s->sk_err = ENOLINK;
@@ -385,12 +383,11 @@ static int ipxitf_demux_socket(struct ipx_interface *intrfc,
 	int is_broadcast = !memcmp(ipx->ipx_dest.node, ipx_broadcast_node,
 				   IPX_NODE_LEN);
 	struct sock *s;
-	struct hlist_node *node;
 	int rc;
 
 	spin_lock_bh(&intrfc->if_sklist_lock);
 
-	sk_for_each(s, node, &intrfc->if_sklist) {
+	sk_for_each(s, &intrfc->if_sklist) {
 		struct ipx_sock *ipxs = ipx_sk(s);
 
 		if (ipxs->port == ipx->ipx_dest.sock &&
@@ -446,12 +443,11 @@ static struct sock *ncp_connection_hack(struct ipx_interface *intrfc,
 		connection = (((int) *(ncphdr + 9)) << 8) | (int) *(ncphdr + 8);
 
 	if (connection) {
-		struct hlist_node *node;
 		/* Now we have to look for a special NCP connection handling
 		 * socket. Only these sockets have ipx_ncp_conn != 0, set by
 		 * SIOCIPXNCPCONN. */
 		spin_lock_bh(&intrfc->if_sklist_lock);
-		sk_for_each(sk, node, &intrfc->if_sklist)
+		sk_for_each(sk, &intrfc->if_sklist)
 			if (ipx_sk(sk)->ipx_ncp_conn == connection) {
 				sock_hold(sk);
 				goto found;
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 02ff7f2f60d4..65e8833a2510 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -103,19 +103,18 @@ out:
 static __inline__ struct sock *ipx_get_socket_idx(loff_t pos)
 {
 	struct sock *s = NULL;
-	struct hlist_node *node;
 	struct ipx_interface *i;
 
 	list_for_each_entry(i, &ipx_interfaces, node) {
 		spin_lock_bh(&i->if_sklist_lock);
-		sk_for_each(s, node, &i->if_sklist) {
+		sk_for_each(s, &i->if_sklist) {
 			if (!pos)
 				break;
 			--pos;
 		}
 		spin_unlock_bh(&i->if_sklist_lock);
 		if (!pos) {
-			if (node)
+			if (s)
 				goto found;
 			break;
 		}
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index b833677d83d6..0578d4fa00a9 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -305,8 +305,7 @@ static void irda_connect_response(struct irda_sock *self)
 
 	IRDA_DEBUG(2, "%s()\n", __func__);
 
-	skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
-			GFP_ATOMIC);
+	skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, GFP_KERNEL);
 	if (skb == NULL) {
 		IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n",
 			   __func__);
@@ -1120,7 +1119,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
 	}
 
 	/* Allocate networking socket */
-	sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto);
+	sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto);
 	if (sk == NULL)
 		return -ENOMEM;
 
@@ -1386,6 +1385,8 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
 
 	IRDA_DEBUG(4, "%s()\n", __func__);
 
+	msg->msg_namelen = 0;
+
 	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
 				flags & MSG_DONTWAIT, &err);
 	if (!skb)
@@ -2567,8 +2568,7 @@ bed:
 						   err);
 
 			/* If watchdog is still activated, kill it! */
-			if(timer_pending(&(self->watchdog)))
-				del_timer(&(self->watchdog));
+			del_timer(&(self->watchdog));
 
 			IRDA_DEBUG(1, "%s(), ...waking up !\n", __func__);
 
@@ -2584,8 +2584,10 @@ bed:
 				    NULL, NULL, NULL);
 
 		/* Check if the we got some results */
-		if (!self->cachedaddr)
-			return -EAGAIN;		/* Didn't find any devices */
+		if (!self->cachedaddr) {
+			err = -EAGAIN;		/* Didn't find any devices */
+			goto out;
+		}
 		daddr = self->cachedaddr;
 		/* Cleanup */
 		self->cachedaddr = 0;
diff --git a/net/irda/ircomm/Kconfig b/net/irda/ircomm/Kconfig
index 2d4c6b4a78d6..19492c1707b7 100644
--- a/net/irda/ircomm/Kconfig
+++ b/net/irda/ircomm/Kconfig
@@ -1,6 +1,6 @@
 config IRCOMM
 	tristate "IrCOMM protocol"
-	depends on IRDA
+	depends on IRDA && TTY
 	help
 	  Say Y here if you want to build support for the IrCOMM protocol.
 	  To compile it as modules, choose M here: the modules will be
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index 52079f19bbbe..b797daac063c 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -117,7 +117,7 @@ struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line)
 
 	IRDA_ASSERT(ircomm != NULL, return NULL;);
 
-	self = kzalloc(sizeof(struct ircomm_cb), GFP_ATOMIC);
+	self = kzalloc(sizeof(struct ircomm_cb), GFP_KERNEL);
 	if (self == NULL)
 		return NULL;
 
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index a68c88cdec6e..41ac7938268b 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -280,7 +280,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
 	struct tty_port *port = &self->port;
 	DECLARE_WAITQUEUE(wait, current);
 	int		retval;
-	int		do_clocal = 0, extra_count = 0;
+	int		do_clocal = 0;
 	unsigned long	flags;
 
 	IRDA_DEBUG(2, "%s()\n", __func__ );
@@ -289,8 +289,15 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
 	 * If non-blocking mode is set, or the port is not enabled,
 	 * then make the check up front and then exit.
 	 */
-	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
-		/* nonblock mode is set or port is not enabled */
+	if (test_bit(TTY_IO_ERROR, &tty->flags)) {
+		port->flags |= ASYNC_NORMAL_ACTIVE;
+		return 0;
+	}
+
+	if (filp->f_flags & O_NONBLOCK) {
+		/* nonblock mode is set */
+		if (tty->termios.c_cflag & CBAUD)
+			tty_port_raise_dtr_rts(port);
 		port->flags |= ASYNC_NORMAL_ACTIVE;
 		IRDA_DEBUG(1, "%s(), O_NONBLOCK requested!\n", __func__ );
 		return 0;
@@ -315,18 +322,16 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
 	      __FILE__, __LINE__, tty->driver->name, port->count);
 
 	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp)) {
-		extra_count = 1;
+	if (!tty_hung_up_p(filp))
 		port->count--;
-	}
-	spin_unlock_irqrestore(&port->lock, flags);
 	port->blocked_open++;
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	while (1) {
-		if (tty->termios.c_cflag & CBAUD)
+		if (C_BAUD(tty) && test_bit(ASYNCB_INITIALIZED, &port->flags))
 			tty_port_raise_dtr_rts(port);
 
-		current->state = TASK_INTERRUPTIBLE;
+		set_current_state(TASK_INTERRUPTIBLE);
 
 		if (tty_hung_up_p(filp) ||
 		    !test_bit(ASYNCB_INITIALIZED, &port->flags)) {
@@ -361,13 +366,11 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&port->open_wait, &wait);
 
-	if (extra_count) {
-		/* ++ is not atomic, so this should be protected - Jean II */
-		spin_lock_irqsave(&port->lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
+	if (!tty_hung_up_p(filp))
 		port->count++;
-		spin_unlock_irqrestore(&port->lock, flags);
-	}
 	port->blocked_open--;
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	IRDA_DEBUG(1, "%s(%d):block_til_ready after blocking on %s open_count=%d\n",
 	      __FILE__, __LINE__, tty->driver->name, port->count);
@@ -452,7 +455,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
 		   self->line, self->port.count);
 
 	/* Not really used by us, but lets do it anyway */
-	tty->low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	self->port.low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 	/*
 	 * If the port is the middle of closing, bail out now
@@ -1136,14 +1139,14 @@ static int ircomm_tty_data_indication(void *instance, void *sap,
 		ircomm_tty_send_initial_parameters(self);
 		ircomm_tty_link_established(self);
 	}
+	tty_kref_put(tty);
 
 	/*
 	 * Use flip buffer functions since the code may be called from interrupt
 	 * context
 	 */
-	tty_insert_flip_string(tty, skb->data, skb->len);
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_insert_flip_string(&self->port, skb->data, skb->len);
+	tty_flip_buffer_push(&self->port);
 
 	/* No need to kfree_skb - see ircomm_ttp_data_indication() */
 
diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c
index edab393e0c82..a2a508f5f268 100644
--- a/net/irda/ircomm/ircomm_tty_attach.c
+++ b/net/irda/ircomm/ircomm_tty_attach.c
@@ -997,12 +997,8 @@ static int ircomm_tty_state_ready(struct ircomm_tty_cb *self,
 			self->settings.dce = IRCOMM_DELTA_CD;
 			ircomm_tty_check_modem_status(self);
 		} else {
-			struct tty_struct *tty = tty_port_tty_get(&self->port);
 			IRDA_DEBUG(0, "%s(), hanging up!\n", __func__ );
-			if (tty) {
-				tty_hangup(tty);
-				tty_kref_put(tty);
-			}
+			tty_port_tty_hangup(&self->port, false);
 		}
 		break;
 	default:
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index e71e85ba2bf1..e1b37f5a2691 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -303,7 +303,8 @@ static void iriap_disconnect_indication(void *instance, void *sap,
 {
 	struct iriap_cb *self;
 
-	IRDA_DEBUG(4, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]);
+	IRDA_DEBUG(4, "%s(), reason=%s [%d]\n", __func__,
+		   irlmp_reason_str(reason), reason);
 
 	self = instance;
 
@@ -495,8 +496,11 @@ static void iriap_getvaluebyclass_confirm(struct iriap_cb *self,
 /*		case CS_ISO_8859_9: */
 /*		case CS_UNICODE: */
 		default:
-			IRDA_DEBUG(0, "%s(), charset %s, not supported\n",
-				   __func__, ias_charset_types[charset]);
+			IRDA_DEBUG(0, "%s(), charset [%d] %s, not supported\n",
+				   __func__, charset,
+				   charset < ARRAY_SIZE(ias_charset_types) ?
+					ias_charset_types[charset] :
+					"(unknown)");
 
 			/* Aborting, close connection! */
 			iriap_disconnect_request(self);
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 6115a44c0a24..98ad6ec4bd3c 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -58,7 +58,7 @@ int  sysctl_discovery_slots   = 6; /* 6 slots by default */
 int  sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ;
 char sysctl_devname[65];
 
-const char *irlmp_reasons[] = {
+static const char *irlmp_reasons[] = {
 	"ERROR, NOT USED",
 	"LM_USER_REQUEST",
 	"LM_LAP_DISCONNECT",
@@ -66,8 +66,15 @@ const char *irlmp_reasons[] = {
 	"LM_LAP_RESET",
 	"LM_INIT_DISCONNECT",
 	"ERROR, NOT USED",
+	"UNKNOWN",
 };
 
+const char *irlmp_reason_str(LM_REASON reason)
+{
+	reason = min_t(size_t, reason, ARRAY_SIZE(irlmp_reasons) - 1);
+	return irlmp_reasons[reason];
+}
+
 /*
  * Function irlmp_init (void)
  *
@@ -747,7 +754,8 @@ void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason,
 {
 	struct lsap_cb *lsap;
 
-	IRDA_DEBUG(1, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]);
+	IRDA_DEBUG(1, "%s(), reason=%s [%d]\n", __func__,
+		   irlmp_reason_str(reason), reason);
 	IRDA_ASSERT(self != NULL, return;);
 	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
 
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 2bb2beb6a373..3c83a1e5ab03 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -214,8 +214,7 @@ irnet_get_discovery_log(irnet_socket *	ap)
  * After reading :    discoveries = NULL ; disco_index = Y ; disco_number = -1
  */
 static inline int
-irnet_read_discovery_log(irnet_socket *	ap,
-			 char *		event)
+irnet_read_discovery_log(irnet_socket *ap, char *event, int buf_size)
 {
   int		done_event = 0;
 
@@ -237,12 +236,13 @@ irnet_read_discovery_log(irnet_socket *	ap,
   if(ap->disco_index < ap->disco_number)
     {
       /* Write an event */
-      sprintf(event, "Found %08x (%s) behind %08x {hints %02X-%02X}\n",
-	      ap->discoveries[ap->disco_index].daddr,
-	      ap->discoveries[ap->disco_index].info,
-	      ap->discoveries[ap->disco_index].saddr,
-	      ap->discoveries[ap->disco_index].hints[0],
-	      ap->discoveries[ap->disco_index].hints[1]);
+      snprintf(event, buf_size,
+	       "Found %08x (%s) behind %08x {hints %02X-%02X}\n",
+	       ap->discoveries[ap->disco_index].daddr,
+	       ap->discoveries[ap->disco_index].info,
+	       ap->discoveries[ap->disco_index].saddr,
+	       ap->discoveries[ap->disco_index].hints[0],
+	       ap->discoveries[ap->disco_index].hints[1]);
       DEBUG(CTRL_INFO, "Writing discovery %d : %s\n",
 	    ap->disco_index, ap->discoveries[ap->disco_index].info);
 
@@ -282,27 +282,24 @@ irnet_ctrl_read(irnet_socket *	ap,
 		size_t		count)
 {
   DECLARE_WAITQUEUE(wait, current);
-  char		event[64];	/* Max event is 61 char */
+  char		event[75];
   ssize_t	ret = 0;
 
   DENTER(CTRL_TRACE, "(ap=0x%p, count=%Zd)\n", ap, count);
 
-  /* Check if we can write an event out in one go */
-  DABORT(count < sizeof(event), -EOVERFLOW, CTRL_ERROR, "Buffer to small.\n");
-
 #ifdef INITIAL_DISCOVERY
   /* Check if we have read the log */
-  if(irnet_read_discovery_log(ap, event))
+  if (irnet_read_discovery_log(ap, event, sizeof(event)))
     {
-      /* We have an event !!! Copy it to the user */
-      if(copy_to_user(buf, event, strlen(event)))
+      count = min(strlen(event), count);
+      if (copy_to_user(buf, event, count))
 	{
 	  DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
 	  return -EFAULT;
 	}
 
       DEXIT(CTRL_TRACE, "\n");
-      return strlen(event);
+      return count;
     }
 #endif /* INITIAL_DISCOVERY */
 
@@ -339,79 +336,81 @@ irnet_ctrl_read(irnet_socket *	ap,
   switch(irnet_events.log[ap->event_index].event)
     {
     case IRNET_DISCOVER:
-      sprintf(event, "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].saddr,
-	      irnet_events.log[ap->event_index].hints.byte[0],
-	      irnet_events.log[ap->event_index].hints.byte[1]);
+      snprintf(event, sizeof(event),
+	       "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].saddr,
+	       irnet_events.log[ap->event_index].hints.byte[0],
+	       irnet_events.log[ap->event_index].hints.byte[1]);
       break;
     case IRNET_EXPIRE:
-      sprintf(event, "Expired %08x (%s) behind %08x {hints %02X-%02X}\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].saddr,
-	      irnet_events.log[ap->event_index].hints.byte[0],
-	      irnet_events.log[ap->event_index].hints.byte[1]);
+      snprintf(event, sizeof(event),
+	       "Expired %08x (%s) behind %08x {hints %02X-%02X}\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].saddr,
+	       irnet_events.log[ap->event_index].hints.byte[0],
+	       irnet_events.log[ap->event_index].hints.byte[1]);
       break;
     case IRNET_CONNECT_TO:
-      sprintf(event, "Connected to %08x (%s) on ppp%d\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].unit);
+      snprintf(event, sizeof(event), "Connected to %08x (%s) on ppp%d\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].unit);
       break;
     case IRNET_CONNECT_FROM:
-      sprintf(event, "Connection from %08x (%s) on ppp%d\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].unit);
+      snprintf(event, sizeof(event), "Connection from %08x (%s) on ppp%d\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].unit);
       break;
     case IRNET_REQUEST_FROM:
-      sprintf(event, "Request from %08x (%s) behind %08x\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].saddr);
+      snprintf(event, sizeof(event), "Request from %08x (%s) behind %08x\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].saddr);
       break;
     case IRNET_NOANSWER_FROM:
-      sprintf(event, "No-answer from %08x (%s) on ppp%d\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].unit);
+      snprintf(event, sizeof(event), "No-answer from %08x (%s) on ppp%d\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].unit);
       break;
     case IRNET_BLOCKED_LINK:
-      sprintf(event, "Blocked link with %08x (%s) on ppp%d\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].unit);
+      snprintf(event, sizeof(event), "Blocked link with %08x (%s) on ppp%d\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].unit);
       break;
     case IRNET_DISCONNECT_FROM:
-      sprintf(event, "Disconnection from %08x (%s) on ppp%d\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].unit);
+      snprintf(event, sizeof(event), "Disconnection from %08x (%s) on ppp%d\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].unit);
       break;
     case IRNET_DISCONNECT_TO:
-      sprintf(event, "Disconnected to %08x (%s)\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name);
+      snprintf(event, sizeof(event), "Disconnected to %08x (%s)\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name);
       break;
     default:
-      sprintf(event, "Bug\n");
+      snprintf(event, sizeof(event), "Bug\n");
     }
   /* Increment our event index */
   ap->event_index = (ap->event_index + 1) % IRNET_MAX_EVENTS;
 
   DEBUG(CTRL_INFO, "Event is :%s", event);
 
-  /* Copy it to the user */
-  if(copy_to_user(buf, event, strlen(event)))
+  count = min(strlen(event), count);
+  if (copy_to_user(buf, event, count))
     {
       DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
       return -EFAULT;
     }
 
   DEXIT(CTRL_TRACE, "\n");
-  return strlen(event);
+  return count;
 }
 
 /*------------------------------------------------------------------*/
diff --git a/net/irda/timer.c b/net/irda/timer.c
index 1d552b3946fc..0c4c115a5cab 100644
--- a/net/irda/timer.c
+++ b/net/irda/timer.c
@@ -57,7 +57,7 @@ void irlap_start_query_timer(struct irlap_cb *self, int S, int s)
 	 * Basically, we multiply the number of remaining slots by our
 	 * slot time, plus add some extra time to properly receive the last
 	 * discovery packet (which is longer due to extra discovery info),
-	 * to avoid messing with for incomming connections requests and
+	 * to avoid messing with for incoming connections requests and
 	 * to accommodate devices that perform discovery slower than us.
 	 * Jean II */
 	timeout = ((sysctl_slot_timeout * HZ / 1000) * (S - s)
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index cd6f7a991d80..ae691651b721 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -49,12 +49,6 @@ static const u8 iprm_shutdown[8] =
 
 #define TRGCLS_SIZE	(sizeof(((struct iucv_message *)0)->class))
 
-/* macros to set/get socket control buffer at correct offset */
-#define CB_TAG(skb)	((skb)->cb)		/* iucv message tag */
-#define CB_TAG_LEN	(sizeof(((struct iucv_message *) 0)->tag))
-#define CB_TRGCLS(skb)	((skb)->cb + CB_TAG_LEN) /* iucv msg target class */
-#define CB_TRGCLS_LEN	(TRGCLS_SIZE)
-
 #define __iucv_sock_wait(sk, condition, timeo, ret)			\
 do {									\
 	DEFINE_WAIT(__wait);						\
@@ -156,14 +150,13 @@ static int afiucv_pm_freeze(struct device *dev)
 {
 	struct iucv_sock *iucv;
 	struct sock *sk;
-	struct hlist_node *node;
 	int err = 0;
 
 #ifdef CONFIG_PM_DEBUG
 	printk(KERN_WARNING "afiucv_pm_freeze\n");
 #endif
 	read_lock(&iucv_sk_list.lock);
-	sk_for_each(sk, node, &iucv_sk_list.head) {
+	sk_for_each(sk, &iucv_sk_list.head) {
 		iucv = iucv_sk(sk);
 		switch (sk->sk_state) {
 		case IUCV_DISCONN:
@@ -194,13 +187,12 @@ static int afiucv_pm_freeze(struct device *dev)
 static int afiucv_pm_restore_thaw(struct device *dev)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 #ifdef CONFIG_PM_DEBUG
 	printk(KERN_WARNING "afiucv_pm_restore_thaw\n");
 #endif
 	read_lock(&iucv_sk_list.lock);
-	sk_for_each(sk, node, &iucv_sk_list.head) {
+	sk_for_each(sk, &iucv_sk_list.head) {
 		switch (sk->sk_state) {
 		case IUCV_CONNECTED:
 			sk->sk_err = EPIPE;
@@ -390,9 +382,8 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
 static struct sock *__iucv_get_sock_by_name(char *nm)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
-	sk_for_each(sk, node, &iucv_sk_list.head)
+	sk_for_each(sk, &iucv_sk_list.head)
 		if (!memcmp(&iucv_sk(sk)->src_name, nm, 8))
 			return sk;
 
@@ -1144,7 +1135,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	/* increment and save iucv message tag for msg_completion cbk */
 	txmsg.tag = iucv->send_tag++;
-	memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN);
+	IUCV_SKB_CB(skb)->tag = txmsg.tag;
 
 	if (iucv->transport == AF_IUCV_TRANS_HIPER) {
 		atomic_inc(&iucv->msg_sent);
@@ -1227,7 +1218,7 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len)
 			return -ENOMEM;
 
 		/* copy target class to control buffer of new skb */
-		memcpy(CB_TRGCLS(nskb), CB_TRGCLS(skb), CB_TRGCLS_LEN);
+		IUCV_SKB_CB(nskb)->class = IUCV_SKB_CB(skb)->class;
 
 		/* copy data fragment */
 		memcpy(nskb->data, skb->data + copied, size);
@@ -1259,7 +1250,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
 
 	/* store msg target class in the second 4 bytes of skb ctrl buffer */
 	/* Note: the first 4 bytes are reserved for msg tag */
-	memcpy(CB_TRGCLS(skb), &msg->class, CB_TRGCLS_LEN);
+	IUCV_SKB_CB(skb)->class = msg->class;
 
 	/* check for special IPRM messages (e.g. iucv_sock_shutdown) */
 	if ((msg->flags & IUCV_IPRMDATA) && len > 7) {
@@ -1295,6 +1286,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
 		}
 	}
 
+	IUCV_SKB_CB(skb)->offset = 0;
 	if (sock_queue_rcv_skb(sk, skb))
 		skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb);
 }
@@ -1330,6 +1322,9 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	unsigned int copied, rlen;
 	struct sk_buff *skb, *rskb, *cskb;
 	int err = 0;
+	u32 offset;
+
+	msg->msg_namelen = 0;
 
 	if ((sk->sk_state == IUCV_DISCONN) &&
 	    skb_queue_empty(&iucv->backlog_skb_q) &&
@@ -1349,13 +1344,14 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return err;
 	}
 
-	rlen   = skb->len;		/* real length of skb */
+	offset = IUCV_SKB_CB(skb)->offset;
+	rlen   = skb->len - offset;		/* real length of skb */
 	copied = min_t(unsigned int, rlen, len);
 	if (!rlen)
 		sk->sk_shutdown = sk->sk_shutdown | RCV_SHUTDOWN;
 
 	cskb = skb;
-	if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) {
+	if (skb_copy_datagram_iovec(cskb, offset, msg->msg_iov, copied)) {
 		if (!(flags & MSG_PEEK))
 			skb_queue_head(&sk->sk_receive_queue, skb);
 		return -EFAULT;
@@ -1373,7 +1369,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	 * get the trgcls from the control buffer of the skb due to
 	 * fragmentation of original iucv message. */
 	err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS,
-			CB_TRGCLS_LEN, CB_TRGCLS(skb));
+		       sizeof(IUCV_SKB_CB(skb)->class),
+		       (void *)&IUCV_SKB_CB(skb)->class);
 	if (err) {
 		if (!(flags & MSG_PEEK))
 			skb_queue_head(&sk->sk_receive_queue, skb);
@@ -1385,9 +1382,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		/* SOCK_STREAM: re-queue skb if it contains unreceived data */
 		if (sk->sk_type == SOCK_STREAM) {
-			skb_pull(skb, copied);
-			if (skb->len) {
-				skb_queue_head(&sk->sk_receive_queue, skb);
+			if (copied < rlen) {
+				IUCV_SKB_CB(skb)->offset = offset + copied;
 				goto done;
 			}
 		}
@@ -1406,6 +1402,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		spin_lock_bh(&iucv->message_q.lock);
 		rskb = skb_dequeue(&iucv->backlog_skb_q);
 		while (rskb) {
+			IUCV_SKB_CB(rskb)->offset = 0;
 			if (sock_queue_rcv_skb(sk, rskb)) {
 				skb_queue_head(&iucv->backlog_skb_q,
 						rskb);
@@ -1464,7 +1461,8 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
 		return iucv_accept_poll(sk);
 
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
-		mask |= POLLERR;
+		mask |= POLLERR |
+			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
 
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLRDHUP;
@@ -1678,7 +1676,6 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	unsigned char user_data[16];
 	unsigned char nuser_data[16];
 	unsigned char src_name[8];
-	struct hlist_node *node;
 	struct sock *sk, *nsk;
 	struct iucv_sock *iucv, *niucv;
 	int err;
@@ -1689,7 +1686,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	read_lock(&iucv_sk_list.lock);
 	iucv = NULL;
 	sk = NULL;
-	sk_for_each(sk, node, &iucv_sk_list.head)
+	sk_for_each(sk, &iucv_sk_list.head)
 		if (sk->sk_state == IUCV_LISTEN &&
 		    !memcmp(&iucv_sk(sk)->src_name, src_name, 8)) {
 			/*
@@ -1834,7 +1831,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
 		spin_lock_irqsave(&list->lock, flags);
 
 		while (list_skb != (struct sk_buff *)list) {
-			if (!memcmp(&msg->tag, CB_TAG(list_skb), CB_TAG_LEN)) {
+			if (msg->tag != IUCV_SKB_CB(list_skb)->tag) {
 				this = list_skb;
 				break;
 			}
@@ -2095,6 +2092,7 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb)
 	skb_pull(skb, sizeof(struct af_iucv_trans_hdr));
 	skb_reset_transport_header(skb);
 	skb_reset_network_header(skb);
+	IUCV_SKB_CB(skb)->offset = 0;
 	spin_lock(&iucv->message_q.lock);
 	if (skb_queue_empty(&iucv->backlog_skb_q)) {
 		if (sock_queue_rcv_skb(sk, skb)) {
@@ -2115,7 +2113,6 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb)
 static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct packet_type *pt, struct net_device *orig_dev)
 {
-	struct hlist_node *node;
 	struct sock *sk;
 	struct iucv_sock *iucv;
 	struct af_iucv_trans_hdr *trans_hdr;
@@ -2132,7 +2129,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
 	iucv = NULL;
 	sk = NULL;
 	read_lock(&iucv_sk_list.lock);
-	sk_for_each(sk, node, &iucv_sk_list.head) {
+	sk_for_each(sk, &iucv_sk_list.head) {
 		if (trans_hdr->flags == AF_IUCV_FLAG_SYN) {
 			if ((!memcmp(&iucv_sk(sk)->src_name,
 				     trans_hdr->destAppName, 8)) &&
@@ -2200,8 +2197,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
 		/* fall through and receive zero length data */
 	case 0:
 		/* plain data frame */
-		memcpy(CB_TRGCLS(skb), &trans_hdr->iucv_hdr.class,
-		       CB_TRGCLS_LEN);
+		IUCV_SKB_CB(skb)->class = trans_hdr->iucv_hdr.class;
 		err = afiucv_hs_callback_rx(sk, skb);
 		break;
 	default:
@@ -2225,10 +2221,9 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
 	struct sk_buff *list_skb;
 	struct sk_buff *nskb;
 	unsigned long flags;
-	struct hlist_node *node;
 
 	read_lock_irqsave(&iucv_sk_list.lock, flags);
-	sk_for_each(sk, node, &iucv_sk_list.head)
+	sk_for_each(sk, &iucv_sk_list.head)
 		if (sk == isk) {
 			iucv = iucv_sk(sk);
 			break;
@@ -2299,14 +2294,13 @@ static int afiucv_netdev_event(struct notifier_block *this,
 			       unsigned long event, void *ptr)
 {
 	struct net_device *event_dev = (struct net_device *)ptr;
-	struct hlist_node *node;
 	struct sock *sk;
 	struct iucv_sock *iucv;
 
 	switch (event) {
 	case NETDEV_REBOOT:
 	case NETDEV_GOING_DOWN:
-		sk_for_each(sk, node, &iucv_sk_list.head) {
+		sk_for_each(sk, &iucv_sk_list.head) {
 			iucv = iucv_sk(sk);
 			if ((iucv->hs_dev == event_dev) &&
 			    (sk->sk_state == IUCV_CONNECTED)) {
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index df082508362d..4fe76ff214c2 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -831,8 +831,11 @@ static int iucv_reboot_event(struct notifier_block *this,
 {
 	int i;
 
+	if (cpumask_empty(&iucv_irq_cpumask))
+		return NOTIFY_DONE;
+
 	get_online_cpus();
-	on_each_cpu(iucv_block_cpu, NULL, 1);
+	on_each_cpu_mask(&iucv_irq_cpumask, iucv_block_cpu, NULL, 1);
 	preempt_disable();
 	for (i = 0; i < iucv_max_pathid; i++) {
 		if (iucv_path_table[i])
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 5b426a646544..5b1e5af25713 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -203,7 +203,6 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
 	}
 	if (*skb2 != NULL) {
 		if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
-			skb_orphan(*skb2);
 			skb_set_owner_r(*skb2, sk);
 			skb_queue_tail(&sk->sk_receive_queue, *skb2);
 			sk->sk_data_ready(sk, (*skb2)->len);
@@ -226,7 +225,6 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
 {
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
 	struct sock *sk;
-	struct hlist_node *node;
 	struct sk_buff *skb2 = NULL;
 	int err = -ESRCH;
 
@@ -237,7 +235,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
 		return -ENOMEM;
 
 	rcu_read_lock();
-	sk_for_each_rcu(sk, node, &net_pfkey->table) {
+	sk_for_each_rcu(sk, &net_pfkey->table) {
 		struct pfkey_sock *pfk = pfkey_sk(sk);
 		int err2;
 
@@ -762,7 +760,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 	}
 
 	/* identity & sensitivity */
-	if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, x->props.family))
+	if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family))
 		size += sizeof(struct sadb_address) + sockaddr_size;
 
 	if (add_keys) {
@@ -816,18 +814,21 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 	sa->sadb_sa_auth = 0;
 	if (x->aalg) {
 		struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
-		sa->sadb_sa_auth = a ? a->desc.sadb_alg_id : 0;
+		sa->sadb_sa_auth = (a && a->pfkey_supported) ?
+					a->desc.sadb_alg_id : 0;
 	}
 	sa->sadb_sa_encrypt = 0;
 	BUG_ON(x->ealg && x->calg);
 	if (x->ealg) {
 		struct xfrm_algo_desc *a = xfrm_ealg_get_byname(x->ealg->alg_name, 0);
-		sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0;
+		sa->sadb_sa_encrypt = (a && a->pfkey_supported) ?
+					a->desc.sadb_alg_id : 0;
 	}
 	/* KAME compatible: sadb_sa_encrypt is overloaded with calg id */
 	if (x->calg) {
 		struct xfrm_algo_desc *a = xfrm_calg_get_byname(x->calg->alg_name, 0);
-		sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0;
+		sa->sadb_sa_encrypt = (a && a->pfkey_supported) ?
+					a->desc.sadb_alg_id : 0;
 	}
 
 	sa->sadb_sa_flags = 0;
@@ -909,8 +910,8 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 	if (!addr->sadb_address_prefixlen)
 		BUG();
 
-	if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr,
-			  x->props.family)) {
+	if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr,
+			     x->props.family)) {
 		addr = (struct sadb_address*) skb_put(skb,
 			sizeof(struct sadb_address)+sockaddr_size);
 		addr->sadb_address_len =
@@ -1138,7 +1139,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 	if (sa->sadb_sa_auth) {
 		int keysize = 0;
 		struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth);
-		if (!a) {
+		if (!a || !a->pfkey_supported) {
 			err = -ENOSYS;
 			goto out;
 		}
@@ -1160,7 +1161,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 	if (sa->sadb_sa_encrypt) {
 		if (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) {
 			struct xfrm_algo_desc *a = xfrm_calg_get_byid(sa->sadb_sa_encrypt);
-			if (!a) {
+			if (!a || !a->pfkey_supported) {
 				err = -ENOSYS;
 				goto out;
 			}
@@ -1172,7 +1173,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 		} else {
 			int keysize = 0;
 			struct xfrm_algo_desc *a = xfrm_ealg_get_byid(sa->sadb_sa_encrypt);
-			if (!a) {
+			if (!a || !a->pfkey_supported) {
 				err = -ENOSYS;
 				goto out;
 			}
@@ -1321,7 +1322,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
 
 	if (hdr->sadb_msg_seq) {
 		x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
-		if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) {
+		if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) {
 			xfrm_state_put(x);
 			x = NULL;
 		}
@@ -1578,13 +1579,13 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
 	struct sadb_msg *hdr;
 	int len, auth_len, enc_len, i;
 
-	auth_len = xfrm_count_auth_supported();
+	auth_len = xfrm_count_pfkey_auth_supported();
 	if (auth_len) {
 		auth_len *= sizeof(struct sadb_alg);
 		auth_len += sizeof(struct sadb_supported);
 	}
 
-	enc_len = xfrm_count_enc_supported();
+	enc_len = xfrm_count_pfkey_enc_supported();
 	if (enc_len) {
 		enc_len *= sizeof(struct sadb_alg);
 		enc_len += sizeof(struct sadb_supported);
@@ -1615,6 +1616,8 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
 			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
 			if (!aalg)
 				break;
+			if (!aalg->pfkey_supported)
+				continue;
 			if (aalg->available)
 				*ap++ = aalg->desc;
 		}
@@ -1634,6 +1637,8 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
 			struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
 			if (!ealg)
 				break;
+			if (!ealg->pfkey_supported)
+				continue;
 			if (ealg->available)
 				*ap++ = ealg->desc;
 		}
@@ -2196,7 +2201,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
 		      XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW);
 	xp->priority = pol->sadb_x_policy_priority;
 
-	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1];
 	xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr);
 	if (!xp->family) {
 		err = -EINVAL;
@@ -2209,7 +2214,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
 	if (xp->selector.sport)
 		xp->selector.sport_mask = htons(0xffff);
 
-	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1],
+	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1];
 	pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr);
 	xp->selector.prefixlen_d = sa->sadb_address_prefixlen;
 
@@ -2310,7 +2315,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 
 	memset(&sel, 0, sizeof(sel));
 
-	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1];
 	sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr);
 	sel.prefixlen_s = sa->sadb_address_prefixlen;
 	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
@@ -2318,7 +2323,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 	if (sel.sport)
 		sel.sport_mask = htons(0xffff);
 
-	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1],
+	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1];
 	pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr);
 	sel.prefixlen_d = sa->sadb_address_prefixlen;
 	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
@@ -2688,6 +2693,7 @@ static int key_notify_policy_flush(const struct km_event *c)
 	hdr->sadb_msg_pid = c->portid;
 	hdr->sadb_msg_version = PF_KEY_V2;
 	hdr->sadb_msg_errno = (uint8_t) 0;
+	hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
 	hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
 	pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
 	return 0;
@@ -2825,6 +2831,8 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
 		const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
 		if (!aalg)
 			break;
+		if (!aalg->pfkey_supported)
+			continue;
 		if (aalg_tmpl_set(t, aalg) && aalg->available)
 			sz += sizeof(struct sadb_comb);
 	}
@@ -2840,6 +2848,9 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
 		if (!ealg)
 			break;
 
+		if (!ealg->pfkey_supported)
+			continue;
+
 		if (!(ealg_tmpl_set(t, ealg) && ealg->available))
 			continue;
 
@@ -2848,6 +2859,9 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
 			if (!aalg)
 				break;
 
+			if (!aalg->pfkey_supported)
+				continue;
+
 			if (aalg_tmpl_set(t, aalg) && aalg->available)
 				sz += sizeof(struct sadb_comb);
 		}
@@ -2871,6 +2885,9 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
 		if (!aalg)
 			break;
 
+		if (!aalg->pfkey_supported)
+			continue;
+
 		if (aalg_tmpl_set(t, aalg) && aalg->available) {
 			struct sadb_comb *c;
 			c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
@@ -2903,6 +2920,9 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
 		if (!ealg)
 			break;
 
+		if (!ealg->pfkey_supported)
+			continue;
+
 		if (!(ealg_tmpl_set(t, ealg) && ealg->available))
 			continue;
 
@@ -2911,6 +2931,8 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
 			const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
 			if (!aalg)
 				break;
+			if (!aalg->pfkey_supported)
+				continue;
 			if (!(aalg_tmpl_set(t, aalg) && aalg->available))
 				continue;
 			c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
@@ -3718,7 +3740,7 @@ static int __net_init pfkey_init_proc(struct net *net)
 {
 	struct proc_dir_entry *e;
 
-	e = proc_net_fops_create(net, "pfkey", 0, &pfkey_proc_ops);
+	e = proc_create("pfkey", 0, net->proc_net, &pfkey_proc_ops);
 	if (e == NULL)
 		return -ENOMEM;
 
@@ -3727,7 +3749,7 @@ static int __net_init pfkey_init_proc(struct net *net)
 
 static void __net_exit pfkey_exit_proc(struct net *net)
 {
-	proc_net_remove(net, "pfkey");
+	remove_proc_entry("pfkey", net->proc_net);
 }
 #else
 static inline int pfkey_init_proc(struct net *net)
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index 147a8fd47a17..adb9843dd7cf 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -46,8 +46,8 @@ config L2TP_DEBUGFS
 	  will be called l2tp_debugfs.
 
 config L2TP_V3
-	bool "L2TPv3 support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && L2TP
+	bool "L2TPv3 support"
+	depends on L2TP
 	help
 	  Layer Two Tunneling Protocol Version 3
 
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 2ac884d0e89b..6984c3a353cd 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -101,6 +101,7 @@ struct l2tp_skb_cb {
 
 static atomic_t l2tp_tunnel_count;
 static atomic_t l2tp_session_count;
+static struct workqueue_struct *l2tp_wq;
 
 /* per-net private data for this module */
 static unsigned int l2tp_net_id;
@@ -113,7 +114,6 @@ struct l2tp_net {
 
 static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
 static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
-static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 
 static inline struct l2tp_net *l2tp_pernet(struct net *net)
 {
@@ -122,7 +122,6 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net)
 	return net_generic(net, l2tp_net_id);
 }
 
-
 /* Tunnel reference counts. Incremented per session that is added to
  * the tunnel.
  */
@@ -192,6 +191,7 @@ struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel)
 	} else {
 		/* Socket is owned by kernelspace */
 		sk = tunnel->sock;
+		sock_hold(sk);
 	}
 
 out:
@@ -210,6 +210,7 @@ void l2tp_tunnel_sock_put(struct sock *sk)
 		}
 		sock_put(sk);
 	}
+	sock_put(sk);
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put);
 
@@ -221,10 +222,9 @@ static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
 	struct hlist_head *session_list =
 		l2tp_session_id_hash_2(pn, session_id);
 	struct l2tp_session *session;
-	struct hlist_node *walk;
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu(session, walk, session_list, global_hlist) {
+	hlist_for_each_entry_rcu(session, session_list, global_hlist) {
 		if (session->session_id == session_id) {
 			rcu_read_unlock_bh();
 			return session;
@@ -253,7 +253,6 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn
 {
 	struct hlist_head *session_list;
 	struct l2tp_session *session;
-	struct hlist_node *walk;
 
 	/* In L2TPv3, session_ids are unique over all tunnels and we
 	 * sometimes need to look them up before we know the
@@ -264,7 +263,7 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn
 
 	session_list = l2tp_session_id_hash(tunnel, session_id);
 	read_lock_bh(&tunnel->hlist_lock);
-	hlist_for_each_entry(session, walk, session_list, hlist) {
+	hlist_for_each_entry(session, session_list, hlist) {
 		if (session->session_id == session_id) {
 			read_unlock_bh(&tunnel->hlist_lock);
 			return session;
@@ -279,13 +278,12 @@ EXPORT_SYMBOL_GPL(l2tp_session_find);
 struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
 {
 	int hash;
-	struct hlist_node *walk;
 	struct l2tp_session *session;
 	int count = 0;
 
 	read_lock_bh(&tunnel->hlist_lock);
 	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
-		hlist_for_each_entry(session, walk, &tunnel->session_hlist[hash], hlist) {
+		hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
 			if (++count > nth) {
 				read_unlock_bh(&tunnel->hlist_lock);
 				return session;
@@ -306,12 +304,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
 {
 	struct l2tp_net *pn = l2tp_pernet(net);
 	int hash;
-	struct hlist_node *walk;
 	struct l2tp_session *session;
 
 	rcu_read_lock_bh();
 	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
-		hlist_for_each_entry_rcu(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
+		hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
 			if (!strcmp(session->ifname, ifname)) {
 				rcu_read_unlock_bh();
 				return session;
@@ -377,10 +374,8 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk
 	struct sk_buff *skbp;
 	struct sk_buff *tmp;
 	u32 ns = L2TP_SKB_CB(skb)->ns;
-	struct l2tp_stats *sstats;
 
 	spin_lock_bh(&session->reorder_q.lock);
-	sstats = &session->stats;
 	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
 		if (L2TP_SKB_CB(skbp)->ns > ns) {
 			__skb_queue_before(&session->reorder_q, skbp, skb);
@@ -388,9 +383,7 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk
 				 "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
 				 session->name, ns, L2TP_SKB_CB(skbp)->ns,
 				 skb_queue_len(&session->reorder_q));
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_oos_packets++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_oos_packets);
 			goto out;
 		}
 	}
@@ -407,23 +400,16 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
 {
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	int length = L2TP_SKB_CB(skb)->length;
-	struct l2tp_stats *tstats, *sstats;
 
 	/* We're about to requeue the skb, so return resources
 	 * to its current owner (a socket receive buffer).
 	 */
 	skb_orphan(skb);
 
-	tstats = &tunnel->stats;
-	u64_stats_update_begin(&tstats->syncp);
-	sstats = &session->stats;
-	u64_stats_update_begin(&sstats->syncp);
-	tstats->rx_packets++;
-	tstats->rx_bytes += length;
-	sstats->rx_packets++;
-	sstats->rx_bytes += length;
-	u64_stats_update_end(&tstats->syncp);
-	u64_stats_update_end(&sstats->syncp);
+	atomic_long_inc(&tunnel->stats.rx_packets);
+	atomic_long_add(length, &tunnel->stats.rx_bytes);
+	atomic_long_inc(&session->stats.rx_packets);
+	atomic_long_add(length, &session->stats.rx_bytes);
 
 	if (L2TP_SKB_CB(skb)->has_seq) {
 		/* Bump our Nr */
@@ -454,7 +440,6 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
 {
 	struct sk_buff *skb;
 	struct sk_buff *tmp;
-	struct l2tp_stats *sstats;
 
 	/* If the pkt at the head of the queue has the nr that we
 	 * expect to send up next, dequeue it and any other
@@ -462,13 +447,10 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
 	 */
 start:
 	spin_lock_bh(&session->reorder_q.lock);
-	sstats = &session->stats;
 	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
 		if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) {
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_seq_discards++;
-			sstats->rx_errors++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_seq_discards);
+			atomic_long_inc(&session->stats.rx_errors);
 			l2tp_dbg(session, L2TP_MSG_SEQ,
 				 "%s: oos pkt %u len %d discarded (too old), waiting for %u, reorder_q_len=%d\n",
 				 session->name, L2TP_SKB_CB(skb)->ns,
@@ -627,7 +609,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	int offset;
 	u32 ns, nr;
-	struct l2tp_stats *sstats = &session->stats;
 
 	/* The ref count is increased since we now hold a pointer to
 	 * the session. Take care to decrement the refcnt when exiting
@@ -644,9 +625,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 				  "%s: cookie mismatch (%u/%u). Discarding.\n",
 				  tunnel->name, tunnel->tunnel_id,
 				  session->session_id);
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_cookie_discards++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_cookie_discards);
 			goto discard;
 		}
 		ptr += session->peer_cookie_len;
@@ -715,9 +694,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			l2tp_warn(session, L2TP_MSG_SEQ,
 				  "%s: recv data has no seq numbers when required. Discarding.\n",
 				  session->name);
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_seq_discards++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_seq_discards);
 			goto discard;
 		}
 
@@ -736,9 +713,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			l2tp_warn(session, L2TP_MSG_SEQ,
 				  "%s: recv data has no seq numbers when required. Discarding.\n",
 				  session->name);
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_seq_discards++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_seq_discards);
 			goto discard;
 		}
 	}
@@ -792,9 +767,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			 * packets
 			 */
 			if (L2TP_SKB_CB(skb)->ns != session->nr) {
-				u64_stats_update_begin(&sstats->syncp);
-				sstats->rx_seq_discards++;
-				u64_stats_update_end(&sstats->syncp);
+				atomic_long_inc(&session->stats.rx_seq_discards);
 				l2tp_dbg(session, L2TP_MSG_SEQ,
 					 "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n",
 					 session->name, L2TP_SKB_CB(skb)->ns,
@@ -820,9 +793,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	return;
 
 discard:
-	u64_stats_update_begin(&sstats->syncp);
-	sstats->rx_errors++;
-	u64_stats_update_end(&sstats->syncp);
+	atomic_long_inc(&session->stats.rx_errors);
 	kfree_skb(skb);
 
 	if (session->deref)
@@ -832,6 +803,23 @@ discard:
 }
 EXPORT_SYMBOL(l2tp_recv_common);
 
+/* Drop skbs from the session's reorder_q
+ */
+int l2tp_session_queue_purge(struct l2tp_session *session)
+{
+	struct sk_buff *skb = NULL;
+	BUG_ON(!session);
+	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+	while ((skb = skb_dequeue(&session->reorder_q))) {
+		atomic_long_inc(&session->stats.rx_errors);
+		kfree_skb(skb);
+		if (session->deref)
+			(*session->deref)(session);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_queue_purge);
+
 /* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
  * here. The skb is not on a list when we get here.
  * Returns 0 if the packet was a data packet and was successfully passed on.
@@ -847,7 +835,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 	u32 tunnel_id, session_id;
 	u16 version;
 	int length;
-	struct l2tp_stats *tstats;
 
 	if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
 		goto discard_bad_csum;
@@ -936,10 +923,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 discard_bad_csum:
 	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
 	UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0);
-	tstats = &tunnel->stats;
-	u64_stats_update_begin(&tstats->syncp);
-	tstats->rx_errors++;
-	u64_stats_update_end(&tstats->syncp);
+	atomic_long_inc(&tunnel->stats.rx_errors);
 	kfree_skb(skb);
 
 	return 0;
@@ -1066,7 +1050,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	unsigned int len = skb->len;
 	int error;
-	struct l2tp_stats *tstats, *sstats;
 
 	/* Debug */
 	if (session->send_seq)
@@ -1095,21 +1078,15 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 		error = ip_queue_xmit(skb, fl);
 
 	/* Update stats */
-	tstats = &tunnel->stats;
-	u64_stats_update_begin(&tstats->syncp);
-	sstats = &session->stats;
-	u64_stats_update_begin(&sstats->syncp);
 	if (error >= 0) {
-		tstats->tx_packets++;
-		tstats->tx_bytes += len;
-		sstats->tx_packets++;
-		sstats->tx_bytes += len;
+		atomic_long_inc(&tunnel->stats.tx_packets);
+		atomic_long_add(len, &tunnel->stats.tx_bytes);
+		atomic_long_inc(&session->stats.tx_packets);
+		atomic_long_add(len, &session->stats.tx_bytes);
 	} else {
-		tstats->tx_errors++;
-		sstats->tx_errors++;
+		atomic_long_inc(&tunnel->stats.tx_errors);
+		atomic_long_inc(&session->stats.tx_errors);
 	}
-	u64_stats_update_end(&tstats->syncp);
-	u64_stats_update_end(&sstats->syncp);
 
 	return 0;
 }
@@ -1271,6 +1248,7 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
 static void l2tp_tunnel_destruct(struct sock *sk)
 {
 	struct l2tp_tunnel *tunnel;
+	struct l2tp_net *pn;
 
 	tunnel = sk->sk_user_data;
 	if (tunnel == NULL)
@@ -1278,38 +1256,44 @@ static void l2tp_tunnel_destruct(struct sock *sk)
 
 	l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name);
 
-	/* Close all sessions */
-	l2tp_tunnel_closeall(tunnel);
 
+	/* Disable udp encapsulation */
 	switch (tunnel->encap) {
 	case L2TP_ENCAPTYPE_UDP:
 		/* No longer an encapsulation socket. See net/ipv4/udp.c */
 		(udp_sk(sk))->encap_type = 0;
 		(udp_sk(sk))->encap_rcv = NULL;
+		(udp_sk(sk))->encap_destroy = NULL;
 		break;
 	case L2TP_ENCAPTYPE_IP:
 		break;
 	}
 
 	/* Remove hooks into tunnel socket */
-	tunnel->sock = NULL;
 	sk->sk_destruct = tunnel->old_sk_destruct;
 	sk->sk_user_data = NULL;
+	tunnel->sock = NULL;
 
-	/* Call the original destructor */
-	if (sk->sk_destruct)
-		(*sk->sk_destruct)(sk);
+	/* Remove the tunnel struct from the tunnel list */
+	pn = l2tp_pernet(tunnel->l2tp_net);
+	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_del_rcu(&tunnel->list);
+	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	atomic_dec(&l2tp_tunnel_count);
 
-	/* We're finished with the socket */
+	l2tp_tunnel_closeall(tunnel);
 	l2tp_tunnel_dec_refcount(tunnel);
 
+	/* Call the original destructor */
+	if (sk->sk_destruct)
+		(*sk->sk_destruct)(sk);
 end:
 	return;
 }
 
 /* When the tunnel is closed, all the attached sessions need to go too.
  */
-static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
+void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
 {
 	int hash;
 	struct hlist_node *walk;
@@ -1332,25 +1316,13 @@ again:
 
 			hlist_del_init(&session->hlist);
 
-			/* Since we should hold the sock lock while
-			 * doing any unbinding, we need to release the
-			 * lock we're holding before taking that lock.
-			 * Hold a reference to the sock so it doesn't
-			 * disappear as we're jumping between locks.
-			 */
 			if (session->ref != NULL)
 				(*session->ref)(session);
 
 			write_unlock_bh(&tunnel->hlist_lock);
 
-			if (tunnel->version != L2TP_HDR_VER_2) {
-				struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
-				spin_lock_bh(&pn->l2tp_session_hlist_lock);
-				hlist_del_init_rcu(&session->global_hlist);
-				spin_unlock_bh(&pn->l2tp_session_hlist_lock);
-				synchronize_rcu();
-			}
+			__l2tp_session_unhash(session);
+			l2tp_session_queue_purge(session);
 
 			if (session->session_close != NULL)
 				(*session->session_close)(session);
@@ -1358,6 +1330,8 @@ again:
 			if (session->deref != NULL)
 				(*session->deref)(session);
 
+			l2tp_session_dec_refcount(session);
+
 			write_lock_bh(&tunnel->hlist_lock);
 
 			/* Now restart from the beginning of this hash
@@ -1370,54 +1344,96 @@ again:
 	}
 	write_unlock_bh(&tunnel->hlist_lock);
 }
+EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
+
+/* Tunnel socket destroy hook for UDP encapsulation */
+static void l2tp_udp_encap_destroy(struct sock *sk)
+{
+	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+	if (tunnel) {
+		l2tp_tunnel_closeall(tunnel);
+		sock_put(sk);
+	}
+}
 
 /* Really kill the tunnel.
  * Come here only when all sessions have been cleared from the tunnel.
  */
 static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
 {
-	struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
 	BUG_ON(atomic_read(&tunnel->ref_count) != 0);
 	BUG_ON(tunnel->sock != NULL);
-
 	l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name);
-
-	/* Remove from tunnel list */
-	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
-	list_del_rcu(&tunnel->list);
 	kfree_rcu(tunnel, rcu);
-	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+}
 
-	atomic_dec(&l2tp_tunnel_count);
+/* Workqueue tunnel deletion function */
+static void l2tp_tunnel_del_work(struct work_struct *work)
+{
+	struct l2tp_tunnel *tunnel = NULL;
+	struct socket *sock = NULL;
+	struct sock *sk = NULL;
+
+	tunnel = container_of(work, struct l2tp_tunnel, del_work);
+	sk = l2tp_tunnel_sock_lookup(tunnel);
+	if (!sk)
+		return;
+
+	sock = sk->sk_socket;
+
+	/* If the tunnel socket was created by userspace, then go through the
+	 * inet layer to shut the socket down, and let userspace close it.
+	 * Otherwise, if we created the socket directly within the kernel, use
+	 * the sk API to release it here.
+	 * In either case the tunnel resources are freed in the socket
+	 * destructor when the tunnel socket goes away.
+	 */
+	if (tunnel->fd >= 0) {
+		if (sock)
+			inet_shutdown(sock, 2);
+	} else {
+		if (sock)
+			kernel_sock_shutdown(sock, SHUT_RDWR);
+		sk_release_kernel(sk);
+	}
+
+	l2tp_tunnel_sock_put(sk);
 }
 
 /* Create a socket for the tunnel, if one isn't set up by
  * userspace. This is used for static tunnels where there is no
  * managing L2TP daemon.
+ *
+ * Since we don't want these sockets to keep a namespace alive by
+ * themselves, we drop the socket's namespace refcount after creation.
+ * These sockets are freed when the namespace exits using the pernet
+ * exit hook.
  */
-static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct socket **sockp)
+static int l2tp_tunnel_sock_create(struct net *net,
+				u32 tunnel_id,
+				u32 peer_tunnel_id,
+				struct l2tp_tunnel_cfg *cfg,
+				struct socket **sockp)
 {
 	int err = -EINVAL;
-	struct sockaddr_in udp_addr;
+	struct socket *sock = NULL;
+	struct sockaddr_in udp_addr = {0};
+	struct sockaddr_l2tpip ip_addr = {0};
 #if IS_ENABLED(CONFIG_IPV6)
-	struct sockaddr_in6 udp6_addr;
-	struct sockaddr_l2tpip6 ip6_addr;
+	struct sockaddr_in6 udp6_addr = {0};
+	struct sockaddr_l2tpip6 ip6_addr = {0};
 #endif
-	struct sockaddr_l2tpip ip_addr;
-	struct socket *sock = NULL;
 
 	switch (cfg->encap) {
 	case L2TP_ENCAPTYPE_UDP:
 #if IS_ENABLED(CONFIG_IPV6)
 		if (cfg->local_ip6 && cfg->peer_ip6) {
-			err = sock_create(AF_INET6, SOCK_DGRAM, 0, sockp);
+			err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
 			if (err < 0)
 				goto out;
 
-			sock = *sockp;
+			sk_change_net(sock->sk, net);
 
-			memset(&udp6_addr, 0, sizeof(udp6_addr));
 			udp6_addr.sin6_family = AF_INET6;
 			memcpy(&udp6_addr.sin6_addr, cfg->local_ip6,
 			       sizeof(udp6_addr.sin6_addr));
@@ -1439,13 +1455,12 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t
 		} else
 #endif
 		{
-			err = sock_create(AF_INET, SOCK_DGRAM, 0, sockp);
+			err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
 			if (err < 0)
 				goto out;
 
-			sock = *sockp;
+			sk_change_net(sock->sk, net);
 
-			memset(&udp_addr, 0, sizeof(udp_addr));
 			udp_addr.sin_family = AF_INET;
 			udp_addr.sin_addr = cfg->local_ip;
 			udp_addr.sin_port = htons(cfg->local_udp_port);
@@ -1472,14 +1487,13 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t
 	case L2TP_ENCAPTYPE_IP:
 #if IS_ENABLED(CONFIG_IPV6)
 		if (cfg->local_ip6 && cfg->peer_ip6) {
-			err = sock_create(AF_INET6, SOCK_DGRAM, IPPROTO_L2TP,
-					  sockp);
+			err = sock_create_kern(AF_INET6, SOCK_DGRAM,
+					  IPPROTO_L2TP, &sock);
 			if (err < 0)
 				goto out;
 
-			sock = *sockp;
+			sk_change_net(sock->sk, net);
 
-			memset(&ip6_addr, 0, sizeof(ip6_addr));
 			ip6_addr.l2tp_family = AF_INET6;
 			memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6,
 			       sizeof(ip6_addr.l2tp_addr));
@@ -1501,14 +1515,13 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t
 		} else
 #endif
 		{
-			err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_L2TP,
-					  sockp);
+			err = sock_create_kern(AF_INET, SOCK_DGRAM,
+					  IPPROTO_L2TP, &sock);
 			if (err < 0)
 				goto out;
 
-			sock = *sockp;
+			sk_change_net(sock->sk, net);
 
-			memset(&ip_addr, 0, sizeof(ip_addr));
 			ip_addr.l2tp_family = AF_INET;
 			ip_addr.l2tp_addr = cfg->local_ip;
 			ip_addr.l2tp_conn_id = tunnel_id;
@@ -1532,8 +1545,10 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t
 	}
 
 out:
+	*sockp = sock;
 	if ((err < 0) && sock) {
-		sock_release(sock);
+		kernel_sock_shutdown(sock, SHUT_RDWR);
+		sk_release_kernel(sock->sk);
 		*sockp = NULL;
 	}
 
@@ -1556,15 +1571,23 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	 * kernel socket.
 	 */
 	if (fd < 0) {
-		err = l2tp_tunnel_sock_create(tunnel_id, peer_tunnel_id, cfg, &sock);
+		err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id,
+				cfg, &sock);
 		if (err < 0)
 			goto err;
 	} else {
-		err = -EBADF;
 		sock = sockfd_lookup(fd, &err);
 		if (!sock) {
-			pr_err("tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
+			pr_err("tunl %u: sockfd_lookup(fd=%d) returned %d\n",
 			       tunnel_id, fd, err);
+			err = -EBADF;
+			goto err;
+		}
+
+		/* Reject namespace mismatches */
+		if (!net_eq(sock_net(sock->sk), net)) {
+			pr_err("tunl %u: netns mismatch\n", tunnel_id);
+			err = -EINVAL;
 			goto err;
 		}
 	}
@@ -1630,6 +1653,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
 		udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
 		udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+		udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
 #if IS_ENABLED(CONFIG_IPV6)
 		if (sk->sk_family == PF_INET6)
 			udpv6_encap_enable();
@@ -1651,6 +1675,9 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 
 	sk->sk_allocation = GFP_ATOMIC;
 
+	/* Init delete workqueue struct */
+	INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work);
+
 	/* Add tunnel to our list */
 	INIT_LIST_HEAD(&tunnel->list);
 	atomic_inc(&l2tp_tunnel_count);
@@ -1682,33 +1709,8 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
  */
 int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
 {
-	int err = -EBADF;
-	struct socket *sock = NULL;
-	struct sock *sk = NULL;
-
-	sk = l2tp_tunnel_sock_lookup(tunnel);
-	if (!sk)
-		goto out;
-
-	sock = sk->sk_socket;
-	BUG_ON(!sock);
-
-	/* Force the tunnel socket to close. This will eventually
-	 * cause the tunnel to be deleted via the normal socket close
-	 * mechanisms when userspace closes the tunnel socket.
-	 */
-	err = inet_shutdown(sock, 2);
-
-	/* If the tunnel's socket was created by the kernel,
-	 * close the socket here since the socket was not
-	 * created by userspace.
-	 */
-	if (sock->file == NULL)
-		err = inet_release(sock);
-
-	l2tp_tunnel_sock_put(sk);
-out:
-	return err;
+	l2tp_tunnel_closeall(tunnel);
+	return (false == queue_work(l2tp_wq, &tunnel->del_work));
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
 
@@ -1716,62 +1718,71 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
  */
 void l2tp_session_free(struct l2tp_session *session)
 {
-	struct l2tp_tunnel *tunnel;
+	struct l2tp_tunnel *tunnel = session->tunnel;
 
 	BUG_ON(atomic_read(&session->ref_count) != 0);
 
-	tunnel = session->tunnel;
-	if (tunnel != NULL) {
+	if (tunnel) {
 		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+		if (session->session_id != 0)
+			atomic_dec(&l2tp_session_count);
+		sock_put(tunnel->sock);
+		session->tunnel = NULL;
+		l2tp_tunnel_dec_refcount(tunnel);
+	}
+
+	kfree(session);
 
-		/* Delete the session from the hash */
+	return;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_free);
+
+/* Remove an l2tp session from l2tp_core's hash lists.
+ * Provides a tidyup interface for pseudowire code which can't just route all
+ * shutdown via. l2tp_session_delete and a pseudowire-specific session_close
+ * callback.
+ */
+void __l2tp_session_unhash(struct l2tp_session *session)
+{
+	struct l2tp_tunnel *tunnel = session->tunnel;
+
+	/* Remove the session from core hashes */
+	if (tunnel) {
+		/* Remove from the per-tunnel hash */
 		write_lock_bh(&tunnel->hlist_lock);
 		hlist_del_init(&session->hlist);
 		write_unlock_bh(&tunnel->hlist_lock);
 
-		/* Unlink from the global hash if not L2TPv2 */
+		/* For L2TPv3 we have a per-net hash: remove from there, too */
 		if (tunnel->version != L2TP_HDR_VER_2) {
 			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
 			spin_lock_bh(&pn->l2tp_session_hlist_lock);
 			hlist_del_init_rcu(&session->global_hlist);
 			spin_unlock_bh(&pn->l2tp_session_hlist_lock);
 			synchronize_rcu();
 		}
-
-		if (session->session_id != 0)
-			atomic_dec(&l2tp_session_count);
-
-		sock_put(tunnel->sock);
-
-		/* This will delete the tunnel context if this
-		 * is the last session on the tunnel.
-		 */
-		session->tunnel = NULL;
-		l2tp_tunnel_dec_refcount(tunnel);
 	}
-
-	kfree(session);
-
-	return;
 }
-EXPORT_SYMBOL_GPL(l2tp_session_free);
+EXPORT_SYMBOL_GPL(__l2tp_session_unhash);
 
 /* This function is used by the netlink SESSION_DELETE command and by
    pseudowire modules.
  */
 int l2tp_session_delete(struct l2tp_session *session)
 {
+	if (session->ref)
+		(*session->ref)(session);
+	__l2tp_session_unhash(session);
+	l2tp_session_queue_purge(session);
 	if (session->session_close != NULL)
 		(*session->session_close)(session);
-
+	if (session->deref)
+		(*session->deref)(session);
 	l2tp_session_dec_refcount(session);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(l2tp_session_delete);
 
-
 /* We come here whenever a session's send_seq, cookie_len or
  * l2specific_len parameters are set.
  */
@@ -1892,8 +1903,21 @@ static __net_init int l2tp_init_net(struct net *net)
 	return 0;
 }
 
+static __net_exit void l2tp_exit_net(struct net *net)
+{
+	struct l2tp_net *pn = l2tp_pernet(net);
+	struct l2tp_tunnel *tunnel = NULL;
+
+	rcu_read_lock_bh();
+	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
+		(void)l2tp_tunnel_delete(tunnel);
+	}
+	rcu_read_unlock_bh();
+}
+
 static struct pernet_operations l2tp_net_ops = {
 	.init = l2tp_init_net,
+	.exit = l2tp_exit_net,
 	.id   = &l2tp_net_id,
 	.size = sizeof(struct l2tp_net),
 };
@@ -1906,6 +1930,13 @@ static int __init l2tp_init(void)
 	if (rc)
 		goto out;
 
+	l2tp_wq = alloc_workqueue("l2tp", WQ_NON_REENTRANT | WQ_UNBOUND, 0);
+	if (!l2tp_wq) {
+		pr_err("alloc_workqueue failed\n");
+		rc = -ENOMEM;
+		goto out;
+	}
+
 	pr_info("L2TP core driver, %s\n", L2TP_DRV_VERSION);
 
 out:
@@ -1915,6 +1946,10 @@ out:
 static void __exit l2tp_exit(void)
 {
 	unregister_pernet_device(&l2tp_net_ops);
+	if (l2tp_wq) {
+		destroy_workqueue(l2tp_wq);
+		l2tp_wq = NULL;
+	}
 }
 
 module_init(l2tp_init);
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index e62204cad4fe..485a490fd990 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -36,16 +36,15 @@ enum {
 struct sk_buff;
 
 struct l2tp_stats {
-	u64			tx_packets;
-	u64			tx_bytes;
-	u64			tx_errors;
-	u64			rx_packets;
-	u64			rx_bytes;
-	u64			rx_seq_discards;
-	u64			rx_oos_packets;
-	u64			rx_errors;
-	u64			rx_cookie_discards;
-	struct u64_stats_sync	syncp;
+	atomic_long_t		tx_packets;
+	atomic_long_t		tx_bytes;
+	atomic_long_t		tx_errors;
+	atomic_long_t		rx_packets;
+	atomic_long_t		rx_bytes;
+	atomic_long_t		rx_seq_discards;
+	atomic_long_t		rx_oos_packets;
+	atomic_long_t		rx_errors;
+	atomic_long_t		rx_cookie_discards;
 };
 
 struct l2tp_tunnel;
@@ -191,6 +190,8 @@ struct l2tp_tunnel {
 	int			fd;		/* Parent fd, if tunnel socket
 						 * was created by userspace */
 
+	struct work_struct	del_work;
+
 	uint8_t			priv[0];	/* private data */
 };
 
@@ -238,11 +239,14 @@ extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
 extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
 
 extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
+extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+extern void __l2tp_session_unhash(struct l2tp_session *session);
 extern int l2tp_session_delete(struct l2tp_session *session);
 extern void l2tp_session_free(struct l2tp_session *session);
 extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
+extern int l2tp_session_queue_purge(struct l2tp_session *session);
 extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
 
 extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index c3813bc84552..072d7202e182 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -146,14 +146,14 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 		   tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0,
 		   atomic_read(&tunnel->ref_count));
 
-	seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+	seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n",
 		   tunnel->debug,
-		   (unsigned long long)tunnel->stats.tx_packets,
-		   (unsigned long long)tunnel->stats.tx_bytes,
-		   (unsigned long long)tunnel->stats.tx_errors,
-		   (unsigned long long)tunnel->stats.rx_packets,
-		   (unsigned long long)tunnel->stats.rx_bytes,
-		   (unsigned long long)tunnel->stats.rx_errors);
+		   atomic_long_read(&tunnel->stats.tx_packets),
+		   atomic_long_read(&tunnel->stats.tx_bytes),
+		   atomic_long_read(&tunnel->stats.tx_errors),
+		   atomic_long_read(&tunnel->stats.rx_packets),
+		   atomic_long_read(&tunnel->stats.rx_bytes),
+		   atomic_long_read(&tunnel->stats.rx_errors));
 
 	if (tunnel->show != NULL)
 		tunnel->show(m, tunnel);
@@ -203,14 +203,14 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
 		seq_printf(m, "\n");
 	}
 
-	seq_printf(m, "   %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+	seq_printf(m, "   %hu/%hu tx %ld/%ld/%ld rx %ld/%ld/%ld\n",
 		   session->nr, session->ns,
-		   (unsigned long long)session->stats.tx_packets,
-		   (unsigned long long)session->stats.tx_bytes,
-		   (unsigned long long)session->stats.tx_errors,
-		   (unsigned long long)session->stats.rx_packets,
-		   (unsigned long long)session->stats.rx_bytes,
-		   (unsigned long long)session->stats.rx_errors);
+		   atomic_long_read(&session->stats.tx_packets),
+		   atomic_long_read(&session->stats.tx_bytes),
+		   atomic_long_read(&session->stats.tx_errors),
+		   atomic_long_read(&session->stats.rx_packets),
+		   atomic_long_read(&session->stats.rx_bytes),
+		   atomic_long_read(&session->stats.rx_errors));
 
 	if (session->show != NULL)
 		session->show(m, session);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 61d8b75d2686..571db8dd2292 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -49,10 +49,9 @@ static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
 
 static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
 {
-	struct hlist_node *node;
 	struct sock *sk;
 
-	sk_for_each_bound(sk, node, &l2tp_ip_bind_table) {
+	sk_for_each_bound(sk, &l2tp_ip_bind_table) {
 		struct inet_sock *inet = inet_sk(sk);
 		struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
 
@@ -115,6 +114,7 @@ static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, in
  */
 static int l2tp_ip_recv(struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb->dev);
 	struct sock *sk;
 	u32 session_id;
 	u32 tunnel_id;
@@ -142,7 +142,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 	}
 
 	/* Ok, this is a data packet. Lookup the session. */
-	session = l2tp_session_find(&init_net, NULL, session_id);
+	session = l2tp_session_find(net, NULL, session_id);
 	if (session == NULL)
 		goto discard;
 
@@ -173,14 +173,14 @@ pass_up:
 		goto discard;
 
 	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
-	tunnel = l2tp_tunnel_find(&init_net, tunnel_id);
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
 	if (tunnel != NULL)
 		sk = tunnel->sock;
 	else {
 		struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
 
 		read_lock_bh(&l2tp_ip_lock);
-		sk = __l2tp_ip_bind_lookup(&init_net, iph->daddr, 0, tunnel_id);
+		sk = __l2tp_ip_bind_lookup(net, iph->daddr, 0, tunnel_id);
 		read_unlock_bh(&l2tp_ip_lock);
 	}
 
@@ -228,10 +228,16 @@ static void l2tp_ip_close(struct sock *sk, long timeout)
 static void l2tp_ip_destroy_sock(struct sock *sk)
 {
 	struct sk_buff *skb;
+	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
 
 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
 		kfree_skb(skb);
 
+	if (tunnel) {
+		l2tp_tunnel_closeall(tunnel);
+		sock_put(sk);
+	}
+
 	sk_refcnt_debug_dec(sk);
 }
 
@@ -239,6 +245,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr;
+	struct net *net = sock_net(sk);
 	int ret;
 	int chk_addr_ret;
 
@@ -251,7 +258,8 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	ret = -EADDRINUSE;
 	read_lock_bh(&l2tp_ip_lock);
-	if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id))
+	if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr,
+				  sk->sk_bound_dev_if, addr->l2tp_conn_id))
 		goto out_in_use;
 
 	read_unlock_bh(&l2tp_ip_lock);
@@ -260,7 +268,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
 		goto out;
 
-	chk_addr_ret = inet_addr_type(&init_net, addr->l2tp_addr.s_addr);
+	chk_addr_ret = inet_addr_type(net, addr->l2tp_addr.s_addr);
 	ret = -EADDRNOTAVAIL;
 	if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
 	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
@@ -369,7 +377,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
 	return 0;
 
 drop:
-	IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS);
+	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS);
 	kfree_skb(skb);
 	return -1;
 }
@@ -605,6 +613,7 @@ static struct inet_protosw l2tp_ip_protosw = {
 
 static struct net_protocol l2tp_ip_protocol __read_mostly = {
 	.handler	= l2tp_ip_recv,
+	.netns_ok	= 1,
 };
 
 static int __init l2tp_ip_init(void)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 8ee4a86ae996..b8a6039314e8 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -60,10 +60,9 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
 					   struct in6_addr *laddr,
 					   int dif, u32 tunnel_id)
 {
-	struct hlist_node *node;
 	struct sock *sk;
 
-	sk_for_each_bound(sk, node, &l2tp_ip6_bind_table) {
+	sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
 		struct in6_addr *addr = inet6_rcv_saddr(sk);
 		struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
 
@@ -242,10 +241,17 @@ static void l2tp_ip6_close(struct sock *sk, long timeout)
 
 static void l2tp_ip6_destroy_sock(struct sock *sk)
 {
+	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+
 	lock_sock(sk);
 	ip6_flush_pending_frames(sk);
 	release_sock(sk);
 
+	if (tunnel) {
+		l2tp_tunnel_closeall(tunnel);
+		sock_put(sk);
+	}
+
 	inet6_destroy_sock(sk);
 }
 
@@ -684,6 +690,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
 		lsa->l2tp_addr = ipv6_hdr(skb)->saddr;
 		lsa->l2tp_flowinfo = 0;
 		lsa->l2tp_scope_id = 0;
+		lsa->l2tp_conn_id = 0;
 		if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
 			lsa->l2tp_scope_id = IP6CB(skb)->iif;
 	}
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index bbba3a19e944..0825ff26e113 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -37,6 +37,7 @@ static struct genl_family l2tp_nl_family = {
 	.version	= L2TP_GENL_VERSION,
 	.hdrsize	= 0,
 	.maxattr	= L2TP_ATTR_MAX,
+	.netnsok	= true,
 };
 
 /* Accessed under genl lock */
@@ -245,8 +246,6 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 #if IS_ENABLED(CONFIG_IPV6)
 	struct ipv6_pinfo *np = NULL;
 #endif
-	struct l2tp_stats stats;
-	unsigned int start;
 
 	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags,
 			  L2TP_CMD_TUNNEL_GET);
@@ -264,28 +263,22 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	do {
-		start = u64_stats_fetch_begin(&tunnel->stats.syncp);
-		stats.tx_packets = tunnel->stats.tx_packets;
-		stats.tx_bytes = tunnel->stats.tx_bytes;
-		stats.tx_errors = tunnel->stats.tx_errors;
-		stats.rx_packets = tunnel->stats.rx_packets;
-		stats.rx_bytes = tunnel->stats.rx_bytes;
-		stats.rx_errors = tunnel->stats.rx_errors;
-		stats.rx_seq_discards = tunnel->stats.rx_seq_discards;
-		stats.rx_oos_packets = tunnel->stats.rx_oos_packets;
-	} while (u64_stats_fetch_retry(&tunnel->stats.syncp, start));
-
-	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) ||
+	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS,
+		    atomic_long_read(&tunnel->stats.tx_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES,
+		    atomic_long_read(&tunnel->stats.tx_bytes)) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS,
+		    atomic_long_read(&tunnel->stats.tx_errors)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS,
+		    atomic_long_read(&tunnel->stats.rx_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES,
+		    atomic_long_read(&tunnel->stats.rx_bytes)) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS,
-			stats.rx_seq_discards) ||
+		    atomic_long_read(&tunnel->stats.rx_seq_discards)) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS,
-			stats.rx_oos_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors))
+		    atomic_long_read(&tunnel->stats.rx_oos_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS,
+		    atomic_long_read(&tunnel->stats.rx_errors)))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 
@@ -611,8 +604,6 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 	struct nlattr *nest;
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	struct sock *sk = NULL;
-	struct l2tp_stats stats;
-	unsigned int start;
 
 	sk = tunnel->sock;
 
@@ -655,28 +646,22 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	do {
-		start = u64_stats_fetch_begin(&session->stats.syncp);
-		stats.tx_packets = session->stats.tx_packets;
-		stats.tx_bytes = session->stats.tx_bytes;
-		stats.tx_errors = session->stats.tx_errors;
-		stats.rx_packets = session->stats.rx_packets;
-		stats.rx_bytes = session->stats.rx_bytes;
-		stats.rx_errors = session->stats.rx_errors;
-		stats.rx_seq_discards = session->stats.rx_seq_discards;
-		stats.rx_oos_packets = session->stats.rx_oos_packets;
-	} while (u64_stats_fetch_retry(&session->stats.syncp, start));
-
-	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) ||
+	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS,
+		atomic_long_read(&session->stats.tx_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES,
+		atomic_long_read(&session->stats.tx_bytes)) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS,
+		atomic_long_read(&session->stats.tx_errors)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS,
+		atomic_long_read(&session->stats.rx_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES,
+		atomic_long_read(&session->stats.rx_bytes)) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS,
-			stats.rx_seq_discards) ||
+		atomic_long_read(&session->stats.rx_seq_discards)) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS,
-			stats.rx_oos_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors))
+		atomic_long_read(&session->stats.rx_oos_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS,
+		atomic_long_read(&session->stats.rx_errors)))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 716605c241f4..637a341c1e2d 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -97,6 +97,7 @@
 #include <net/ip.h>
 #include <net/udp.h>
 #include <net/xfrm.h>
+#include <net/inet_common.h>
 
 #include <asm/byteorder.h>
 #include <linux/atomic.h>
@@ -259,7 +260,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
 			  session->name);
 
 		/* Not bound. Nothing we can do, so discard. */
-		session->stats.rx_errors++;
+		atomic_long_inc(&session->stats.rx_errors);
 		kfree_skb(skb);
 	}
 
@@ -355,6 +356,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	l2tp_xmit_skb(session, skb, session->hdr_len);
 
 	sock_put(ps->tunnel_sock);
+	sock_put(sk);
 
 	return error;
 
@@ -446,34 +448,16 @@ static void pppol2tp_session_close(struct l2tp_session *session)
 {
 	struct pppol2tp_session *ps = l2tp_session_priv(session);
 	struct sock *sk = ps->sock;
-	struct sk_buff *skb;
+	struct socket *sock = sk->sk_socket;
 
 	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 
-	if (session->session_id == 0)
-		goto out;
-
-	if (sk != NULL) {
-		lock_sock(sk);
-
-		if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
-			pppox_unbind_sock(sk);
-			sk->sk_state = PPPOX_DEAD;
-			sk->sk_state_change(sk);
-		}
-
-		/* Purge any queued data */
-		skb_queue_purge(&sk->sk_receive_queue);
-		skb_queue_purge(&sk->sk_write_queue);
-		while ((skb = skb_dequeue(&session->reorder_q))) {
-			kfree_skb(skb);
-			sock_put(sk);
-		}
 
-		release_sock(sk);
+	if (sock) {
+		inet_shutdown(sock, 2);
+		/* Don't let the session go away before our socket does */
+		l2tp_session_inc_refcount(session);
 	}
-
-out:
 	return;
 }
 
@@ -482,19 +466,12 @@ out:
  */
 static void pppol2tp_session_destruct(struct sock *sk)
 {
-	struct l2tp_session *session;
-
-	if (sk->sk_user_data != NULL) {
-		session = sk->sk_user_data;
-		if (session == NULL)
-			goto out;
-
+	struct l2tp_session *session = sk->sk_user_data;
+	if (session) {
 		sk->sk_user_data = NULL;
 		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 		l2tp_session_dec_refcount(session);
 	}
-
-out:
 	return;
 }
 
@@ -524,16 +501,13 @@ static int pppol2tp_release(struct socket *sock)
 	session = pppol2tp_sock_to_session(sk);
 
 	/* Purge any queued data */
-	skb_queue_purge(&sk->sk_receive_queue);
-	skb_queue_purge(&sk->sk_write_queue);
 	if (session != NULL) {
-		struct sk_buff *skb;
-		while ((skb = skb_dequeue(&session->reorder_q))) {
-			kfree_skb(skb);
-			sock_put(sk);
-		}
+		__l2tp_session_unhash(session);
+		l2tp_session_queue_purge(session);
 		sock_put(sk);
 	}
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
 
 	release_sock(sk);
 
@@ -879,18 +853,6 @@ out:
 	return error;
 }
 
-/* Called when deleting sessions via the netlink interface.
- */
-static int pppol2tp_session_delete(struct l2tp_session *session)
-{
-	struct pppol2tp_session *ps = l2tp_session_priv(session);
-
-	if (ps->sock == NULL)
-		l2tp_session_dec_refcount(session);
-
-	return 0;
-}
-
 #endif /* CONFIG_L2TP_V3 */
 
 /* getname() support.
@@ -1024,14 +986,14 @@ end:
 static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest,
 				struct l2tp_stats *stats)
 {
-	dest->tx_packets = stats->tx_packets;
-	dest->tx_bytes = stats->tx_bytes;
-	dest->tx_errors = stats->tx_errors;
-	dest->rx_packets = stats->rx_packets;
-	dest->rx_bytes = stats->rx_bytes;
-	dest->rx_seq_discards = stats->rx_seq_discards;
-	dest->rx_oos_packets = stats->rx_oos_packets;
-	dest->rx_errors = stats->rx_errors;
+	dest->tx_packets = atomic_long_read(&stats->tx_packets);
+	dest->tx_bytes = atomic_long_read(&stats->tx_bytes);
+	dest->tx_errors = atomic_long_read(&stats->tx_errors);
+	dest->rx_packets = atomic_long_read(&stats->rx_packets);
+	dest->rx_bytes = atomic_long_read(&stats->rx_bytes);
+	dest->rx_seq_discards = atomic_long_read(&stats->rx_seq_discards);
+	dest->rx_oos_packets = atomic_long_read(&stats->rx_oos_packets);
+	dest->rx_errors = atomic_long_read(&stats->rx_errors);
 }
 
 /* Session ioctl helper.
@@ -1665,14 +1627,14 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
 		   tunnel->name,
 		   (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
 		   atomic_read(&tunnel->ref_count) - 1);
-	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
+	seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n",
 		   tunnel->debug,
-		   (unsigned long long)tunnel->stats.tx_packets,
-		   (unsigned long long)tunnel->stats.tx_bytes,
-		   (unsigned long long)tunnel->stats.tx_errors,
-		   (unsigned long long)tunnel->stats.rx_packets,
-		   (unsigned long long)tunnel->stats.rx_bytes,
-		   (unsigned long long)tunnel->stats.rx_errors);
+		   atomic_long_read(&tunnel->stats.tx_packets),
+		   atomic_long_read(&tunnel->stats.tx_bytes),
+		   atomic_long_read(&tunnel->stats.tx_errors),
+		   atomic_long_read(&tunnel->stats.rx_packets),
+		   atomic_long_read(&tunnel->stats.rx_bytes),
+		   atomic_long_read(&tunnel->stats.rx_errors));
 }
 
 static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
@@ -1707,14 +1669,14 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 		   session->lns_mode ? "LNS" : "LAC",
 		   session->debug,
 		   jiffies_to_msecs(session->reorder_timeout));
-	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
+	seq_printf(m, "   %hu/%hu %ld/%ld/%ld %ld/%ld/%ld\n",
 		   session->nr, session->ns,
-		   (unsigned long long)session->stats.tx_packets,
-		   (unsigned long long)session->stats.tx_bytes,
-		   (unsigned long long)session->stats.tx_errors,
-		   (unsigned long long)session->stats.rx_packets,
-		   (unsigned long long)session->stats.rx_bytes,
-		   (unsigned long long)session->stats.rx_errors);
+		   atomic_long_read(&session->stats.tx_packets),
+		   atomic_long_read(&session->stats.tx_bytes),
+		   atomic_long_read(&session->stats.tx_errors),
+		   atomic_long_read(&session->stats.rx_packets),
+		   atomic_long_read(&session->stats.rx_bytes),
+		   atomic_long_read(&session->stats.rx_errors));
 
 	if (po)
 		seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
@@ -1783,7 +1745,8 @@ static __net_init int pppol2tp_init_net(struct net *net)
 	struct proc_dir_entry *pde;
 	int err = 0;
 
-	pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops);
+	pde = proc_create("pppol2tp", S_IRUGO, net->proc_net,
+			  &pppol2tp_proc_fops);
 	if (!pde) {
 		err = -ENOMEM;
 		goto out;
@@ -1795,7 +1758,7 @@ out:
 
 static __net_exit void pppol2tp_exit_net(struct net *net)
 {
-	proc_net_remove(net, "pppol2tp");
+	remove_proc_entry("pppol2tp", net->proc_net);
 }
 
 static struct pernet_operations pppol2tp_net_ops = {
@@ -1837,7 +1800,7 @@ static const struct pppox_proto pppol2tp_proto = {
 
 static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = {
 	.session_create	= pppol2tp_session_create,
-	.session_delete	= pppol2tp_session_delete,
+	.session_delete	= l2tp_session_delete,
 };
 
 #endif /* CONFIG_L2TP_V3 */
diff --git a/net/lapb/Kconfig b/net/lapb/Kconfig
index f0b5efb31a00..6481839b76c9 100644
--- a/net/lapb/Kconfig
+++ b/net/lapb/Kconfig
@@ -3,8 +3,7 @@
 #
 
 config LAPB
-	tristate "LAPB Data Link Driver (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	tristate "LAPB Data Link Driver"
 	---help---
 	  Link Access Procedure, Balanced (LAPB) is the data link layer (i.e.
 	  the lower) part of the X.25 protocol. It offers a reliable
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 88709882c464..48aaa89253e0 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -720,6 +720,8 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
 	int target;	/* Read at least this many bytes */
 	long timeo;
 
+	msg->msg_namelen = 0;
+
 	lock_sock(sk);
 	copied = -ENOTCONN;
 	if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN))
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 7c5073badc73..78be45cda5c1 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -393,12 +393,11 @@ static void llc_sap_mcast(struct llc_sap *sap,
 {
 	int i = 0, count = 256 / sizeof(struct sock *);
 	struct sock *sk, *stack[count];
-	struct hlist_node *node;
 	struct llc_sock *llc;
 	struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex);
 
 	spin_lock_bh(&sap->sk_lock);
-	hlist_for_each_entry(llc, node, dev_hb, dev_hash_node) {
+	hlist_for_each_entry(llc, dev_hb, dev_hash_node) {
 
 		sk = &llc->sk;
 
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index b4ecf267a34b..62535fe9f570 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -81,7 +81,7 @@ comment "Some wireless drivers require a rate control algorithm"
 
 config MAC80211_MESH
 	bool "Enable mac80211 mesh networking (pre-802.11s) support"
-	depends on MAC80211 && EXPERIMENTAL
+	depends on MAC80211
 	---help---
 	 This options enables support of Draft 802.11s mesh networking.
 	 The implementation is based on Draft 2.08 of the Mesh Networking
@@ -258,6 +258,17 @@ config MAC80211_MESH_SYNC_DEBUG
 
 	  Do not select this option.
 
+config MAC80211_MESH_PS_DEBUG
+	bool "Verbose mesh powersave debugging"
+	depends on MAC80211_DEBUG_MENU
+	depends on MAC80211_MESH
+	---help---
+	  Selecting this option causes mac80211 to print out very verbose mesh
+	  powersave debugging messages (when mac80211 is taking part in a
+	  mesh network).
+
+	  Do not select this option.
+
 config MAC80211_TDLS_DEBUG
 	bool "Verbose TDLS debugging"
 	depends on MAC80211_DEBUG_MENU
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 4911202334d9..9d7d840aac6d 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -39,7 +39,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \
 	mesh_pathtbl.o \
 	mesh_plink.o \
 	mesh_hwmp.o \
-	mesh_sync.o
+	mesh_sync.o \
+	mesh_ps.o
 
 mac80211-$(CONFIG_PM) += pm.o
 
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 808338a1bce5..31bf2586fb84 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -83,8 +83,8 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 	if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP,
 			     &sta->sta, tid, NULL, 0))
 		sdata_info(sta->sdata,
-			   "HW problem - can not stop rx aggregation for tid %d\n",
-			   tid);
+			   "HW problem - can not stop rx aggregation for %pM tid %d\n",
+			   sta->sta.addr, tid);
 
 	/* check if this is a self generated aggregation halt */
 	if (initiator == WLAN_BACK_RECIPIENT && tx)
@@ -159,7 +159,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
 	}
 	rcu_read_unlock();
 
-	ht_dbg(sta->sdata, "rx session timer expired on tid %d\n", (u16)*ptid);
+	ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n",
+	       sta->sta.addr, (u16)*ptid);
 
 	set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired);
 	ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
@@ -247,7 +248,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	status = WLAN_STATUS_REQUEST_DECLINED;
 
 	if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) {
-		ht_dbg(sta->sdata, "Suspend in progress - Denying ADDBA request\n");
+		ht_dbg(sta->sdata,
+		       "Suspend in progress - Denying ADDBA request (%pM tid %d)\n",
+		       sta->sta.addr, tid);
 		goto end_no_lock;
 	}
 
@@ -317,7 +320,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 	ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START,
 			       &sta->sta, tid, &start_seq_num, 0);
-	ht_dbg(sta->sdata, "Rx A-MPDU request on tid %d result %d\n", tid, ret);
+	ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n",
+	       sta->sta.addr, tid, ret);
 	if (ret) {
 		kfree(tid_agg_rx->reorder_buf);
 		kfree(tid_agg_rx->reorder_time);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index eb9df22418f0..13b7683de5a4 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -149,16 +149,133 @@ void ieee80211_assign_tid_tx(struct sta_info *sta, int tid,
 	rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx);
 }
 
+static inline int ieee80211_ac_from_tid(int tid)
+{
+	return ieee802_1d_to_ac[tid & 7];
+}
+
+/*
+ * When multiple aggregation sessions on multiple stations
+ * are being created/destroyed simultaneously, we need to
+ * refcount the global queue stop caused by that in order
+ * to not get into a situation where one of the aggregation
+ * setup or teardown re-enables queues before the other is
+ * ready to handle that.
+ *
+ * These two functions take care of this issue by keeping
+ * a global "agg_queue_stop" refcount.
+ */
+static void __acquires(agg_queue)
+ieee80211_stop_queue_agg(struct ieee80211_sub_if_data *sdata, int tid)
+{
+	int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)];
+
+	if (atomic_inc_return(&sdata->local->agg_queue_stop[queue]) == 1)
+		ieee80211_stop_queue_by_reason(
+			&sdata->local->hw, queue,
+			IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+	__acquire(agg_queue);
+}
+
+static void __releases(agg_queue)
+ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid)
+{
+	int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)];
+
+	if (atomic_dec_return(&sdata->local->agg_queue_stop[queue]) == 0)
+		ieee80211_wake_queue_by_reason(
+			&sdata->local->hw, queue,
+			IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+	__release(agg_queue);
+}
+
+/*
+ * splice packets from the STA's pending to the local pending,
+ * requires a call to ieee80211_agg_splice_finish later
+ */
+static void __acquires(agg_queue)
+ieee80211_agg_splice_packets(struct ieee80211_sub_if_data *sdata,
+			     struct tid_ampdu_tx *tid_tx, u16 tid)
+{
+	struct ieee80211_local *local = sdata->local;
+	int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)];
+	unsigned long flags;
+
+	ieee80211_stop_queue_agg(sdata, tid);
+
+	if (WARN(!tid_tx,
+		 "TID %d gone but expected when splicing aggregates from the pending queue\n",
+		 tid))
+		return;
+
+	if (!skb_queue_empty(&tid_tx->pending)) {
+		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+		/* copy over remaining packets */
+		skb_queue_splice_tail_init(&tid_tx->pending,
+					   &local->pending[queue]);
+		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+	}
+}
+
+static void __releases(agg_queue)
+ieee80211_agg_splice_finish(struct ieee80211_sub_if_data *sdata, u16 tid)
+{
+	ieee80211_wake_queue_agg(sdata, tid);
+}
+
+static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid)
+{
+	struct tid_ampdu_tx *tid_tx;
+
+	lockdep_assert_held(&sta->ampdu_mlme.mtx);
+	lockdep_assert_held(&sta->lock);
+
+	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
+
+	/*
+	 * When we get here, the TX path will not be lockless any more wrt.
+	 * aggregation, since the OPERATIONAL bit has long been cleared.
+	 * Thus it will block on getting the lock, if it occurs. So if we
+	 * stop the queue now, we will not get any more packets, and any
+	 * that might be being processed will wait for us here, thereby
+	 * guaranteeing that no packets go to the tid_tx pending queue any
+	 * more.
+	 */
+
+	ieee80211_agg_splice_packets(sta->sdata, tid_tx, tid);
+
+	/* future packets must not find the tid_tx struct any more */
+	ieee80211_assign_tid_tx(sta, tid, NULL);
+
+	ieee80211_agg_splice_finish(sta->sdata, tid);
+
+	kfree_rcu(tid_tx, rcu_head);
+}
+
 int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
-				    enum ieee80211_back_parties initiator,
-				    bool tx)
+				    enum ieee80211_agg_stop_reason reason)
 {
 	struct ieee80211_local *local = sta->local;
 	struct tid_ampdu_tx *tid_tx;
+	enum ieee80211_ampdu_mlme_action action;
 	int ret;
 
 	lockdep_assert_held(&sta->ampdu_mlme.mtx);
 
+	switch (reason) {
+	case AGG_STOP_DECLINED:
+	case AGG_STOP_LOCAL_REQUEST:
+	case AGG_STOP_PEER_REQUEST:
+		action = IEEE80211_AMPDU_TX_STOP_CONT;
+		break;
+	case AGG_STOP_DESTROY_STA:
+		action = IEEE80211_AMPDU_TX_STOP_FLUSH;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
 	spin_lock_bh(&sta->lock);
 
 	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
@@ -167,10 +284,19 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 		return -ENOENT;
 	}
 
-	/* if we're already stopping ignore any new requests to stop */
+	/*
+	 * if we're already stopping ignore any new requests to stop
+	 * unless we're destroying it in which case notify the driver
+	 */
 	if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
 		spin_unlock_bh(&sta->lock);
-		return -EALREADY;
+		if (reason != AGG_STOP_DESTROY_STA)
+			return -EALREADY;
+		ret = drv_ampdu_action(local, sta->sdata,
+				       IEEE80211_AMPDU_TX_STOP_FLUSH_CONT,
+				       &sta->sta, tid, NULL, 0);
+		WARN_ON_ONCE(ret);
+		return 0;
 	}
 
 	if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
@@ -212,11 +338,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 	 */
 	synchronize_net();
 
-	tid_tx->stop_initiator = initiator;
-	tid_tx->tx_stop = tx;
+	tid_tx->stop_initiator = reason == AGG_STOP_PEER_REQUEST ?
+					WLAN_BACK_RECIPIENT :
+					WLAN_BACK_INITIATOR;
+	tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST;
 
-	ret = drv_ampdu_action(local, sta->sdata,
-			       IEEE80211_AMPDU_TX_STOP,
+	ret = drv_ampdu_action(local, sta->sdata, action,
 			       &sta->sta, tid, NULL, 0);
 
 	/* HW shall not deny going back to legacy */
@@ -227,7 +354,17 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 		 */
 	}
 
-	return ret;
+	/*
+	 * In the case of AGG_STOP_DESTROY_STA, the driver won't
+	 * necessarily call ieee80211_stop_tx_ba_cb(), so this may
+	 * seem like we can leave the tid_tx data pending forever.
+	 * This is true, in a way, but "forever" is only until the
+	 * station struct is actually destroyed. In the meantime,
+	 * leaving it around ensures that we don't transmit packets
+	 * to the driver on this TID which might confuse it.
+	 */
+
+	return 0;
 }
 
 /*
@@ -253,91 +390,18 @@ static void sta_addba_resp_timer_expired(unsigned long data)
 	    test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) {
 		rcu_read_unlock();
 		ht_dbg(sta->sdata,
-		       "timer expired on tid %d but we are not (or no longer) expecting addBA response there\n",
-		       tid);
+		       "timer expired on %pM tid %d but we are not (or no longer) expecting addBA response there\n",
+		       sta->sta.addr, tid);
 		return;
 	}
 
-	ht_dbg(sta->sdata, "addBA response timer expired on tid %d\n", tid);
+	ht_dbg(sta->sdata, "addBA response timer expired on %pM tid %d\n",
+	       sta->sta.addr, tid);
 
 	ieee80211_stop_tx_ba_session(&sta->sta, tid);
 	rcu_read_unlock();
 }
 
-static inline int ieee80211_ac_from_tid(int tid)
-{
-	return ieee802_1d_to_ac[tid & 7];
-}
-
-/*
- * When multiple aggregation sessions on multiple stations
- * are being created/destroyed simultaneously, we need to
- * refcount the global queue stop caused by that in order
- * to not get into a situation where one of the aggregation
- * setup or teardown re-enables queues before the other is
- * ready to handle that.
- *
- * These two functions take care of this issue by keeping
- * a global "agg_queue_stop" refcount.
- */
-static void __acquires(agg_queue)
-ieee80211_stop_queue_agg(struct ieee80211_sub_if_data *sdata, int tid)
-{
-	int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)];
-
-	if (atomic_inc_return(&sdata->local->agg_queue_stop[queue]) == 1)
-		ieee80211_stop_queue_by_reason(
-			&sdata->local->hw, queue,
-			IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
-	__acquire(agg_queue);
-}
-
-static void __releases(agg_queue)
-ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid)
-{
-	int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)];
-
-	if (atomic_dec_return(&sdata->local->agg_queue_stop[queue]) == 0)
-		ieee80211_wake_queue_by_reason(
-			&sdata->local->hw, queue,
-			IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
-	__release(agg_queue);
-}
-
-/*
- * splice packets from the STA's pending to the local pending,
- * requires a call to ieee80211_agg_splice_finish later
- */
-static void __acquires(agg_queue)
-ieee80211_agg_splice_packets(struct ieee80211_sub_if_data *sdata,
-			     struct tid_ampdu_tx *tid_tx, u16 tid)
-{
-	struct ieee80211_local *local = sdata->local;
-	int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)];
-	unsigned long flags;
-
-	ieee80211_stop_queue_agg(sdata, tid);
-
-	if (WARN(!tid_tx,
-		 "TID %d gone but expected when splicing aggregates from the pending queue\n",
-		 tid))
-		return;
-
-	if (!skb_queue_empty(&tid_tx->pending)) {
-		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
-		/* copy over remaining packets */
-		skb_queue_splice_tail_init(&tid_tx->pending,
-					   &local->pending[queue]);
-		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
-	}
-}
-
-static void __releases(agg_queue)
-ieee80211_agg_splice_finish(struct ieee80211_sub_if_data *sdata, u16 tid)
-{
-	ieee80211_wake_queue_agg(sdata, tid);
-}
-
 void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 {
 	struct tid_ampdu_tx *tid_tx;
@@ -369,7 +433,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 			       &sta->sta, tid, &start_seq_num, 0);
 	if (ret) {
 		ht_dbg(sdata,
-		       "BA request denied - HW unavailable for tid %d\n", tid);
+		       "BA request denied - HW unavailable for %pM tid %d\n",
+		       sta->sta.addr, tid);
 		spin_lock_bh(&sta->lock);
 		ieee80211_agg_splice_packets(sdata, tid_tx, tid);
 		ieee80211_assign_tid_tx(sta, tid, NULL);
@@ -382,7 +447,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 
 	/* activate the timer for the recipient's addBA response */
 	mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL);
-	ht_dbg(sdata, "activated addBA response timer on tid %d\n", tid);
+	ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n",
+	       sta->sta.addr, tid);
 
 	spin_lock_bh(&sta->lock);
 	sta->ampdu_mlme.last_addba_req_time[tid] = jiffies;
@@ -429,7 +495,8 @@ static void sta_tx_agg_session_timer_expired(unsigned long data)
 
 	rcu_read_unlock();
 
-	ht_dbg(sta->sdata, "tx session timer expired on tid %d\n", (u16)*ptid);
+	ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n",
+	       sta->sta.addr, (u16)*ptid);
 
 	ieee80211_stop_tx_ba_session(&sta->sta, *ptid);
 }
@@ -465,7 +532,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
 
 	if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) {
 		ht_dbg(sdata,
-		       "BA sessions blocked - Denying BA session request\n");
+		       "BA sessions blocked - Denying BA session request %pM tid %d\n",
+		       sta->sta.addr, tid);
 		return -EINVAL;
 	}
 
@@ -506,8 +574,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
 	    time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] +
 			HT_AGG_RETRIES_PERIOD)) {
 		ht_dbg(sdata,
-		       "BA request denied - waiting a grace period after %d failed requests on tid %u\n",
-		       sta->ampdu_mlme.addba_req_num[tid], tid);
+		       "BA request denied - waiting a grace period after %d failed requests on %pM tid %u\n",
+		       sta->ampdu_mlme.addba_req_num[tid], sta->sta.addr, tid);
 		ret = -EBUSY;
 		goto err_unlock_sta;
 	}
@@ -516,8 +584,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
 	/* check if the TID is not in aggregation flow already */
 	if (tid_tx || sta->ampdu_mlme.tid_start_tx[tid]) {
 		ht_dbg(sdata,
-		       "BA request denied - session is not idle on tid %u\n",
-		       tid);
+		       "BA request denied - session is not idle on %pM tid %u\n",
+		       sta->sta.addr, tid);
 		ret = -EAGAIN;
 		goto err_unlock_sta;
 	}
@@ -572,7 +640,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 
 	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
 
-	ht_dbg(sta->sdata, "Aggregation is on for tid %d\n", tid);
+	ht_dbg(sta->sdata, "Aggregation is on for %pM tid %d\n",
+	       sta->sta.addr, tid);
 
 	drv_ampdu_action(local, sta->sdata,
 			 IEEE80211_AMPDU_TX_OPERATIONAL,
@@ -660,14 +729,13 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
 EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
 
 int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
-				   enum ieee80211_back_parties initiator,
-				   bool tx)
+				   enum ieee80211_agg_stop_reason reason)
 {
 	int ret;
 
 	mutex_lock(&sta->ampdu_mlme.mtx);
 
-	ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator, tx);
+	ret = ___ieee80211_stop_tx_ba_session(sta, tid, reason);
 
 	mutex_unlock(&sta->ampdu_mlme.mtx);
 
@@ -743,7 +811,9 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
 
 	if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
-		ht_dbg(sdata, "unexpected callback to A-MPDU stop\n");
+		ht_dbg(sdata,
+		       "unexpected callback to A-MPDU stop for %pM tid %d\n",
+		       sta->sta.addr, tid);
 		goto unlock_sta;
 	}
 
@@ -751,24 +821,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 		ieee80211_send_delba(sta->sdata, ra, tid,
 			WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
 
-	/*
-	 * When we get here, the TX path will not be lockless any more wrt.
-	 * aggregation, since the OPERATIONAL bit has long been cleared.
-	 * Thus it will block on getting the lock, if it occurs. So if we
-	 * stop the queue now, we will not get any more packets, and any
-	 * that might be being processed will wait for us here, thereby
-	 * guaranteeing that no packets go to the tid_tx pending queue any
-	 * more.
-	 */
-
-	ieee80211_agg_splice_packets(sta->sdata, tid_tx, tid);
-
-	/* future packets must not find the tid_tx struct any more */
-	ieee80211_assign_tid_tx(sta, tid, NULL);
-
-	ieee80211_agg_splice_finish(sta->sdata, tid);
-
-	kfree_rcu(tid_tx, rcu_head);
+	ieee80211_remove_tid_tx(sta, tid);
 
  unlock_sta:
 	spin_unlock_bh(&sta->lock);
@@ -819,13 +872,15 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 		goto out;
 
 	if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) {
-		ht_dbg(sta->sdata, "wrong addBA response token, tid %d\n", tid);
+		ht_dbg(sta->sdata, "wrong addBA response token, %pM tid %d\n",
+		       sta->sta.addr, tid);
 		goto out;
 	}
 
 	del_timer_sync(&tid_tx->addba_resp_timer);
 
-	ht_dbg(sta->sdata, "switched off addBA timer for tid %d\n", tid);
+	ht_dbg(sta->sdata, "switched off addBA timer for %pM tid %d\n",
+	       sta->sta.addr, tid);
 
 	/*
 	 * addba_resp_timer may have fired before we got here, and
@@ -835,8 +890,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 	if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) ||
 	    test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
 		ht_dbg(sta->sdata,
-		       "got addBA resp for tid %d but we already gave up\n",
-		       tid);
+		       "got addBA resp for %pM tid %d but we already gave up\n",
+		       sta->sta.addr, tid);
 		goto out;
 	}
 
@@ -868,8 +923,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 		}
 
 	} else {
-		___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR,
-						false);
+		___ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_DECLINED);
 	}
 
  out:
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 0479c64aa83c..1a89c80e6407 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -175,7 +175,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 		 *       add it to the device after the station.
 		 */
 		if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) {
-			ieee80211_key_free(sdata->local, key);
+			ieee80211_key_free_unused(key);
 			err = -ENOENT;
 			goto out_unlock;
 		}
@@ -214,8 +214,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	}
 
 	err = ieee80211_key_link(key, sdata, sta);
-	if (err)
-		ieee80211_key_free(sdata->local, key);
 
  out_unlock:
 	mutex_unlock(&sdata->local->sta_mtx);
@@ -254,7 +252,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 		goto out_unlock;
 	}
 
-	__ieee80211_key_free(key);
+	ieee80211_key_free(key, true);
 
 	ret = 0;
  out_unlock:
@@ -445,12 +443,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
 	struct timespec uptime;
+	u64 packets = 0;
+	int ac;
 
 	sinfo->generation = sdata->local->sta_generation;
 
 	sinfo->filled = STATION_INFO_INACTIVE_TIME |
-			STATION_INFO_RX_BYTES |
-			STATION_INFO_TX_BYTES |
+			STATION_INFO_RX_BYTES64 |
+			STATION_INFO_TX_BYTES64 |
 			STATION_INFO_RX_PACKETS |
 			STATION_INFO_TX_PACKETS |
 			STATION_INFO_TX_RETRIES |
@@ -467,10 +467,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	sinfo->connected_time = uptime.tv_sec - sta->last_connected;
 
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
+	sinfo->tx_bytes = 0;
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		sinfo->tx_bytes += sta->tx_bytes[ac];
+		packets += sta->tx_packets[ac];
+	}
+	sinfo->tx_packets = packets;
 	sinfo->rx_bytes = sta->rx_bytes;
-	sinfo->tx_bytes = sta->tx_bytes;
 	sinfo->rx_packets = sta->rx_packets;
-	sinfo->tx_packets = sta->tx_packets;
 	sinfo->tx_retries = sta->tx_retry_count;
 	sinfo->tx_failed = sta->tx_retry_failed;
 	sinfo->rx_dropped_misc = sta->rx_dropped;
@@ -492,7 +496,10 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 #ifdef CONFIG_MAC80211_MESH
 		sinfo->filled |= STATION_INFO_LLID |
 				 STATION_INFO_PLID |
-				 STATION_INFO_PLINK_STATE;
+				 STATION_INFO_PLINK_STATE |
+				 STATION_INFO_LOCAL_PM |
+				 STATION_INFO_PEER_PM |
+				 STATION_INFO_NONPEER_PM;
 
 		sinfo->llid = le16_to_cpu(sta->llid);
 		sinfo->plid = le16_to_cpu(sta->plid);
@@ -501,6 +508,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			sinfo->filled |= STATION_INFO_T_OFFSET;
 			sinfo->t_offset = sta->t_offset;
 		}
+		sinfo->local_pm = sta->local_pm;
+		sinfo->peer_pm = sta->peer_pm;
+		sinfo->nonpeer_pm = sta->nonpeer_pm;
 #endif
 	}
 
@@ -520,6 +530,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 				BIT(NL80211_STA_FLAG_WME) |
 				BIT(NL80211_STA_FLAG_MFP) |
 				BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+				BIT(NL80211_STA_FLAG_ASSOCIATED) |
 				BIT(NL80211_STA_FLAG_TDLS_PEER);
 	if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
 		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED);
@@ -531,6 +542,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP);
 	if (test_sta_flag(sta, WLAN_STA_AUTH))
 		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED);
+	if (test_sta_flag(sta, WLAN_STA_ASSOC))
+		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
 		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
 }
@@ -589,8 +602,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
 		data[i++] += sta->rx_fragments;		\
 		data[i++] += sta->rx_dropped;		\
 							\
-		data[i++] += sta->tx_packets;		\
-		data[i++] += sta->tx_bytes;		\
+		data[i++] += sinfo.tx_packets;		\
+		data[i++] += sinfo.tx_bytes;		\
 		data[i++] += sta->tx_fragments;		\
 		data[i++] += sta->tx_filtered_count;	\
 		data[i++] += sta->tx_retry_failed;	\
@@ -612,13 +625,14 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
 		if (!(sta && !WARN_ON(sta->sdata->dev != dev)))
 			goto do_survey;
 
+		sinfo.filled = 0;
+		sta_set_sinfo(sta, &sinfo);
+
 		i = 0;
 		ADD_STA_STATS(sta);
 
 		data[i++] = sta->sta_state;
 
-		sinfo.filled = 0;
-		sta_set_sinfo(sta, &sinfo);
 
 		if (sinfo.filled & STATION_INFO_TX_BITRATE)
 			data[i] = 100000 *
@@ -791,8 +805,7 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
 					IEEE80211_CHANCTX_EXCLUSIVE);
 		}
 	} else if (local->open_count == local->monitors) {
-		local->_oper_channel = chandef->chan;
-		local->_oper_channel_type = cfg80211_get_chandef_type(chandef);
+		local->_oper_chandef = *chandef;
 		ieee80211_hw_config(local, 0);
 	}
 
@@ -919,11 +932,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 	/* TODO: make hostapd tell us what it wants */
 	sdata->smps_mode = IEEE80211_SMPS_OFF;
 	sdata->needed_rx_chains = sdata->local->rx_chains;
+	sdata->radar_required = params->radar_required;
 
 	err = ieee80211_vif_use_channel(sdata, &params->chandef,
 					IEEE80211_CHANCTX_SHARED);
 	if (err)
 		return err;
+	ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
 
 	/*
 	 * Apply control port protocol, this allows us to
@@ -940,6 +955,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 
 	sdata->vif.bss_conf.beacon_int = params->beacon_interval;
 	sdata->vif.bss_conf.dtim_period = params->dtim_period;
+	sdata->vif.bss_conf.enable_beacon = true;
 
 	sdata->vif.bss_conf.ssid_len = params->ssid_len;
 	if (params->ssid_len)
@@ -948,8 +964,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 	sdata->vif.bss_conf.hidden_ssid =
 		(params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE);
 
-	sdata->vif.bss_conf.p2p_ctwindow = params->p2p_ctwindow;
-	sdata->vif.bss_conf.p2p_oppps = params->p2p_opp_ps;
+	memset(&sdata->vif.bss_conf.p2p_noa_attr, 0,
+	       sizeof(sdata->vif.bss_conf.p2p_noa_attr));
+	sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow =
+		params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK;
+	if (params->p2p_opp_ps)
+		sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
+					IEEE80211_P2P_OPPPS_ENABLE_BIT;
 
 	err = ieee80211_assign_beacon(sdata, &params->beacon);
 	if (err < 0)
@@ -1020,8 +1041,20 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
 		kfree_rcu(old_probe_resp, rcu_head);
 
 	list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
-		sta_info_flush(local, vlan);
-	sta_info_flush(local, sdata);
+		sta_info_flush_defer(vlan);
+	sta_info_flush_defer(sdata);
+	synchronize_net();
+	rcu_barrier();
+	list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
+		sta_info_flush_cleanup(vlan);
+		ieee80211_free_keys(vlan);
+	}
+	sta_info_flush_cleanup(sdata);
+	ieee80211_free_keys(sdata);
+
+	sdata->vif.bss_conf.enable_beacon = false;
+	sdata->vif.bss_conf.ssid_len = 0;
+	clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
 	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
 
 	drv_stop_ap(sdata->local, sdata);
@@ -1030,6 +1063,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
 	local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf);
 	skb_queue_purge(&sdata->u.ap.ps.bc_buf);
 
+	ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
 	ieee80211_vif_release_channel(sdata);
 
 	return 0;
@@ -1079,6 +1113,58 @@ static void ieee80211_send_layer2_update(struct sta_info *sta)
 	netif_rx_ni(skb);
 }
 
+static int sta_apply_auth_flags(struct ieee80211_local *local,
+				struct sta_info *sta,
+				u32 mask, u32 set)
+{
+	int ret;
+
+	if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) &&
+	    set & BIT(NL80211_STA_FLAG_AUTHENTICATED) &&
+	    !test_sta_flag(sta, WLAN_STA_AUTH)) {
+		ret = sta_info_move_state(sta, IEEE80211_STA_AUTH);
+		if (ret)
+			return ret;
+	}
+
+	if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) &&
+	    set & BIT(NL80211_STA_FLAG_ASSOCIATED) &&
+	    !test_sta_flag(sta, WLAN_STA_ASSOC)) {
+		ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
+		if (ret)
+			return ret;
+	}
+
+	if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) {
+		if (set & BIT(NL80211_STA_FLAG_AUTHORIZED))
+			ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED);
+		else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+			ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
+		else
+			ret = 0;
+		if (ret)
+			return ret;
+	}
+
+	if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) &&
+	    !(set & BIT(NL80211_STA_FLAG_ASSOCIATED)) &&
+	    test_sta_flag(sta, WLAN_STA_ASSOC)) {
+		ret = sta_info_move_state(sta, IEEE80211_STA_AUTH);
+		if (ret)
+			return ret;
+	}
+
+	if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) &&
+	    !(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) &&
+	    test_sta_flag(sta, WLAN_STA_AUTH)) {
+		ret = sta_info_move_state(sta, IEEE80211_STA_NONE);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int sta_apply_parameters(struct ieee80211_local *local,
 				struct sta_info *sta,
 				struct station_parameters *params)
@@ -1096,52 +1182,32 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	mask = params->sta_flags_mask;
 	set = params->sta_flags_set;
 
-	/*
-	 * In mesh mode, we can clear AUTHENTICATED flag but must
-	 * also make ASSOCIATED follow appropriately for the driver
-	 * API. See also below, after AUTHORIZED changes.
-	 */
-	if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) {
-		/* cfg80211 should not allow this in non-mesh modes */
-		if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif)))
-			return -EINVAL;
-
-		if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED) &&
-		    !test_sta_flag(sta, WLAN_STA_AUTH)) {
-			ret = sta_info_move_state(sta, IEEE80211_STA_AUTH);
-			if (ret)
-				return ret;
-			ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
-			if (ret)
-				return ret;
-		}
-	}
-
-	if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) {
-		if (set & BIT(NL80211_STA_FLAG_AUTHORIZED))
-			ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED);
-		else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
-			ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
-		if (ret)
-			return ret;
-	}
-
-	if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) {
-		/* cfg80211 should not allow this in non-mesh modes */
-		if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif)))
-			return -EINVAL;
-
-		if (!(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) &&
-		    test_sta_flag(sta, WLAN_STA_AUTH)) {
-			ret = sta_info_move_state(sta, IEEE80211_STA_AUTH);
-			if (ret)
-				return ret;
-			ret = sta_info_move_state(sta, IEEE80211_STA_NONE);
-			if (ret)
-				return ret;
+	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		/*
+		 * In mesh mode, ASSOCIATED isn't part of the nl80211
+		 * API but must follow AUTHENTICATED for driver state.
+		 */
+		if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED))
+			mask |= BIT(NL80211_STA_FLAG_ASSOCIATED);
+		if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED))
+			set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
+	} else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+		/*
+		 * TDLS -- everything follows authorized, but
+		 * only becoming authorized is possible, not
+		 * going back
+		 */
+		if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) {
+			set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+			       BIT(NL80211_STA_FLAG_ASSOCIATED);
+			mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+				BIT(NL80211_STA_FLAG_ASSOCIATED);
 		}
 	}
 
+	ret = sta_apply_auth_flags(local, sta, mask, set);
+	if (ret)
+		return ret;
 
 	if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) {
 		if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE))
@@ -1187,10 +1253,11 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 		sta->sta.aid = params->aid;
 
 	/*
-	 * FIXME: updating the following information is racy when this
-	 *	  function is called from ieee80211_change_station().
-	 *	  However, all this information should be static so
-	 *	  maybe we should just reject attemps to change it.
+	 * Some of the following updates would be racy if called on an
+	 * existing station, via ieee80211_change_station(). However,
+	 * all such changes are rejected by cfg80211 except for updates
+	 * changing the supported rates on an existing but not yet used
+	 * TDLS peer.
 	 */
 
 	if (params->listen_interval >= 0)
@@ -1211,36 +1278,66 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 
 	if (params->ht_capa)
 		ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
-						  params->ht_capa,
-						  &sta->sta.ht_cap);
+						  params->ht_capa, sta);
 
 	if (params->vht_capa)
 		ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
-						    params->vht_capa,
-						    &sta->sta.vht_cap);
+						    params->vht_capa, sta);
 
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
-		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED)
+		u32 changed = 0;
+
+		if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) {
 			switch (params->plink_state) {
-			case NL80211_PLINK_LISTEN:
 			case NL80211_PLINK_ESTAB:
+				if (sta->plink_state != NL80211_PLINK_ESTAB)
+					changed = mesh_plink_inc_estab_count(
+							sdata);
+				sta->plink_state = params->plink_state;
+
+				ieee80211_mps_sta_status_update(sta);
+				changed |= ieee80211_mps_set_sta_local_pm(sta,
+					      sdata->u.mesh.mshcfg.power_mode);
+				break;
+			case NL80211_PLINK_LISTEN:
 			case NL80211_PLINK_BLOCKED:
+			case NL80211_PLINK_OPN_SNT:
+			case NL80211_PLINK_OPN_RCVD:
+			case NL80211_PLINK_CNF_RCVD:
+			case NL80211_PLINK_HOLDING:
+				if (sta->plink_state == NL80211_PLINK_ESTAB)
+					changed = mesh_plink_dec_estab_count(
+							sdata);
 				sta->plink_state = params->plink_state;
+
+				ieee80211_mps_sta_status_update(sta);
+				changed |=
+				      ieee80211_mps_local_status_update(sdata);
 				break;
 			default:
 				/*  nothing  */
 				break;
 			}
-		else
-			switch (params->plink_action) {
-			case PLINK_ACTION_OPEN:
-				mesh_plink_open(sta);
-				break;
-			case PLINK_ACTION_BLOCK:
-				mesh_plink_block(sta);
-				break;
-			}
+		}
+
+		switch (params->plink_action) {
+		case NL80211_PLINK_ACTION_NO_ACTION:
+			/* nothing */
+			break;
+		case NL80211_PLINK_ACTION_OPEN:
+			changed |= mesh_plink_open(sta);
+			break;
+		case NL80211_PLINK_ACTION_BLOCK:
+			changed |= mesh_plink_block(sta);
+			break;
+		}
+
+		if (params->local_pm)
+			changed |=
+			      ieee80211_mps_set_sta_local_pm(sta,
+							     params->local_pm);
+		ieee80211_bss_info_change_notify(sdata, changed);
 #endif
 	}
 
@@ -1275,8 +1372,14 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 	if (!sta)
 		return -ENOMEM;
 
-	sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
-	sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
+	/*
+	 * defaults -- if userspace wants something else we'll
+	 * change it accordingly in sta_apply_parameters()
+	 */
+	if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) {
+		sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
+		sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
+	}
 
 	err = sta_apply_parameters(local, sta, params);
 	if (err) {
@@ -1285,8 +1388,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 	}
 
 	/*
-	 * for TDLS, rate control should be initialized only when supported
-	 * rates are known.
+	 * for TDLS, rate control should be initialized only when
+	 * rates are known and station is marked authorized
 	 */
 	if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER))
 		rate_control_rate_init(sta);
@@ -1311,7 +1414,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
 				 u8 *mac)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1319,53 +1421,72 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
 	if (mac)
 		return sta_info_destroy_addr_bss(sdata, mac);
 
-	sta_info_flush(local, sdata);
+	sta_info_flush(sdata);
 	return 0;
 }
 
 static int ieee80211_change_station(struct wiphy *wiphy,
-				    struct net_device *dev,
-				    u8 *mac,
+				    struct net_device *dev, u8 *mac,
 				    struct station_parameters *params)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *vlansdata;
+	enum cfg80211_station_type statype;
 	int err;
 
 	mutex_lock(&local->sta_mtx);
 
 	sta = sta_info_get_bss(sdata, mac);
 	if (!sta) {
-		mutex_unlock(&local->sta_mtx);
-		return -ENOENT;
+		err = -ENOENT;
+		goto out_err;
 	}
 
-	/* in station mode, supported rates are only valid with TDLS */
-	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
-	    params->supported_rates &&
-	    !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
-		mutex_unlock(&local->sta_mtx);
-		return -EINVAL;
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_MESH_POINT:
+		if (sdata->u.mesh.user_mpm)
+			statype = CFG80211_STA_MESH_PEER_USER;
+		else
+			statype = CFG80211_STA_MESH_PEER_KERNEL;
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		statype = CFG80211_STA_IBSS;
+		break;
+	case NL80211_IFTYPE_STATION:
+		if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+			statype = CFG80211_STA_AP_STA;
+			break;
+		}
+		if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+			statype = CFG80211_STA_TDLS_PEER_ACTIVE;
+		else
+			statype = CFG80211_STA_TDLS_PEER_SETUP;
+		break;
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_AP_VLAN:
+		statype = CFG80211_STA_AP_CLIENT;
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		goto out_err;
 	}
 
+	err = cfg80211_check_station_change(wiphy, params, statype);
+	if (err)
+		goto out_err;
+
 	if (params->vlan && params->vlan != sta->sdata->dev) {
 		bool prev_4addr = false;
 		bool new_4addr = false;
 
 		vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
 
-		if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
-		    vlansdata->vif.type != NL80211_IFTYPE_AP) {
-			mutex_unlock(&local->sta_mtx);
-			return -EINVAL;
-		}
-
 		if (params->vlan->ieee80211_ptr->use_4addr) {
 			if (vlansdata->u.vlan.sta) {
-				mutex_unlock(&local->sta_mtx);
-				return -EBUSY;
+				err = -EBUSY;
+				goto out_err;
 			}
 
 			rcu_assign_pointer(vlansdata->u.vlan.sta, sta);
@@ -1392,12 +1513,12 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 	}
 
 	err = sta_apply_parameters(local, sta, params);
-	if (err) {
-		mutex_unlock(&local->sta_mtx);
-		return err;
-	}
+	if (err)
+		goto out_err;
 
-	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && params->supported_rates)
+	/* When peer becomes authorized, init rate control as well */
+	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+	    test_sta_flag(sta, WLAN_STA_AUTHORIZED))
 		rate_control_rate_init(sta);
 
 	mutex_unlock(&local->sta_mtx);
@@ -1407,7 +1528,11 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 		ieee80211_recalc_ps(local, -1);
 		ieee80211_recalc_ps_vif(sdata);
 	}
+
 	return 0;
+out_err:
+	mutex_unlock(&local->sta_mtx);
+	return err;
 }
 
 #ifdef CONFIG_MAC80211_MESH
@@ -1417,7 +1542,6 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
 	struct sta_info *sta;
-	int err;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
@@ -1428,17 +1552,12 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
 		return -ENOENT;
 	}
 
-	err = mesh_path_add(dst, sdata);
-	if (err) {
+	mpath = mesh_path_add(sdata, dst);
+	if (IS_ERR(mpath)) {
 		rcu_read_unlock();
-		return err;
+		return PTR_ERR(mpath);
 	}
 
-	mpath = mesh_path_lookup(dst, sdata);
-	if (!mpath) {
-		rcu_read_unlock();
-		return -ENXIO;
-	}
 	mesh_path_fix_nexthop(mpath, sta);
 
 	rcu_read_unlock();
@@ -1446,12 +1565,12 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev,
-				 u8 *dst)
+			       u8 *dst)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	if (dst)
-		return mesh_path_del(dst, sdata);
+		return mesh_path_del(sdata, dst);
 
 	mesh_path_flush_by_iface(sdata);
 	return 0;
@@ -1475,7 +1594,7 @@ static int ieee80211_change_mpath(struct wiphy *wiphy,
 		return -ENOENT;
 	}
 
-	mpath = mesh_path_lookup(dst, sdata);
+	mpath = mesh_path_lookup(sdata, dst);
 	if (!mpath) {
 		rcu_read_unlock();
 		return -ENOENT;
@@ -1539,7 +1658,7 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev,
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(dst, sdata);
+	mpath = mesh_path_lookup(sdata, dst);
 	if (!mpath) {
 		rcu_read_unlock();
 		return -ENOENT;
@@ -1560,7 +1679,7 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup_by_idx(idx, sdata);
+	mpath = mesh_path_lookup_by_idx(sdata, idx);
 	if (!mpath) {
 		rcu_read_unlock();
 		return -ENOENT;
@@ -1615,6 +1734,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
 	ifmsh->mesh_sp_id = setup->sync_method;
 	ifmsh->mesh_pp_id = setup->path_sel_proto;
 	ifmsh->mesh_pm_id = setup->path_metric;
+	ifmsh->user_mpm = setup->user_mpm;
 	ifmsh->security = IEEE80211_MESH_SEC_NONE;
 	if (setup->is_authenticated)
 		ifmsh->security |= IEEE80211_MESH_SEC_AUTHED;
@@ -1625,6 +1745,9 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
 	memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate,
 						sizeof(setup->mcast_rate));
 
+	sdata->vif.bss_conf.beacon_int = setup->beacon_interval;
+	sdata->vif.bss_conf.dtim_period = setup->dtim_period;
+
 	return 0;
 }
 
@@ -1655,8 +1778,11 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
 		conf->dot11MeshTTL = nconf->dot11MeshTTL;
 	if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask))
 		conf->element_ttl = nconf->element_ttl;
-	if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask))
+	if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) {
+		if (ifmsh->user_mpm)
+			return -EBUSY;
 		conf->auto_open_plinks = nconf->auto_open_plinks;
+	}
 	if (_chg_mesh_attr(NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, mask))
 		conf->dot11MeshNbrOffsetMaxNeighbor =
 			nconf->dot11MeshNbrOffsetMaxNeighbor;
@@ -1723,6 +1849,14 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
 	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, mask))
 		conf->dot11MeshHWMPconfirmationInterval =
 			nconf->dot11MeshHWMPconfirmationInterval;
+	if (_chg_mesh_attr(NL80211_MESHCONF_POWER_MODE, mask)) {
+		conf->power_mode = nconf->power_mode;
+		ieee80211_mps_local_status_update(sdata);
+	}
+	if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask))
+		conf->dot11MeshAwakeWindowDuration =
+			nconf->dot11MeshAwakeWindowDuration;
+	ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON);
 	return 0;
 }
 
@@ -1748,9 +1882,7 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
 	if (err)
 		return err;
 
-	ieee80211_start_mesh(sdata);
-
-	return 0;
+	return ieee80211_start_mesh(sdata);
 }
 
 static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev)
@@ -1829,12 +1961,20 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 	}
 
 	if (params->p2p_ctwindow >= 0) {
-		sdata->vif.bss_conf.p2p_ctwindow = params->p2p_ctwindow;
+		sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &=
+					~IEEE80211_P2P_OPPPS_CTWINDOW_MASK;
+		sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
+			params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK;
 		changed |= BSS_CHANGED_P2P_PS;
 	}
 
-	if (params->p2p_opp_ps >= 0) {
-		sdata->vif.bss_conf.p2p_oppps = params->p2p_opp_ps;
+	if (params->p2p_opp_ps > 0) {
+		sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
+					IEEE80211_P2P_OPPPS_ENABLE_BIT;
+		changed |= BSS_CHANGED_P2P_PS;
+	} else if (params->p2p_opp_ps == 0) {
+		sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &=
+					~IEEE80211_P2P_OPPPS_ENABLE_BIT;
 		changed |= BSS_CHANGED_P2P_PS;
 	}
 
@@ -2208,7 +2348,8 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 
-	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+	if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+	    sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
 		return -EOPNOTSUPP;
 
 	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS))
@@ -2277,9 +2418,22 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 	}
 
 	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+		struct ieee80211_supported_band *sband = wiphy->bands[i];
+		int j;
+
 		sdata->rc_rateidx_mask[i] = mask->control[i].legacy;
 		memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].mcs,
 		       sizeof(mask->control[i].mcs));
+
+		sdata->rc_has_mcs_mask[i] = false;
+		if (!sband)
+			continue;
+
+		for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++)
+			if (~sdata->rc_rateidx_mcs_mask[i][j]) {
+				sdata->rc_has_mcs_mask[i] = true;
+				break;
+			}
 	}
 
 	return 0;
@@ -2289,7 +2443,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
 				    struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_channel *channel,
 				    unsigned int duration, u64 *cookie,
-				    struct sk_buff *txskb)
+				    struct sk_buff *txskb,
+				    enum ieee80211_roc_type type)
 {
 	struct ieee80211_roc_work *roc, *tmp;
 	bool queued = false;
@@ -2308,13 +2463,15 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
 	roc->duration = duration;
 	roc->req_duration = duration;
 	roc->frame = txskb;
+	roc->type = type;
 	roc->mgmt_tx_cookie = (unsigned long)txskb;
 	roc->sdata = sdata;
 	INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work);
 	INIT_LIST_HEAD(&roc->dependents);
 
 	/* if there's one pending or we're scanning, queue this one */
-	if (!list_empty(&local->roc_list) || local->scanning)
+	if (!list_empty(&local->roc_list) ||
+	    local->scanning || local->radar_detect_enabled)
 		goto out_check_combine;
 
 	/* if not HW assist, just queue & schedule work */
@@ -2337,7 +2494,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
 	if (!duration)
 		duration = 10;
 
-	ret = drv_remain_on_channel(local, sdata, channel, duration);
+	ret = drv_remain_on_channel(local, sdata, channel, duration, type);
 	if (ret) {
 		kfree(roc);
 		return ret;
@@ -2356,10 +2513,13 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
 		 *
 		 * If it hasn't started yet, just increase the duration
 		 * and add the new one to the list of dependents.
+		 * If the type of the new ROC has higher priority, modify the
+		 * type of the previous one to match that of the new one.
 		 */
 		if (!tmp->started) {
 			list_add_tail(&roc->list, &tmp->dependents);
 			tmp->duration = max(tmp->duration, roc->duration);
+			tmp->type = max(tmp->type, roc->type);
 			queued = true;
 			break;
 		}
@@ -2371,16 +2531,18 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
 			/*
 			 * In the offloaded ROC case, if it hasn't begun, add
 			 * this new one to the dependent list to be handled
-			 * when the the master one begins. If it has begun,
+			 * when the master one begins. If it has begun,
 			 * check that there's still a minimum time left and
 			 * if so, start this one, transmitting the frame, but
-			 * add it to the list directly after this one with a
+			 * add it to the list directly after this one with
 			 * a reduced time so we'll ask the driver to execute
 			 * it right after finishing the previous one, in the
 			 * hope that it'll also be executed right afterwards,
 			 * effectively extending the old one.
 			 * If there's no minimum time left, just add it to the
 			 * normal list.
+			 * TODO: the ROC type is ignored here, assuming that it
+			 * is better to immediately use the current ROC.
 			 */
 			if (!tmp->hw_begun) {
 				list_add_tail(&roc->list, &tmp->dependents);
@@ -2474,7 +2636,8 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy,
 
 	mutex_lock(&local->mtx);
 	ret = ieee80211_start_roc_work(local, sdata, chan,
-				       duration, cookie, NULL);
+				       duration, cookie, NULL,
+				       IEEE80211_ROC_TYPE_NORMAL);
 	mutex_unlock(&local->mtx);
 
 	return ret;
@@ -2499,7 +2662,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
 			list_del(&dep->list);
 			mutex_unlock(&local->mtx);
 
-			ieee80211_roc_notify_destroy(dep);
+			ieee80211_roc_notify_destroy(dep, true);
 			return 0;
 		}
 
@@ -2539,7 +2702,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
 			ieee80211_start_next_roc(local);
 		mutex_unlock(&local->mtx);
 
-		ieee80211_roc_notify_destroy(found);
+		ieee80211_roc_notify_destroy(found, true);
 	} else {
 		/* work may be pending so use it all the time */
 		found->abort = true;
@@ -2549,6 +2712,8 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
 
 		/* work will clean up etc */
 		flush_delayed_work(&found->work);
+		WARN_ON(!found->to_be_freed);
+		kfree(found);
 	}
 
 	return 0;
@@ -2564,6 +2729,37 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
 	return ieee80211_cancel_roc(local, cookie, false);
 }
 
+static int ieee80211_start_radar_detection(struct wiphy *wiphy,
+					   struct net_device *dev,
+					   struct cfg80211_chan_def *chandef)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+	unsigned long timeout;
+	int err;
+
+	if (!list_empty(&local->roc_list) || local->scanning)
+		return -EBUSY;
+
+	/* whatever, but channel contexts should not complain about that one */
+	sdata->smps_mode = IEEE80211_SMPS_OFF;
+	sdata->needed_rx_chains = local->rx_chains;
+	sdata->radar_required = true;
+
+	mutex_lock(&local->iflist_mtx);
+	err = ieee80211_vif_use_channel(sdata, chandef,
+					IEEE80211_CHANCTX_SHARED);
+	mutex_unlock(&local->iflist_mtx);
+	if (err)
+		return err;
+
+	timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS);
+	ieee80211_queue_delayed_work(&sdata->local->hw,
+				     &sdata->dfs_cac_timer_work, timeout);
+
+	return 0;
+}
+
 static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 			     struct ieee80211_channel *chan, bool offchan,
 			     unsigned int wait, const u8 *buf, size_t len,
@@ -2668,14 +2864,16 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 		goto out_unlock;
 	}
 
-	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN;
+	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN |
+					IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
 	if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
 		IEEE80211_SKB_CB(skb)->hw_queue =
 			local->hw.offchannel_tx_hw_queue;
 
 	/* This will handle all kinds of coalescing and immediate TX */
 	ret = ieee80211_start_roc_work(local, sdata, chan,
-				       wait, cookie, skb);
+				       wait, cookie, skb,
+				       IEEE80211_ROC_TYPE_MGMT_TX);
 	if (ret)
 		kfree_skb(skb);
  out_unlock:
@@ -3170,6 +3368,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
 				     struct cfg80211_chan_def *chandef)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_chanctx_conf *chanctx_conf;
 	int ret = -ENODATA;
 
@@ -3178,6 +3377,14 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
 	if (chanctx_conf) {
 		*chandef = chanctx_conf->def;
 		ret = 0;
+	} else if (local->open_count > 0 &&
+		   local->open_count == local->monitors &&
+		   sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+		if (local->use_chanctx)
+			*chandef = local->monitor_chandef;
+		else
+			*chandef = local->_oper_chandef;
+		ret = 0;
 	}
 	rcu_read_unlock();
 
@@ -3268,4 +3475,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.get_et_stats = ieee80211_get_et_stats,
 	.get_et_strings = ieee80211_get_et_strings,
 	.get_channel = ieee80211_cfg_get_channel,
+	.start_radar_detection = ieee80211_start_radar_detection,
 };
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 80e55527504b..03e8d2e3270e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -9,7 +9,7 @@
 #include "ieee80211_i.h"
 #include "driver-ops.h"
 
-static void ieee80211_change_chandef(struct ieee80211_local *local,
+static void ieee80211_change_chanctx(struct ieee80211_local *local,
 				     struct ieee80211_chanctx *ctx,
 				     const struct cfg80211_chan_def *chandef)
 {
@@ -22,7 +22,7 @@ static void ieee80211_change_chandef(struct ieee80211_local *local,
 	drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH);
 
 	if (!local->use_chanctx) {
-		local->_oper_channel_type = cfg80211_get_chandef_type(chandef);
+		local->_oper_chandef = *chandef;
 		ieee80211_hw_config(local, 0);
 	}
 }
@@ -49,7 +49,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
 		if (!compat)
 			continue;
 
-		ieee80211_change_chandef(local, ctx, compat);
+		ieee80211_change_chanctx(local, ctx, compat);
 
 		return ctx;
 	}
@@ -57,12 +57,29 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
 	return NULL;
 }
 
+static bool ieee80211_is_radar_required(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		if (sdata->radar_required) {
+			rcu_read_unlock();
+			return true;
+		}
+	}
+	rcu_read_unlock();
+
+	return false;
+}
+
 static struct ieee80211_chanctx *
 ieee80211_new_chanctx(struct ieee80211_local *local,
 		      const struct cfg80211_chan_def *chandef,
 		      enum ieee80211_chanctx_mode mode)
 {
 	struct ieee80211_chanctx *ctx;
+	u32 changed;
 	int err;
 
 	lockdep_assert_held(&local->chanctx_mtx);
@@ -75,34 +92,61 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
 	ctx->conf.rx_chains_static = 1;
 	ctx->conf.rx_chains_dynamic = 1;
 	ctx->mode = mode;
+	ctx->conf.radar_enabled = ieee80211_is_radar_required(local);
+	if (!local->use_chanctx)
+		local->hw.conf.radar_enabled = ctx->conf.radar_enabled;
+
+	/* acquire mutex to prevent idle from changing */
+	mutex_lock(&local->mtx);
+	/* turn idle off *before* setting channel -- some drivers need that */
+	changed = ieee80211_idle_off(local);
+	if (changed)
+		ieee80211_hw_config(local, changed);
 
 	if (!local->use_chanctx) {
-		local->_oper_channel_type =
-			cfg80211_get_chandef_type(chandef);
-		local->_oper_channel = chandef->chan;
+		local->_oper_chandef = *chandef;
 		ieee80211_hw_config(local, 0);
 	} else {
 		err = drv_add_chanctx(local, ctx);
 		if (err) {
 			kfree(ctx);
-			return ERR_PTR(err);
+			ctx = ERR_PTR(err);
+
+			ieee80211_recalc_idle(local);
+			goto out;
 		}
 	}
 
+	/* and keep the mutex held until the new chanctx is on the list */
 	list_add_rcu(&ctx->list, &local->chanctx_list);
 
+ out:
+	mutex_unlock(&local->mtx);
+
 	return ctx;
 }
 
 static void ieee80211_free_chanctx(struct ieee80211_local *local,
 				   struct ieee80211_chanctx *ctx)
 {
+	bool check_single_channel = false;
 	lockdep_assert_held(&local->chanctx_mtx);
 
 	WARN_ON_ONCE(ctx->refcount != 0);
 
 	if (!local->use_chanctx) {
-		local->_oper_channel_type = NL80211_CHAN_NO_HT;
+		struct cfg80211_chan_def *chandef = &local->_oper_chandef;
+		chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
+		chandef->center_freq1 = chandef->chan->center_freq;
+		chandef->center_freq2 = 0;
+
+		/* NOTE: Disabling radar is only valid here for
+		 * single channel context. To be sure, check it ...
+		 */
+		if (local->hw.conf.radar_enabled)
+			check_single_channel = true;
+		local->hw.conf.radar_enabled = false;
+
 		ieee80211_hw_config(local, 0);
 	} else {
 		drv_remove_chanctx(local, ctx);
@@ -110,6 +154,13 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local,
 
 	list_del_rcu(&ctx->list);
 	kfree_rcu(ctx, rcu_head);
+
+	/* throw a warning if this wasn't the only channel context. */
+	WARN_ON(check_single_channel && !list_empty(&local->chanctx_list));
+
+	mutex_lock(&local->mtx);
+	ieee80211_recalc_idle(local);
+	mutex_unlock(&local->mtx);
 }
 
 static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
@@ -128,6 +179,11 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
 	ctx->refcount++;
 
 	ieee80211_recalc_txpower(sdata);
+	sdata->vif.bss_conf.idle = false;
+
+	if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
+	    sdata->vif.type != NL80211_IFTYPE_MONITOR)
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
 
 	return 0;
 }
@@ -162,7 +218,7 @@ static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
 	if (WARN_ON_ONCE(!compat))
 		return;
 
-	ieee80211_change_chandef(local, ctx, compat);
+	ieee80211_change_chanctx(local, ctx, compat);
 }
 
 static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
@@ -175,11 +231,18 @@ static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
 	ctx->refcount--;
 	rcu_assign_pointer(sdata->vif.chanctx_conf, NULL);
 
+	sdata->vif.bss_conf.idle = true;
+
+	if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
+	    sdata->vif.type != NL80211_IFTYPE_MONITOR)
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
+
 	drv_unassign_vif_chanctx(local, sdata, ctx);
 
 	if (ctx->refcount > 0) {
 		ieee80211_recalc_chanctx_chantype(sdata->local, ctx);
 		ieee80211_recalc_smps_chanctx(local, ctx);
+		ieee80211_recalc_radar_chanctx(local, ctx);
 	}
 }
 
@@ -198,20 +261,34 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)
 
 	ctx = container_of(conf, struct ieee80211_chanctx, conf);
 
-	if (sdata->vif.type == NL80211_IFTYPE_AP) {
-		struct ieee80211_sub_if_data *vlan;
-
-		/* for the VLAN list */
-		ASSERT_RTNL();
-		list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
-			rcu_assign_pointer(vlan->vif.chanctx_conf, NULL);
-	}
-
 	ieee80211_unassign_vif_chanctx(sdata, ctx);
 	if (ctx->refcount == 0)
 		ieee80211_free_chanctx(local, ctx);
 }
 
+void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
+				    struct ieee80211_chanctx *chanctx)
+{
+	bool radar_enabled;
+
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	radar_enabled = ieee80211_is_radar_required(local);
+
+	if (radar_enabled == chanctx->conf.radar_enabled)
+		return;
+
+	chanctx->conf.radar_enabled = radar_enabled;
+	local->radar_detect_enabled = chanctx->conf.radar_enabled;
+
+	if (!local->use_chanctx) {
+		local->hw.conf.radar_enabled = chanctx->conf.radar_enabled;
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+	}
+
+	drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR);
+}
+
 void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
 				   struct ieee80211_chanctx *chanctx)
 {
@@ -326,16 +403,57 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
 		goto out;
 	}
 
-	if (sdata->vif.type == NL80211_IFTYPE_AP) {
-		struct ieee80211_sub_if_data *vlan;
+	ieee80211_recalc_smps_chanctx(local, ctx);
+	ieee80211_recalc_radar_chanctx(local, ctx);
+ out:
+	mutex_unlock(&local->chanctx_mtx);
+	return ret;
+}
 
-		/* for the VLAN list */
-		ASSERT_RTNL();
-		list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
-			rcu_assign_pointer(vlan->vif.chanctx_conf, &ctx->conf);
+int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
+				   const struct cfg80211_chan_def *chandef,
+				   u32 *changed)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_chanctx_conf *conf;
+	struct ieee80211_chanctx *ctx;
+	int ret;
+
+	if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
+				     IEEE80211_CHAN_DISABLED))
+		return -EINVAL;
+
+	mutex_lock(&local->chanctx_mtx);
+	if (cfg80211_chandef_identical(chandef, &sdata->vif.bss_conf.chandef)) {
+		ret = 0;
+		goto out;
 	}
 
-	ieee80211_recalc_smps_chanctx(local, ctx);
+	if (chandef->width == NL80211_CHAN_WIDTH_20_NOHT ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+					 lockdep_is_held(&local->chanctx_mtx));
+	if (!conf) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ctx = container_of(conf, struct ieee80211_chanctx, conf);
+	if (!cfg80211_chandef_compatible(&conf->def, chandef)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	sdata->vif.bss_conf.chandef = *chandef;
+
+	ieee80211_recalc_chanctx_chantype(local, ctx);
+
+	*changed |= BSS_CHANGED_BANDWIDTH;
+	ret = 0;
  out:
 	mutex_unlock(&local->chanctx_mtx);
 	return ret;
@@ -369,6 +487,40 @@ void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata)
 	mutex_unlock(&local->chanctx_mtx);
 }
 
+void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
+					 bool clear)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_sub_if_data *vlan;
+	struct ieee80211_chanctx_conf *conf;
+
+	ASSERT_RTNL();
+
+	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP))
+		return;
+
+	mutex_lock(&local->chanctx_mtx);
+
+	/*
+	 * Check that conf exists, even when clearing this function
+	 * must be called with the AP's channel context still there
+	 * as it would otherwise cause VLANs to have an invalid
+	 * channel context pointer for a while, possibly pointing
+	 * to a channel context that has already been freed.
+	 */
+	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+				lockdep_is_held(&local->chanctx_mtx));
+	WARN_ON(!conf);
+
+	if (clear)
+		conf = NULL;
+
+	list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+		rcu_assign_pointer(vlan->vif.chanctx_conf, conf);
+
+	mutex_unlock(&local->chanctx_mtx);
+}
+
 void ieee80211_iter_chan_contexts_atomic(
 	struct ieee80211_hw *hw,
 	void (*iter)(struct ieee80211_hw *hw,
@@ -381,7 +533,8 @@ void ieee80211_iter_chan_contexts_atomic(
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ctx, &local->chanctx_list, list)
-		iter(hw, &ctx->conf, iter_data);
+		if (ctx->driver_present)
+			iter(hw, &ctx->conf, iter_data);
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(ieee80211_iter_chan_contexts_atomic);
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 8f383a576016..4ccc5ed6237d 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -44,6 +44,12 @@
 #define MAC80211_MESH_SYNC_DEBUG 0
 #endif
 
+#ifdef CONFIG_MAC80211_MESH_PS_DEBUG
+#define MAC80211_MESH_PS_DEBUG 1
+#else
+#define MAC80211_MESH_PS_DEBUG 0
+#endif
+
 #ifdef CONFIG_MAC80211_TDLS_DEBUG
 #define MAC80211_TDLS_DEBUG 1
 #else
@@ -151,6 +157,10 @@ do {									\
 	_sdata_dbg(MAC80211_MESH_SYNC_DEBUG,				\
 		   sdata, fmt, ##__VA_ARGS__)
 
+#define mps_dbg(sdata, fmt, ...)					\
+	_sdata_dbg(MAC80211_MESH_PS_DEBUG,				\
+		   sdata, fmt, ##__VA_ARGS__)
+
 #define tdls_dbg(sdata, fmt, ...)					\
 	_sdata_dbg(MAC80211_TDLS_DEBUG,					\
 		   sdata, fmt, ##__VA_ARGS__)
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 466f4b45dd94..b0e32d628114 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -121,8 +121,8 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf,
 		sf += snprintf(buf + sf, mxln - sf, "SIGNAL_UNSPEC\n");
 	if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
 		sf += snprintf(buf + sf, mxln - sf, "SIGNAL_DBM\n");
-	if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD)
-		sf += snprintf(buf + sf, mxln - sf, "NEED_DTIM_PERIOD\n");
+	if (local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC)
+		sf += snprintf(buf + sf, mxln - sf, "NEED_DTIM_BEFORE_ASSOC\n");
 	if (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT)
 		sf += snprintf(buf + sf, mxln - sf, "SPECTRUM_MGMT\n");
 	if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)
@@ -151,8 +151,6 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf,
 		sf += snprintf(buf + sf, mxln - sf, "AP_LINK_PS\n");
 	if (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)
 		sf += snprintf(buf + sf, mxln - sf, "TX_AMPDU_SETUP_IN_HW\n");
-	if (local->hw.flags & IEEE80211_HW_SCAN_WHILE_IDLE)
-		sf += snprintf(buf + sf, mxln - sf, "SCAN_WHILE_IDLE\n");
 
 	rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
 	kfree(buf);
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index c3a3082b72e5..1521cabad3d6 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -295,7 +295,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
 	char buf[50];
 	struct ieee80211_key *key;
 
-	if (!sdata->debugfs.dir)
+	if (!sdata->vif.debugfs_dir)
 		return;
 
 	lockdep_assert_held(&sdata->local->key_mtx);
@@ -311,7 +311,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
 		sprintf(buf, "../keys/%d", key->debugfs.cnt);
 		sdata->debugfs.default_unicast_key =
 			debugfs_create_symlink("default_unicast_key",
-					       sdata->debugfs.dir, buf);
+					       sdata->vif.debugfs_dir, buf);
 	}
 
 	if (sdata->debugfs.default_multicast_key) {
@@ -325,7 +325,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
 		sprintf(buf, "../keys/%d", key->debugfs.cnt);
 		sdata->debugfs.default_multicast_key =
 			debugfs_create_symlink("default_multicast_key",
-					       sdata->debugfs.dir, buf);
+					       sdata->vif.debugfs_dir, buf);
 	}
 }
 
@@ -334,7 +334,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
 	char buf[50];
 	struct ieee80211_key *key;
 
-	if (!sdata->debugfs.dir)
+	if (!sdata->vif.debugfs_dir)
 		return;
 
 	key = key_mtx_dereference(sdata->local,
@@ -343,7 +343,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
 		sprintf(buf, "../keys/%d", key->debugfs.cnt);
 		sdata->debugfs.default_mgmt_key =
 			debugfs_create_symlink("default_mgmt_key",
-					       sdata->debugfs.dir, buf);
+					       sdata->vif.debugfs_dir, buf);
 	} else
 		ieee80211_debugfs_key_remove_mgmt_default(sdata);
 }
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index cbde5cc49a40..14abcf44f974 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -124,6 +124,15 @@ static ssize_t ieee80211_if_fmt_##name(					\
 	return scnprintf(buf, buflen, "%d\n", sdata->field / 16);	\
 }
 
+#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, field)			\
+static ssize_t ieee80211_if_fmt_##name(					\
+	const struct ieee80211_sub_if_data *sdata,			\
+	char *buf, int buflen)						\
+{									\
+	return scnprintf(buf, buflen, "%d\n",				\
+			 jiffies_to_msecs(sdata->field));		\
+}
+
 #define __IEEE80211_IF_FILE(name, _write)				\
 static ssize_t ieee80211_if_read_##name(struct file *file,		\
 					char __user *userbuf,		\
@@ -197,6 +206,7 @@ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
 IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
 IEEE80211_IF_FILE(last_beacon, u.mgd.last_beacon_signal, DEC);
 IEEE80211_IF_FILE(ave_beacon, u.mgd.ave_beacon_signal, DEC_DIV_16);
+IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS);
 
 static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
 			      enum ieee80211_smps_mode smps_mode)
@@ -515,10 +525,13 @@ IEEE80211_IF_FILE(dot11MeshHWMProotInterval,
 		  u.mesh.mshcfg.dot11MeshHWMProotInterval, DEC);
 IEEE80211_IF_FILE(dot11MeshHWMPconfirmationInterval,
 		  u.mesh.mshcfg.dot11MeshHWMPconfirmationInterval, DEC);
+IEEE80211_IF_FILE(power_mode, u.mesh.mshcfg.power_mode, DEC);
+IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration,
+		  u.mesh.mshcfg.dot11MeshAwakeWindowDuration, DEC);
 #endif
 
 #define DEBUGFS_ADD_MODE(name, mode) \
-	debugfs_create_file(#name, mode, sdata->debugfs.dir, \
+	debugfs_create_file(#name, mode, sdata->vif.debugfs_dir, \
 			    sdata, &name##_ops);
 
 #define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400)
@@ -539,6 +552,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 	DEBUGFS_ADD(aid);
 	DEBUGFS_ADD(last_beacon);
 	DEBUGFS_ADD(ave_beacon);
+	DEBUGFS_ADD(beacon_timeout);
 	DEBUGFS_ADD_MODE(smps, 0600);
 	DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
 	DEBUGFS_ADD_MODE(uapsd_queues, 0600);
@@ -574,7 +588,7 @@ static void add_mesh_files(struct ieee80211_sub_if_data *sdata)
 static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
 {
 	struct dentry *dir = debugfs_create_dir("mesh_stats",
-						sdata->debugfs.dir);
+						sdata->vif.debugfs_dir);
 #define MESHSTATS_ADD(name)\
 	debugfs_create_file(#name, 0400, dir, sdata, &name##_ops);
 
@@ -591,7 +605,7 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
 static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
 {
 	struct dentry *dir = debugfs_create_dir("mesh_config",
-						sdata->debugfs.dir);
+						sdata->vif.debugfs_dir);
 
 #define MESHPARAMS_ADD(name) \
 	debugfs_create_file(#name, 0600, dir, sdata, &name##_ops);
@@ -620,13 +634,15 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
 	MESHPARAMS_ADD(dot11MeshHWMPactivePathToRootTimeout);
 	MESHPARAMS_ADD(dot11MeshHWMProotInterval);
 	MESHPARAMS_ADD(dot11MeshHWMPconfirmationInterval);
+	MESHPARAMS_ADD(power_mode);
+	MESHPARAMS_ADD(dot11MeshAwakeWindowDuration);
 #undef MESHPARAMS_ADD
 }
 #endif
 
 static void add_files(struct ieee80211_sub_if_data *sdata)
 {
-	if (!sdata->debugfs.dir)
+	if (!sdata->vif.debugfs_dir)
 		return;
 
 	DEBUGFS_ADD(flags);
@@ -668,21 +684,21 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
 	char buf[10+IFNAMSIZ];
 
 	sprintf(buf, "netdev:%s", sdata->name);
-	sdata->debugfs.dir = debugfs_create_dir(buf,
+	sdata->vif.debugfs_dir = debugfs_create_dir(buf,
 		sdata->local->hw.wiphy->debugfsdir);
-	if (sdata->debugfs.dir)
+	if (sdata->vif.debugfs_dir)
 		sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
-			sdata->debugfs.dir);
+			sdata->vif.debugfs_dir);
 	add_files(sdata);
 }
 
 void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
 {
-	if (!sdata->debugfs.dir)
+	if (!sdata->vif.debugfs_dir)
 		return;
 
-	debugfs_remove_recursive(sdata->debugfs.dir);
-	sdata->debugfs.dir = NULL;
+	debugfs_remove_recursive(sdata->vif.debugfs_dir);
+	sdata->vif.debugfs_dir = NULL;
 }
 
 void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
@@ -690,7 +706,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
 	struct dentry *dir;
 	char buf[10 + IFNAMSIZ];
 
-	dir = sdata->debugfs.dir;
+	dir = sdata->vif.debugfs_dir;
 
 	if (!dir)
 		return;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 6fb1168b9f16..44e201d60a13 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -54,6 +54,7 @@ STA_FILE(aid, sta.aid, D);
 STA_FILE(dev, sdata->name, S);
 STA_FILE(last_signal, last_signal, D);
 STA_FILE(last_ack_signal, last_ack_signal, D);
+STA_FILE(beacon_loss_count, beacon_loss_count, D);
 
 static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
 			      size_t count, loff_t *ppos)
@@ -65,7 +66,7 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
 	test_sta_flag(sta, WLAN_STA_##flg) ? #flg "\n" : ""
 
 	int res = scnprintf(buf, sizeof(buf),
-			    "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+			    "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
 			    TEST(AUTH), TEST(ASSOC), TEST(PS_STA),
 			    TEST(PS_DRIVER), TEST(AUTHORIZED),
 			    TEST(SHORT_PREAMBLE),
@@ -74,7 +75,8 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
 			    TEST(UAPSD), TEST(SP), TEST(TDLS_PEER),
 			    TEST(TDLS_PEER_AUTH), TEST(4ADDR_EVENT),
 			    TEST(INSERTED), TEST(RATE_CONTROL),
-			    TEST(TOFFSET_KNOWN));
+			    TEST(TOFFSET_KNOWN), TEST(MPSP_OWNER),
+			    TEST(MPSP_RECIPIENT));
 #undef TEST
 	return simple_read_from_buffer(userbuf, count, ppos, buf, res);
 }
@@ -324,6 +326,36 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 }
 STA_OPS(ht_capa);
 
+static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
+				 size_t count, loff_t *ppos)
+{
+	char buf[128], *p = buf;
+	struct sta_info *sta = file->private_data;
+	struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap;
+
+	p += scnprintf(p, sizeof(buf) + buf - p, "VHT %ssupported\n",
+			vhtc->vht_supported ? "" : "not ");
+	if (vhtc->vht_supported) {
+		p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.8x\n", vhtc->cap);
+
+		p += scnprintf(p, sizeof(buf)+buf-p, "RX MCS: %.4x\n",
+			       le16_to_cpu(vhtc->vht_mcs.rx_mcs_map));
+		if (vhtc->vht_mcs.rx_highest)
+			p += scnprintf(p, sizeof(buf)+buf-p,
+				       "MCS RX highest: %d Mbps\n",
+				       le16_to_cpu(vhtc->vht_mcs.rx_highest));
+		p += scnprintf(p, sizeof(buf)+buf-p, "TX MCS: %.4x\n",
+			       le16_to_cpu(vhtc->vht_mcs.tx_mcs_map));
+		if (vhtc->vht_mcs.tx_highest)
+			p += scnprintf(p, sizeof(buf)+buf-p,
+				       "MCS TX highest: %d Mbps\n",
+				       le16_to_cpu(vhtc->vht_mcs.tx_highest));
+	}
+
+	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+}
+STA_OPS(vht_capa);
+
 static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf,
 					size_t count, loff_t *ppos)
 {
@@ -403,7 +435,9 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
 	DEBUGFS_ADD(agg_status);
 	DEBUGFS_ADD(dev);
 	DEBUGFS_ADD(last_signal);
+	DEBUGFS_ADD(beacon_loss_count);
 	DEBUGFS_ADD(ht_capa);
+	DEBUGFS_ADD(vht_capa);
 	DEBUGFS_ADD(last_ack_signal);
 	DEBUGFS_ADD(current_tx_rate);
 	DEBUGFS_ADD(last_rx_rate);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 698dc7e6f309..169664c122e2 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -207,6 +207,17 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 {
 	might_sleep();
 
+	if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON |
+				    BSS_CHANGED_BEACON_ENABLED) &&
+			 sdata->vif.type != NL80211_IFTYPE_AP &&
+			 sdata->vif.type != NL80211_IFTYPE_ADHOC &&
+			 sdata->vif.type != NL80211_IFTYPE_MESH_POINT))
+		return;
+
+	if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE ||
+			 sdata->vif.type == NL80211_IFTYPE_MONITOR))
+		return;
+
 	check_sdata_in_driver(sdata);
 
 	trace_drv_bss_info_changed(local, sdata, info, changed);
@@ -230,6 +241,22 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
 	return ret;
 }
 
+static inline void drv_set_multicast_list(struct ieee80211_local *local,
+					  struct ieee80211_sub_if_data *sdata,
+					  struct netdev_hw_addr_list *mc_list)
+{
+	bool allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI;
+
+	trace_drv_set_multicast_list(local, sdata, mc_list->count);
+
+	check_sdata_in_driver(sdata);
+
+	if (local->ops->set_multicast_list)
+		local->ops->set_multicast_list(&local->hw, &sdata->vif,
+					       allmulti, mc_list);
+	trace_drv_return_void(local);
+}
+
 static inline void drv_configure_filter(struct ieee80211_local *local,
 					unsigned int changed_flags,
 					unsigned int *total_flags,
@@ -561,7 +588,8 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local,
 	check_sdata_in_driver(sdata);
 
 	WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
-		sdata->vif.type != NL80211_IFTYPE_ADHOC);
+		(sdata->vif.type != NL80211_IFTYPE_ADHOC &&
+		 sdata->vif.type != NL80211_IFTYPE_MESH_POINT));
 
 	trace_drv_sta_rc_update(local, sdata, sta, changed);
 	if (local->ops->sta_rc_update)
@@ -692,13 +720,14 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local)
 		local->ops->rfkill_poll(&local->hw);
 }
 
-static inline void drv_flush(struct ieee80211_local *local, bool drop)
+static inline void drv_flush(struct ieee80211_local *local,
+			     u32 queues, bool drop)
 {
 	might_sleep();
 
-	trace_drv_flush(local, drop);
+	trace_drv_flush(local, queues, drop);
 	if (local->ops->flush)
-		local->ops->flush(&local->hw, drop);
+		local->ops->flush(&local->hw, queues, drop);
 	trace_drv_return_void(local);
 }
 
@@ -738,15 +767,16 @@ static inline int drv_get_antenna(struct ieee80211_local *local,
 static inline int drv_remain_on_channel(struct ieee80211_local *local,
 					struct ieee80211_sub_if_data *sdata,
 					struct ieee80211_channel *chan,
-					unsigned int duration)
+					unsigned int duration,
+					enum ieee80211_roc_type type)
 {
 	int ret;
 
 	might_sleep();
 
-	trace_drv_remain_on_channel(local, sdata, chan, duration);
+	trace_drv_remain_on_channel(local, sdata, chan, duration, type);
 	ret = local->ops->remain_on_channel(&local->hw, &sdata->vif,
-					    chan, duration);
+					    chan, duration, type);
 	trace_drv_return_int(local, ret);
 
 	return ret;
@@ -837,11 +867,12 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local,
 }
 
 static inline void drv_rssi_callback(struct ieee80211_local *local,
+				     struct ieee80211_sub_if_data *sdata,
 				     const enum ieee80211_rssi_event event)
 {
-	trace_drv_rssi_callback(local, event);
+	trace_drv_rssi_callback(local, sdata, event);
 	if (local->ops->rssi_callback)
-		local->ops->rssi_callback(&local->hw, event);
+		local->ops->rssi_callback(&local->hw, &sdata->vif, event);
 	trace_drv_return_void(local);
 }
 
@@ -913,6 +944,8 @@ static inline int drv_add_chanctx(struct ieee80211_local *local,
 	if (local->ops->add_chanctx)
 		ret = local->ops->add_chanctx(&local->hw, &ctx->conf);
 	trace_drv_return_int(local, ret);
+	if (!ret)
+		ctx->driver_present = true;
 
 	return ret;
 }
@@ -924,6 +957,7 @@ static inline void drv_remove_chanctx(struct ieee80211_local *local,
 	if (local->ops->remove_chanctx)
 		local->ops->remove_chanctx(&local->hw, &ctx->conf);
 	trace_drv_return_void(local);
+	ctx->driver_present = false;
 }
 
 static inline void drv_change_chanctx(struct ieee80211_local *local,
@@ -931,8 +965,10 @@ static inline void drv_change_chanctx(struct ieee80211_local *local,
 				      u32 changed)
 {
 	trace_drv_change_chanctx(local, ctx, changed);
-	if (local->ops->change_chanctx)
+	if (local->ops->change_chanctx) {
+		WARN_ON_ONCE(!ctx->driver_present);
 		local->ops->change_chanctx(&local->hw, &ctx->conf, changed);
+	}
 	trace_drv_return_void(local);
 }
 
@@ -945,10 +981,12 @@ static inline int drv_assign_vif_chanctx(struct ieee80211_local *local,
 	check_sdata_in_driver(sdata);
 
 	trace_drv_assign_vif_chanctx(local, sdata, ctx);
-	if (local->ops->assign_vif_chanctx)
+	if (local->ops->assign_vif_chanctx) {
+		WARN_ON_ONCE(!ctx->driver_present);
 		ret = local->ops->assign_vif_chanctx(&local->hw,
 						     &sdata->vif,
 						     &ctx->conf);
+	}
 	trace_drv_return_int(local, ret);
 
 	return ret;
@@ -961,10 +999,12 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
 	check_sdata_in_driver(sdata);
 
 	trace_drv_unassign_vif_chanctx(local, sdata, ctx);
-	if (local->ops->unassign_vif_chanctx)
+	if (local->ops->unassign_vif_chanctx) {
+		WARN_ON_ONCE(!ctx->driver_present);
 		local->ops->unassign_vif_chanctx(&local->hw,
 						 &sdata->vif,
 						 &ctx->conf);
+	}
 	trace_drv_return_void(local);
 }
 
@@ -1003,4 +1043,32 @@ static inline void drv_restart_complete(struct ieee80211_local *local)
 	trace_drv_return_void(local);
 }
 
+static inline void
+drv_set_default_unicast_key(struct ieee80211_local *local,
+			    struct ieee80211_sub_if_data *sdata,
+			    int key_idx)
+{
+	check_sdata_in_driver(sdata);
+
+	WARN_ON_ONCE(key_idx < -1 || key_idx > 3);
+
+	trace_drv_set_default_unicast_key(local, sdata, key_idx);
+	if (local->ops->set_default_unicast_key)
+		local->ops->set_default_unicast_key(&local->hw, &sdata->vif,
+						    key_idx);
+	trace_drv_return_void(local);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline void drv_ipv6_addr_change(struct ieee80211_local *local,
+					struct ieee80211_sub_if_data *sdata,
+					struct inet6_dev *idev)
+{
+	trace_drv_ipv6_addr_change(local, sdata);
+	if (local->ops->ipv6_addr_change)
+		local->ops->ipv6_addr_change(&local->hw, &sdata->vif, idev);
+	trace_drv_return_void(local);
+}
+#endif
+
 #endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index a71d891794a4..af8cee06e4f3 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -37,12 +37,8 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 	u8 *smask = (u8 *)(&sdata->u.mgd.ht_capa_mask.mcs.rx_mask);
 	int i;
 
-	if (sdata->vif.type != NL80211_IFTYPE_STATION) {
-		/* AP interfaces call this code when adding new stations,
-		 * so just silently ignore non station interfaces.
-		 */
+	if (!ht_cap->ht_supported)
 		return;
-	}
 
 	/* NOTE:  If you add more over-rides here, update register_hw
 	 * ht_capa_mod_msk logic in main.c as well.
@@ -62,6 +58,9 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 	__check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SUP_WIDTH_20_40);
 	__check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_40);
 
+	/* Allow user to disable SGI-20 (SGI-40 is handled above) */
+	__check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_20);
+
 	/* Allow user to disable the max-AMSDU bit. */
 	__check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU);
 
@@ -86,22 +85,36 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 }
 
 
-void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
+bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_supported_band *sband,
-				       struct ieee80211_ht_cap *ht_cap_ie,
-				       struct ieee80211_sta_ht_cap *ht_cap)
+				       const struct ieee80211_ht_cap *ht_cap_ie,
+				       struct sta_info *sta)
 {
+	struct ieee80211_sta_ht_cap ht_cap, own_cap;
 	u8 ampdu_info, tx_mcs_set_cap;
 	int i, max_tx_streams;
+	bool changed;
+	enum ieee80211_sta_rx_bandwidth bw;
+	enum ieee80211_smps_mode smps_mode;
 
-	BUG_ON(!ht_cap);
-
-	memset(ht_cap, 0, sizeof(*ht_cap));
+	memset(&ht_cap, 0, sizeof(ht_cap));
 
 	if (!ht_cap_ie || !sband->ht_cap.ht_supported)
-		return;
+		goto apply;
 
-	ht_cap->ht_supported = true;
+	ht_cap.ht_supported = true;
+
+	own_cap = sband->ht_cap;
+
+	/*
+	 * If user has specified capability over-rides, take care
+	 * of that if the station we're setting up is the AP that
+	 * we advertised a restricted capability set to. Override
+	 * our own capabilities and then use those below.
+	 */
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    !test_sta_flag(sta, WLAN_STA_TDLS_PEER))
+		ieee80211_apply_htcap_overrides(sdata, &own_cap);
 
 	/*
 	 * The bits listed in this expression should be
@@ -109,38 +122,38 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 	 * advertises more then we can't use those thus
 	 * we mask them out.
 	 */
-	ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) &
-		(sband->ht_cap.cap |
-		 ~(IEEE80211_HT_CAP_LDPC_CODING |
-		   IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
-		   IEEE80211_HT_CAP_GRN_FLD |
-		   IEEE80211_HT_CAP_SGI_20 |
-		   IEEE80211_HT_CAP_SGI_40 |
-		   IEEE80211_HT_CAP_DSSSCCK40));
+	ht_cap.cap = le16_to_cpu(ht_cap_ie->cap_info) &
+		(own_cap.cap | ~(IEEE80211_HT_CAP_LDPC_CODING |
+				 IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
+				 IEEE80211_HT_CAP_GRN_FLD |
+				 IEEE80211_HT_CAP_SGI_20 |
+				 IEEE80211_HT_CAP_SGI_40 |
+				 IEEE80211_HT_CAP_DSSSCCK40));
+
 	/*
 	 * The STBC bits are asymmetric -- if we don't have
 	 * TX then mask out the peer's RX and vice versa.
 	 */
-	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC))
-		ht_cap->cap &= ~IEEE80211_HT_CAP_RX_STBC;
-	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC))
-		ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC;
+	if (!(own_cap.cap & IEEE80211_HT_CAP_TX_STBC))
+		ht_cap.cap &= ~IEEE80211_HT_CAP_RX_STBC;
+	if (!(own_cap.cap & IEEE80211_HT_CAP_RX_STBC))
+		ht_cap.cap &= ~IEEE80211_HT_CAP_TX_STBC;
 
 	ampdu_info = ht_cap_ie->ampdu_params_info;
-	ht_cap->ampdu_factor =
+	ht_cap.ampdu_factor =
 		ampdu_info & IEEE80211_HT_AMPDU_PARM_FACTOR;
-	ht_cap->ampdu_density =
+	ht_cap.ampdu_density =
 		(ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2;
 
 	/* own MCS TX capabilities */
-	tx_mcs_set_cap = sband->ht_cap.mcs.tx_params;
+	tx_mcs_set_cap = own_cap.mcs.tx_params;
 
 	/* Copy peer MCS TX capabilities, the driver might need them. */
-	ht_cap->mcs.tx_params = ht_cap_ie->mcs.tx_params;
+	ht_cap.mcs.tx_params = ht_cap_ie->mcs.tx_params;
 
 	/* can we TX with MCS rates? */
 	if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED))
-		return;
+		goto apply;
 
 	/* Counting from 0, therefore +1 */
 	if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_RX_DIFF)
@@ -158,37 +171,84 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 	 * - remainder are multiple spatial streams using unequal modulation
 	 */
 	for (i = 0; i < max_tx_streams; i++)
-		ht_cap->mcs.rx_mask[i] =
-			sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i];
+		ht_cap.mcs.rx_mask[i] =
+			own_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i];
 
 	if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION)
 		for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE;
 		     i < IEEE80211_HT_MCS_MASK_LEN; i++)
-			ht_cap->mcs.rx_mask[i] =
-				sband->ht_cap.mcs.rx_mask[i] &
+			ht_cap.mcs.rx_mask[i] =
+				own_cap.mcs.rx_mask[i] &
 					ht_cap_ie->mcs.rx_mask[i];
 
 	/* handle MCS rate 32 too */
-	if (sband->ht_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1)
-		ht_cap->mcs.rx_mask[32/8] |= 1;
+	if (own_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1)
+		ht_cap.mcs.rx_mask[32/8] |= 1;
 
-	/*
-	 * If user has specified capability over-rides, take care
-	 * of that here.
-	 */
-	ieee80211_apply_htcap_overrides(sdata, ht_cap);
+ apply:
+	changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
+
+	memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
+
+	switch (sdata->vif.bss_conf.chandef.width) {
+	default:
+		WARN_ON_ONCE(1);
+		/* fall through */
+	case NL80211_CHAN_WIDTH_20_NOHT:
+	case NL80211_CHAN_WIDTH_20:
+		bw = IEEE80211_STA_RX_BW_20;
+		break;
+	case NL80211_CHAN_WIDTH_40:
+	case NL80211_CHAN_WIDTH_80:
+	case NL80211_CHAN_WIDTH_80P80:
+	case NL80211_CHAN_WIDTH_160:
+		bw = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
+		break;
+	}
+
+	if (bw != sta->sta.bandwidth)
+		changed = true;
+	sta->sta.bandwidth = bw;
+
+	sta->cur_max_bandwidth =
+		ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
+
+	switch ((ht_cap.cap & IEEE80211_HT_CAP_SM_PS)
+			>> IEEE80211_HT_CAP_SM_PS_SHIFT) {
+	case WLAN_HT_CAP_SM_PS_INVALID:
+	case WLAN_HT_CAP_SM_PS_STATIC:
+		smps_mode = IEEE80211_SMPS_STATIC;
+		break;
+	case WLAN_HT_CAP_SM_PS_DYNAMIC:
+		smps_mode = IEEE80211_SMPS_DYNAMIC;
+		break;
+	case WLAN_HT_CAP_SM_PS_DISABLED:
+		smps_mode = IEEE80211_SMPS_OFF;
+		break;
+	}
+
+	if (smps_mode != sta->sta.smps_mode)
+		changed = true;
+	sta->sta.smps_mode = smps_mode;
+
+	return changed;
 }
 
-void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx)
+void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
+					 enum ieee80211_agg_stop_reason reason)
 {
 	int i;
 
 	cancel_work_sync(&sta->ampdu_mlme.work);
 
 	for (i = 0; i <  IEEE80211_NUM_TIDS; i++) {
-		__ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR, tx);
+		__ieee80211_stop_tx_ba_session(sta, i, reason);
 		__ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
-					       WLAN_REASON_QSTA_LEAVE_QBSS, tx);
+					       WLAN_REASON_QSTA_LEAVE_QBSS,
+					       reason != AGG_STOP_DESTROY_STA &&
+					       reason != AGG_STOP_PEER_REQUEST);
 	}
 }
 
@@ -245,8 +305,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
 		if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP,
 						 &tid_tx->state))
 			___ieee80211_stop_tx_ba_session(sta, tid,
-							WLAN_BACK_INITIATOR,
-							true);
+							AGG_STOP_LOCAL_REQUEST);
 	}
 	mutex_unlock(&sta->ampdu_mlme.mtx);
 }
@@ -314,8 +373,7 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
 		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0,
 					       true);
 	else
-		__ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
-					       true);
+		__ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_PEER_REQUEST);
 }
 
 int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
@@ -387,6 +445,9 @@ void ieee80211_request_smps(struct ieee80211_vif *vif,
 	if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF))
 		smps_mode = IEEE80211_SMPS_AUTOMATIC;
 
+	if (sdata->u.mgd.driver_smps_mode == smps_mode)
+		return;
+
 	sdata->u.mgd.driver_smps_mode = smps_mode;
 
 	ieee80211_queue_work(&sdata->local->hw,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 6b7644e818d8..170f9a7fa319 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -44,7 +44,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 	struct ieee80211_local *local = sdata->local;
 	int rates, i;
-	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos;
 	struct ieee80211_supported_band *sband;
@@ -52,31 +51,34 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	u32 bss_change;
 	u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
 	struct cfg80211_chan_def chandef;
+	struct beacon_data *presp;
+	int frame_len;
 
 	lockdep_assert_held(&ifibss->mtx);
 
 	/* Reset own TSF to allow time synchronization work. */
 	drv_reset_tsf(local, sdata);
 
-	skb = ifibss->skb;
-	RCU_INIT_POINTER(ifibss->presp, NULL);
-	synchronize_rcu();
-	skb->data = skb->head;
-	skb->len = 0;
-	skb_reset_tail_pointer(skb);
-	skb_reserve(skb, sdata->local->hw.extra_tx_headroom);
-
 	if (!ether_addr_equal(ifibss->bssid, bssid))
-		sta_info_flush(sdata->local, sdata);
+		sta_info_flush(sdata);
 
 	/* if merging, indicate to driver that we leave the old IBSS */
 	if (sdata->vif.bss_conf.ibss_joined) {
 		sdata->vif.bss_conf.ibss_joined = false;
 		sdata->vif.bss_conf.ibss_creator = false;
+		sdata->vif.bss_conf.enable_beacon = false;
 		netif_carrier_off(sdata->dev);
-		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IBSS);
+		ieee80211_bss_info_change_notify(sdata,
+						 BSS_CHANGED_IBSS |
+						 BSS_CHANGED_BEACON_ENABLED);
 	}
 
+	presp = rcu_dereference_protected(ifibss->presp,
+					  lockdep_is_held(&ifibss->mtx));
+	rcu_assign_pointer(ifibss->presp, NULL);
+	if (presp)
+		kfree_rcu(presp, rcu_head);
+
 	sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
 
 	cfg80211_chandef_create(&chandef, chan, ifibss->channel_type);
@@ -98,19 +100,24 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 
 	sband = local->hw.wiphy->bands[chan->band];
 
-	/* build supported rates array */
-	pos = supp_rates;
-	for (i = 0; i < sband->n_bitrates; i++) {
-		int rate = sband->bitrates[i].bitrate;
-		u8 basic = 0;
-		if (basic_rates & BIT(i))
-			basic = 0x80;
-		*pos++ = basic | (u8) (rate / 5);
-	}
-
 	/* Build IBSS probe response */
-	mgmt = (void *) skb_put(skb, 24 + sizeof(mgmt->u.beacon));
-	memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon));
+	frame_len = sizeof(struct ieee80211_hdr_3addr) +
+		    12 /* struct ieee80211_mgmt.u.beacon */ +
+		    2 + IEEE80211_MAX_SSID_LEN /* max SSID */ +
+		    2 + 8 /* max Supported Rates */ +
+		    3 /* max DS params */ +
+		    4 /* IBSS params */ +
+		    2 + (IEEE80211_MAX_SUPP_RATES - 8) +
+		    2 + sizeof(struct ieee80211_ht_cap) +
+		    2 + sizeof(struct ieee80211_ht_operation) +
+		    ifibss->ie_len;
+	presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL);
+	if (!presp)
+		return;
+
+	presp->head = (void *)(presp + 1);
+
+	mgmt = (void *) presp->head;
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_PROBE_RESP);
 	eth_broadcast_addr(mgmt->da);
@@ -120,27 +127,30 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	mgmt->u.beacon.timestamp = cpu_to_le64(tsf);
 	mgmt->u.beacon.capab_info = cpu_to_le16(capability);
 
-	pos = skb_put(skb, 2 + ifibss->ssid_len);
+	pos = (u8 *)mgmt + offsetof(struct ieee80211_mgmt, u.beacon.variable);
+
 	*pos++ = WLAN_EID_SSID;
 	*pos++ = ifibss->ssid_len;
 	memcpy(pos, ifibss->ssid, ifibss->ssid_len);
+	pos += ifibss->ssid_len;
 
-	rates = sband->n_bitrates;
-	if (rates > 8)
-		rates = 8;
-	pos = skb_put(skb, 2 + rates);
+	rates = min_t(int, 8, sband->n_bitrates);
 	*pos++ = WLAN_EID_SUPP_RATES;
 	*pos++ = rates;
-	memcpy(pos, supp_rates, rates);
+	for (i = 0; i < rates; i++) {
+		int rate = sband->bitrates[i].bitrate;
+		u8 basic = 0;
+		if (basic_rates & BIT(i))
+			basic = 0x80;
+		*pos++ = basic | (u8) (rate / 5);
+	}
 
 	if (sband->band == IEEE80211_BAND_2GHZ) {
-		pos = skb_put(skb, 2 + 1);
 		*pos++ = WLAN_EID_DS_PARAMS;
 		*pos++ = 1;
 		*pos++ = ieee80211_frequency_to_channel(chan->center_freq);
 	}
 
-	pos = skb_put(skb, 2 + 2);
 	*pos++ = WLAN_EID_IBSS_PARAMS;
 	*pos++ = 2;
 	/* FIX: set ATIM window based on scan results */
@@ -148,23 +158,25 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	*pos++ = 0;
 
 	if (sband->n_bitrates > 8) {
-		rates = sband->n_bitrates - 8;
-		pos = skb_put(skb, 2 + rates);
 		*pos++ = WLAN_EID_EXT_SUPP_RATES;
-		*pos++ = rates;
-		memcpy(pos, &supp_rates[8], rates);
+		*pos++ = sband->n_bitrates - 8;
+		for (i = 8; i < sband->n_bitrates; i++) {
+			int rate = sband->bitrates[i].bitrate;
+			u8 basic = 0;
+			if (basic_rates & BIT(i))
+				basic = 0x80;
+			*pos++ = basic | (u8) (rate / 5);
+		}
 	}
 
-	if (ifibss->ie_len)
-		memcpy(skb_put(skb, ifibss->ie_len),
-		       ifibss->ie, ifibss->ie_len);
+	if (ifibss->ie_len) {
+		memcpy(pos, ifibss->ie, ifibss->ie_len);
+		pos += ifibss->ie_len;
+	}
 
 	/* add HT capability and information IEs */
 	if (chandef.width != NL80211_CHAN_WIDTH_20_NOHT &&
 	    sband->ht_cap.ht_supported) {
-		pos = skb_put(skb, 4 +
-				   sizeof(struct ieee80211_ht_cap) +
-				   sizeof(struct ieee80211_ht_operation));
 		pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap,
 						sband->ht_cap.cap);
 		/*
@@ -177,7 +189,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (local->hw.queues >= IEEE80211_NUM_ACS) {
-		pos = skb_put(skb, 9);
 		*pos++ = WLAN_EID_VENDOR_SPECIFIC;
 		*pos++ = 7; /* len */
 		*pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
@@ -189,10 +200,17 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 		*pos++ = 0; /* U-APSD no in use */
 	}
 
-	rcu_assign_pointer(ifibss->presp, skb);
+	presp->head_len = pos - presp->head;
+	if (WARN_ON(presp->head_len > frame_len))
+		return;
+
+	rcu_assign_pointer(ifibss->presp, presp);
 
+	sdata->vif.bss_conf.enable_beacon = true;
 	sdata->vif.bss_conf.beacon_int = beacon_int;
 	sdata->vif.bss_conf.basic_rates = basic_rates;
+	sdata->vif.bss_conf.ssid_len = ifibss->ssid_len;
+	memcpy(sdata->vif.bss_conf.ssid, ifibss->ssid, ifibss->ssid_len);
 	bss_change = BSS_CHANGED_BEACON_INT;
 	bss_change |= ieee80211_reset_erp_info(sdata);
 	bss_change |= BSS_CHANGED_BSSID;
@@ -201,6 +219,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	bss_change |= BSS_CHANGED_BASIC_RATES;
 	bss_change |= BSS_CHANGED_HT;
 	bss_change |= BSS_CHANGED_IBSS;
+	bss_change |= BSS_CHANGED_SSID;
 
 	/*
 	 * In 5 GHz/802.11a, we can always use short slot time.
@@ -226,8 +245,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 		  round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
 
 	bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan,
-					mgmt, skb->len, 0, GFP_KERNEL);
-	cfg80211_put_bss(bss);
+					mgmt, presp->head_len, 0, GFP_KERNEL);
+	cfg80211_put_bss(local->hw.wiphy, bss);
 	netif_carrier_on(sdata->dev);
 	cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL);
 }
@@ -241,6 +260,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	u32 basic_rates;
 	int i, j;
 	u16 beacon_int = cbss->beacon_interval;
+	const struct cfg80211_bss_ies *ies;
+	u64 tsf;
 
 	lockdep_assert_held(&sdata->u.ibss.mtx);
 
@@ -264,13 +285,17 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
+	rcu_read_lock();
+	ies = rcu_dereference(cbss->ies);
+	tsf = ies->tsf;
+	rcu_read_unlock();
+
 	__ieee80211_sta_join_ibss(sdata, cbss->bssid,
 				  beacon_int,
 				  cbss->channel,
 				  basic_rates,
 				  cbss->capability,
-				  cbss->tsf,
-				  false);
+				  tsf, false);
 }
 
 static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta,
@@ -301,7 +326,7 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta,
 			 "TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n",
 			 sdata->vif.addr, addr, sdata->u.ibss.bssid);
 		ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, 0, NULL, 0,
-				    addr, sdata->u.ibss.bssid, NULL, 0, 0);
+				    addr, sdata->u.ibss.bssid, NULL, 0, 0, 0);
 	}
 	return sta;
 }
@@ -421,15 +446,13 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
 	 * has actually implemented this.
 	 */
 	ieee80211_send_auth(sdata, 2, WLAN_AUTH_OPEN, 0, NULL, 0,
-			    mgmt->sa, sdata->u.ibss.bssid, NULL, 0, 0);
+			    mgmt->sa, sdata->u.ibss.bssid, NULL, 0, 0, 0);
 }
 
 static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
-				  struct ieee80211_mgmt *mgmt,
-				  size_t len,
+				  struct ieee80211_mgmt *mgmt, size_t len,
 				  struct ieee80211_rx_status *rx_status,
-				  struct ieee802_11_elems *elems,
-				  bool beacon)
+				  struct ieee802_11_elems *elems)
 {
 	struct ieee80211_local *local = sdata->local;
 	int freq;
@@ -443,7 +466,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
 	bool rates_updated = false;
 
-	if (elems->ds_params && elems->ds_params_len == 1)
+	if (elems->ds_params)
 		freq = ieee80211_channel_to_frequency(elems->ds_params[0],
 						      band);
 	else
@@ -491,33 +514,26 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		if (sta && elems->ht_operation && elems->ht_cap_elem &&
 		    sdata->u.ibss.channel_type != NL80211_CHAN_NO_HT) {
 			/* we both use HT */
-			struct ieee80211_sta_ht_cap sta_ht_cap_new;
+			struct ieee80211_ht_cap htcap_ie;
 			struct cfg80211_chan_def chandef;
 
 			ieee80211_ht_oper_to_chandef(channel,
 						     elems->ht_operation,
 						     &chandef);
 
-			ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
-							  elems->ht_cap_elem,
-							  &sta_ht_cap_new);
+			memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie));
 
 			/*
 			 * fall back to HT20 if we don't use or use
 			 * the other extension channel
 			 */
-			if (chandef.width != NL80211_CHAN_WIDTH_40 ||
-			    cfg80211_get_chandef_type(&chandef) !=
+			if (cfg80211_get_chandef_type(&chandef) !=
 						sdata->u.ibss.channel_type)
-				sta_ht_cap_new.cap &=
-					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-
-			if (memcmp(&sta->sta.ht_cap, &sta_ht_cap_new,
-				   sizeof(sta_ht_cap_new))) {
-				memcpy(&sta->sta.ht_cap, &sta_ht_cap_new,
-				       sizeof(sta_ht_cap_new));
-				rates_updated = true;
-			}
+				htcap_ie.cap_info &=
+					cpu_to_le16(~IEEE80211_HT_CAP_SUP_WIDTH_20_40);
+
+			rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap(
+						sdata, sband, &htcap_ie, sta);
 		}
 
 		if (sta && rates_updated) {
@@ -530,14 +546,14 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	}
 
 	bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
-					channel, beacon);
+					channel);
 	if (!bss)
 		return;
 
 	cbss = container_of((void *)bss, struct cfg80211_bss, priv);
 
-	/* was just updated in ieee80211_bss_info_update */
-	beacon_timestamp = cbss->tsf;
+	/* same for beacon and probe response */
+	beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
 
 	/* check if we need to merge IBSS */
 
@@ -824,8 +840,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	int tx_last_beacon, len = req->len;
 	struct sk_buff *skb;
-	struct ieee80211_mgmt *resp;
-	struct sk_buff *presp;
+	struct beacon_data *presp;
 	u8 *pos, *end;
 
 	lockdep_assert_held(&ifibss->mtx);
@@ -866,51 +881,42 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 	}
 
 	/* Reply with ProbeResp */
-	skb = skb_copy(presp, GFP_KERNEL);
+	skb = dev_alloc_skb(local->tx_headroom + presp->head_len);
 	if (!skb)
 		return;
 
-	resp = (struct ieee80211_mgmt *) skb->data;
-	memcpy(resp->da, mgmt->sa, ETH_ALEN);
-	ibss_dbg(sdata, "Sending ProbeResp to %pM\n", resp->da);
+	skb_reserve(skb, local->tx_headroom);
+	memcpy(skb_put(skb, presp->head_len), presp->head, presp->head_len);
+
+	memcpy(((struct ieee80211_mgmt *) skb->data)->da, mgmt->sa, ETH_ALEN);
+	ibss_dbg(sdata, "Sending ProbeResp to %pM\n", mgmt->sa);
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	ieee80211_tx_skb(sdata, skb);
 }
 
-static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
-					 struct ieee80211_mgmt *mgmt,
-					 size_t len,
-					 struct ieee80211_rx_status *rx_status)
+static
+void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_mgmt *mgmt, size_t len,
+				    struct ieee80211_rx_status *rx_status)
 {
 	size_t baselen;
 	struct ieee802_11_elems elems;
 
+	BUILD_BUG_ON(offsetof(typeof(mgmt->u.probe_resp), variable) !=
+		     offsetof(typeof(mgmt->u.beacon), variable));
+
+	/*
+	 * either beacon or probe_resp but the variable field is at the
+	 * same offset
+	 */
 	baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
 	if (baselen > len)
 		return;
 
 	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
-				&elems);
+			       false, &elems);
 
-	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false);
-}
-
-static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_mgmt *mgmt,
-				     size_t len,
-				     struct ieee80211_rx_status *rx_status)
-{
-	size_t baselen;
-	struct ieee802_11_elems elems;
-
-	/* Process beacon from the current BSS */
-	baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
-	if (baselen > len)
-		return;
-
-	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
-
-	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true);
+	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
 }
 
 void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
@@ -934,12 +940,9 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 		ieee80211_rx_mgmt_probe_req(sdata, skb);
 		break;
 	case IEEE80211_STYPE_PROBE_RESP:
-		ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len,
-					     rx_status);
-		break;
 	case IEEE80211_STYPE_BEACON:
-		ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len,
-					 rx_status);
+		ieee80211_rx_mgmt_probe_beacon(sdata, mgmt, skb->len,
+					       rx_status);
 		break;
 	case IEEE80211_STYPE_AUTH:
 		ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len);
@@ -1001,36 +1004,9 @@ static void ieee80211_ibss_timer(unsigned long data)
 {
 	struct ieee80211_sub_if_data *sdata =
 		(struct ieee80211_sub_if_data *) data;
-	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
-	struct ieee80211_local *local = sdata->local;
-
-	if (local->quiescing) {
-		ifibss->timer_running = true;
-		return;
-	}
-
-	ieee80211_queue_work(&local->hw, &sdata->work);
-}
-
-#ifdef CONFIG_PM
-void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
-
-	if (del_timer_sync(&ifibss->timer))
-		ifibss->timer_running = true;
-}
-
-void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 
-	if (ifibss->timer_running) {
-		add_timer(&ifibss->timer);
-		ifibss->timer_running = false;
-	}
+	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 }
-#endif
 
 void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
 {
@@ -1063,23 +1039,8 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
 int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 			struct cfg80211_ibss_params *params)
 {
-	struct sk_buff *skb;
 	u32 changed = 0;
 
-	skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom +
-			    sizeof(struct ieee80211_hdr_3addr) +
-			    12 /* struct ieee80211_mgmt.u.beacon */ +
-			    2 + IEEE80211_MAX_SSID_LEN /* max SSID */ +
-			    2 + 8 /* max Supported Rates */ +
-			    3 /* max DS params */ +
-			    4 /* IBSS params */ +
-			    2 + (IEEE80211_MAX_SUPP_RATES - 8) +
-			    2 + sizeof(struct ieee80211_ht_cap) +
-			    2 + sizeof(struct ieee80211_ht_operation) +
-			    params->ie_len);
-	if (!skb)
-		return -ENOMEM;
-
 	mutex_lock(&sdata->u.ibss.mtx);
 
 	if (params->bssid) {
@@ -1108,7 +1069,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 			sdata->u.ibss.ie_len = params->ie_len;
 	}
 
-	sdata->u.ibss.skb = skb;
 	sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH;
 	sdata->u.ibss.ibss_join_req = jiffies;
 
@@ -1117,10 +1077,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 
 	mutex_unlock(&sdata->u.ibss.mtx);
 
-	mutex_lock(&sdata->local->mtx);
-	ieee80211_recalc_idle(sdata->local);
-	mutex_unlock(&sdata->local->mtx);
-
 	/*
 	 * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is
 	 * reserved, but an HT STA shall protect HT transmissions as though
@@ -1148,13 +1104,13 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 
 int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 {
-	struct sk_buff *skb;
 	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 	struct ieee80211_local *local = sdata->local;
 	struct cfg80211_bss *cbss;
 	u16 capability;
 	int active_ibss;
 	struct sta_info *sta;
+	struct beacon_data *presp;
 
 	mutex_lock(&sdata->u.ibss.mtx);
 
@@ -1174,7 +1130,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 
 		if (cbss) {
 			cfg80211_unlink_bss(local->hw.wiphy, cbss);
-			cfg80211_put_bss(cbss);
+			cfg80211_put_bss(local->hw.wiphy, cbss);
 		}
 	}
 
@@ -1182,7 +1138,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 	memset(ifibss->bssid, 0, ETH_ALEN);
 	ifibss->ssid_len = 0;
 
-	sta_info_flush(sdata->local, sdata);
+	sta_info_flush(sdata);
 
 	spin_lock_bh(&ifibss->incomplete_lock);
 	while (!list_empty(&ifibss->incomplete_stations)) {
@@ -1200,15 +1156,18 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 
 	/* remove beacon */
 	kfree(sdata->u.ibss.ie);
-	skb = rcu_dereference_protected(sdata->u.ibss.presp,
-					lockdep_is_held(&sdata->u.ibss.mtx));
+	presp = rcu_dereference_protected(ifibss->presp,
+					  lockdep_is_held(&sdata->u.ibss.mtx));
 	RCU_INIT_POINTER(sdata->u.ibss.presp, NULL);
 	sdata->vif.bss_conf.ibss_joined = false;
 	sdata->vif.bss_conf.ibss_creator = false;
+	sdata->vif.bss_conf.enable_beacon = false;
+	sdata->vif.bss_conf.ssid_len = 0;
+	clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
 	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
 						BSS_CHANGED_IBSS);
 	synchronize_rcu();
-	kfree_skb(skb);
+	kfree(presp);
 
 	skb_queue_purge(&sdata->skb_queue);
 
@@ -1216,9 +1175,5 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 
 	mutex_unlock(&sdata->u.ibss.mtx);
 
-	mutex_lock(&local->mtx);
-	ieee80211_recalc_idle(sdata->local);
-	mutex_unlock(&local->mtx);
-
 	return 0;
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2ed065c09562..158e6eb188d3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -86,23 +86,11 @@ struct ieee80211_fragment_entry {
 
 
 struct ieee80211_bss {
-	/* don't want to look up all the time */
-	size_t ssid_len;
-	u8 ssid[IEEE80211_MAX_SSID_LEN];
-
-	u32 device_ts;
+	u32 device_ts_beacon, device_ts_presp;
 
 	bool wmm_used;
 	bool uapsd_supported;
 
-	unsigned long last_probe_resp;
-
-#ifdef CONFIG_MAC80211_MESH
-	u8 *mesh_id;
-	size_t mesh_id_len;
-	u8 *mesh_cfg;
-#endif
-
 #define IEEE80211_MAX_SUPP_RATES 32
 	u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
 	size_t supp_rates_len;
@@ -153,31 +141,6 @@ enum ieee80211_bss_valid_data_flags {
 	IEEE80211_BSS_VALID_ERP			= BIT(3)
 };
 
-static inline u8 *bss_mesh_cfg(struct ieee80211_bss *bss)
-{
-#ifdef CONFIG_MAC80211_MESH
-	return bss->mesh_cfg;
-#endif
-	return NULL;
-}
-
-static inline u8 *bss_mesh_id(struct ieee80211_bss *bss)
-{
-#ifdef CONFIG_MAC80211_MESH
-	return bss->mesh_id;
-#endif
-	return NULL;
-}
-
-static inline u8 bss_mesh_id_len(struct ieee80211_bss *bss)
-{
-#ifdef CONFIG_MAC80211_MESH
-	return bss->mesh_id_len;
-#endif
-	return 0;
-}
-
-
 typedef unsigned __bitwise__ ieee80211_tx_result;
 #define TX_CONTINUE	((__force ieee80211_tx_result) 0u)
 #define TX_DROP		((__force ieee80211_tx_result) 1u)
@@ -193,6 +156,7 @@ struct ieee80211_tx_data {
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
 	struct ieee80211_key *key;
+	struct ieee80211_tx_rate rate;
 
 	unsigned int flags;
 };
@@ -346,12 +310,14 @@ struct ieee80211_roc_work {
 	struct ieee80211_channel *chan;
 
 	bool started, abort, hw_begun, notified;
+	bool to_be_freed;
 
 	unsigned long hw_start_time;
 
 	u32 duration, req_duration;
 	struct sk_buff *frame;
 	u64 cookie, mgmt_tx_cookie;
+	enum ieee80211_roc_type type;
 };
 
 /* flags used in struct ieee80211_if_managed.flags */
@@ -380,6 +346,7 @@ struct ieee80211_mgd_auth_data {
 	u8 key[WLAN_KEY_LEN_WEP104];
 	u8 key_len, key_idx;
 	bool done;
+	bool timeout_started;
 
 	u16 sae_trans, sae_status;
 	size_t data_len;
@@ -399,12 +366,14 @@ struct ieee80211_mgd_assoc_data {
 	u8 ssid_len;
 	u8 supp_rates_len;
 	bool wmm, uapsd;
-	bool have_beacon;
-	bool sent_assoc;
+	bool have_beacon, need_beacon;
 	bool synced;
+	bool timeout_started;
 
 	u8 ap_ht_param;
 
+	struct ieee80211_vht_cap ap_vht_cap;
+
 	size_t ie_len;
 	u8 ie[];
 };
@@ -423,6 +392,7 @@ struct ieee80211_if_managed {
 	unsigned long probe_timeout;
 	int probe_send_count;
 	bool nullfunc_failed;
+	bool connection_loss;
 
 	struct mutex mtx;
 	struct cfg80211_bss *associated;
@@ -433,7 +403,6 @@ struct ieee80211_if_managed {
 
 	u16 aid;
 
-	unsigned long timers_running; /* used for quiesce/restart */
 	bool powersave; /* powersave requested for this iface */
 	bool broken_ap; /* AP is broken -- turn off powersave */
 	u8 dtim_period;
@@ -447,6 +416,10 @@ struct ieee80211_if_managed {
 	bool beacon_crc_valid;
 	u32 beacon_crc;
 
+	bool status_acked;
+	bool status_received;
+	__le16 status_fc;
+
 	enum {
 		IEEE80211_MFP_DISABLED,
 		IEEE80211_MFP_OPTIONAL,
@@ -471,7 +444,7 @@ struct ieee80211_if_managed {
 
 	u8 use_4addr;
 
-	u8 p2p_noa_index;
+	s16 p2p_noa_index;
 
 	/* Signal strength from the last Beacon frame in the current BSS. */
 	int last_beacon_signal;
@@ -508,6 +481,8 @@ struct ieee80211_if_managed {
 
 	struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */
 	struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */
+	struct ieee80211_vht_cap vht_capa; /* configured VHT overrides */
+	struct ieee80211_vht_cap vht_capa_mask; /* Valid parts of vht_capa */
 };
 
 struct ieee80211_if_ibss {
@@ -519,8 +494,6 @@ struct ieee80211_if_ibss {
 
 	u32 basic_rates;
 
-	bool timer_running;
-
 	bool fixed_bssid;
 	bool fixed_channel;
 	bool privacy;
@@ -537,8 +510,7 @@ struct ieee80211_if_ibss {
 
 	unsigned long ibss_join_req;
 	/* probe response/beacon for IBSS */
-	struct sk_buff __rcu *presp;
-	struct sk_buff *skb;
+	struct beacon_data __rcu *presp;
 
 	spinlock_t incomplete_lock;
 	struct list_head incomplete_stations;
@@ -572,8 +544,6 @@ struct ieee80211_if_mesh {
 	struct timer_list mesh_path_timer;
 	struct timer_list mesh_path_root_timer;
 
-	unsigned long timers_running;
-
 	unsigned long wrkq_flags;
 
 	u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN];
@@ -609,6 +579,9 @@ struct ieee80211_if_mesh {
 	u32 mesh_seqnum;
 	bool accepting_plinks;
 	int num_gates;
+	struct beacon_data __rcu *beacon;
+	/* just protects beacon updates for now */
+	struct mutex mtx;
 	const u8 *ie;
 	u8 ie_len;
 	enum {
@@ -616,11 +589,17 @@ struct ieee80211_if_mesh {
 		IEEE80211_MESH_SEC_AUTHED = 0x1,
 		IEEE80211_MESH_SEC_SECURED = 0x2,
 	} security;
+	bool user_mpm;
 	/* Extensible Synchronization Framework */
 	const struct ieee80211_mesh_sync_ops *sync_ops;
 	s64 sync_offset_clockdrift_max;
 	spinlock_t sync_offset_lock;
 	bool adjusting_tbtt;
+	/* mesh power save */
+	enum nl80211_mesh_power_mode nonpeer_pm;
+	int ps_peers_light_sleep;
+	int ps_peers_deep_sleep;
+	struct ps_data ps;
 };
 
 #ifdef CONFIG_MAC80211_MESH
@@ -659,10 +638,13 @@ enum ieee80211_sub_if_data_flags {
  *	change handling while the interface is up
  * @SDATA_STATE_OFFCHANNEL: This interface is currently in offchannel
  *	mode, so queues are stopped
+ * @SDATA_STATE_OFFCHANNEL_BEACON_STOPPED: Beaconing was stopped due
+ *	to offchannel, reset when offchannel returns
  */
 enum ieee80211_sdata_state_bits {
 	SDATA_STATE_RUNNING,
 	SDATA_STATE_OFFCHANNEL,
+	SDATA_STATE_OFFCHANNEL_BEACON_STOPPED,
 };
 
 /**
@@ -685,6 +667,7 @@ struct ieee80211_chanctx {
 
 	enum ieee80211_chanctx_mode mode;
 	int refcount;
+	bool driver_present;
 
 	struct ieee80211_chanctx_conf conf;
 };
@@ -699,6 +682,8 @@ struct ieee80211_sub_if_data {
 
 	/* count for keys needing tailroom space allocation */
 	int crypto_tx_tailroom_needed_cnt;
+	int crypto_tx_tailroom_pending_dec;
+	struct delayed_work dec_tailroom_needed_wk;
 
 	struct net_device *dev;
 	struct ieee80211_local *local;
@@ -711,9 +696,6 @@ struct ieee80211_sub_if_data {
 
 	char name[IFNAMSIZ];
 
-	/* to detect idle changes */
-	bool old_idle;
-
 	/* Fragment table for host-based reassembly */
 	struct ieee80211_fragment_entry	fragments[IEEE80211_FRAGMENT_MAX];
 	unsigned int fragment_next;
@@ -741,14 +723,15 @@ struct ieee80211_sub_if_data {
 	struct work_struct work;
 	struct sk_buff_head skb_queue;
 
-	bool arp_filter_state;
-
 	u8 needed_rx_chains;
 	enum ieee80211_smps_mode smps_mode;
 
 	int user_power_level; /* in dBm */
 	int ap_power_level; /* in dBm */
 
+	bool radar_required;
+	struct delayed_work dfs_cac_timer_work;
+
 	/*
 	 * AP this belongs to: self in AP mode and
 	 * corresponding AP in VLAN mode, NULL for
@@ -758,6 +741,8 @@ struct ieee80211_sub_if_data {
 
 	/* bitmap of allowed (non-MCS) rate indexes for rate control */
 	u32 rc_rateidx_mask[IEEE80211_NUM_BANDS];
+
+	bool rc_has_mcs_mask[IEEE80211_NUM_BANDS];
 	u8  rc_rateidx_mcs_mask[IEEE80211_NUM_BANDS][IEEE80211_HT_MCS_MASK_LEN];
 
 	union {
@@ -776,13 +761,13 @@ struct ieee80211_sub_if_data {
 
 #ifdef CONFIG_MAC80211_DEBUGFS
 	struct {
-		struct dentry *dir;
 		struct dentry *subdir_stations;
 		struct dentry *default_unicast_key;
 		struct dentry *default_multicast_key;
 		struct dentry *default_mgmt_key;
 	} debugfs;
 #endif
+
 	/* must be last, dynamically sized area in this! */
 	struct ieee80211_vif vif;
 };
@@ -817,11 +802,6 @@ enum sdata_queue_type {
 enum {
 	IEEE80211_RX_MSG	= 1,
 	IEEE80211_TX_STATUS_MSG	= 2,
-	IEEE80211_EOSP_MSG	= 3,
-};
-
-struct skb_eosp_msg_data {
-	u8 sta[ETH_ALEN], iface[ETH_ALEN];
 };
 
 enum queue_stop_reason {
@@ -831,6 +811,8 @@ enum queue_stop_reason {
 	IEEE80211_QUEUE_STOP_REASON_AGGREGATION,
 	IEEE80211_QUEUE_STOP_REASON_SUSPEND,
 	IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
+	IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL,
+	IEEE80211_QUEUE_STOP_REASON_FLUSH,
 };
 
 #ifdef CONFIG_MAC80211_LEDS
@@ -963,6 +945,10 @@ struct ieee80211_local {
 	/* wowlan is enabled -- don't reconfig on resume */
 	bool wowlan;
 
+	/* DFS/radar detection is enabled */
+	bool radar_detect_enabled;
+	struct work_struct radar_detected_work;
+
 	/* number of RX chains the hardware has */
 	u8 rx_chains;
 
@@ -977,14 +963,7 @@ struct ieee80211_local {
 	struct sk_buff_head skb_queue;
 	struct sk_buff_head skb_queue_unreliable;
 
-	/*
-	 * Internal FIFO queue which is shared between multiple rx path
-	 * stages. Its main task is to provide a serialization mechanism,
-	 * so all rx handlers can enjoy having exclusive access to their
-	 * private data structures.
-	 */
-	struct sk_buff_head rx_skb_queue;
-	bool running_rx_handler;	/* protected by rx_skb_queue.lock */
+	spinlock_t rx_path_lock;
 
 	/* Station data */
 	/*
@@ -1044,10 +1023,9 @@ struct ieee80211_local {
 	enum mac80211_scan_state next_scan_state;
 	struct delayed_work scan_work;
 	struct ieee80211_sub_if_data __rcu *scan_sdata;
-	struct ieee80211_channel *csa_channel;
+	struct cfg80211_chan_def csa_chandef;
 	/* For backward compatibility only -- do not use */
-	struct ieee80211_channel *_oper_channel;
-	enum nl80211_channel_type _oper_channel_type;
+	struct cfg80211_chan_def _oper_chandef;
 
 	/* Temporary remain-on-channel for off-channel operations */
 	struct ieee80211_channel *tmp_channel;
@@ -1118,14 +1096,13 @@ struct ieee80211_local {
 	struct timer_list dynamic_ps_timer;
 	struct notifier_block network_latency_notifier;
 	struct notifier_block ifa_notifier;
+	struct notifier_block ifa6_notifier;
 
 	/*
 	 * The dynamic ps timeout configured from user space via WEXT -
 	 * this will override whatever chosen by mac80211 internally.
 	 */
 	int dynamic_ps_forced_timeout;
-	int dynamic_ps_user_timeout;
-	bool disable_dynamic_ps;
 
 	int user_power_level; /* in dBm, for all interfaces */
 
@@ -1153,11 +1130,6 @@ struct ieee80211_local {
 
 	struct ieee80211_sub_if_data __rcu *p2p_sdata;
 
-	/* dummy netdev for use w/ NAPI */
-	struct net_device napi_dev;
-
-	struct napi_struct napi;
-
 	/* virtual monitor interface */
 	struct ieee80211_sub_if_data __rcu *monitor_sdata;
 	struct cfg80211_chan_def monitor_chandef;
@@ -1183,53 +1155,47 @@ struct ieee80211_ra_tid {
 
 /* Parsed Information Elements */
 struct ieee802_11_elems {
-	u8 *ie_start;
+	const u8 *ie_start;
 	size_t total_len;
 
 	/* pointers to IEs */
-	u8 *ssid;
-	u8 *supp_rates;
-	u8 *fh_params;
-	u8 *ds_params;
-	u8 *cf_params;
-	struct ieee80211_tim_ie *tim;
-	u8 *ibss_params;
-	u8 *challenge;
-	u8 *wpa;
-	u8 *rsn;
-	u8 *erp_info;
-	u8 *ext_supp_rates;
-	u8 *wmm_info;
-	u8 *wmm_param;
-	struct ieee80211_ht_cap *ht_cap_elem;
-	struct ieee80211_ht_operation *ht_operation;
-	struct ieee80211_vht_cap *vht_cap_elem;
-	struct ieee80211_vht_operation *vht_operation;
-	struct ieee80211_meshconf_ie *mesh_config;
-	u8 *mesh_id;
-	u8 *peering;
-	u8 *preq;
-	u8 *prep;
-	u8 *perr;
-	struct ieee80211_rann_ie *rann;
-	struct ieee80211_channel_sw_ie *ch_switch_ie;
-	u8 *country_elem;
-	u8 *pwr_constr_elem;
-	u8 *quiet_elem;	/* first quite element */
-	u8 *timeout_int;
+	const u8 *ssid;
+	const u8 *supp_rates;
+	const u8 *ds_params;
+	const struct ieee80211_tim_ie *tim;
+	const u8 *challenge;
+	const u8 *rsn;
+	const u8 *erp_info;
+	const u8 *ext_supp_rates;
+	const u8 *wmm_info;
+	const u8 *wmm_param;
+	const struct ieee80211_ht_cap *ht_cap_elem;
+	const struct ieee80211_ht_operation *ht_operation;
+	const struct ieee80211_vht_cap *vht_cap_elem;
+	const struct ieee80211_vht_operation *vht_operation;
+	const struct ieee80211_meshconf_ie *mesh_config;
+	const u8 *mesh_id;
+	const u8 *peering;
+	const __le16 *awake_window;
+	const u8 *preq;
+	const u8 *prep;
+	const u8 *perr;
+	const struct ieee80211_rann_ie *rann;
+	const struct ieee80211_channel_sw_ie *ch_switch_ie;
+	const struct ieee80211_ext_chansw_ie *ext_chansw_ie;
+	const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
+	const u8 *country_elem;
+	const u8 *pwr_constr_elem;
+	const struct ieee80211_timeout_interval_ie *timeout_int;
+	const u8 *opmode_notif;
+	const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
 
 	/* length of them, respectively */
 	u8 ssid_len;
 	u8 supp_rates_len;
-	u8 fh_params_len;
-	u8 ds_params_len;
-	u8 cf_params_len;
 	u8 tim_len;
-	u8 ibss_params_len;
 	u8 challenge_len;
-	u8 wpa_len;
 	u8 rsn_len;
-	u8 erp_info_len;
 	u8 ext_supp_rates_len;
 	u8 wmm_info_len;
 	u8 wmm_param_len;
@@ -1239,9 +1205,6 @@ struct ieee802_11_elems {
 	u8 prep_len;
 	u8 perr_len;
 	u8 country_elem_len;
-	u8 quiet_elem_len;
-	u8 num_of_quiet_elem;	/* can be more the one */
-	u8 timeout_int_len;
 
 	/* whether a parse error occurred while retrieving these elements */
 	bool parse_error;
@@ -1296,18 +1259,14 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata);
 int ieee80211_max_network_latency(struct notifier_block *nb,
 				  unsigned long data, void *dummy);
 int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
-void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
-				      struct ieee80211_channel_sw_ie *sw_elem,
-				      struct ieee80211_bss *bss,
-				      u64 timestamp);
-void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata);
-void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 				  struct sk_buff *skb);
 void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata);
 void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata);
+void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata,
+				  __le16 fc, bool acked);
 
 /* IBSS code */
 void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
@@ -1317,8 +1276,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
 int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 			struct cfg80211_ibss_params *params);
 int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata);
-void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata);
-void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata);
 void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata);
 void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 				   struct sk_buff *skb);
@@ -1346,8 +1303,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 			  struct ieee80211_mgmt *mgmt,
 			  size_t len,
 			  struct ieee802_11_elems *elems,
-			  struct ieee80211_channel *channel,
-			  bool beacon);
+			  struct ieee80211_channel *channel);
 void ieee80211_rx_bss_put(struct ieee80211_local *local,
 			  struct ieee80211_bss *bss);
 
@@ -1362,8 +1318,9 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local);
 void ieee80211_offchannel_return(struct ieee80211_local *local);
 void ieee80211_roc_setup(struct ieee80211_local *local);
 void ieee80211_start_next_roc(struct ieee80211_local *local);
-void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata);
-void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc);
+void ieee80211_roc_purge(struct ieee80211_local *local,
+			 struct ieee80211_sub_if_data *sdata);
+void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free);
 void ieee80211_sw_roc_work(struct work_struct *work);
 void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
 
@@ -1377,11 +1334,14 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 			     enum nl80211_iftype type);
 void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata);
 void ieee80211_remove_interfaces(struct ieee80211_local *local);
+u32 ieee80211_idle_off(struct ieee80211_local *local);
 void ieee80211_recalc_idle(struct ieee80211_local *local);
 void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
 				    const int offset);
 int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up);
 void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata);
+int ieee80211_add_virtual_monitor(struct ieee80211_local *local);
+void ieee80211_del_virtual_monitor(struct ieee80211_local *local);
 
 bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
 void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
@@ -1404,10 +1364,10 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
 /* HT */
 void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_sta_ht_cap *ht_cap);
-void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
+bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_supported_band *sband,
-				       struct ieee80211_ht_cap *ht_cap_ie,
-				       struct ieee80211_sta_ht_cap *ht_cap);
+				       const struct ieee80211_ht_cap *ht_cap_ie,
+				       struct sta_info *sta);
 void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 			  const u8 *da, u16 tid,
 			  u16 initiator, u16 reason_code);
@@ -1420,7 +1380,8 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 				     u16 initiator, u16 reason, bool stop);
 void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 				    u16 initiator, u16 reason, bool stop);
-void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx);
+void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
+					 enum ieee80211_agg_stop_reason reason);
 void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
 			     struct sta_info *sta,
 			     struct ieee80211_mgmt *mgmt, size_t len);
@@ -1434,11 +1395,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 				     size_t len);
 
 int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
-				   enum ieee80211_back_parties initiator,
-				   bool tx);
+				   enum ieee80211_agg_stop_reason reason);
 int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
-				    enum ieee80211_back_parties initiator,
-				    bool tx);
+				    enum ieee80211_agg_stop_reason reason);
 void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
 void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
 void ieee80211_ba_session_work(struct work_struct *work);
@@ -1448,10 +1407,19 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
 u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs);
 
 /* VHT */
-void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
-					 struct ieee80211_supported_band *sband,
-					 struct ieee80211_vht_cap *vht_cap_ie,
-					 struct ieee80211_sta_vht_cap *vht_cap);
+void
+ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_supported_band *sband,
+				    const struct ieee80211_vht_cap *vht_cap_ie,
+				    struct sta_info *sta);
+enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
+void ieee80211_sta_set_rx_nss(struct sta_info *sta);
+void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
+				 struct sta_info *sta, u8 opmode,
+				 enum ieee80211_band band, bool nss_only);
+void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
+				      struct ieee80211_sta_vht_cap *vht_cap);
+
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_mgmt *mgmt,
@@ -1528,11 +1496,15 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
 	ieee80211_tx_skb_tid(sdata, skb, 7);
 }
 
-void ieee802_11_parse_elems(u8 *start, size_t len,
-			    struct ieee802_11_elems *elems);
-u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
+u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, bool action,
 			       struct ieee802_11_elems *elems,
 			       u64 filter, u32 crc);
+static inline void ieee802_11_parse_elems(u8 *start, size_t len, bool action,
+					  struct ieee802_11_elems *elems)
+{
+	ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0);
+}
+
 u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
 			      enum ieee80211_band band);
 
@@ -1548,8 +1520,10 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_hdr *hdr, bool ack);
 
 void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
+				     unsigned long queues,
 				     enum queue_stop_reason reason);
 void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
+				     unsigned long queues,
 				     enum queue_stop_reason reason);
 void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
 				    enum queue_stop_reason reason);
@@ -1566,11 +1540,14 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local,
 {
 	ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL);
 }
+void ieee80211_flush_queues(struct ieee80211_local *local,
+			    struct ieee80211_sub_if_data *sdata);
 
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg, u16 status,
-			 u8 *extra, size_t extra_len, const u8 *bssid,
-			 const u8 *da, const u8 *key, u8 key_len, u8 key_idx);
+			 const u8 *extra, size_t extra_len, const u8 *bssid,
+			 const u8 *da, const u8 *key, u8 key_len, u8 key_idx,
+			 u32 tx_flags);
 void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 				    const u8 *bssid, u16 stype, u16 reason,
 				    bool send_frame, u8 *frame_buf);
@@ -1587,7 +1564,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
 void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 			      const u8 *ssid, size_t ssid_len,
 			      const u8 *ie, size_t ie_len,
-			      u32 ratemask, bool directed, bool no_cck,
+			      u32 ratemask, bool directed, u32 tx_flags,
 			      struct ieee80211_channel *channel, bool scan);
 
 void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
@@ -1619,18 +1596,31 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
 
 /* channel management */
 void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
-				  struct ieee80211_ht_operation *ht_oper,
+				  const struct ieee80211_ht_operation *ht_oper,
 				  struct cfg80211_chan_def *chandef);
 
 int __must_check
 ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
 			  const struct cfg80211_chan_def *chandef,
 			  enum ieee80211_chanctx_mode mode);
+int __must_check
+ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
+			       const struct cfg80211_chan_def *chandef,
+			       u32 *changed);
 void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata);
 void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata);
+void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
+					 bool clear);
 
 void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
 				   struct ieee80211_chanctx *chanctx);
+void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
+				    struct ieee80211_chanctx *chanctx);
+
+void ieee80211_dfs_cac_timer(unsigned long data);
+void ieee80211_dfs_cac_timer_work(struct work_struct *work);
+void ieee80211_dfs_cac_cancel(struct ieee80211_local *local);
+void ieee80211_dfs_radar_detected_work(struct work_struct *work);
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8be854e86cd9..60f1ce5e5e52 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1,5 +1,5 @@
 /*
- * Interface handling (except master interface)
+ * Interface handling
  *
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
@@ -78,8 +78,7 @@ void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
 		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER);
 }
 
-static u32 ieee80211_idle_off(struct ieee80211_local *local,
-			      const char *reason)
+static u32 __ieee80211_idle_off(struct ieee80211_local *local)
 {
 	if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE))
 		return 0;
@@ -88,121 +87,67 @@ static u32 ieee80211_idle_off(struct ieee80211_local *local,
 	return IEEE80211_CONF_CHANGE_IDLE;
 }
 
-static u32 ieee80211_idle_on(struct ieee80211_local *local)
+static u32 __ieee80211_idle_on(struct ieee80211_local *local)
 {
 	if (local->hw.conf.flags & IEEE80211_CONF_IDLE)
 		return 0;
 
-	drv_flush(local, false);
+	ieee80211_flush_queues(local, NULL);
 
 	local->hw.conf.flags |= IEEE80211_CONF_IDLE;
 	return IEEE80211_CONF_CHANGE_IDLE;
 }
 
-static u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
+static u32 __ieee80211_recalc_idle(struct ieee80211_local *local,
+				   bool force_active)
 {
-	struct ieee80211_sub_if_data *sdata;
-	int count = 0;
-	bool working = false, scanning = false;
+	bool working = false, scanning, active;
 	unsigned int led_trig_start = 0, led_trig_stop = 0;
 	struct ieee80211_roc_work *roc;
 
-#ifdef CONFIG_PROVE_LOCKING
-	WARN_ON(debug_locks && !lockdep_rtnl_is_held() &&
-		!lockdep_is_held(&local->iflist_mtx));
-#endif
 	lockdep_assert_held(&local->mtx);
 
-	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!ieee80211_sdata_running(sdata)) {
-			sdata->vif.bss_conf.idle = true;
-			continue;
-		}
-
-		sdata->old_idle = sdata->vif.bss_conf.idle;
-
-		/* do not count disabled managed interfaces */
-		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
-		    !sdata->u.mgd.associated &&
-		    !sdata->u.mgd.auth_data &&
-		    !sdata->u.mgd.assoc_data) {
-			sdata->vif.bss_conf.idle = true;
-			continue;
-		}
-		/* do not count unused IBSS interfaces */
-		if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
-		    !sdata->u.ibss.ssid_len) {
-			sdata->vif.bss_conf.idle = true;
-			continue;
-		}
-
-		if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE)
-			continue;
-
-		/* count everything else */
-		sdata->vif.bss_conf.idle = false;
-		count++;
-	}
+	active = force_active ||
+		 !list_empty(&local->chanctx_list) ||
+		 local->monitors;
 
 	if (!local->ops->remain_on_channel) {
 		list_for_each_entry(roc, &local->roc_list, list) {
 			working = true;
-			roc->sdata->vif.bss_conf.idle = false;
+			break;
 		}
 	}
 
-	sdata = rcu_dereference_protected(local->scan_sdata,
-					  lockdep_is_held(&local->mtx));
-	if (sdata && !(local->hw.flags & IEEE80211_HW_SCAN_WHILE_IDLE)) {
-		scanning = true;
-		sdata->vif.bss_conf.idle = false;
-	}
-
-	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
-		    sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
-		    sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE)
-			continue;
-		if (sdata->old_idle == sdata->vif.bss_conf.idle)
-			continue;
-		if (!ieee80211_sdata_running(sdata))
-			continue;
-		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
-	}
+	scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) ||
+		   test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning);
 
 	if (working || scanning)
 		led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_WORK;
 	else
 		led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_WORK;
 
-	if (count)
+	if (active)
 		led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED;
 	else
 		led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED;
 
 	ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop);
 
-	if (working)
-		return ieee80211_idle_off(local, "working");
-	if (scanning)
-		return ieee80211_idle_off(local, "scanning");
-	if (!count)
-		return ieee80211_idle_on(local);
-	else
-		return ieee80211_idle_off(local, "in use");
+	if (working || scanning || active)
+		return __ieee80211_idle_off(local);
+	return __ieee80211_idle_on(local);
+}
 
-	return 0;
+u32 ieee80211_idle_off(struct ieee80211_local *local)
+{
+	return __ieee80211_recalc_idle(local, true);
 }
 
 void ieee80211_recalc_idle(struct ieee80211_local *local)
 {
-	u32 chg;
-
-	mutex_lock(&local->iflist_mtx);
-	chg = __ieee80211_recalc_idle(local);
-	mutex_unlock(&local->iflist_mtx);
-	if (chg)
-		ieee80211_hw_config(local, chg);
+	u32 change = __ieee80211_recalc_idle(local, false);
+	if (change)
+		ieee80211_hw_config(local, change);
 }
 
 static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
@@ -360,7 +305,8 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata)
 		}
 	}
 
-	if ((sdata->vif.type != NL80211_IFTYPE_AP) ||
+	if ((sdata->vif.type != NL80211_IFTYPE_AP &&
+	     sdata->vif.type != NL80211_IFTYPE_MESH_POINT) ||
 	    !(sdata->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) {
 		sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
 		return 0;
@@ -411,24 +357,22 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata)
 	sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
 }
 
-static int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
+int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
 {
 	struct ieee80211_sub_if_data *sdata;
-	int ret = 0;
+	int ret;
 
 	if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF))
 		return 0;
 
-	mutex_lock(&local->iflist_mtx);
+	ASSERT_RTNL();
 
 	if (local->monitor_sdata)
-		goto out_unlock;
+		return 0;
 
 	sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, GFP_KERNEL);
-	if (!sdata) {
-		ret = -ENOMEM;
-		goto out_unlock;
-	}
+	if (!sdata)
+		return -ENOMEM;
 
 	/* set up data */
 	sdata->local = local;
@@ -442,13 +386,13 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
 	if (WARN_ON(ret)) {
 		/* ok .. stupid driver, it asked for this! */
 		kfree(sdata);
-		goto out_unlock;
+		return ret;
 	}
 
 	ret = ieee80211_check_queues(sdata);
 	if (ret) {
 		kfree(sdata);
-		goto out_unlock;
+		return ret;
 	}
 
 	ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef,
@@ -456,30 +400,37 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
 	if (ret) {
 		drv_remove_interface(local, sdata);
 		kfree(sdata);
-		goto out_unlock;
+		return ret;
 	}
 
+	mutex_lock(&local->iflist_mtx);
 	rcu_assign_pointer(local->monitor_sdata, sdata);
- out_unlock:
 	mutex_unlock(&local->iflist_mtx);
-	return ret;
+
+	return 0;
 }
 
-static void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
+void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
 {
 	struct ieee80211_sub_if_data *sdata;
 
 	if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF))
 		return;
 
+	ASSERT_RTNL();
+
 	mutex_lock(&local->iflist_mtx);
 
 	sdata = rcu_dereference_protected(local->monitor_sdata,
 					  lockdep_is_held(&local->iflist_mtx));
-	if (!sdata)
-		goto out_unlock;
+	if (!sdata) {
+		mutex_unlock(&local->iflist_mtx);
+		return;
+	}
 
 	rcu_assign_pointer(local->monitor_sdata, NULL);
+	mutex_unlock(&local->iflist_mtx);
+
 	synchronize_net();
 
 	ieee80211_vif_release_channel(sdata);
@@ -487,8 +438,6 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
 	drv_remove_interface(local, sdata);
 
 	kfree(sdata);
- out_unlock:
-	mutex_unlock(&local->iflist_mtx);
 }
 
 /*
@@ -550,8 +499,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 		res = drv_start(local);
 		if (res)
 			goto err_del_bss;
-		if (local->ops->napi_poll)
-			napi_enable(&local->napi);
 		/* we're brought up, everything changes */
 		hw_reconf_flags = ~0;
 		ieee80211_led_radio(local, true);
@@ -606,6 +553,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 
 		ieee80211_adjust_monitor_flags(sdata, 1);
 		ieee80211_configure_filter(local);
+		mutex_lock(&local->mtx);
+		ieee80211_recalc_idle(local);
+		mutex_unlock(&local->mtx);
 
 		netif_carrier_on(dev);
 		break;
@@ -645,7 +595,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 		case NL80211_IFTYPE_P2P_DEVICE:
 			break;
 		default:
-			netif_carrier_on(dev);
+			/* not reached */
+			WARN_ON(1);
 		}
 
 		/*
@@ -694,10 +645,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 	if (sdata->flags & IEEE80211_SDATA_PROMISC)
 		atomic_inc(&local->iff_promiscs);
 
-	mutex_lock(&local->mtx);
-	hw_reconf_flags |= __ieee80211_recalc_idle(local);
-	mutex_unlock(&local->mtx);
-
 	if (coming_up)
 		local->open_count++;
 
@@ -706,8 +653,28 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 
 	ieee80211_recalc_ps(local, -1);
 
-	if (dev)
-		netif_tx_start_all_queues(dev);
+	if (dev) {
+		unsigned long flags;
+		int n_acs = IEEE80211_NUM_ACS;
+		int ac;
+
+		if (local->hw.queues < IEEE80211_NUM_ACS)
+			n_acs = 1;
+
+		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+		if (sdata->vif.cab_queue == IEEE80211_INVAL_HW_QUEUE ||
+		    (local->queue_stop_reasons[sdata->vif.cab_queue] == 0 &&
+		     skb_queue_empty(&local->pending[sdata->vif.cab_queue]))) {
+			for (ac = 0; ac < n_acs; ac++) {
+				int ac_queue = sdata->vif.hw_queue[ac];
+
+				if (local->queue_stop_reasons[ac_queue] == 0 &&
+				    skb_queue_empty(&local->pending[ac_queue]))
+					netif_start_subqueue(dev, ac);
+			}
+		}
+		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+	}
 
 	return 0;
  err_del_interface:
@@ -747,7 +714,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	unsigned long flags;
 	struct sk_buff *skb, *tmp;
 	u32 hw_reconf_flags = 0;
-	int i;
+	int i, flushed;
+	struct ps_data *ps;
 
 	clear_bit(SDATA_STATE_RUNNING, &sdata->state);
 
@@ -760,7 +728,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	if (sdata->dev)
 		netif_tx_stop_all_queues(sdata->dev);
 
-	ieee80211_roc_purge(sdata);
+	ieee80211_roc_purge(local, sdata);
+
+	if (sdata->vif.type == NL80211_IFTYPE_STATION)
+		ieee80211_mgd_stop(sdata);
 
 	/*
 	 * Remove all stations associated with this interface.
@@ -772,18 +743,17 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	 * (because if we remove a STA after ops->remove_interface()
 	 * the driver will have removed the vif info already!)
 	 *
-	 * This is relevant only in AP, WDS and mesh modes, since in
-	 * all other modes we've already removed all stations when
-	 * disconnecting etc.
+	 * This is relevant only in WDS mode, in all other modes we've
+	 * already removed all stations when disconnecting or similar,
+	 * so warn otherwise.
+	 *
+	 * We call sta_info_flush_cleanup() later, to combine RCU waits.
 	 */
-	sta_info_flush(local, sdata);
+	flushed = sta_info_flush_defer(sdata);
+	WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) ||
+		     (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1));
 
-	/*
-	 * Don't count this interface for promisc/allmulti while it
-	 * is down. dev_mc_unsync() will invoke set_multicast_list
-	 * on the master interface which will sync these down to the
-	 * hardware as filter flags.
-	 */
+	/* don't count this interface for promisc/allmulti while it is down */
 	if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
 		atomic_dec(&local->iff_allmultis);
 
@@ -804,8 +774,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 				 sdata->dev->addr_len);
 		spin_unlock_bh(&local->filter_lock);
 		netif_addr_unlock_bh(sdata->dev);
-
-		ieee80211_configure_filter(local);
 	}
 
 	del_timer_sync(&local->dynamic_ps_timer);
@@ -813,6 +781,17 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 
 	cancel_work_sync(&sdata->recalc_smps);
 
+	cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
+
+	if (sdata->wdev.cac_started) {
+		WARN_ON(local->suspended);
+		mutex_lock(&local->iflist_mtx);
+		ieee80211_vif_release_channel(sdata);
+		mutex_unlock(&local->iflist_mtx);
+		cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_ABORTED,
+				   GFP_KERNEL);
+	}
+
 	/* APs need special treatment */
 	if (sdata->vif.type == NL80211_IFTYPE_AP) {
 		struct ieee80211_sub_if_data *vlan, *tmpsdata;
@@ -822,8 +801,19 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 					 u.vlan.list)
 			dev_close(vlan->dev);
 		WARN_ON(!list_empty(&sdata->u.ap.vlans));
-	} else if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-		ieee80211_mgd_stop(sdata);
+	} else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+		/* remove all packets in parent bc_buf pointing to this dev */
+		ps = &sdata->bss->ps;
+
+		spin_lock_irqsave(&ps->bc_buf.lock, flags);
+		skb_queue_walk_safe(&ps->bc_buf, skb, tmp) {
+			if (skb->dev == sdata->dev) {
+				__skb_unlink(skb, &ps->bc_buf);
+				local->total_ps_buffered--;
+				ieee80211_free_txskb(&local->hw, skb);
+			}
+		}
+		spin_unlock_irqrestore(&ps->bc_buf.lock, flags);
 	}
 
 	if (going_down)
@@ -845,11 +835,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 		if (local->monitors == 0) {
 			local->hw.conf.flags &= ~IEEE80211_CONF_MONITOR;
 			hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR;
-			ieee80211_del_virtual_monitor(local);
 		}
 
 		ieee80211_adjust_monitor_flags(sdata, -1);
-		ieee80211_configure_filter(local);
 		break;
 	case NL80211_IFTYPE_P2P_DEVICE:
 		/* relies on synchronize_rcu() below */
@@ -859,45 +847,31 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 		cancel_work_sync(&sdata->work);
 		/*
 		 * When we get here, the interface is marked down.
-		 * Call synchronize_rcu() to wait for the RX path
-		 * should it be using the interface and enqueuing
-		 * frames at this very time on another CPU.
+		 *
+		 * sta_info_flush_cleanup() requires rcu_barrier()
+		 * first to wait for the station call_rcu() calls
+		 * to complete, and we also need synchronize_rcu()
+		 * to wait for the RX path in case it is using the
+		 * interface and enqueuing frames at this very time on
+		 * another CPU.
 		 */
 		synchronize_rcu();
-		skb_queue_purge(&sdata->skb_queue);
+		rcu_barrier();
+		sta_info_flush_cleanup(sdata);
 
 		/*
 		 * Free all remaining keys, there shouldn't be any,
-		 * except maybe group keys in AP more or WDS?
+		 * except maybe in WDS mode?
 		 */
 		ieee80211_free_keys(sdata);
 
-		if (going_down)
-			drv_remove_interface(local, sdata);
+		/* fall through */
+	case NL80211_IFTYPE_AP:
+		skb_queue_purge(&sdata->skb_queue);
 	}
 
 	sdata->bss = NULL;
 
-	mutex_lock(&local->mtx);
-	hw_reconf_flags |= __ieee80211_recalc_idle(local);
-	mutex_unlock(&local->mtx);
-
-	ieee80211_recalc_ps(local, -1);
-
-	if (local->open_count == 0) {
-		if (local->ops->napi_poll)
-			napi_disable(&local->napi);
-		ieee80211_clear_tx_pending(local);
-		ieee80211_stop_device(local);
-
-		/* no reconfiguring after stop! */
-		hw_reconf_flags = 0;
-	}
-
-	/* do after stop to avoid reconfiguring when we stop anyway */
-	if (hw_reconf_flags)
-		ieee80211_hw_config(local, hw_reconf_flags);
-
 	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 	for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
 		skb_queue_walk_safe(&local->pending[i], skb, tmp) {
@@ -910,7 +884,54 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	}
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
-	if (local->monitors == local->open_count && local->monitors > 0)
+	if (local->open_count == 0)
+		ieee80211_clear_tx_pending(local);
+
+	/*
+	 * If the interface goes down while suspended, presumably because
+	 * the device was unplugged and that happens before our resume,
+	 * then the driver is already unconfigured and the remainder of
+	 * this function isn't needed.
+	 * XXX: what about WoWLAN? If the device has software state, e.g.
+	 *	memory allocated, it might expect teardown commands from
+	 *	mac80211 here?
+	 */
+	if (local->suspended) {
+		WARN_ON(local->wowlan);
+		WARN_ON(rtnl_dereference(local->monitor_sdata));
+		return;
+	}
+
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_AP_VLAN:
+		break;
+	case NL80211_IFTYPE_MONITOR:
+		if (local->monitors == 0)
+			ieee80211_del_virtual_monitor(local);
+
+		mutex_lock(&local->mtx);
+		ieee80211_recalc_idle(local);
+		mutex_unlock(&local->mtx);
+		break;
+	default:
+		if (going_down)
+			drv_remove_interface(local, sdata);
+	}
+
+	ieee80211_recalc_ps(local, -1);
+
+	if (local->open_count == 0) {
+		ieee80211_stop_device(local);
+
+		/* no reconfiguring after stop! */
+		return;
+	}
+
+	/* do after stop to avoid reconfiguring when we stop anyway */
+	ieee80211_configure_filter(local);
+	ieee80211_hw_config(local, hw_reconf_flags);
+
+	if (local->monitors == local->open_count)
 		ieee80211_add_virtual_monitor(local);
 }
 
@@ -949,6 +970,17 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
 			atomic_dec(&local->iff_promiscs);
 		sdata->flags ^= IEEE80211_SDATA_PROMISC;
 	}
+
+	/*
+	 * TODO: If somebody needs this on AP interfaces,
+	 *	 it can be enabled easily but multicast
+	 *	 addresses from VLANs need to be synced.
+	 */
+	if (sdata->vif.type != NL80211_IFTYPE_MONITOR &&
+	    sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
+	    sdata->vif.type != NL80211_IFTYPE_AP)
+		drv_set_multicast_list(local, sdata, &dev->mc);
+
 	spin_lock_bh(&local->filter_lock);
 	__hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
 	spin_unlock_bh(&local->filter_lock);
@@ -961,7 +993,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
  */
 static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_local *local = sdata->local;
 	int flushed;
 	int i;
 
@@ -977,7 +1008,7 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		mesh_rmc_free(sdata);
 
-	flushed = sta_info_flush(local, sdata);
+	flushed = sta_info_flush(sdata);
 	WARN_ON(flushed);
 }
 
@@ -1218,6 +1249,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 	case NL80211_IFTYPE_AP:
 		skb_queue_head_init(&sdata->u.ap.ps.bc_buf);
 		INIT_LIST_HEAD(&sdata->u.ap.vlans);
+		sdata->vif.bss_conf.bssid = sdata->vif.addr;
 		break;
 	case NL80211_IFTYPE_P2P_CLIENT:
 		type = NL80211_IFTYPE_STATION;
@@ -1225,9 +1257,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 		sdata->vif.p2p = true;
 		/* fall through */
 	case NL80211_IFTYPE_STATION:
+		sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid;
 		ieee80211_sta_setup_sdata(sdata);
 		break;
 	case NL80211_IFTYPE_ADHOC:
+		sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid;
 		ieee80211_ibss_setup_sdata(sdata);
 		break;
 	case NL80211_IFTYPE_MESH_POINT:
@@ -1241,8 +1275,12 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 				      MONITOR_FLAG_OTHER_BSS;
 		break;
 	case NL80211_IFTYPE_WDS:
+		sdata->vif.bss_conf.bssid = NULL;
+		break;
 	case NL80211_IFTYPE_AP_VLAN:
+		break;
 	case NL80211_IFTYPE_P2P_DEVICE:
+		sdata->vif.bss_conf.bssid = sdata->vif.addr;
 		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
 	case NUM_NL80211_IFTYPES:
@@ -1558,9 +1596,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	/* initialise type-independent data */
 	sdata->wdev.wiphy = local->hw.wiphy;
 	sdata->local = local;
-#ifdef CONFIG_INET
-	sdata->arp_filter_state = true;
-#endif
 
 	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
 		skb_queue_head_init(&sdata->fragments[i].skb_list);
@@ -1570,6 +1605,10 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	spin_lock_init(&sdata->cleanup_stations_lock);
 	INIT_LIST_HEAD(&sdata->cleanup_stations);
 	INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk);
+	INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work,
+			  ieee80211_dfs_cac_timer_work);
+	INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk,
+			  ieee80211_delayed_tailroom_dec);
 
 	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
 		struct ieee80211_supported_band *sband;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 619c5d697999..67059b88fea5 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -204,8 +204,11 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
 	if (idx >= 0 && idx < NUM_DEFAULT_KEYS)
 		key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
 
-	if (uni)
+	if (uni) {
 		rcu_assign_pointer(sdata->default_unicast_key, key);
+		drv_set_default_unicast_key(sdata->local, sdata, idx);
+	}
+
 	if (multi)
 		rcu_assign_pointer(sdata->default_multicast_key, key);
 
@@ -245,11 +248,11 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
 }
 
 
-static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
-				    struct sta_info *sta,
-				    bool pairwise,
-				    struct ieee80211_key *old,
-				    struct ieee80211_key *new)
+static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
+				  struct sta_info *sta,
+				  bool pairwise,
+				  struct ieee80211_key *old,
+				  struct ieee80211_key *new)
 {
 	int idx;
 	bool defunikey, defmultikey, defmgmtkey;
@@ -394,7 +397,41 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 	return key;
 }
 
-static void __ieee80211_key_destroy(struct ieee80211_key *key)
+static void ieee80211_key_free_common(struct ieee80211_key *key)
+{
+	if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
+		ieee80211_aes_key_free(key->u.ccmp.tfm);
+	if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
+		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
+	kfree(key);
+}
+
+static void __ieee80211_key_destroy(struct ieee80211_key *key,
+				    bool delay_tailroom)
+{
+	if (key->local)
+		ieee80211_key_disable_hw_accel(key);
+
+	if (key->local) {
+		struct ieee80211_sub_if_data *sdata = key->sdata;
+
+		ieee80211_debugfs_key_remove(key);
+
+		if (delay_tailroom) {
+			/* see ieee80211_delayed_tailroom_dec */
+			sdata->crypto_tx_tailroom_pending_dec++;
+			schedule_delayed_work(&sdata->dec_tailroom_needed_wk,
+					      HZ/2);
+		} else {
+			sdata->crypto_tx_tailroom_needed_cnt--;
+		}
+	}
+
+	ieee80211_key_free_common(key);
+}
+
+static void ieee80211_key_destroy(struct ieee80211_key *key,
+				  bool delay_tailroom)
 {
 	if (!key)
 		return;
@@ -405,19 +442,13 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
 	 */
 	synchronize_net();
 
-	if (key->local)
-		ieee80211_key_disable_hw_accel(key);
-
-	if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
-		ieee80211_aes_key_free(key->u.ccmp.tfm);
-	if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
-		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
-	if (key->local) {
-		ieee80211_debugfs_key_remove(key);
-		key->sdata->crypto_tx_tailroom_needed_cnt--;
-	}
+	__ieee80211_key_destroy(key, delay_tailroom);
+}
 
-	kfree(key);
+void ieee80211_key_free_unused(struct ieee80211_key *key)
+{
+	WARN_ON(key->sdata || key->local);
+	ieee80211_key_free_common(key);
 }
 
 int ieee80211_key_link(struct ieee80211_key *key,
@@ -437,32 +468,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
 	key->sdata = sdata;
 	key->sta = sta;
 
-	if (sta) {
-		/*
-		 * some hardware cannot handle TKIP with QoS, so
-		 * we indicate whether QoS could be in use.
-		 */
-		if (test_sta_flag(sta, WLAN_STA_WME))
-			key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA;
-	} else {
-		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-			struct sta_info *ap;
-
-			/*
-			 * We're getting a sta pointer in, so must be under
-			 * appropriate locking for sta_info_get().
-			 */
-
-			/* same here, the AP could be using QoS */
-			ap = sta_info_get(key->sdata, key->sdata->u.mgd.bssid);
-			if (ap) {
-				if (test_sta_flag(ap, WLAN_STA_WME))
-					key->conf.flags |=
-						IEEE80211_KEY_FLAG_WMM_STA;
-			}
-		}
-	}
-
 	mutex_lock(&sdata->local->key_mtx);
 
 	if (sta && pairwise)
@@ -474,19 +479,22 @@ int ieee80211_key_link(struct ieee80211_key *key,
 
 	increment_tailroom_need_count(sdata);
 
-	__ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
-	__ieee80211_key_destroy(old_key);
+	ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
+	ieee80211_key_destroy(old_key, true);
 
 	ieee80211_debugfs_key_add(key);
 
 	ret = ieee80211_key_enable_hw_accel(key);
 
+	if (ret)
+		ieee80211_key_free(key, true);
+
 	mutex_unlock(&sdata->local->key_mtx);
 
 	return ret;
 }
 
-void __ieee80211_key_free(struct ieee80211_key *key)
+void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom)
 {
 	if (!key)
 		return;
@@ -495,18 +503,10 @@ void __ieee80211_key_free(struct ieee80211_key *key)
 	 * Replace key with nothingness if it was ever used.
 	 */
 	if (key->sdata)
-		__ieee80211_key_replace(key->sdata, key->sta,
+		ieee80211_key_replace(key->sdata, key->sta,
 				key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
 				key, NULL);
-	__ieee80211_key_destroy(key);
-}
-
-void ieee80211_key_free(struct ieee80211_local *local,
-			struct ieee80211_key *key)
-{
-	mutex_lock(&local->key_mtx);
-	__ieee80211_key_free(key);
-	mutex_unlock(&local->key_mtx);
+	ieee80211_key_destroy(key, delay_tailroom);
 }
 
 void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
@@ -563,36 +563,109 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_iter_keys);
 
-void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata)
+void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_key *key;
+	struct ieee80211_key *key, *tmp;
+	LIST_HEAD(keys);
 
-	ASSERT_RTNL();
+	cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk);
 
 	mutex_lock(&sdata->local->key_mtx);
 
-	list_for_each_entry(key, &sdata->key_list, list)
-		ieee80211_key_disable_hw_accel(key);
+	sdata->crypto_tx_tailroom_needed_cnt -=
+		sdata->crypto_tx_tailroom_pending_dec;
+	sdata->crypto_tx_tailroom_pending_dec = 0;
+
+	ieee80211_debugfs_key_remove_mgmt_default(sdata);
+
+	list_for_each_entry_safe(key, tmp, &sdata->key_list, list) {
+		ieee80211_key_replace(key->sdata, key->sta,
+				key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+				key, NULL);
+		list_add_tail(&key->list, &keys);
+	}
+
+	ieee80211_debugfs_key_update_default(sdata);
+
+	if (!list_empty(&keys)) {
+		synchronize_net();
+		list_for_each_entry_safe(key, tmp, &keys, list)
+			__ieee80211_key_destroy(key, false);
+	}
+
+	WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
+		     sdata->crypto_tx_tailroom_pending_dec);
 
 	mutex_unlock(&sdata->local->key_mtx);
 }
 
-void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
+void ieee80211_free_sta_keys(struct ieee80211_local *local,
+			     struct sta_info *sta)
 {
 	struct ieee80211_key *key, *tmp;
+	LIST_HEAD(keys);
+	int i;
 
-	mutex_lock(&sdata->local->key_mtx);
+	mutex_lock(&local->key_mtx);
+	for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+		key = key_mtx_dereference(local, sta->gtk[i]);
+		if (!key)
+			continue;
+		ieee80211_key_replace(key->sdata, key->sta,
+				key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+				key, NULL);
+		list_add(&key->list, &keys);
+	}
 
-	ieee80211_debugfs_key_remove_mgmt_default(sdata);
+	key = key_mtx_dereference(local, sta->ptk);
+	if (key) {
+		ieee80211_key_replace(key->sdata, key->sta,
+				key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+				key, NULL);
+		list_add(&key->list, &keys);
+	}
 
-	list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
-		__ieee80211_key_free(key);
+	/*
+	 * NB: the station code relies on this being
+	 * done even if there aren't any keys
+	 */
+	synchronize_net();
 
-	ieee80211_debugfs_key_update_default(sdata);
+	list_for_each_entry_safe(key, tmp, &keys, list)
+		__ieee80211_key_destroy(key, true);
 
-	mutex_unlock(&sdata->local->key_mtx);
+	mutex_unlock(&local->key_mtx);
 }
 
+void ieee80211_delayed_tailroom_dec(struct work_struct *wk)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	sdata = container_of(wk, struct ieee80211_sub_if_data,
+			     dec_tailroom_needed_wk.work);
+
+	/*
+	 * The reason for the delayed tailroom needed decrementing is to
+	 * make roaming faster: during roaming, all keys are first deleted
+	 * and then new keys are installed. The first new key causes the
+	 * crypto_tx_tailroom_needed_cnt to go from 0 to 1, which invokes
+	 * the cost of synchronize_net() (which can be slow). Avoid this
+	 * by deferring the crypto_tx_tailroom_needed_cnt decrementing on
+	 * key removal for a while, so if we roam the value is larger than
+	 * zero and no 0->1 transition happens.
+	 *
+	 * The cost is that if the AP switching was from an AP with keys
+	 * to one without, we still allocate tailroom while it would no
+	 * longer be needed. However, in the typical (fast) roaming case
+	 * within an ESS this usually won't happen.
+	 */
+
+	mutex_lock(&sdata->local->key_mtx);
+	sdata->crypto_tx_tailroom_needed_cnt -=
+		sdata->crypto_tx_tailroom_pending_dec;
+	sdata->crypto_tx_tailroom_pending_dec = 0;
+	mutex_unlock(&sdata->local->key_mtx);
+}
 
 void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid,
 				const u8 *replay_ctr, gfp_t gfp)
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 382dc44ed330..e8de3e6d7804 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -129,23 +129,25 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 					  size_t seq_len, const u8 *seq);
 /*
  * Insert a key into data structures (sdata, sta if necessary)
- * to make it used, free old key.
+ * to make it used, free old key. On failure, also free the new key.
  */
-int __must_check ieee80211_key_link(struct ieee80211_key *key,
-				    struct ieee80211_sub_if_data *sdata,
-				    struct sta_info *sta);
-void __ieee80211_key_free(struct ieee80211_key *key);
-void ieee80211_key_free(struct ieee80211_local *local,
-			struct ieee80211_key *key);
+int ieee80211_key_link(struct ieee80211_key *key,
+		       struct ieee80211_sub_if_data *sdata,
+		       struct sta_info *sta);
+void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom);
+void ieee80211_key_free_unused(struct ieee80211_key *key);
 void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx,
 			       bool uni, bool multi);
 void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
 				    int idx);
 void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata);
+void ieee80211_free_sta_keys(struct ieee80211_local *local,
+			     struct sta_info *sta);
 void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
-void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata);
 
 #define key_mtx_dereference(local, ref) \
 	rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx)))
 
+void ieee80211_delayed_tailroom_dec(struct work_struct *wk);
+
 #endif /* IEEE80211_KEY_H */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 1b087fff93e7..8a7bfc47d577 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -23,6 +23,7 @@
 #include <linux/inetdevice.h>
 #include <net/net_namespace.h>
 #include <net/cfg80211.h>
+#include <net/addrconf.h>
 
 #include "ieee80211_i.h"
 #include "driver-ops.h"
@@ -33,8 +34,6 @@
 #include "cfg.h"
 #include "debugfs.h"
 
-static struct lock_class_key ieee80211_rx_skb_queue_class;
-
 void ieee80211_configure_filter(struct ieee80211_local *local)
 {
 	u64 mc;
@@ -96,43 +95,47 @@ static void ieee80211_reconfig_filter(struct work_struct *work)
 static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
 {
 	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_channel *chan;
+	struct cfg80211_chan_def chandef = {};
 	u32 changed = 0;
 	int power;
-	enum nl80211_channel_type channel_type;
 	u32 offchannel_flag;
-	bool scanning = false;
 
 	offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
+
 	if (local->scan_channel) {
-		chan = local->scan_channel;
+		chandef.chan = local->scan_channel;
 		/* If scanning on oper channel, use whatever channel-type
 		 * is currently in use.
 		 */
-		if (chan == local->_oper_channel)
-			channel_type = local->_oper_channel_type;
-		else
-			channel_type = NL80211_CHAN_NO_HT;
+		if (chandef.chan == local->_oper_chandef.chan) {
+			chandef = local->_oper_chandef;
+		} else {
+			chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
+			chandef.center_freq1 = chandef.chan->center_freq;
+		}
 	} else if (local->tmp_channel) {
-		chan = local->tmp_channel;
-		channel_type = NL80211_CHAN_NO_HT;
-	} else {
-		chan = local->_oper_channel;
-		channel_type = local->_oper_channel_type;
-	}
-
-	if (chan != local->_oper_channel ||
-	    channel_type != local->_oper_channel_type)
+		chandef.chan = local->tmp_channel;
+		chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
+		chandef.center_freq1 = chandef.chan->center_freq;
+	} else
+		chandef = local->_oper_chandef;
+
+	WARN(!cfg80211_chandef_valid(&chandef),
+	     "control:%d MHz width:%d center: %d/%d MHz",
+	     chandef.chan->center_freq, chandef.width,
+	     chandef.center_freq1, chandef.center_freq2);
+
+	if (!cfg80211_chandef_identical(&chandef, &local->_oper_chandef))
 		local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
 	else
 		local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
 
 	offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
 
-	if (offchannel_flag || chan != local->hw.conf.channel ||
-	    channel_type != local->hw.conf.channel_type) {
-		local->hw.conf.channel = chan;
-		local->hw.conf.channel_type = channel_type;
+	if (offchannel_flag ||
+	    !cfg80211_chandef_identical(&local->hw.conf.chandef,
+					&local->_oper_chandef)) {
+		local->hw.conf.chandef = chandef;
 		changed |= IEEE80211_CONF_CHANGE_CHANNEL;
 	}
 
@@ -148,10 +151,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
 		changed |= IEEE80211_CONF_CHANGE_SMPS;
 	}
 
-	scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) ||
-		   test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning) ||
-		   test_bit(SCAN_HW_SCANNING, &local->scanning);
-	power = chan->max_power;
+	power = chandef.chan->max_power;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -207,76 +207,10 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 				      u32 changed)
 {
 	struct ieee80211_local *local = sdata->local;
-	static const u8 zero[ETH_ALEN] = { 0 };
 
 	if (!changed)
 		return;
 
-	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-		sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid;
-	} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
-		sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid;
-	else if (sdata->vif.type == NL80211_IFTYPE_AP)
-		sdata->vif.bss_conf.bssid = sdata->vif.addr;
-	else if (sdata->vif.type == NL80211_IFTYPE_WDS)
-		sdata->vif.bss_conf.bssid = NULL;
-	else if (ieee80211_vif_is_mesh(&sdata->vif)) {
-		sdata->vif.bss_conf.bssid = zero;
-	} else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) {
-		sdata->vif.bss_conf.bssid = sdata->vif.addr;
-		WARN_ONCE(changed & ~(BSS_CHANGED_IDLE),
-			  "P2P Device BSS changed %#x", changed);
-	} else {
-		WARN_ON(1);
-		return;
-	}
-
-	switch (sdata->vif.type) {
-	case NL80211_IFTYPE_AP:
-	case NL80211_IFTYPE_ADHOC:
-	case NL80211_IFTYPE_WDS:
-	case NL80211_IFTYPE_MESH_POINT:
-		break;
-	default:
-		/* do not warn to simplify caller in scan.c */
-		changed &= ~BSS_CHANGED_BEACON_ENABLED;
-		if (WARN_ON(changed & BSS_CHANGED_BEACON))
-			return;
-		break;
-	}
-
-	if (changed & BSS_CHANGED_BEACON_ENABLED) {
-		if (local->quiescing || !ieee80211_sdata_running(sdata) ||
-		    test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) {
-			sdata->vif.bss_conf.enable_beacon = false;
-		} else {
-			/*
-			 * Beacon should be enabled, but AP mode must
-			 * check whether there is a beacon configured.
-			 */
-			switch (sdata->vif.type) {
-			case NL80211_IFTYPE_AP:
-				sdata->vif.bss_conf.enable_beacon =
-					!!sdata->u.ap.beacon;
-				break;
-			case NL80211_IFTYPE_ADHOC:
-				sdata->vif.bss_conf.enable_beacon =
-					!!sdata->u.ibss.presp;
-				break;
-#ifdef CONFIG_MAC80211_MESH
-			case NL80211_IFTYPE_MESH_POINT:
-				sdata->vif.bss_conf.enable_beacon =
-					!!sdata->u.mesh.mesh_id_len;
-				break;
-#endif
-			default:
-				/* not reached */
-				WARN_ON(1);
-				break;
-			}
-		}
-	}
-
 	drv_bss_info_changed(local, sdata, &sdata->vif.bss_conf, changed);
 }
 
@@ -293,8 +227,6 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
 static void ieee80211_tasklet_handler(unsigned long data)
 {
 	struct ieee80211_local *local = (struct ieee80211_local *) data;
-	struct sta_info *sta, *tmp;
-	struct skb_eosp_msg_data *eosp_data;
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&local->skb_queue)) ||
@@ -310,18 +242,6 @@ static void ieee80211_tasklet_handler(unsigned long data)
 			skb->pkt_type = 0;
 			ieee80211_tx_status(&local->hw, skb);
 			break;
-		case IEEE80211_EOSP_MSG:
-			eosp_data = (void *)skb->cb;
-			for_each_sta_info(local, eosp_data->sta, sta, tmp) {
-				/* skip wrong virtual interface */
-				if (memcmp(eosp_data->iface,
-					   sta->sdata->vif.addr, ETH_ALEN))
-					continue;
-				clear_sta_flag(sta, WLAN_STA_SP);
-				break;
-			}
-			dev_kfree_skb(skb);
-			break;
 		default:
 			WARN(1, "mac80211: Packet is of unknown type %d\n",
 			     skb->pkt_type);
@@ -362,8 +282,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
 		   "Hardware restart was requested\n");
 
 	/* use this reason, ieee80211_reconfig will unblock it */
-	ieee80211_stop_queues_by_reason(hw,
-		IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+	ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_SUSPEND);
 
 	/*
 	 * Stop all Rx during the reconfig. We don't want state changes
@@ -415,27 +335,19 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
 
 	/* Copy the addresses to the bss_conf list */
 	ifa = idev->ifa_list;
-	while (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN && ifa) {
-		bss_conf->arp_addr_list[c] = ifa->ifa_address;
+	while (ifa) {
+		if (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN)
+			bss_conf->arp_addr_list[c] = ifa->ifa_address;
 		ifa = ifa->ifa_next;
 		c++;
 	}
 
-	/* If not all addresses fit the list, disable filtering */
-	if (ifa) {
-		sdata->arp_filter_state = false;
-		c = 0;
-	} else {
-		sdata->arp_filter_state = true;
-	}
 	bss_conf->arp_addr_cnt = c;
 
 	/* Configure driver only if associated (which also implies it is up) */
-	if (ifmgd->associated) {
-		bss_conf->arp_filter_enabled = sdata->arp_filter_state;
+	if (ifmgd->associated)
 		ieee80211_bss_info_change_notify(sdata,
 						 BSS_CHANGED_ARP_FILTER);
-	}
 
 	mutex_unlock(&ifmgd->mtx);
 
@@ -443,29 +355,36 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
 }
 #endif
 
-static int ieee80211_napi_poll(struct napi_struct *napi, int budget)
+#if IS_ENABLED(CONFIG_IPV6)
+static int ieee80211_ifa6_changed(struct notifier_block *nb,
+				  unsigned long data, void *arg)
 {
+	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)arg;
+	struct inet6_dev *idev = ifa->idev;
+	struct net_device *ndev = ifa->idev->dev;
 	struct ieee80211_local *local =
-		container_of(napi, struct ieee80211_local, napi);
+		container_of(nb, struct ieee80211_local, ifa6_notifier);
+	struct wireless_dev *wdev = ndev->ieee80211_ptr;
+	struct ieee80211_sub_if_data *sdata;
 
-	return local->ops->napi_poll(&local->hw, budget);
-}
+	/* Make sure it's our interface that got changed */
+	if (!wdev || wdev->wiphy != local->hw.wiphy)
+		return NOTIFY_DONE;
 
-void ieee80211_napi_schedule(struct ieee80211_hw *hw)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
+	sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
 
-	napi_schedule(&local->napi);
-}
-EXPORT_SYMBOL(ieee80211_napi_schedule);
+	/*
+	 * For now only support station mode. This is mostly because
+	 * doing AP would have to handle AP_VLAN in some way ...
+	 */
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return NOTIFY_DONE;
 
-void ieee80211_napi_complete(struct ieee80211_hw *hw)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
+	drv_ipv6_addr_change(local, sdata, idev);
 
-	napi_complete(&local->napi);
+	return NOTIFY_DONE;
 }
-EXPORT_SYMBOL(ieee80211_napi_complete);
+#endif
 
 /* There isn't a lot of sense in it, but you can transmit anything you like */
 static const struct ieee80211_txrx_stypes
@@ -537,6 +456,7 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = {
 
 	.cap_info = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
 				IEEE80211_HT_CAP_MAX_AMSDU |
+				IEEE80211_HT_CAP_SGI_20 |
 				IEEE80211_HT_CAP_SGI_40),
 	.mcs = {
 		.rx_mask = { 0xff, 0xff, 0xff, 0xff, 0xff,
@@ -544,6 +464,32 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = {
 	},
 };
 
+static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = {
+	.vht_cap_info =
+		cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC |
+			    IEEE80211_VHT_CAP_SHORT_GI_80 |
+			    IEEE80211_VHT_CAP_SHORT_GI_160 |
+			    IEEE80211_VHT_CAP_RXSTBC_1 |
+			    IEEE80211_VHT_CAP_RXSTBC_2 |
+			    IEEE80211_VHT_CAP_RXSTBC_3 |
+			    IEEE80211_VHT_CAP_RXSTBC_4 |
+			    IEEE80211_VHT_CAP_TXSTBC |
+			    IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
+			    IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
+			    IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN |
+			    IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN |
+			    IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK),
+	.supp_mcs = {
+		.rx_mcs_map = cpu_to_le16(~0),
+		.tx_mcs_map = cpu_to_le16(~0),
+	},
+};
+
+static const u8 extended_capabilities[] = {
+	0, 0, 0, 0, 0, 0, 0,
+	WLAN_EXT_CAPA8_OPMODE_NOTIF,
+};
+
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					const struct ieee80211_ops *ops)
 {
@@ -600,13 +546,18 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 			WIPHY_FLAG_REPORTS_OBSS |
 			WIPHY_FLAG_OFFCHAN_TX;
 
+	wiphy->extended_capabilities = extended_capabilities;
+	wiphy->extended_capabilities_mask = extended_capabilities;
+	wiphy->extended_capabilities_len = ARRAY_SIZE(extended_capabilities);
+
 	if (ops->remain_on_channel)
 		wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL;
 
 	wiphy->features |= NL80211_FEATURE_SK_TX_STATUS |
 			   NL80211_FEATURE_SAE |
 			   NL80211_FEATURE_HT_IBSS |
-			   NL80211_FEATURE_VIF_TXPOWER;
+			   NL80211_FEATURE_VIF_TXPOWER |
+			   NL80211_FEATURE_USERSPACE_MPM;
 
 	if (!ops->hw_scan)
 		wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
@@ -641,8 +592,11 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					 IEEE80211_RADIOTAP_MCS_HAVE_BW;
 	local->hw.radiotap_vht_details = IEEE80211_RADIOTAP_VHT_KNOWN_GI |
 					 IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH;
+	local->hw.uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
+	local->hw.uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
 	local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
 	wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask;
+	wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask;
 
 	INIT_LIST_HEAD(&local->interfaces);
 
@@ -653,25 +607,19 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	mutex_init(&local->key_mtx);
 	spin_lock_init(&local->filter_lock);
+	spin_lock_init(&local->rx_path_lock);
 	spin_lock_init(&local->queue_stop_reason_lock);
 
 	INIT_LIST_HEAD(&local->chanctx_list);
 	mutex_init(&local->chanctx_mtx);
 
-	/*
-	 * The rx_skb_queue is only accessed from tasklets,
-	 * but other SKB queues are used from within IRQ
-	 * context. Therefore, this one needs a different
-	 * locking class so our direct, non-irq-safe use of
-	 * the queue's lock doesn't throw lockdep warnings.
-	 */
-	skb_queue_head_init_class(&local->rx_skb_queue,
-				  &ieee80211_rx_skb_queue_class);
-
 	INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work);
 
 	INIT_WORK(&local->restart_work, ieee80211_restart_work);
 
+	INIT_WORK(&local->radar_detected_work,
+		  ieee80211_dfs_radar_detected_work);
+
 	INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter);
 	local->smps_mode = IEEE80211_SMPS_OFF;
 
@@ -687,8 +635,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	spin_lock_init(&local->ack_status_lock);
 	idr_init(&local->ack_status_frames);
-	/* preallocate at least one entry */
-	idr_pre_get(&local->ack_status_frames, GFP_KERNEL);
 
 	sta_info_init(local);
 
@@ -706,9 +652,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	skb_queue_head_init(&local->skb_queue);
 	skb_queue_head_init(&local->skb_queue_unreliable);
 
-	/* init dummy netdev for use w/ NAPI */
-	init_dummy_netdev(&local->napi_dev);
-
 	ieee80211_led_names(local);
 
 	ieee80211_roc_setup(local);
@@ -725,6 +668,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	int channels, max_bitrates;
 	bool supp_ht, supp_vht;
 	netdev_features_t feature_whitelist;
+	struct cfg80211_chan_def dflt_chandef = {};
 	static const u32 cipher_suites[] = {
 		/* keep WEP first, it may be removed below */
 		WLAN_CIPHER_SUITE_WEP40,
@@ -747,9 +691,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		return -EINVAL;
 #endif
 
-	if ((hw->flags & IEEE80211_HW_SCAN_WHILE_IDLE) && !local->ops->hw_scan)
-		return -EINVAL;
-
 	if (!local->use_chanctx) {
 		for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) {
 			const struct ieee80211_iface_combination *comb;
@@ -767,6 +708,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		 */
 		if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS))
 			return -EINVAL;
+
+		/* DFS currently not supported with channel context drivers */
+		for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) {
+			const struct ieee80211_iface_combination *comb;
+
+			comb = &local->hw.wiphy->iface_combinations[i];
+
+			if (comb->radar_detect_widths)
+				return -EINVAL;
+		}
 	}
 
 	/* Only HW csum features are currently compatible with mac80211 */
@@ -795,15 +746,19 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		sband = local->hw.wiphy->bands[band];
 		if (!sband)
 			continue;
-		if (!local->use_chanctx && !local->_oper_channel) {
+
+		if (!dflt_chandef.chan) {
+			cfg80211_chandef_create(&dflt_chandef,
+						&sband->channels[0],
+						NL80211_CHAN_NO_HT);
 			/* init channel we're on */
-			local->hw.conf.channel =
-			local->_oper_channel = &sband->channels[0];
-			local->hw.conf.channel_type = NL80211_CHAN_NO_HT;
+			if (!local->use_chanctx && !local->_oper_chandef.chan) {
+				local->hw.conf.chandef = dflt_chandef;
+				local->_oper_chandef = dflt_chandef;
+			}
+			local->monitor_chandef = dflt_chandef;
 		}
-		cfg80211_chandef_create(&local->monitor_chandef,
-					&sband->channels[0],
-					NL80211_CHAN_NO_HT);
+
 		channels += sband->n_channels;
 
 		if (max_bitrates < sband->n_bitrates)
@@ -886,22 +841,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (supp_ht)
 		local->scan_ies_len += 2 + sizeof(struct ieee80211_ht_cap);
 
-	if (supp_vht) {
+	if (supp_vht)
 		local->scan_ies_len +=
 			2 + sizeof(struct ieee80211_vht_cap);
 
-		/*
-		 * (for now at least), drivers wanting to use VHT must
-		 * support channel contexts, as they contain all the
-		 * necessary VHT information and the global hw config
-		 * doesn't (yet)
-		 */
-		if (WARN_ON(!local->use_chanctx)) {
-			result = -EINVAL;
-			goto fail_wiphy_register;
-		}
-	}
-
 	if (!local->ops->hw_scan) {
 		/* For hw_scan, driver needs to set these up. */
 		local->hw.wiphy->max_scan_ssids = 4;
@@ -1049,12 +992,22 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		goto fail_ifa;
 #endif
 
-	netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll,
-			local->hw.napi_weight);
+#if IS_ENABLED(CONFIG_IPV6)
+	local->ifa6_notifier.notifier_call = ieee80211_ifa6_changed;
+	result = register_inet6addr_notifier(&local->ifa6_notifier);
+	if (result)
+		goto fail_ifa6;
+#endif
 
 	return 0;
 
+#if IS_ENABLED(CONFIG_IPV6)
+ fail_ifa6:
 #ifdef CONFIG_INET
+	unregister_inetaddr_notifier(&local->ifa_notifier);
+#endif
+#endif
+#if defined(CONFIG_INET) || defined(CONFIG_IPV6)
  fail_ifa:
 	pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
 			       &local->network_latency_notifier);
@@ -1090,6 +1043,9 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 #ifdef CONFIG_INET
 	unregister_inetaddr_notifier(&local->ifa_notifier);
 #endif
+#if IS_ENABLED(CONFIG_IPV6)
+	unregister_inet6addr_notifier(&local->ifa6_notifier);
+#endif
 
 	rtnl_lock();
 
@@ -1113,7 +1069,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 		wiphy_warn(local->hw.wiphy, "skb_queue not empty\n");
 	skb_queue_purge(&local->skb_queue);
 	skb_queue_purge(&local->skb_queue_unreliable);
-	skb_queue_purge(&local->rx_skb_queue);
 
 	destroy_workqueue(local->workqueue);
 	wiphy_unregister(local->hw.wiphy);
@@ -1191,8 +1146,7 @@ static void __exit ieee80211_exit(void)
 	rc80211_minstrel_ht_exit();
 	rc80211_minstrel_exit();
 
-	if (mesh_allocated)
-		ieee80211s_stop();
+	ieee80211s_stop();
 
 	ieee80211_iface_exit();
 
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 649ad513547f..6952760881c8 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -13,23 +13,14 @@
 #include "ieee80211_i.h"
 #include "mesh.h"
 
-#define TMR_RUNNING_HK	0
-#define TMR_RUNNING_MP	1
-#define TMR_RUNNING_MPR	2
-
-int mesh_allocated;
+static int mesh_allocated;
 static struct kmem_cache *rm_cache;
 
-#ifdef CONFIG_MAC80211_MESH
 bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt)
 {
 	return (mgmt->u.action.u.mesh_action.action_code ==
 			WLAN_MESH_ACTION_HWMP_PATH_SELECTION);
 }
-#else
-bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt)
-{ return false; }
-#endif
 
 void ieee80211s_init(void)
 {
@@ -41,6 +32,8 @@ void ieee80211s_init(void)
 
 void ieee80211s_stop(void)
 {
+	if (!mesh_allocated)
+		return;
 	mesh_pathtbl_unregister();
 	kmem_cache_destroy(rm_cache);
 }
@@ -53,11 +46,6 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data)
 
 	set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
 
-	if (local->quiescing) {
-		set_bit(TMR_RUNNING_HK, &ifmsh->timers_running);
-		return;
-	}
-
 	ieee80211_queue_work(&local->hw, &sdata->work);
 }
 
@@ -95,24 +83,22 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
 	     (ifmsh->mesh_cc_id == ie->mesh_config->meshconf_congest) &&
 	     (ifmsh->mesh_sp_id == ie->mesh_config->meshconf_synch) &&
 	     (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth)))
-		goto mismatch;
+		return false;
 
 	ieee80211_sta_get_rates(local, ie, ieee80211_get_sdata_band(sdata),
 				&basic_rates);
 
 	if (sdata->vif.bss_conf.basic_rates != basic_rates)
-		goto mismatch;
+		return false;
 
 	ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan,
 				     ie->ht_operation, &sta_chan_def);
 
 	if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
 					 &sta_chan_def))
-		goto mismatch;
+		return false;
 
 	return true;
-mismatch:
-	return false;
 }
 
 /**
@@ -123,7 +109,7 @@ mismatch:
 bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie)
 {
 	return (ie->mesh_config->meshconf_cap &
-	    IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS) != 0;
+			IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS) != 0;
 }
 
 /**
@@ -154,6 +140,31 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
 	return changed;
 }
 
+/*
+ * mesh_sta_cleanup - clean up any mesh sta state
+ *
+ * @sta: mesh sta to clean up.
+ */
+void mesh_sta_cleanup(struct sta_info *sta)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	u32 changed;
+
+	/*
+	 * maybe userspace handles peer allocation and peering, but in either
+	 * case the beacon is still generated by the kernel and we might need
+	 * an update.
+	 */
+	changed = mesh_accept_plinks_update(sdata);
+	if (!sdata->u.mesh.user_mpm) {
+		changed |= mesh_plink_deactivate(sta);
+		del_timer_sync(&sta->plink_timer);
+	}
+
+	if (changed)
+		ieee80211_mbss_info_change_notify(sdata, changed);
+}
+
 int mesh_rmc_init(struct ieee80211_sub_if_data *sdata)
 {
 	int i;
@@ -176,11 +187,12 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata)
 	if (!sdata->u.mesh.rmc)
 		return;
 
-	for (i = 0; i < RMC_BUCKETS; i++)
+	for (i = 0; i < RMC_BUCKETS; i++) {
 		list_for_each_entry_safe(p, n, &rmc->bucket[i], list) {
 			list_del(&p->list);
 			kmem_cache_free(rm_cache, p);
 		}
+	}
 
 	kfree(rmc);
 	sdata->u.mesh.rmc = NULL;
@@ -189,6 +201,7 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata)
 /**
  * mesh_rmc_check - Check frame in recent multicast cache and add if absent.
  *
+ * @sdata:	interface
  * @sa:		source address
  * @mesh_hdr:	mesh_header
  *
@@ -198,8 +211,8 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata)
  * received this frame lately. If the frame is not in the cache, it is added to
  * it.
  */
-int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr,
-		   struct ieee80211_sub_if_data *sdata)
+int mesh_rmc_check(struct ieee80211_sub_if_data *sdata,
+		   const u8 *sa, struct ieee80211s_hdr *mesh_hdr)
 {
 	struct mesh_rmc *rmc = sdata->u.mesh.rmc;
 	u32 seqnum = 0;
@@ -213,12 +226,11 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr,
 	list_for_each_entry_safe(p, n, &rmc->bucket[idx], list) {
 		++entries;
 		if (time_after(jiffies, p->exp_time) ||
-				(entries == RMC_QUEUE_MAX_LEN)) {
+		    entries == RMC_QUEUE_MAX_LEN) {
 			list_del(&p->list);
 			kmem_cache_free(rm_cache, p);
 			--entries;
-		} else if ((seqnum == p->seqnum) &&
-			   (ether_addr_equal(sa, p->sa)))
+		} else if ((seqnum == p->seqnum) && ether_addr_equal(sa, p->sa))
 			return -1;
 	}
 
@@ -233,8 +245,8 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr,
 	return 0;
 }
 
-int
-mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
+int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
+			 struct sk_buff *skb)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	u8 *pos, neighbors;
@@ -265,16 +277,18 @@ mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	/* Mesh capability */
 	*pos = IEEE80211_MESHCONF_CAPAB_FORWARDING;
 	*pos |= ifmsh->accepting_plinks ?
-	    IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
+			IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
+	/* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */
+	*pos |= ifmsh->ps_peers_deep_sleep ?
+			IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00;
 	*pos++ |= ifmsh->adjusting_tbtt ?
-	    IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00;
+			IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00;
 	*pos++ = 0x00;
 
 	return 0;
 }
 
-int
-mesh_add_meshid_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
+int mesh_add_meshid_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	u8 *pos;
@@ -291,8 +305,31 @@ mesh_add_meshid_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	return 0;
 }
 
-int
-mesh_add_vendor_ies(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
+static int mesh_add_awake_window_ie(struct ieee80211_sub_if_data *sdata,
+				    struct sk_buff *skb)
+{
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	u8 *pos;
+
+	/* see IEEE802.11-2012 13.14.6 */
+	if (ifmsh->ps_peers_light_sleep == 0 &&
+	    ifmsh->ps_peers_deep_sleep == 0 &&
+	    ifmsh->nonpeer_pm == NL80211_MESH_POWER_ACTIVE)
+		return 0;
+
+	if (skb_tailroom(skb) < 4)
+		return -ENOMEM;
+
+	pos = skb_put(skb, 2 + 2);
+	*pos++ = WLAN_EID_MESH_AWAKE_WINDOW;
+	*pos++ = 2;
+	put_unaligned_le16(ifmsh->mshcfg.dot11MeshAwakeWindowDuration, pos);
+
+	return 0;
+}
+
+int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata,
+			struct sk_buff *skb)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	u8 offset, len;
@@ -315,8 +352,7 @@ mesh_add_vendor_ies(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	return 0;
 }
 
-int
-mesh_add_rsn_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
+int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	u8 len = 0;
@@ -344,11 +380,9 @@ mesh_add_rsn_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	return 0;
 }
 
-int mesh_add_ds_params_ie(struct sk_buff *skb,
-			  struct ieee80211_sub_if_data *sdata)
+static int mesh_add_ds_params_ie(struct ieee80211_sub_if_data *sdata,
+				 struct sk_buff *skb)
 {
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_supported_band *sband;
 	struct ieee80211_chanctx_conf *chanctx_conf;
 	struct ieee80211_channel *chan;
 	u8 *pos;
@@ -365,19 +399,16 @@ int mesh_add_ds_params_ie(struct sk_buff *skb,
 	chan = chanctx_conf->def.chan;
 	rcu_read_unlock();
 
-	sband = local->hw.wiphy->bands[chan->band];
-	if (sband->band == IEEE80211_BAND_2GHZ) {
-		pos = skb_put(skb, 2 + 1);
-		*pos++ = WLAN_EID_DS_PARAMS;
-		*pos++ = 1;
-		*pos++ = ieee80211_frequency_to_channel(chan->center_freq);
-	}
+	pos = skb_put(skb, 2 + 1);
+	*pos++ = WLAN_EID_DS_PARAMS;
+	*pos++ = 1;
+	*pos++ = ieee80211_frequency_to_channel(chan->center_freq);
 
 	return 0;
 }
 
-int mesh_add_ht_cap_ie(struct sk_buff *skb,
-		       struct ieee80211_sub_if_data *sdata)
+int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata,
+		       struct sk_buff *skb)
 {
 	struct ieee80211_local *local = sdata->local;
 	enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
@@ -398,8 +429,8 @@ int mesh_add_ht_cap_ie(struct sk_buff *skb,
 	return 0;
 }
 
-int mesh_add_ht_oper_ie(struct sk_buff *skb,
-			struct ieee80211_sub_if_data *sdata)
+int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
+			struct sk_buff *skb)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_chanctx_conf *chanctx_conf;
@@ -434,19 +465,13 @@ int mesh_add_ht_oper_ie(struct sk_buff *skb,
 
 	return 0;
 }
+
 static void ieee80211_mesh_path_timer(unsigned long data)
 {
 	struct ieee80211_sub_if_data *sdata =
 		(struct ieee80211_sub_if_data *) data;
-	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
-	struct ieee80211_local *local = sdata->local;
 
-	if (local->quiescing) {
-		set_bit(TMR_RUNNING_MP, &ifmsh->timers_running);
-		return;
-	}
-
-	ieee80211_queue_work(&local->hw, &sdata->work);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 }
 
 static void ieee80211_mesh_path_root_timer(unsigned long data)
@@ -454,16 +479,10 @@ static void ieee80211_mesh_path_root_timer(unsigned long data)
 	struct ieee80211_sub_if_data *sdata =
 		(struct ieee80211_sub_if_data *) data;
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
-	struct ieee80211_local *local = sdata->local;
 
 	set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags);
 
-	if (local->quiescing) {
-		set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running);
-		return;
-	}
-
-	ieee80211_queue_work(&local->hw, &sdata->work);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 }
 
 void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh)
@@ -479,7 +498,7 @@ void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh)
 
 /**
  * ieee80211_fill_mesh_addresses - fill addresses of a locally originated mesh frame
- * @hdr:    	802.11 frame header
+ * @hdr:	802.11 frame header
  * @fc:		frame control field
  * @meshda:	destination address in the mesh
  * @meshsa:	source address address in the mesh.  Same as TA, as frame is
@@ -510,8 +529,8 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
 
 /**
  * ieee80211_new_mesh_header - create a new mesh header
- * @meshhdr:    uninitialized mesh header
  * @sdata:	mesh interface to be used
+ * @meshhdr:    uninitialized mesh header
  * @addr4or5:   1st address in the ae header, which may correspond to address 4
  *              (if addr6 is NULL) or address 5 (if addr6 is present). It may
  *              be NULL.
@@ -520,42 +539,49 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
  *
  * Return the header length.
  */
-int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
-		struct ieee80211_sub_if_data *sdata, char *addr4or5,
-		char *addr6)
+int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
+			      struct ieee80211s_hdr *meshhdr,
+			      const char *addr4or5, const char *addr6)
 {
-	int aelen = 0;
-	BUG_ON(!addr4or5 && addr6);
+	if (WARN_ON(!addr4or5 && addr6))
+		return 0;
+
 	memset(meshhdr, 0, sizeof(*meshhdr));
+
 	meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL;
+
+	/* FIXME: racy -- TX on multiple queues can be concurrent */
 	put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum);
 	sdata->u.mesh.mesh_seqnum++;
+
 	if (addr4or5 && !addr6) {
 		meshhdr->flags |= MESH_FLAGS_AE_A4;
-		aelen += ETH_ALEN;
 		memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN);
+		return 2 * ETH_ALEN;
 	} else if (addr4or5 && addr6) {
 		meshhdr->flags |= MESH_FLAGS_AE_A5_A6;
-		aelen += 2 * ETH_ALEN;
 		memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN);
 		memcpy(meshhdr->eaddr2, addr6, ETH_ALEN);
+		return 3 * ETH_ALEN;
 	}
-	return 6 + aelen;
+
+	return ETH_ALEN;
 }
 
-static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata,
-			   struct ieee80211_if_mesh *ifmsh)
+static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata)
 {
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	u32 changed;
 
 	ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT);
 	mesh_path_expire(sdata);
 
 	changed = mesh_accept_plinks_update(sdata);
-	ieee80211_bss_info_change_notify(sdata, changed);
+	ieee80211_mbss_info_change_notify(sdata, changed);
 
 	mod_timer(&ifmsh->housekeeping_timer,
-		  round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL));
+		  round_jiffies(jiffies +
+				IEEE80211_MESH_HOUSEKEEPING_INTERVAL));
 }
 
 static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata)
@@ -574,39 +600,147 @@ static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata)
 		  round_jiffies(TU_TO_EXP_TIME(interval)));
 }
 
-#ifdef CONFIG_PM
-void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata)
+static int
+ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
 {
-	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct beacon_data *bcn;
+	int head_len, tail_len;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+	struct ieee80211_chanctx_conf *chanctx_conf;
+	enum ieee80211_band band;
+	u8 *pos;
+	struct ieee80211_sub_if_data *sdata;
+	int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) +
+		      sizeof(mgmt->u.beacon);
+
+	sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
+	rcu_read_lock();
+	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+	band = chanctx_conf->def.chan->band;
+	rcu_read_unlock();
 
-	/* use atomic bitops in case all timers fire at the same time */
+	head_len = hdr_len +
+		   2 + /* NULL SSID */
+		   2 + 8 + /* supported rates */
+		   2 + 3; /* DS params */
+	tail_len = 2 + (IEEE80211_MAX_SUPP_RATES - 8) +
+		   2 + sizeof(struct ieee80211_ht_cap) +
+		   2 + sizeof(struct ieee80211_ht_operation) +
+		   2 + ifmsh->mesh_id_len +
+		   2 + sizeof(struct ieee80211_meshconf_ie) +
+		   2 + sizeof(__le16) + /* awake window */
+		   ifmsh->ie_len;
+
+	bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL);
+	/* need an skb for IE builders to operate on */
+	skb = dev_alloc_skb(max(head_len, tail_len));
+
+	if (!bcn || !skb)
+		goto out_free;
 
-	if (del_timer_sync(&ifmsh->housekeeping_timer))
-		set_bit(TMR_RUNNING_HK, &ifmsh->timers_running);
-	if (del_timer_sync(&ifmsh->mesh_path_timer))
-		set_bit(TMR_RUNNING_MP, &ifmsh->timers_running);
-	if (del_timer_sync(&ifmsh->mesh_path_root_timer))
-		set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running);
+	/*
+	 * pointers go into the block we allocated,
+	 * memory is | beacon_data | head | tail |
+	 */
+	bcn->head = ((u8 *) bcn) + sizeof(*bcn);
+
+	/* fill in the head */
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len);
+	memset(mgmt, 0, hdr_len);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_BEACON);
+	eth_broadcast_addr(mgmt->da);
+	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+	memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
+	ieee80211_mps_set_frame_flags(sdata, NULL, (void *) mgmt);
+	mgmt->u.beacon.beacon_int =
+		cpu_to_le16(sdata->vif.bss_conf.beacon_int);
+	mgmt->u.beacon.capab_info |= cpu_to_le16(
+		sdata->u.mesh.security ? WLAN_CAPABILITY_PRIVACY : 0);
+
+	pos = skb_put(skb, 2);
+	*pos++ = WLAN_EID_SSID;
+	*pos++ = 0x0;
+
+	if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
+	    mesh_add_ds_params_ie(sdata, skb))
+		goto out_free;
+
+	bcn->head_len = skb->len;
+	memcpy(bcn->head, skb->data, bcn->head_len);
+
+	/* now the tail */
+	skb_trim(skb, 0);
+	bcn->tail = bcn->head + bcn->head_len;
+
+	if (ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
+	    mesh_add_rsn_ie(sdata, skb) ||
+	    mesh_add_ht_cap_ie(sdata, skb) ||
+	    mesh_add_ht_oper_ie(sdata, skb) ||
+	    mesh_add_meshid_ie(sdata, skb) ||
+	    mesh_add_meshconf_ie(sdata, skb) ||
+	    mesh_add_awake_window_ie(sdata, skb) ||
+	    mesh_add_vendor_ies(sdata, skb))
+		goto out_free;
+
+	bcn->tail_len = skb->len;
+	memcpy(bcn->tail, skb->data, bcn->tail_len);
+
+	dev_kfree_skb(skb);
+	rcu_assign_pointer(ifmsh->beacon, bcn);
+	return 0;
+out_free:
+	kfree(bcn);
+	dev_kfree_skb(skb);
+	return -ENOMEM;
 }
 
-void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata)
+static int
+ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh)
 {
-	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct beacon_data *old_bcn;
+	int ret;
+
+	mutex_lock(&ifmsh->mtx);
+
+	old_bcn = rcu_dereference_protected(ifmsh->beacon,
+					    lockdep_is_held(&ifmsh->mtx));
+	ret = ieee80211_mesh_build_beacon(ifmsh);
+	if (ret)
+		/* just reuse old beacon */
+		goto out;
+
+	if (old_bcn)
+		kfree_rcu(old_bcn, rcu_head);
+out:
+	mutex_unlock(&ifmsh->mtx);
+	return ret;
+}
 
-	if (test_and_clear_bit(TMR_RUNNING_HK, &ifmsh->timers_running))
-		add_timer(&ifmsh->housekeeping_timer);
-	if (test_and_clear_bit(TMR_RUNNING_MP, &ifmsh->timers_running))
-		add_timer(&ifmsh->mesh_path_timer);
-	if (test_and_clear_bit(TMR_RUNNING_MPR, &ifmsh->timers_running))
-		add_timer(&ifmsh->mesh_path_root_timer);
-	ieee80211_mesh_root_setup(ifmsh);
+void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata,
+				       u32 changed)
+{
+	if (sdata->vif.bss_conf.enable_beacon &&
+	    (changed & (BSS_CHANGED_BEACON |
+			BSS_CHANGED_HT |
+			BSS_CHANGED_BASIC_RATES |
+			BSS_CHANGED_BEACON_INT)))
+		if (ieee80211_mesh_rebuild_beacon(&sdata->u.mesh))
+			return;
+	ieee80211_bss_info_change_notify(sdata, changed);
 }
-#endif
 
-void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
+int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_local *local = sdata->local;
+	u32 changed = BSS_CHANGED_BEACON |
+		      BSS_CHANGED_BEACON_ENABLED |
+		      BSS_CHANGED_HT |
+		      BSS_CHANGED_BASIC_RATES |
+		      BSS_CHANGED_BEACON_INT;
+	enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
 
 	local->fif_other_bss++;
 	/* mesh ifaces must set allmulti to forward mcast traffic */
@@ -624,34 +758,51 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 	ieee80211_queue_work(&local->hw, &sdata->work);
 	sdata->vif.bss_conf.ht_operation_mode =
 				ifmsh->mshcfg.ht_opmode;
-	sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL;
+	sdata->vif.bss_conf.enable_beacon = true;
 	sdata->vif.bss_conf.basic_rates =
-		ieee80211_mandatory_rates(sdata->local,
-					  ieee80211_get_sdata_band(sdata));
-	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON |
-						BSS_CHANGED_BEACON_ENABLED |
-						BSS_CHANGED_HT |
-						BSS_CHANGED_BASIC_RATES |
-						BSS_CHANGED_BEACON_INT);
+		ieee80211_mandatory_rates(local, band);
+
+	changed |= ieee80211_mps_local_status_update(sdata);
+
+	if (ieee80211_mesh_build_beacon(ifmsh)) {
+		ieee80211_stop_mesh(sdata);
+		return -ENOMEM;
+	}
+
+	ieee80211_bss_info_change_notify(sdata, changed);
 
 	netif_carrier_on(sdata->dev);
+	return 0;
 }
 
 void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct beacon_data *bcn;
 
 	netif_carrier_off(sdata->dev);
 
 	/* stop the beacon */
 	ifmsh->mesh_id_len = 0;
+	sdata->vif.bss_conf.enable_beacon = false;
+	clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
 	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+	mutex_lock(&ifmsh->mtx);
+	bcn = rcu_dereference_protected(ifmsh->beacon,
+					lockdep_is_held(&ifmsh->mtx));
+	rcu_assign_pointer(ifmsh->beacon, NULL);
+	kfree_rcu(bcn, rcu_head);
+	mutex_unlock(&ifmsh->mtx);
 
 	/* flush STAs and mpaths on this iface */
-	sta_info_flush(sdata->local, sdata);
+	sta_info_flush(sdata);
 	mesh_path_flush_by_iface(sdata);
 
+	/* free all potentially still buffered group-addressed frames */
+	local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf);
+	skb_queue_purge(&ifmsh->ps.bc_buf);
+
 	del_timer_sync(&sdata->u.mesh.housekeeping_timer);
 	del_timer_sync(&sdata->u.mesh.mesh_path_root_timer);
 	del_timer_sync(&sdata->u.mesh.mesh_path_timer);
@@ -667,8 +818,61 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 	local->fif_other_bss--;
 	atomic_dec(&local->iff_allmultis);
 	ieee80211_configure_filter(local);
+}
+
+static void
+ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
+			    struct ieee80211_mgmt *mgmt, size_t len)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct sk_buff *presp;
+	struct beacon_data *bcn;
+	struct ieee80211_mgmt *hdr;
+	struct ieee802_11_elems elems;
+	size_t baselen;
+	u8 *pos;
+
+	pos = mgmt->u.probe_req.variable;
+	baselen = (u8 *) pos - (u8 *) mgmt;
+	if (baselen > len)
+		return;
+
+	ieee802_11_parse_elems(pos, len - baselen, false, &elems);
+
+	/* 802.11-2012 10.1.4.3.2 */
+	if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) &&
+	     !is_broadcast_ether_addr(mgmt->da)) ||
+	    elems.ssid_len != 0)
+		return;
 
-	sdata->u.mesh.timers_running = 0;
+	if (elems.mesh_id_len != 0 &&
+	    (elems.mesh_id_len != ifmsh->mesh_id_len ||
+	     memcmp(elems.mesh_id, ifmsh->mesh_id, ifmsh->mesh_id_len)))
+		return;
+
+	rcu_read_lock();
+	bcn = rcu_dereference(ifmsh->beacon);
+
+	if (!bcn)
+		goto out;
+
+	presp = dev_alloc_skb(local->tx_headroom +
+			      bcn->head_len + bcn->tail_len);
+	if (!presp)
+		goto out;
+
+	skb_reserve(presp, local->tx_headroom);
+	memcpy(skb_put(presp, bcn->head_len), bcn->head, bcn->head_len);
+	memcpy(skb_put(presp, bcn->tail_len), bcn->tail, bcn->tail_len);
+	hdr = (struct ieee80211_mgmt *) presp->data;
+	hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					 IEEE80211_STYPE_PROBE_RESP);
+	memcpy(hdr->da, mgmt->sa, ETH_ALEN);
+	IEEE80211_SKB_CB(presp)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+	ieee80211_tx_skb(sdata, presp);
+out:
+	rcu_read_unlock();
 }
 
 static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
@@ -695,7 +899,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 		return;
 
 	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
-			       &elems);
+			       false, &elems);
 
 	/* ignore non-mesh or secure / unsecure mismatch */
 	if ((!elems.mesh_id || !elems.mesh_config) ||
@@ -703,7 +907,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 	    (!elems.rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE))
 		return;
 
-	if (elems.ds_params && elems.ds_params_len == 1)
+	if (elems.ds_params)
 		freq = ieee80211_channel_to_frequency(elems.ds_params[0], band);
 	else
 		freq = rx_status->freq;
@@ -760,6 +964,9 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 		ieee80211_mesh_rx_bcn_presp(sdata, stype, mgmt, skb->len,
 					    rx_status);
 		break;
+	case IEEE80211_STYPE_PROBE_REQ:
+		ieee80211_mesh_rx_probe_req(sdata, mgmt, skb->len);
+		break;
 	case IEEE80211_STYPE_ACTION:
 		ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status);
 		break;
@@ -782,7 +989,7 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
 		mesh_mpp_table_grow();
 
 	if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags))
-		ieee80211_mesh_housekeeping(sdata, ifmsh);
+		ieee80211_mesh_housekeeping(sdata);
 
 	if (test_and_clear_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags))
 		ieee80211_mesh_rootpath(sdata);
@@ -797,7 +1004,8 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list)
-		if (ieee80211_vif_is_mesh(&sdata->vif))
+		if (ieee80211_vif_is_mesh(&sdata->vif) &&
+		    ieee80211_sdata_running(sdata))
 			ieee80211_queue_work(&local->hw, &sdata->work);
 	rcu_read_unlock();
 }
@@ -805,6 +1013,7 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	static u8 zero_addr[ETH_ALEN] = {};
 
 	setup_timer(&ifmsh->housekeeping_timer,
 		    ieee80211_mesh_housekeeping_timer,
@@ -828,6 +1037,11 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 		    ieee80211_mesh_path_root_timer,
 		    (unsigned long) sdata);
 	INIT_LIST_HEAD(&ifmsh->preq_queue.list);
+	skb_queue_head_init(&ifmsh->ps.bc_buf);
 	spin_lock_init(&ifmsh->mesh_preq_queue_lock);
 	spin_lock_init(&ifmsh->sync_offset_lock);
+	RCU_INIT_POINTER(ifmsh->beacon, NULL);
+	mutex_init(&ifmsh->mtx);
+
+	sdata->vif.bss_conf.bssid = zero_addr;
 }
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 84c28c6101cd..da158774eebb 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -26,12 +26,12 @@
  * @MESH_PATH_ACTIVE: the mesh path can be used for forwarding
  * @MESH_PATH_RESOLVING: the discovery process is running for this mesh path
  * @MESH_PATH_SN_VALID: the mesh path contains a valid destination sequence
- * 	number
+ *	number
  * @MESH_PATH_FIXED: the mesh path has been manually set and should not be
- * 	modified
+ *	modified
  * @MESH_PATH_RESOLVED: the mesh path can has been resolved
  * @MESH_PATH_REQ_QUEUED: there is an unsent path request for this destination
- * already queued up, waiting for the discovery process to start.
+ *	already queued up, waiting for the discovery process to start.
  *
  * MESH_PATH_RESOLVED is used by the mesh path timer to
  * decide when to stop or cancel the mesh path discovery.
@@ -73,16 +73,16 @@ enum mesh_deferred_task_flags {
  * @dst: mesh path destination mac address
  * @sdata: mesh subif
  * @next_hop: mesh neighbor to which frames for this destination will be
- * 	forwarded
+ *	forwarded
  * @timer: mesh path discovery timer
  * @frame_queue: pending queue for frames sent to this destination while the
- * 	path is unresolved
+ *	path is unresolved
  * @sn: target sequence number
  * @metric: current metric to this destination
  * @hop_count: hops to destination
  * @exp_time: in jiffies, when the path will expire or when it expired
  * @discovery_timeout: timeout (lapse in jiffies) used for the last discovery
- * 	retry
+ *	retry
  * @discovery_retries: number of discovery retries
  * @flags: mesh path flags, as specified on &enum mesh_path_flags
  * @state_lock: mesh path state lock used to protect changes to the
@@ -191,8 +191,6 @@ struct mesh_rmc {
 #define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ)
 #define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ)
 
-#define MESH_DEFAULT_BEACON_INTERVAL		1000 	/* in 1024 us units */
-
 #define MESH_PATH_EXPIRE (600 * HZ)
 
 /* Default maximum number of plinks per interface */
@@ -208,104 +206,133 @@ struct mesh_rmc {
 /* Various */
 int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
 				  const u8 *da, const u8 *sa);
-int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
-		struct ieee80211_sub_if_data *sdata, char *addr4or5,
-		char *addr6);
-int mesh_rmc_check(u8 *addr, struct ieee80211s_hdr *mesh_hdr,
-		struct ieee80211_sub_if_data *sdata);
+int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
+			      struct ieee80211s_hdr *meshhdr,
+			      const char *addr4or5, const char *addr6);
+int mesh_rmc_check(struct ieee80211_sub_if_data *sdata,
+		   const u8 *addr, struct ieee80211s_hdr *mesh_hdr);
 bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
 			struct ieee802_11_elems *ie);
 void mesh_ids_set_default(struct ieee80211_if_mesh *mesh);
-void mesh_mgmt_ies_add(struct sk_buff *skb,
-		struct ieee80211_sub_if_data *sdata);
-int mesh_add_meshconf_ie(struct sk_buff *skb,
-			 struct ieee80211_sub_if_data *sdata);
-int mesh_add_meshid_ie(struct sk_buff *skb,
-		       struct ieee80211_sub_if_data *sdata);
-int mesh_add_rsn_ie(struct sk_buff *skb,
-		    struct ieee80211_sub_if_data *sdata);
-int mesh_add_vendor_ies(struct sk_buff *skb,
-			struct ieee80211_sub_if_data *sdata);
-int mesh_add_ds_params_ie(struct sk_buff *skb,
-			  struct ieee80211_sub_if_data *sdata);
-int mesh_add_ht_cap_ie(struct sk_buff *skb,
-		       struct ieee80211_sub_if_data *sdata);
-int mesh_add_ht_oper_ie(struct sk_buff *skb,
-			struct ieee80211_sub_if_data *sdata);
+void mesh_mgmt_ies_add(struct ieee80211_sub_if_data *sdata,
+		       struct sk_buff *skb);
+int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
+			 struct sk_buff *skb);
+int mesh_add_meshid_ie(struct ieee80211_sub_if_data *sdata,
+		       struct sk_buff *skb);
+int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata,
+		    struct sk_buff *skb);
+int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata,
+			struct sk_buff *skb);
+int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata,
+		       struct sk_buff *skb);
+int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
+			struct sk_buff *skb);
 void mesh_rmc_free(struct ieee80211_sub_if_data *sdata);
 int mesh_rmc_init(struct ieee80211_sub_if_data *sdata);
 void ieee80211s_init(void);
 void ieee80211s_update_metric(struct ieee80211_local *local,
-		struct sta_info *sta, struct sk_buff *skb);
-void ieee80211s_stop(void);
+			      struct sta_info *sta, struct sk_buff *skb);
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata);
-void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata);
+int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata);
 void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata);
 void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh);
 const struct ieee80211_mesh_sync_ops *ieee80211_mesh_sync_ops_get(u8 method);
+/* wrapper for ieee80211_bss_info_change_notify() */
+void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata,
+				       u32 changed);
+
+/* mesh power save */
+u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata);
+u32 ieee80211_mps_set_sta_local_pm(struct sta_info *sta,
+				   enum nl80211_mesh_power_mode pm);
+void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata,
+				   struct sta_info *sta,
+				   struct ieee80211_hdr *hdr);
+void ieee80211_mps_sta_status_update(struct sta_info *sta);
+void ieee80211_mps_rx_h_sta_process(struct sta_info *sta,
+				    struct ieee80211_hdr *hdr);
+void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta,
+				    bool tx, bool acked);
+void ieee80211_mps_frame_release(struct sta_info *sta,
+				 struct ieee802_11_elems *elems);
 
 /* Mesh paths */
-int mesh_nexthop_lookup(struct sk_buff *skb,
-		struct ieee80211_sub_if_data *sdata);
-int mesh_nexthop_resolve(struct sk_buff *skb,
-			 struct ieee80211_sub_if_data *sdata);
+int mesh_nexthop_lookup(struct ieee80211_sub_if_data *sdata,
+			struct sk_buff *skb);
+int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,
+			 struct sk_buff *skb);
 void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata);
-struct mesh_path *mesh_path_lookup(u8 *dst,
-		struct ieee80211_sub_if_data *sdata);
-struct mesh_path *mpp_path_lookup(u8 *dst,
-				  struct ieee80211_sub_if_data *sdata);
-int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata);
-struct mesh_path *mesh_path_lookup_by_idx(int idx,
-		struct ieee80211_sub_if_data *sdata);
+struct mesh_path *mesh_path_lookup(struct ieee80211_sub_if_data *sdata,
+				   const u8 *dst);
+struct mesh_path *mpp_path_lookup(struct ieee80211_sub_if_data *sdata,
+				  const u8 *dst);
+int mpp_path_add(struct ieee80211_sub_if_data *sdata,
+		 const u8 *dst, const u8 *mpp);
+struct mesh_path *
+mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx);
 void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop);
 void mesh_path_expire(struct ieee80211_sub_if_data *sdata);
 void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
-		struct ieee80211_mgmt *mgmt, size_t len);
-int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata);
+			    struct ieee80211_mgmt *mgmt, size_t len);
+struct mesh_path *
+mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst);
 
 int mesh_path_add_gate(struct mesh_path *mpath);
 int mesh_path_send_to_gates(struct mesh_path *mpath);
 int mesh_gate_num(struct ieee80211_sub_if_data *sdata);
+
 /* Mesh plinks */
 void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
-			   u8 *hw_addr,
-			   struct ieee802_11_elems *ie);
+			   u8 *hw_addr, struct ieee802_11_elems *ie);
 bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
 u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
 void mesh_plink_broken(struct sta_info *sta);
-void mesh_plink_deactivate(struct sta_info *sta);
-int mesh_plink_open(struct sta_info *sta);
-void mesh_plink_block(struct sta_info *sta);
+u32 mesh_plink_deactivate(struct sta_info *sta);
+u32 mesh_plink_open(struct sta_info *sta);
+u32 mesh_plink_block(struct sta_info *sta);
 void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
 			 struct ieee80211_mgmt *mgmt, size_t len,
 			 struct ieee80211_rx_status *rx_status);
+void mesh_sta_cleanup(struct sta_info *sta);
 
 /* Private interfaces */
 /* Mesh tables */
 void mesh_mpath_table_grow(void);
 void mesh_mpp_table_grow(void);
 /* Mesh paths */
-int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode,
-		       const u8 *ra, struct ieee80211_sub_if_data *sdata);
+int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
+		       u8 ttl, const u8 *target, __le32 target_sn,
+		       __le16 target_rcode, const u8 *ra);
 void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta);
 void mesh_path_flush_pending(struct mesh_path *mpath);
 void mesh_path_tx_pending(struct mesh_path *mpath);
 int mesh_pathtbl_init(void);
 void mesh_pathtbl_unregister(void);
-int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata);
+int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr);
 void mesh_path_timer(unsigned long data);
 void mesh_path_flush_by_nexthop(struct sta_info *sta);
-void mesh_path_discard_frame(struct sk_buff *skb,
-		struct ieee80211_sub_if_data *sdata);
-void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata);
-void mesh_path_restart(struct ieee80211_sub_if_data *sdata);
+void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
+			     struct sk_buff *skb);
 void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata);
 
 bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt);
 extern int mesh_paths_generation;
 
 #ifdef CONFIG_MAC80211_MESH
-extern int mesh_allocated;
+static inline
+u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata)
+{
+	atomic_inc(&sdata->u.mesh.estab_plinks);
+	return mesh_accept_plinks_update(sdata);
+}
+
+static inline
+u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
+{
+	atomic_dec(&sdata->u.mesh.estab_plinks);
+	return mesh_accept_plinks_update(sdata);
+}
 
 static inline int mesh_plink_free_count(struct ieee80211_sub_if_data *sdata)
 {
@@ -331,26 +358,17 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
 
 void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
 
-void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata);
-void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata);
-void mesh_plink_quiesce(struct sta_info *sta);
-void mesh_plink_restart(struct sta_info *sta);
 void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
 void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
+void ieee80211s_stop(void);
 #else
-#define mesh_allocated	0
 static inline void
 ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
-static inline void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata)
-{}
-static inline void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata)
-{}
-static inline void mesh_plink_quiesce(struct sta_info *sta) {}
-static inline void mesh_plink_restart(struct sta_info *sta) {}
 static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
 { return false; }
 static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
 {}
+static inline void ieee80211s_stop(void) {}
 #endif
 
 #endif /* IEEE80211S_H */
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 2659e428b80c..486819cd02cd 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -30,14 +30,14 @@
 
 static void mesh_queue_preq(struct mesh_path *, u8);
 
-static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae)
+static inline u32 u32_field_get(const u8 *preq_elem, int offset, bool ae)
 {
 	if (ae)
 		offset += 6;
 	return get_unaligned_le32(preq_elem + offset);
 }
 
-static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae)
+static inline u32 u16_field_get(const u8 *preq_elem, int offset, bool ae)
 {
 	if (ae)
 		offset += 6;
@@ -102,10 +102,13 @@ enum mpath_frame_type {
 static const u8 broadcast_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
 static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
-		u8 *orig_addr, __le32 orig_sn, u8 target_flags, u8 *target,
-		__le32 target_sn, const u8 *da, u8 hop_count, u8 ttl,
-		__le32 lifetime, __le32 metric, __le32 preq_id,
-		struct ieee80211_sub_if_data *sdata)
+				  const u8 *orig_addr, __le32 orig_sn,
+				  u8 target_flags, const u8 *target,
+				  __le32 target_sn, const u8 *da,
+				  u8 hop_count, u8 ttl,
+				  __le32 lifetime, __le32 metric,
+				  __le32 preq_id,
+				  struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
@@ -141,7 +144,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 		*pos++ = WLAN_EID_PREQ;
 		break;
 	case MPATH_PREP:
-		mhwmp_dbg(sdata, "sending PREP to %pM\n", target);
+		mhwmp_dbg(sdata, "sending PREP to %pM\n", orig_addr);
 		ie_len = 31;
 		pos = skb_put(skb, 2 + ie_len);
 		*pos++ = WLAN_EID_PREP;
@@ -205,6 +208,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
 		struct sk_buff *skb)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 
 	skb_set_mac_header(skb, 0);
 	skb_set_network_header(skb, 0);
@@ -217,23 +221,26 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
 	info->control.vif = &sdata->vif;
 	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
 	ieee80211_set_qos_hdr(sdata, skb);
+	ieee80211_mps_set_frame_flags(sdata, NULL, hdr);
 }
 
 /**
- * mesh_send_path error - Sends a PERR mesh management frame
+ * mesh_path_error_tx - Sends a PERR mesh management frame
  *
+ * @ttl: allowed remaining hops
  * @target: broken destination
  * @target_sn: SN of the broken destination
  * @target_rcode: reason code for this PERR
  * @ra: node this frame is addressed to
+ * @sdata: local mesh subif
  *
  * Note: This function may be called with driver locks taken that the driver
  * also acquires in the TX path.  To avoid a deadlock we don't transmit the
  * frame directly but add it to the pending queue instead.
  */
-int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
-		       __le16 target_rcode, const u8 *ra,
-		       struct ieee80211_sub_if_data *sdata)
+int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
+		       u8 ttl, const u8 *target, __le32 target_sn,
+		       __le16 target_rcode, const u8 *ra)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
@@ -353,6 +360,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
  * @sdata: local mesh subif
  * @mgmt: mesh management frame
  * @hwmp_ie: hwmp information element (PREP or PREQ)
+ * @action: type of hwmp ie
  *
  * This function updates the path routing information to the originator and the
  * transmitter of a HWMP PREQ or PREP frame.
@@ -364,14 +372,14 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
  * path routing information is updated.
  */
 static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
-			    struct ieee80211_mgmt *mgmt,
-			    u8 *hwmp_ie, enum mpath_frame_type action)
+			       struct ieee80211_mgmt *mgmt,
+			       const u8 *hwmp_ie, enum mpath_frame_type action)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct mesh_path *mpath;
 	struct sta_info *sta;
 	bool fresh_info;
-	u8 *orig_addr, *ta;
+	const u8 *orig_addr, *ta;
 	u32 orig_sn, orig_metric;
 	unsigned long orig_lifetime, exp_time;
 	u32 last_hop_metric, new_metric;
@@ -422,7 +430,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 		process = false;
 		fresh_info = false;
 	} else {
-		mpath = mesh_path_lookup(orig_addr, sdata);
+		mpath = mesh_path_lookup(sdata, orig_addr);
 		if (mpath) {
 			spin_lock_bh(&mpath->state_lock);
 			if (mpath->flags & MESH_PATH_FIXED)
@@ -437,9 +445,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 				}
 			}
 		} else {
-			mesh_path_add(orig_addr, sdata);
-			mpath = mesh_path_lookup(orig_addr, sdata);
-			if (!mpath) {
+			mpath = mesh_path_add(sdata, orig_addr);
+			if (IS_ERR(mpath)) {
 				rcu_read_unlock();
 				return 0;
 			}
@@ -470,7 +477,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 	else {
 		fresh_info = true;
 
-		mpath = mesh_path_lookup(ta, sdata);
+		mpath = mesh_path_lookup(sdata, ta);
 		if (mpath) {
 			spin_lock_bh(&mpath->state_lock);
 			if ((mpath->flags & MESH_PATH_FIXED) ||
@@ -478,9 +485,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 					(last_hop_metric > mpath->metric)))
 				fresh_info = false;
 		} else {
-			mesh_path_add(ta, sdata);
-			mpath = mesh_path_lookup(ta, sdata);
-			if (!mpath) {
+			mpath = mesh_path_add(sdata, ta);
+			if (IS_ERR(mpath)) {
 				rcu_read_unlock();
 				return 0;
 			}
@@ -506,11 +512,11 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 
 static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_mgmt *mgmt,
-				    u8 *preq_elem, u32 metric)
+				    const u8 *preq_elem, u32 metric)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_path *mpath = NULL;
-	u8 *target_addr, *orig_addr;
+	const u8 *target_addr, *orig_addr;
 	const u8 *da;
 	u8 target_flags, ttl, flags;
 	u32 orig_sn, target_sn, lifetime, orig_metric;
@@ -545,7 +551,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 	} else if (is_broadcast_ether_addr(target_addr) &&
 		   (target_flags & IEEE80211_PREQ_TO_FLAG)) {
 		rcu_read_lock();
-		mpath = mesh_path_lookup(orig_addr, sdata);
+		mpath = mesh_path_lookup(sdata, orig_addr);
 		if (mpath) {
 			if (flags & IEEE80211_PREQ_PROACTIVE_PREP_FLAG) {
 				reply = true;
@@ -560,7 +566,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 		rcu_read_unlock();
 	} else {
 		rcu_read_lock();
-		mpath = mesh_path_lookup(target_addr, sdata);
+		mpath = mesh_path_lookup(sdata, target_addr);
 		if (mpath) {
 			if ((!(mpath->flags & MESH_PATH_SN_VALID)) ||
 					SN_LT(mpath->sn, target_sn)) {
@@ -643,17 +649,17 @@ next_hop_deref_protected(struct mesh_path *mpath)
 
 static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_mgmt *mgmt,
-				    u8 *prep_elem, u32 metric)
+				    const u8 *prep_elem, u32 metric)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_path *mpath;
-	u8 *target_addr, *orig_addr;
+	const u8 *target_addr, *orig_addr;
 	u8 ttl, hopcount, flags;
 	u8 next_hop[ETH_ALEN];
 	u32 target_sn, orig_sn, lifetime;
 
 	mhwmp_dbg(sdata, "received PREP from %pM\n",
-		  PREP_IE_ORIG_ADDR(prep_elem));
+		  PREP_IE_TARGET_ADDR(prep_elem));
 
 	orig_addr = PREP_IE_ORIG_ADDR(prep_elem);
 	if (ether_addr_equal(orig_addr, sdata->vif.addr))
@@ -670,7 +676,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 	}
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(orig_addr, sdata);
+	mpath = mesh_path_lookup(sdata, orig_addr);
 	if (mpath)
 		spin_lock_bh(&mpath->state_lock);
 	else
@@ -706,12 +712,13 @@ fail:
 }
 
 static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
-			     struct ieee80211_mgmt *mgmt, u8 *perr_elem)
+				    struct ieee80211_mgmt *mgmt,
+				    const u8 *perr_elem)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_path *mpath;
 	u8 ttl;
-	u8 *ta, *target_addr;
+	const u8 *ta, *target_addr;
 	u32 target_sn;
 	u16 target_rcode;
 
@@ -727,7 +734,7 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
 	target_rcode = PERR_IE_TARGET_RCODE(perr_elem);
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(target_addr, sdata);
+	mpath = mesh_path_lookup(sdata, target_addr);
 	if (mpath) {
 		struct sta_info *sta;
 
@@ -742,9 +749,10 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
 			spin_unlock_bh(&mpath->state_lock);
 			if (!ifmsh->mshcfg.dot11MeshForwarding)
 				goto endperr;
-			mesh_path_error_tx(ttl, target_addr, cpu_to_le32(target_sn),
+			mesh_path_error_tx(sdata, ttl, target_addr,
+					   cpu_to_le32(target_sn),
 					   cpu_to_le16(target_rcode),
-					   broadcast_addr, sdata);
+					   broadcast_addr);
 		} else
 			spin_unlock_bh(&mpath->state_lock);
 	}
@@ -753,15 +761,15 @@ endperr:
 }
 
 static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
-				struct ieee80211_mgmt *mgmt,
-				struct ieee80211_rann_ie *rann)
+				    struct ieee80211_mgmt *mgmt,
+				    const struct ieee80211_rann_ie *rann)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 	struct mesh_path *mpath;
 	u8 ttl, flags, hopcount;
-	u8 *orig_addr;
+	const u8 *orig_addr;
 	u32 orig_sn, metric, metric_txsta, interval;
 	bool root_is_gate;
 
@@ -792,11 +800,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
 
 	metric_txsta = airtime_link_metric_get(local, sta);
 
-	mpath = mesh_path_lookup(orig_addr, sdata);
+	mpath = mesh_path_lookup(sdata, orig_addr);
 	if (!mpath) {
-		mesh_path_add(orig_addr, sdata);
-		mpath = mesh_path_lookup(orig_addr, sdata);
-		if (!mpath) {
+		mpath = mesh_path_add(sdata, orig_addr);
+		if (IS_ERR(mpath)) {
 			rcu_read_unlock();
 			sdata->u.mesh.mshstats.dropped_frames_no_route++;
 			return;
@@ -852,8 +859,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
 
 
 void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
-			    struct ieee80211_mgmt *mgmt,
-			    size_t len)
+			    struct ieee80211_mgmt *mgmt, size_t len)
 {
 	struct ieee802_11_elems elems;
 	size_t baselen;
@@ -874,7 +880,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
 
 	baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt;
 	ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable,
-			len - baselen, &elems);
+			       len - baselen, false, &elems);
 
 	if (elems.preq) {
 		if (elems.preq_len != 37)
@@ -997,7 +1003,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
 	spin_unlock_bh(&ifmsh->mesh_preq_queue_lock);
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(preq_node->dst, sdata);
+	mpath = mesh_path_lookup(sdata, preq_node->dst);
 	if (!mpath)
 		goto enddiscovery;
 
@@ -1067,8 +1073,8 @@ enddiscovery:
  * Returns: 0 if the next hop was found and -ENOENT if the frame was queued.
  * skb is freeed here if no mpath could be allocated.
  */
-int mesh_nexthop_resolve(struct sk_buff *skb,
-			 struct ieee80211_sub_if_data *sdata)
+int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,
+			 struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -1077,19 +1083,22 @@ int mesh_nexthop_resolve(struct sk_buff *skb,
 	u8 *target_addr = hdr->addr3;
 	int err = 0;
 
+	/* Nulls are only sent to peers for PS and should be pre-addressed */
+	if (ieee80211_is_qos_nullfunc(hdr->frame_control))
+		return 0;
+
 	rcu_read_lock();
-	err = mesh_nexthop_lookup(skb, sdata);
+	err = mesh_nexthop_lookup(sdata, skb);
 	if (!err)
 		goto endlookup;
 
 	/* no nexthop found, start resolving */
-	mpath = mesh_path_lookup(target_addr, sdata);
+	mpath = mesh_path_lookup(sdata, target_addr);
 	if (!mpath) {
-		mesh_path_add(target_addr, sdata);
-		mpath = mesh_path_lookup(target_addr, sdata);
-		if (!mpath) {
-			mesh_path_discard_frame(skb, sdata);
-			err = -ENOSPC;
+		mpath = mesh_path_add(sdata, target_addr);
+		if (IS_ERR(mpath)) {
+			mesh_path_discard_frame(sdata, skb);
+			err = PTR_ERR(mpath);
 			goto endlookup;
 		}
 	}
@@ -1105,12 +1114,13 @@ int mesh_nexthop_resolve(struct sk_buff *skb,
 	skb_queue_tail(&mpath->frame_queue, skb);
 	err = -ENOENT;
 	if (skb_to_free)
-		mesh_path_discard_frame(skb_to_free, sdata);
+		mesh_path_discard_frame(sdata, skb_to_free);
 
 endlookup:
 	rcu_read_unlock();
 	return err;
 }
+
 /**
  * mesh_nexthop_lookup - put the appropriate next hop on a mesh frame. Calling
  * this function is considered "using" the associated mpath, so preempt a path
@@ -1121,8 +1131,8 @@ endlookup:
  *
  * Returns: 0 if the next hop was found. Nonzero otherwise.
  */
-int mesh_nexthop_lookup(struct sk_buff *skb,
-			struct ieee80211_sub_if_data *sdata)
+int mesh_nexthop_lookup(struct ieee80211_sub_if_data *sdata,
+			struct sk_buff *skb)
 {
 	struct mesh_path *mpath;
 	struct sta_info *next_hop;
@@ -1131,7 +1141,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
 	int err = -ENOENT;
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(target_addr, sdata);
+	mpath = mesh_path_lookup(sdata, target_addr);
 
 	if (!mpath || !(mpath->flags & MESH_PATH_ACTIVE))
 		goto endlookup;
@@ -1148,6 +1158,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
 	if (next_hop) {
 		memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN);
 		memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+		ieee80211_mps_set_frame_flags(sdata, next_hop, hdr);
 		err = 0;
 	}
 
@@ -1189,8 +1200,7 @@ void mesh_path_timer(unsigned long data)
 	}
 }
 
-void
-mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata)
+void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	u32 interval = ifmsh->mshcfg.dot11MeshHWMPRannInterval;
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index aa749818860e..89aacfd2756d 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -24,9 +24,12 @@
 /* Keep the mean chain length below this constant */
 #define MEAN_CHAIN_LEN		2
 
-#define MPATH_EXPIRED(mpath) ((mpath->flags & MESH_PATH_ACTIVE) && \
-				time_after(jiffies, mpath->exp_time) && \
-				!(mpath->flags & MESH_PATH_FIXED))
+static inline bool mpath_expired(struct mesh_path *mpath)
+{
+	return (mpath->flags & MESH_PATH_ACTIVE) &&
+	       time_after(jiffies, mpath->exp_time) &&
+	       !(mpath->flags & MESH_PATH_FIXED);
+}
 
 struct mpath_node {
 	struct hlist_node list;
@@ -69,9 +72,9 @@ static inline struct mesh_table *resize_dereference_mpp_paths(void)
  * it's used twice. So it is illegal to do
  *	for_each_mesh_entry(rcu_dereference(...), ...)
  */
-#define for_each_mesh_entry(tbl, p, node, i) \
+#define for_each_mesh_entry(tbl, node, i) \
 	for (i = 0; i <= tbl->hash_mask; i++) \
-		hlist_for_each_entry_rcu(node, p, &tbl->hash_buckets[i], list)
+		hlist_for_each_entry_rcu(node, &tbl->hash_buckets[i], list)
 
 
 static struct mesh_table *mesh_table_alloc(int size_order)
@@ -136,7 +139,7 @@ static void mesh_table_free(struct mesh_table *tbl, bool free_leafs)
 	}
 	if (free_leafs) {
 		spin_lock_bh(&tbl->gates_lock);
-		hlist_for_each_entry_safe(gate, p, q,
+		hlist_for_each_entry_safe(gate, q,
 					 tbl->known_gates, list) {
 			hlist_del(&gate->list);
 			kfree(gate);
@@ -181,12 +184,12 @@ errcopy:
 	return -ENOMEM;
 }
 
-static u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata,
+static u32 mesh_table_hash(const u8 *addr, struct ieee80211_sub_if_data *sdata,
 			   struct mesh_table *tbl)
 {
 	/* Use last four bytes of hw addr and interface index as hash index */
-	return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex, tbl->hash_rnd)
-		& tbl->hash_mask;
+	return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex,
+			    tbl->hash_rnd) & tbl->hash_mask;
 }
 
 
@@ -212,6 +215,7 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta)
 		hdr = (struct ieee80211_hdr *) skb->data;
 		memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN);
 		memcpy(hdr->addr2, mpath->sdata->vif.addr, ETH_ALEN);
+		ieee80211_mps_set_frame_flags(sta->sdata, sta, hdr);
 	}
 
 	spin_unlock_irqrestore(&mpath->frame_queue.lock, flags);
@@ -325,20 +329,19 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath,
 }
 
 
-static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst,
-					  struct ieee80211_sub_if_data *sdata)
+static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst,
+				      struct ieee80211_sub_if_data *sdata)
 {
 	struct mesh_path *mpath;
-	struct hlist_node *n;
 	struct hlist_head *bucket;
 	struct mpath_node *node;
 
 	bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)];
-	hlist_for_each_entry_rcu(node, n, bucket, list) {
+	hlist_for_each_entry_rcu(node, bucket, list) {
 		mpath = node->mpath;
 		if (mpath->sdata == sdata &&
 		    ether_addr_equal(dst, mpath->dst)) {
-			if (MPATH_EXPIRED(mpath)) {
+			if (mpath_expired(mpath)) {
 				spin_lock_bh(&mpath->state_lock);
 				mpath->flags &= ~MESH_PATH_ACTIVE;
 				spin_unlock_bh(&mpath->state_lock);
@@ -351,19 +354,21 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst,
 
 /**
  * mesh_path_lookup - look up a path in the mesh path table
- * @dst: hardware address (ETH_ALEN length) of destination
  * @sdata: local subif
+ * @dst: hardware address (ETH_ALEN length) of destination
  *
  * Returns: pointer to the mesh path structure, or NULL if not found
  *
  * Locking: must be called within a read rcu section.
  */
-struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata)
+struct mesh_path *
+mesh_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst)
 {
 	return mpath_lookup(rcu_dereference(mesh_paths), dst, sdata);
 }
 
-struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata)
+struct mesh_path *
+mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst)
 {
 	return mpath_lookup(rcu_dereference(mpp_paths), dst, sdata);
 }
@@ -378,19 +383,19 @@ struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata)
  *
  * Locking: must be called within a read rcu section.
  */
-struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data *sdata)
+struct mesh_path *
+mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx)
 {
 	struct mesh_table *tbl = rcu_dereference(mesh_paths);
 	struct mpath_node *node;
-	struct hlist_node *p;
 	int i;
 	int j = 0;
 
-	for_each_mesh_entry(tbl, p, node, i) {
+	for_each_mesh_entry(tbl, node, i) {
 		if (sdata && node->mpath->sdata != sdata)
 			continue;
 		if (j++ == idx) {
-			if (MPATH_EXPIRED(node->mpath)) {
+			if (mpath_expired(node->mpath)) {
 				spin_lock_bh(&node->mpath->state_lock);
 				node->mpath->flags &= ~MESH_PATH_ACTIVE;
 				spin_unlock_bh(&node->mpath->state_lock);
@@ -410,13 +415,12 @@ int mesh_path_add_gate(struct mesh_path *mpath)
 {
 	struct mesh_table *tbl;
 	struct mpath_node *gate, *new_gate;
-	struct hlist_node *n;
 	int err;
 
 	rcu_read_lock();
 	tbl = rcu_dereference(mesh_paths);
 
-	hlist_for_each_entry_rcu(gate, n, tbl->known_gates, list)
+	hlist_for_each_entry_rcu(gate, tbl->known_gates, list)
 		if (gate->mpath == mpath) {
 			err = -EEXIST;
 			goto err_rcu;
@@ -434,11 +438,10 @@ int mesh_path_add_gate(struct mesh_path *mpath)
 	spin_lock_bh(&tbl->gates_lock);
 	hlist_add_head_rcu(&new_gate->list, tbl->known_gates);
 	spin_unlock_bh(&tbl->gates_lock);
-	rcu_read_unlock();
 	mpath_dbg(mpath->sdata,
 		  "Mesh path: Recorded new gate: %pM. %d known gates\n",
 		  mpath->dst, mpath->sdata->u.mesh.num_gates);
-	return 0;
+	err = 0;
 err_rcu:
 	rcu_read_unlock();
 	return err;
@@ -449,30 +452,27 @@ err_rcu:
  * @tbl: table which holds our list of known gates
  * @mpath: gate mpath
  *
- * Returns: 0 on success
- *
  * Locking: must be called inside rcu_read_lock() section
  */
-static int mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath)
+static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath)
 {
 	struct mpath_node *gate;
-	struct hlist_node *p, *q;
+	struct hlist_node *q;
 
-	hlist_for_each_entry_safe(gate, p, q, tbl->known_gates, list)
-		if (gate->mpath == mpath) {
-			spin_lock_bh(&tbl->gates_lock);
-			hlist_del_rcu(&gate->list);
-			kfree_rcu(gate, rcu);
-			spin_unlock_bh(&tbl->gates_lock);
-			mpath->sdata->u.mesh.num_gates--;
-			mpath->is_gate = false;
-			mpath_dbg(mpath->sdata,
-				  "Mesh path: Deleted gate: %pM. %d known gates\n",
-				  mpath->dst, mpath->sdata->u.mesh.num_gates);
-			break;
-		}
-
-	return 0;
+	hlist_for_each_entry_safe(gate, q, tbl->known_gates, list) {
+		if (gate->mpath != mpath)
+			continue;
+		spin_lock_bh(&tbl->gates_lock);
+		hlist_del_rcu(&gate->list);
+		kfree_rcu(gate, rcu);
+		spin_unlock_bh(&tbl->gates_lock);
+		mpath->sdata->u.mesh.num_gates--;
+		mpath->is_gate = false;
+		mpath_dbg(mpath->sdata,
+			  "Mesh path: Deleted gate: %pM. %d known gates\n",
+			  mpath->dst, mpath->sdata->u.mesh.num_gates);
+		break;
+	}
 }
 
 /**
@@ -486,14 +486,15 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata)
 
 /**
  * mesh_path_add - allocate and add a new path to the mesh path table
- * @addr: destination address of the path (ETH_ALEN length)
+ * @dst: destination address of the path (ETH_ALEN length)
  * @sdata: local subif
  *
  * Returns: 0 on success
  *
  * State: the initial state of the new path is set to 0
  */
-int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
+struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
+				const u8 *dst)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_local *local = sdata->local;
@@ -501,20 +502,34 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
 	struct mesh_path *mpath, *new_mpath;
 	struct mpath_node *node, *new_node;
 	struct hlist_head *bucket;
-	struct hlist_node *n;
 	int grow = 0;
-	int err = 0;
+	int err;
 	u32 hash_idx;
 
 	if (ether_addr_equal(dst, sdata->vif.addr))
 		/* never add ourselves as neighbours */
-		return -ENOTSUPP;
+		return ERR_PTR(-ENOTSUPP);
 
 	if (is_multicast_ether_addr(dst))
-		return -ENOTSUPP;
+		return ERR_PTR(-ENOTSUPP);
 
 	if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0)
-		return -ENOSPC;
+		return ERR_PTR(-ENOSPC);
+
+	read_lock_bh(&pathtbl_resize_lock);
+	tbl = resize_dereference_mesh_paths();
+
+	hash_idx = mesh_table_hash(dst, sdata, tbl);
+	bucket = &tbl->hash_buckets[hash_idx];
+
+	spin_lock(&tbl->hashwlock[hash_idx]);
+
+	hlist_for_each_entry(node, bucket, list) {
+		mpath = node->mpath;
+		if (mpath->sdata == sdata &&
+		    ether_addr_equal(dst, mpath->dst))
+			goto found;
+	}
 
 	err = -ENOMEM;
 	new_mpath = kzalloc(sizeof(struct mesh_path), GFP_ATOMIC);
@@ -525,7 +540,6 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
 	if (!new_node)
 		goto err_node_alloc;
 
-	read_lock_bh(&pathtbl_resize_lock);
 	memcpy(new_mpath->dst, dst, ETH_ALEN);
 	eth_broadcast_addr(new_mpath->rann_snd_addr);
 	new_mpath->is_root = false;
@@ -539,21 +553,6 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
 	spin_lock_init(&new_mpath->state_lock);
 	init_timer(&new_mpath->timer);
 
-	tbl = resize_dereference_mesh_paths();
-
-	hash_idx = mesh_table_hash(dst, sdata, tbl);
-	bucket = &tbl->hash_buckets[hash_idx];
-
-	spin_lock(&tbl->hashwlock[hash_idx]);
-
-	err = -EEXIST;
-	hlist_for_each_entry(node, n, bucket, list) {
-		mpath = node->mpath;
-		if (mpath->sdata == sdata &&
-		    ether_addr_equal(dst, mpath->dst))
-			goto err_exists;
-	}
-
 	hlist_add_head_rcu(&new_node->list, bucket);
 	if (atomic_inc_return(&tbl->entries) >=
 	    tbl->mean_chain_len * (tbl->hash_mask + 1))
@@ -561,23 +560,23 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
 
 	mesh_paths_generation++;
 
-	spin_unlock(&tbl->hashwlock[hash_idx]);
-	read_unlock_bh(&pathtbl_resize_lock);
 	if (grow) {
 		set_bit(MESH_WORK_GROW_MPATH_TABLE,  &ifmsh->wrkq_flags);
 		ieee80211_queue_work(&local->hw, &sdata->work);
 	}
-	return 0;
-
-err_exists:
+	mpath = new_mpath;
+found:
 	spin_unlock(&tbl->hashwlock[hash_idx]);
 	read_unlock_bh(&pathtbl_resize_lock);
-	kfree(new_node);
+	return mpath;
+
 err_node_alloc:
 	kfree(new_mpath);
 err_path_alloc:
 	atomic_dec(&sdata->u.mesh.mpaths);
-	return err;
+	spin_unlock(&tbl->hashwlock[hash_idx]);
+	read_unlock_bh(&pathtbl_resize_lock);
+	return ERR_PTR(err);
 }
 
 static void mesh_table_free_rcu(struct rcu_head *rcu)
@@ -628,7 +627,8 @@ void mesh_mpp_table_grow(void)
 	write_unlock_bh(&pathtbl_resize_lock);
 }
 
-int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata)
+int mpp_path_add(struct ieee80211_sub_if_data *sdata,
+		 const u8 *dst, const u8 *mpp)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_local *local = sdata->local;
@@ -636,7 +636,6 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata)
 	struct mesh_path *mpath, *new_mpath;
 	struct mpath_node *node, *new_node;
 	struct hlist_head *bucket;
-	struct hlist_node *n;
 	int grow = 0;
 	int err = 0;
 	u32 hash_idx;
@@ -676,7 +675,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata)
 	spin_lock(&tbl->hashwlock[hash_idx]);
 
 	err = -EEXIST;
-	hlist_for_each_entry(node, n, bucket, list) {
+	hlist_for_each_entry(node, bucket, list) {
 		mpath = node->mpath;
 		if (mpath->sdata == sdata &&
 		    ether_addr_equal(dst, mpath->dst))
@@ -721,14 +720,13 @@ void mesh_plink_broken(struct sta_info *sta)
 	static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 	struct mesh_path *mpath;
 	struct mpath_node *node;
-	struct hlist_node *p;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	int i;
 	__le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_DEST_UNREACHABLE);
 
 	rcu_read_lock();
 	tbl = rcu_dereference(mesh_paths);
-	for_each_mesh_entry(tbl, p, node, i) {
+	for_each_mesh_entry(tbl, node, i) {
 		mpath = node->mpath;
 		if (rcu_dereference(mpath->next_hop) == sta &&
 		    mpath->flags & MESH_PATH_ACTIVE &&
@@ -737,9 +735,10 @@ void mesh_plink_broken(struct sta_info *sta)
 			mpath->flags &= ~MESH_PATH_ACTIVE;
 			++mpath->sn;
 			spin_unlock_bh(&mpath->state_lock);
-			mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl,
-					mpath->dst, cpu_to_le32(mpath->sn),
-					reason, bcast, sdata);
+			mesh_path_error_tx(sdata,
+					   sdata->u.mesh.mshcfg.element_ttl,
+					   mpath->dst, cpu_to_le32(mpath->sn),
+					   reason, bcast);
 		}
 	}
 	rcu_read_unlock();
@@ -787,13 +786,12 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
 	struct mesh_table *tbl;
 	struct mesh_path *mpath;
 	struct mpath_node *node;
-	struct hlist_node *p;
 	int i;
 
 	rcu_read_lock();
 	read_lock_bh(&pathtbl_resize_lock);
 	tbl = resize_dereference_mesh_paths();
-	for_each_mesh_entry(tbl, p, node, i) {
+	for_each_mesh_entry(tbl, node, i) {
 		mpath = node->mpath;
 		if (rcu_dereference(mpath->next_hop) == sta) {
 			spin_lock(&tbl->hashwlock[i]);
@@ -810,11 +808,10 @@ static void table_flush_by_iface(struct mesh_table *tbl,
 {
 	struct mesh_path *mpath;
 	struct mpath_node *node;
-	struct hlist_node *p;
 	int i;
 
 	WARN_ON(!rcu_read_lock_held());
-	for_each_mesh_entry(tbl, p, node, i) {
+	for_each_mesh_entry(tbl, node, i) {
 		mpath = node->mpath;
 		if (mpath->sdata != sdata)
 			continue;
@@ -854,13 +851,12 @@ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
  *
  * Returns: 0 if successful
  */
-int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata)
+int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
 {
 	struct mesh_table *tbl;
 	struct mesh_path *mpath;
 	struct mpath_node *node;
 	struct hlist_head *bucket;
-	struct hlist_node *n;
 	int hash_idx;
 	int err = 0;
 
@@ -870,7 +866,7 @@ int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata)
 	bucket = &tbl->hash_buckets[hash_idx];
 
 	spin_lock(&tbl->hashwlock[hash_idx]);
-	hlist_for_each_entry(node, n, bucket, list) {
+	hlist_for_each_entry(node, bucket, list) {
 		mpath = node->mpath;
 		if (mpath->sdata == sdata &&
 		    ether_addr_equal(addr, mpath->dst)) {
@@ -915,7 +911,6 @@ void mesh_path_tx_pending(struct mesh_path *mpath)
 int mesh_path_send_to_gates(struct mesh_path *mpath)
 {
 	struct ieee80211_sub_if_data *sdata = mpath->sdata;
-	struct hlist_node *n;
 	struct mesh_table *tbl;
 	struct mesh_path *from_mpath = mpath;
 	struct mpath_node *gate = NULL;
@@ -930,7 +925,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
 	if (!known_gates)
 		return -EHOSTUNREACH;
 
-	hlist_for_each_entry_rcu(gate, n, known_gates, list) {
+	hlist_for_each_entry_rcu(gate, known_gates, list) {
 		if (gate->mpath->sdata != sdata)
 			continue;
 
@@ -946,7 +941,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
 		}
 	}
 
-	hlist_for_each_entry_rcu(gate, n, known_gates, list)
+	hlist_for_each_entry_rcu(gate, known_gates, list)
 		if (gate->mpath->sdata == sdata) {
 			mpath_dbg(sdata, "Sending to %pM\n", gate->mpath->dst);
 			mesh_path_tx_pending(gate->mpath);
@@ -963,8 +958,8 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
  *
  * Locking: the function must me called within a rcu_read_lock region
  */
-void mesh_path_discard_frame(struct sk_buff *skb,
-			     struct ieee80211_sub_if_data *sdata)
+void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
+			     struct sk_buff *skb)
 {
 	kfree_skb(skb);
 	sdata->u.mesh.mshstats.dropped_frames_no_route++;
@@ -982,7 +977,7 @@ void mesh_path_flush_pending(struct mesh_path *mpath)
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL)
-		mesh_path_discard_frame(skb, mpath->sdata);
+		mesh_path_discard_frame(mpath->sdata, skb);
 }
 
 /**
@@ -1091,19 +1086,18 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
 	struct mesh_table *tbl;
 	struct mesh_path *mpath;
 	struct mpath_node *node;
-	struct hlist_node *p;
 	int i;
 
 	rcu_read_lock();
 	tbl = rcu_dereference(mesh_paths);
-	for_each_mesh_entry(tbl, p, node, i) {
+	for_each_mesh_entry(tbl, node, i) {
 		if (node->mpath->sdata != sdata)
 			continue;
 		mpath = node->mpath;
 		if ((!(mpath->flags & MESH_PATH_RESOLVING)) &&
 		    (!(mpath->flags & MESH_PATH_FIXED)) &&
 		     time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE))
-			mesh_path_del(mpath->dst, mpath->sdata);
+			mesh_path_del(mpath->sdata, mpath->dst);
 	}
 	rcu_read_unlock();
 }
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 4b274e9c91a5..09bebed99416 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -37,23 +37,31 @@ enum plink_event {
 	CLS_IGNR
 };
 
-static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
-		enum ieee80211_self_protected_actioncode action,
-		u8 *da, __le16 llid, __le16 plid, __le16 reason);
+static const char * const mplstates[] = {
+	[NL80211_PLINK_LISTEN] = "LISTEN",
+	[NL80211_PLINK_OPN_SNT] = "OPN-SNT",
+	[NL80211_PLINK_OPN_RCVD] = "OPN-RCVD",
+	[NL80211_PLINK_CNF_RCVD] = "CNF_RCVD",
+	[NL80211_PLINK_ESTAB] = "ESTAB",
+	[NL80211_PLINK_HOLDING] = "HOLDING",
+	[NL80211_PLINK_BLOCKED] = "BLOCKED"
+};
 
-static inline
-u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata)
-{
-	atomic_inc(&sdata->u.mesh.estab_plinks);
-	return mesh_accept_plinks_update(sdata);
-}
+static const char * const mplevents[] = {
+	[PLINK_UNDEFINED] = "NONE",
+	[OPN_ACPT] = "OPN_ACPT",
+	[OPN_RJCT] = "OPN_RJCT",
+	[OPN_IGNR] = "OPN_IGNR",
+	[CNF_ACPT] = "CNF_ACPT",
+	[CNF_RJCT] = "CNF_RJCT",
+	[CNF_IGNR] = "CNF_IGNR",
+	[CLS_ACPT] = "CLS_ACPT",
+	[CLS_IGNR] = "CLS_IGNR"
+};
 
-static inline
-u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
-{
-	atomic_dec(&sdata->u.mesh.estab_plinks);
-	return mesh_accept_plinks_update(sdata);
-}
+static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
+			       enum ieee80211_self_protected_actioncode action,
+			       u8 *da, __le16 llid, __le16 plid, __le16 reason);
 
 /**
  * mesh_plink_fsm_restart - restart a mesh peer link finite state machine
@@ -70,27 +78,63 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta)
 }
 
 /*
- * Allocate mesh sta entry and insert into station table
+ * mesh_set_short_slot_time - enable / disable ERP short slot time.
+ *
+ * The standard indirectly mandates mesh STAs to turn off short slot time by
+ * disallowing advertising this (802.11-2012 8.4.1.4), but that doesn't mean we
+ * can't be sneaky about it. Enable short slot time if all mesh STAs in the
+ * MBSS support ERP rates.
+ *
+ * Returns BSS_CHANGED_ERP_SLOT or 0 for no change.
  */
-static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata,
-					 u8 *hw_addr)
+static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata)
 {
+	struct ieee80211_local *local = sdata->local;
+	enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+	struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
 	struct sta_info *sta;
+	u32 erp_rates = 0, changed = 0;
+	int i;
+	bool short_slot = false;
 
-	if (sdata->local->num_sta >= MESH_MAX_PLINKS)
-		return NULL;
+	if (band == IEEE80211_BAND_5GHZ) {
+		/* (IEEE 802.11-2012 19.4.5) */
+		short_slot = true;
+		goto out;
+	} else if (band != IEEE80211_BAND_2GHZ ||
+		   (band == IEEE80211_BAND_2GHZ &&
+		    local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
+		goto out;
 
-	sta = sta_info_alloc(sdata, hw_addr, GFP_KERNEL);
-	if (!sta)
-		return NULL;
+	for (i = 0; i < sband->n_bitrates; i++)
+		if (sband->bitrates[i].flags & IEEE80211_RATE_ERP_G)
+			erp_rates |= BIT(i);
 
-	sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
-	sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
-	sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED);
+	if (!erp_rates)
+		goto out;
 
-	set_sta_flag(sta, WLAN_STA_WME);
+	rcu_read_lock();
+	list_for_each_entry_rcu(sta, &local->sta_list, list) {
+		if (sdata != sta->sdata ||
+		    sta->plink_state != NL80211_PLINK_ESTAB)
+			continue;
 
-	return sta;
+		short_slot = false;
+		if (erp_rates & sta->sta.supp_rates[band])
+			short_slot = true;
+		 else
+			break;
+	}
+	rcu_read_unlock();
+
+out:
+	if (sdata->vif.bss_conf.use_short_slot != short_slot) {
+		sdata->vif.bss_conf.use_short_slot = short_slot;
+		changed = BSS_CHANGED_ERP_SLOT;
+		mpl_dbg(sdata, "mesh_plink %pM: ERP short slot time %d\n",
+			sdata->vif.addr, short_slot);
+	}
+	return changed;
 }
 
 /**
@@ -107,7 +151,6 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
-	u32 changed = 0;
 	u16 ht_opmode;
 	bool non_ht_sta = false, ht20_sta = false;
 
@@ -120,23 +163,19 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
 		    sta->plink_state != NL80211_PLINK_ESTAB)
 			continue;
 
-		switch (sta->ch_width) {
-		case NL80211_CHAN_WIDTH_20_NOHT:
-			mpl_dbg(sdata,
-				"mesh_plink %pM: nonHT sta (%pM) is present\n",
-				sdata->vif.addr, sta->sta.addr);
+		if (sta->sta.bandwidth > IEEE80211_STA_RX_BW_20)
+			continue;
+
+		if (!sta->sta.ht_cap.ht_supported) {
+			mpl_dbg(sdata, "nonHT sta (%pM) is present\n",
+				       sta->sta.addr);
 			non_ht_sta = true;
-			goto out;
-		case NL80211_CHAN_WIDTH_20:
-			mpl_dbg(sdata,
-				"mesh_plink %pM: HT20 sta (%pM) is present\n",
-				sdata->vif.addr, sta->sta.addr);
-			ht20_sta = true;
-		default:
 			break;
 		}
+
+		mpl_dbg(sdata, "HT20 sta (%pM) is present\n", sta->sta.addr);
+		ht20_sta = true;
 	}
-out:
 	rcu_read_unlock();
 
 	if (non_ht_sta)
@@ -147,16 +186,13 @@ out:
 	else
 		ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONE;
 
-	if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) {
-		sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
-		sdata->u.mesh.mshcfg.ht_opmode = ht_opmode;
-		changed = BSS_CHANGED_HT;
-		mpl_dbg(sdata,
-			"mesh_plink %pM: protection mode changed to %d\n",
-			sdata->vif.addr, ht_opmode);
-	}
+	if (sdata->vif.bss_conf.ht_operation_mode == ht_opmode)
+		return 0;
 
-	return changed;
+	sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
+	sdata->u.mesh.mshcfg.ht_opmode = ht_opmode;
+	mpl_dbg(sdata, "selected new HT protection mode %d\n", ht_opmode);
+	return BSS_CHANGED_HT;
 }
 
 /**
@@ -179,6 +215,9 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta)
 	sta->plink_state = NL80211_PLINK_BLOCKED;
 	mesh_path_flush_by_nexthop(sta);
 
+	ieee80211_mps_sta_status_update(sta);
+	changed |= ieee80211_mps_local_status_update(sdata);
+
 	return changed;
 }
 
@@ -189,7 +228,7 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta)
  *
  * All mesh paths with this peer as next hop will be flushed
  */
-void mesh_plink_deactivate(struct sta_info *sta)
+u32 mesh_plink_deactivate(struct sta_info *sta)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	u32 changed;
@@ -202,12 +241,13 @@ void mesh_plink_deactivate(struct sta_info *sta)
 			    sta->reason);
 	spin_unlock_bh(&sta->lock);
 
-	ieee80211_bss_info_change_notify(sdata, changed);
+	return changed;
 }
 
 static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
-		enum ieee80211_self_protected_actioncode action,
-		u8 *da, __le16 llid, __le16 plid, __le16 reason) {
+			       enum ieee80211_self_protected_actioncode action,
+			       u8 *da, __le16 llid, __le16 plid, __le16 reason)
+{
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_tx_info *info;
@@ -258,13 +298,13 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 		}
 		if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
 		    ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
-		    mesh_add_rsn_ie(skb, sdata) ||
-		    mesh_add_meshid_ie(skb, sdata) ||
-		    mesh_add_meshconf_ie(skb, sdata))
+		    mesh_add_rsn_ie(sdata, skb) ||
+		    mesh_add_meshid_ie(sdata, skb) ||
+		    mesh_add_meshconf_ie(sdata, skb))
 			goto free;
 	} else {	/* WLAN_SP_MESH_PEERING_CLOSE */
 		info->flags |= IEEE80211_TX_CTL_NO_ACK;
-		if (mesh_add_meshid_ie(skb, sdata))
+		if (mesh_add_meshid_ie(sdata, skb))
 			goto free;
 	}
 
@@ -308,12 +348,12 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (action != WLAN_SP_MESH_PEERING_CLOSE) {
-		if (mesh_add_ht_cap_ie(skb, sdata) ||
-		    mesh_add_ht_oper_ie(skb, sdata))
+		if (mesh_add_ht_cap_ie(sdata, skb) ||
+		    mesh_add_ht_oper_ie(sdata, skb))
 			goto free;
 	}
 
-	if (mesh_add_vendor_ies(skb, sdata))
+	if (mesh_add_vendor_ies(sdata, skb))
 		goto free;
 
 	ieee80211_tx_skb(sdata, skb);
@@ -323,92 +363,147 @@ free:
 	return err;
 }
 
-/**
- * mesh_peer_init - initialize new mesh peer and return resulting sta_info
- *
- * @sdata: local meshif
- * @addr: peer's address
- * @elems: IEs from beacon or mesh peering frame
- *
- * call under RCU
- */
-static struct sta_info *mesh_peer_init(struct ieee80211_sub_if_data *sdata,
-				       u8 *addr,
-				       struct ieee802_11_elems *elems)
+static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
+			       struct sta_info *sta,
+			       struct ieee802_11_elems *elems, bool insert)
 {
 	struct ieee80211_local *local = sdata->local;
 	enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
 	struct ieee80211_supported_band *sband;
-	u32 rates, basic_rates = 0;
-	struct sta_info *sta;
-	bool insert = false;
+	u32 rates, basic_rates = 0, changed = 0;
 
 	sband = local->hw.wiphy->bands[band];
 	rates = ieee80211_sta_get_rates(local, elems, band, &basic_rates);
 
-	sta = sta_info_get(sdata, addr);
-	if (!sta) {
-		/* Userspace handles peer allocation when security is enabled */
-		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) {
-			cfg80211_notify_new_peer_candidate(sdata->dev, addr,
-							   elems->ie_start,
-							   elems->total_len,
-							   GFP_ATOMIC);
-			return NULL;
-		}
-
-		sta = mesh_plink_alloc(sdata, addr);
-		if (!sta)
-			return NULL;
-		insert = true;
-	}
-
 	spin_lock_bh(&sta->lock);
 	sta->last_rx = jiffies;
-	if (sta->plink_state == NL80211_PLINK_ESTAB) {
-		spin_unlock_bh(&sta->lock);
-		return sta;
-	}
 
+	/* rates and capabilities don't change during peering */
+	if (sta->plink_state == NL80211_PLINK_ESTAB)
+		goto out;
+
+	if (sta->sta.supp_rates[band] != rates)
+		changed |= IEEE80211_RC_SUPP_RATES_CHANGED;
 	sta->sta.supp_rates[band] = rates;
-	if (elems->ht_cap_elem &&
-	    sdata->vif.bss_conf.chandef.width != NL80211_CHAN_WIDTH_20_NOHT)
-		ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
-						  elems->ht_cap_elem,
-						  &sta->sta.ht_cap);
-	else
-		memset(&sta->sta.ht_cap, 0, sizeof(sta->sta.ht_cap));
-
-	if (elems->ht_operation) {
-		struct cfg80211_chan_def chandef;
-
-		if (!(elems->ht_operation->ht_param &
-		      IEEE80211_HT_PARAM_CHAN_WIDTH_ANY))
-			sta->sta.ht_cap.cap &=
-					    ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-		ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan,
-					     elems->ht_operation, &chandef);
-		sta->ch_width = chandef.width;
+
+	if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
+					      elems->ht_cap_elem, sta))
+		changed |= IEEE80211_RC_BW_CHANGED;
+
+	/* HT peer is operating 20MHz-only */
+	if (elems->ht_operation &&
+	    !(elems->ht_operation->ht_param &
+	      IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) {
+		if (sta->sta.bandwidth != IEEE80211_STA_RX_BW_20)
+			changed |= IEEE80211_RC_BW_CHANGED;
+		sta->sta.bandwidth = IEEE80211_STA_RX_BW_20;
 	}
 
 	if (insert)
 		rate_control_rate_init(sta);
+	else
+		rate_control_rate_update(local, sband, sta, changed);
+out:
 	spin_unlock_bh(&sta->lock);
+}
 
-	if (insert && sta_info_insert(sta))
+static struct sta_info *
+__mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr)
+{
+	struct sta_info *sta;
+
+	if (sdata->local->num_sta >= MESH_MAX_PLINKS)
+		return NULL;
+
+	sta = sta_info_alloc(sdata, hw_addr, GFP_KERNEL);
+	if (!sta)
 		return NULL;
 
+	sta->plink_state = NL80211_PLINK_LISTEN;
+
+	sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
+	sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
+	sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED);
+
+	set_sta_flag(sta, WLAN_STA_WME);
+
+	return sta;
+}
+
+static struct sta_info *
+mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
+		    struct ieee802_11_elems *elems)
+{
+	struct sta_info *sta = NULL;
+
+	/* Userspace handles station allocation */
+	if (sdata->u.mesh.user_mpm ||
+	    sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED)
+		cfg80211_notify_new_peer_candidate(sdata->dev, addr,
+						   elems->ie_start,
+						   elems->total_len,
+						   GFP_KERNEL);
+	else
+		sta = __mesh_sta_info_alloc(sdata, addr);
+
 	return sta;
 }
 
+/*
+ * mesh_sta_info_get - return mesh sta info entry for @addr.
+ *
+ * @sdata: local meshif
+ * @addr: peer's address
+ * @elems: IEs from beacon or mesh peering frame.
+ *
+ * Return existing or newly allocated sta_info under RCU read lock.
+ * (re)initialize with given IEs.
+ */
+static struct sta_info *
+mesh_sta_info_get(struct ieee80211_sub_if_data *sdata,
+		  u8 *addr, struct ieee802_11_elems *elems) __acquires(RCU)
+{
+	struct sta_info *sta = NULL;
+
+	rcu_read_lock();
+	sta = sta_info_get(sdata, addr);
+	if (sta) {
+		mesh_sta_info_init(sdata, sta, elems, false);
+	} else {
+		rcu_read_unlock();
+		/* can't run atomic */
+		sta = mesh_sta_info_alloc(sdata, addr, elems);
+		if (!sta) {
+			rcu_read_lock();
+			return NULL;
+		}
+
+		mesh_sta_info_init(sdata, sta, elems, true);
+
+		if (sta_info_insert_rcu(sta))
+			return NULL;
+	}
+
+	return sta;
+}
+
+/*
+ * mesh_neighbour_update - update or initialize new mesh neighbor.
+ *
+ * @sdata: local meshif
+ * @addr: peer's address
+ * @elems: IEs from beacon or mesh peering frame
+ *
+ * Initiates peering if appropriate.
+ */
 void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
 			   u8 *hw_addr,
 			   struct ieee802_11_elems *elems)
 {
 	struct sta_info *sta;
+	u32 changed = 0;
 
-	rcu_read_lock();
-	sta = mesh_peer_init(sdata, hw_addr, elems);
+	sta = mesh_sta_info_get(sdata, hw_addr, elems);
 	if (!sta)
 		goto out;
 
@@ -417,10 +512,12 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
 	    sdata->u.mesh.accepting_plinks &&
 	    sdata->u.mesh.mshcfg.auto_open_plinks &&
 	    rssi_threshold_check(sta, sdata))
-		mesh_plink_open(sta);
+		changed = mesh_plink_open(sta);
 
+	ieee80211_mps_frame_release(sta, elems);
 out:
 	rcu_read_unlock();
+	ieee80211_mbss_info_change_notify(sdata, changed);
 }
 
 static void mesh_plink_timer(unsigned long data)
@@ -437,10 +534,8 @@ static void mesh_plink_timer(unsigned long data)
 	 */
 	sta = (struct sta_info *) data;
 
-	if (sta->sdata->local->quiescing) {
-		sta->plink_timer_was_running = true;
+	if (sta->sdata->local->quiescing)
 		return;
-	}
 
 	spin_lock_bh(&sta->lock);
 	if (sta->ignore_plink_timer) {
@@ -449,8 +544,8 @@ static void mesh_plink_timer(unsigned long data)
 		return;
 	}
 	mpl_dbg(sta->sdata,
-		"Mesh plink timer for %pM fired on state %d\n",
-		sta->sta.addr, sta->plink_state);
+		"Mesh plink timer for %pM fired on state %s\n",
+		sta->sta.addr, mplstates[sta->plink_state]);
 	reason = 0;
 	llid = sta->llid;
 	plid = sta->plid;
@@ -501,22 +596,6 @@ static void mesh_plink_timer(unsigned long data)
 	}
 }
 
-#ifdef CONFIG_PM
-void mesh_plink_quiesce(struct sta_info *sta)
-{
-	if (del_timer_sync(&sta->plink_timer))
-		sta->plink_timer_was_running = true;
-}
-
-void mesh_plink_restart(struct sta_info *sta)
-{
-	if (sta->plink_timer_was_running) {
-		add_timer(&sta->plink_timer);
-		sta->plink_timer_was_running = false;
-	}
-}
-#endif
-
 static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout)
 {
 	sta->plink_timer.expires = jiffies + (HZ * timeout / 1000);
@@ -526,13 +605,14 @@ static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout)
 	add_timer(&sta->plink_timer);
 }
 
-int mesh_plink_open(struct sta_info *sta)
+u32 mesh_plink_open(struct sta_info *sta)
 {
 	__le16 llid;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	u32 changed;
 
 	if (!test_sta_flag(sta, WLAN_STA_AUTH))
-		return -EPERM;
+		return 0;
 
 	spin_lock_bh(&sta->lock);
 	get_random_bytes(&llid, 2);
@@ -540,7 +620,7 @@ int mesh_plink_open(struct sta_info *sta)
 	if (sta->plink_state != NL80211_PLINK_LISTEN &&
 	    sta->plink_state != NL80211_PLINK_BLOCKED) {
 		spin_unlock_bh(&sta->lock);
-		return -EBUSY;
+		return 0;
 	}
 	sta->plink_state = NL80211_PLINK_OPN_SNT;
 	mesh_plink_timer_set(sta, sdata->u.mesh.mshcfg.dot11MeshRetryTimeout);
@@ -549,13 +629,16 @@ int mesh_plink_open(struct sta_info *sta)
 		"Mesh plink: starting establishment with %pM\n",
 		sta->sta.addr);
 
-	return mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN,
-				   sta->sta.addr, llid, 0, 0);
+	/* set the non-peer mode to active during peering */
+	changed = ieee80211_mps_local_status_update(sdata);
+
+	mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN,
+			    sta->sta.addr, llid, 0, 0);
+	return changed;
 }
 
-void mesh_plink_block(struct sta_info *sta)
+u32 mesh_plink_block(struct sta_info *sta)
 {
-	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	u32 changed;
 
 	spin_lock_bh(&sta->lock);
@@ -563,12 +646,13 @@ void mesh_plink_block(struct sta_info *sta)
 	sta->plink_state = NL80211_PLINK_BLOCKED;
 	spin_unlock_bh(&sta->lock);
 
-	ieee80211_bss_info_change_notify(sdata, changed);
+	return changed;
 }
 
 
-void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt,
-			 size_t len, struct ieee80211_rx_status *rx_status)
+void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
+			 struct ieee80211_mgmt *mgmt, size_t len,
+			 struct ieee80211_rx_status *rx_status)
 {
 	struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg;
 	struct ieee802_11_elems elems;
@@ -581,20 +665,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 	u8 *baseaddr;
 	u32 changed = 0;
 	__le16 plid, llid, reason;
-	static const char *mplstates[] = {
-		[NL80211_PLINK_LISTEN] = "LISTEN",
-		[NL80211_PLINK_OPN_SNT] = "OPN-SNT",
-		[NL80211_PLINK_OPN_RCVD] = "OPN-RCVD",
-		[NL80211_PLINK_CNF_RCVD] = "CNF_RCVD",
-		[NL80211_PLINK_ESTAB] = "ESTAB",
-		[NL80211_PLINK_HOLDING] = "HOLDING",
-		[NL80211_PLINK_BLOCKED] = "BLOCKED"
-	};
 
 	/* need action_code, aux */
 	if (len < IEEE80211_MIN_ACTION_SIZE + 3)
 		return;
 
+	if (sdata->u.mesh.user_mpm)
+		/* userspace must register for these */
+		return;
+
 	if (is_multicast_ether_addr(mgmt->da)) {
 		mpl_dbg(sdata,
 			"Mesh plink: ignore frame from multicast address\n");
@@ -608,14 +687,16 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		baseaddr += 4;
 		baselen += 4;
 	}
-	ieee802_11_parse_elems(baseaddr, len - baselen, &elems);
+	ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems);
+
 	if (!elems.peering) {
 		mpl_dbg(sdata,
 			"Mesh plink: missing necessary peer link ie\n");
 		return;
 	}
+
 	if (elems.rsn_len &&
-			sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) {
+	    sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) {
 		mpl_dbg(sdata,
 			"Mesh plink: can't establish link with secure peer\n");
 		return;
@@ -634,7 +715,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 	}
 
 	if (ftype != WLAN_SP_MESH_PEERING_CLOSE &&
-				(!elems.mesh_id || !elems.mesh_config)) {
+	    (!elems.mesh_id || !elems.mesh_config)) {
 		mpl_dbg(sdata, "Mesh plink: missing necessary ie\n");
 		return;
 	}
@@ -646,6 +727,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 	    (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len == 8))
 		memcpy(&llid, PLINK_GET_PLID(elems.peering), 2);
 
+	/* WARNING: Only for sta pointer, is dropped & re-acquired */
 	rcu_read_lock();
 
 	sta = sta_info_get(sdata, mgmt->sa);
@@ -749,8 +831,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 	}
 
 	if (event == OPN_ACPT) {
+		rcu_read_unlock();
 		/* allocate sta entry if necessary and update info */
-		sta = mesh_peer_init(sdata, mgmt->sa, &elems);
+		sta = mesh_sta_info_get(sdata, mgmt->sa, &elems);
 		if (!sta) {
 			mpl_dbg(sdata, "Mesh plink: failed to init peer!\n");
 			rcu_read_unlock();
@@ -758,11 +841,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		}
 	}
 
-	mpl_dbg(sdata,
-		"Mesh plink (peer, state, llid, plid, event): %pM %s %d %d %d\n",
-		mgmt->sa, mplstates[sta->plink_state],
-		le16_to_cpu(sta->llid), le16_to_cpu(sta->plid),
-		event);
+	mpl_dbg(sdata, "peer %pM in state %s got event %s\n", mgmt->sa,
+		       mplstates[sta->plink_state], mplevents[event]);
 	reason = 0;
 	spin_lock_bh(&sta->lock);
 	switch (sta->plink_state) {
@@ -780,6 +860,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			sta->llid = llid;
 			mesh_plink_timer_set(sta,
 					     mshcfg->dot11MeshRetryTimeout);
+
+			/* set the non-peer mode to active during peering */
+			changed |= ieee80211_mps_local_status_update(sdata);
+
 			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(sdata,
 					    WLAN_SP_MESH_PEERING_OPEN,
@@ -870,8 +954,12 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			spin_unlock_bh(&sta->lock);
 			changed |= mesh_plink_inc_estab_count(sdata);
 			changed |= mesh_set_ht_prot_mode(sdata);
+			changed |= mesh_set_short_slot_time(sdata);
 			mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n",
 				sta->sta.addr);
+			ieee80211_mps_sta_status_update(sta);
+			changed |= ieee80211_mps_set_sta_local_pm(sta,
+						       mshcfg->power_mode);
 			break;
 		default:
 			spin_unlock_bh(&sta->lock);
@@ -905,11 +993,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			spin_unlock_bh(&sta->lock);
 			changed |= mesh_plink_inc_estab_count(sdata);
 			changed |= mesh_set_ht_prot_mode(sdata);
+			changed |= mesh_set_short_slot_time(sdata);
 			mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n",
 				sta->sta.addr);
 			mesh_plink_frame_tx(sdata,
 					    WLAN_SP_MESH_PEERING_CONFIRM,
 					    sta->sta.addr, llid, plid, 0);
+			ieee80211_mps_sta_status_update(sta);
+			changed |= ieee80211_mps_set_sta_local_pm(sta,
+							mshcfg->power_mode);
 			break;
 		default:
 			spin_unlock_bh(&sta->lock);
@@ -928,6 +1020,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout);
 			spin_unlock_bh(&sta->lock);
 			changed |= mesh_set_ht_prot_mode(sdata);
+			changed |= mesh_set_short_slot_time(sdata);
 			mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE,
 					    sta->sta.addr, llid, plid, reason);
 			break;
@@ -976,5 +1069,5 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 	rcu_read_unlock();
 
 	if (changed)
-		ieee80211_bss_info_change_notify(sdata, changed);
+		ieee80211_mbss_info_change_notify(sdata, changed);
 }
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
new file mode 100644
index 000000000000..3b7bfc01ee36
--- /dev/null
+++ b/net/mac80211/mesh_ps.c
@@ -0,0 +1,598 @@
+/*
+ * Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
+ * Copyright 2012-2013, cozybit Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "mesh.h"
+#include "wme.h"
+
+
+/* mesh PS management */
+
+/**
+ * mps_qos_null_get - create pre-addressed QoS Null frame for mesh powersave
+ */
+static struct sk_buff *mps_qos_null_get(struct sta_info *sta)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_hdr *nullfunc; /* use 4addr header */
+	struct sk_buff *skb;
+	int size = sizeof(*nullfunc);
+	__le16 fc;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + size + 2);
+	if (!skb)
+		return NULL;
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	nullfunc = (struct ieee80211_hdr *) skb_put(skb, size);
+	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC);
+	ieee80211_fill_mesh_addresses(nullfunc, &fc, sta->sta.addr,
+				      sdata->vif.addr);
+	nullfunc->frame_control = fc;
+	nullfunc->duration_id = 0;
+	/* no address resolution for this frame -> set addr 1 immediately */
+	memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN);
+	memset(skb_put(skb, 2), 0, 2); /* append QoS control field */
+	ieee80211_mps_set_frame_flags(sdata, sta, nullfunc);
+
+	return skb;
+}
+
+/**
+ * mps_qos_null_tx - send a QoS Null to indicate link-specific power mode
+ */
+static void mps_qos_null_tx(struct sta_info *sta)
+{
+	struct sk_buff *skb;
+
+	skb = mps_qos_null_get(sta);
+	if (!skb)
+		return;
+
+	mps_dbg(sta->sdata, "announcing peer-specific power mode to %pM\n",
+		sta->sta.addr);
+
+	/* don't unintentionally start a MPSP */
+	if (!test_sta_flag(sta, WLAN_STA_PS_STA)) {
+		u8 *qc = ieee80211_get_qos_ctl((void *) skb->data);
+
+		qc[0] |= IEEE80211_QOS_CTL_EOSP;
+	}
+
+	ieee80211_tx_skb(sta->sdata, skb);
+}
+
+/**
+ * ieee80211_mps_local_status_update - track status of local link-specific PMs
+ *
+ * @sdata: local mesh subif
+ *
+ * sets the non-peer power mode and triggers the driver PS (re-)configuration
+ * Return BSS_CHANGED_BEACON if a beacon update is necessary.
+ */
+u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct sta_info *sta;
+	bool peering = false;
+	int light_sleep_cnt = 0;
+	int deep_sleep_cnt = 0;
+	u32 changed = 0;
+	enum nl80211_mesh_power_mode nonpeer_pm;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
+		if (sdata != sta->sdata)
+			continue;
+
+		switch (sta->plink_state) {
+		case NL80211_PLINK_OPN_SNT:
+		case NL80211_PLINK_OPN_RCVD:
+		case NL80211_PLINK_CNF_RCVD:
+			peering = true;
+			break;
+		case NL80211_PLINK_ESTAB:
+			if (sta->local_pm == NL80211_MESH_POWER_LIGHT_SLEEP)
+				light_sleep_cnt++;
+			else if (sta->local_pm == NL80211_MESH_POWER_DEEP_SLEEP)
+				deep_sleep_cnt++;
+			break;
+		default:
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	/*
+	 * Set non-peer mode to active during peering/scanning/authentication
+	 * (see IEEE802.11-2012 13.14.8.3). The non-peer mesh power mode is
+	 * deep sleep if the local STA is in light or deep sleep towards at
+	 * least one mesh peer (see 13.14.3.1). Otherwise, set it to the
+	 * user-configured default value.
+	 */
+	if (peering) {
+		mps_dbg(sdata, "setting non-peer PM to active for peering\n");
+		nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
+	} else if (light_sleep_cnt || deep_sleep_cnt) {
+		mps_dbg(sdata, "setting non-peer PM to deep sleep\n");
+		nonpeer_pm = NL80211_MESH_POWER_DEEP_SLEEP;
+	} else {
+		mps_dbg(sdata, "setting non-peer PM to user value\n");
+		nonpeer_pm = ifmsh->mshcfg.power_mode;
+	}
+
+	/* need update if sleep counts move between 0 and non-zero */
+	if (ifmsh->nonpeer_pm != nonpeer_pm ||
+	    !ifmsh->ps_peers_light_sleep != !light_sleep_cnt ||
+	    !ifmsh->ps_peers_deep_sleep != !deep_sleep_cnt)
+		changed = BSS_CHANGED_BEACON;
+
+	ifmsh->nonpeer_pm = nonpeer_pm;
+	ifmsh->ps_peers_light_sleep = light_sleep_cnt;
+	ifmsh->ps_peers_deep_sleep = deep_sleep_cnt;
+
+	return changed;
+}
+
+/**
+ * ieee80211_mps_set_sta_local_pm - set local PM towards a mesh STA
+ *
+ * @sta: mesh STA
+ * @pm: the power mode to set
+ * Return BSS_CHANGED_BEACON if a beacon update is in order.
+ */
+u32 ieee80211_mps_set_sta_local_pm(struct sta_info *sta,
+				   enum nl80211_mesh_power_mode pm)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+
+	mps_dbg(sdata, "local STA operates in mode %d with %pM\n",
+		pm, sta->sta.addr);
+
+	sta->local_pm = pm;
+
+	/*
+	 * announce peer-specific power mode transition
+	 * (see IEEE802.11-2012 13.14.3.2 and 13.14.3.3)
+	 */
+	if (sta->plink_state == NL80211_PLINK_ESTAB)
+		mps_qos_null_tx(sta);
+
+	return ieee80211_mps_local_status_update(sdata);
+}
+
+/**
+ * ieee80211_mps_set_frame_flags - set mesh PS flags in FC (and QoS Control)
+ *
+ * @sdata: local mesh subif
+ * @sta: mesh STA
+ * @hdr: 802.11 frame header
+ *
+ * see IEEE802.11-2012 8.2.4.1.7 and 8.2.4.5.11
+ *
+ * NOTE: sta must be given when an individually-addressed QoS frame header
+ * is handled, for group-addressed and management frames it is not used
+ */
+void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata,
+				   struct sta_info *sta,
+				   struct ieee80211_hdr *hdr)
+{
+	enum nl80211_mesh_power_mode pm;
+	u8 *qc;
+
+	if (WARN_ON(is_unicast_ether_addr(hdr->addr1) &&
+		    ieee80211_is_data_qos(hdr->frame_control) &&
+		    !sta))
+		return;
+
+	if (is_unicast_ether_addr(hdr->addr1) &&
+	    ieee80211_is_data_qos(hdr->frame_control) &&
+	    sta->plink_state == NL80211_PLINK_ESTAB)
+		pm = sta->local_pm;
+	else
+		pm = sdata->u.mesh.nonpeer_pm;
+
+	if (pm == NL80211_MESH_POWER_ACTIVE)
+		hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_PM);
+	else
+		hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM);
+
+	if (!ieee80211_is_data_qos(hdr->frame_control))
+		return;
+
+	qc = ieee80211_get_qos_ctl(hdr);
+
+	if ((is_unicast_ether_addr(hdr->addr1) &&
+	     pm == NL80211_MESH_POWER_DEEP_SLEEP) ||
+	    (is_multicast_ether_addr(hdr->addr1) &&
+	     sdata->u.mesh.ps_peers_deep_sleep > 0))
+		qc[1] |= (IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8);
+	else
+		qc[1] &= ~(IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8);
+}
+
+/**
+ * ieee80211_mps_sta_status_update - update buffering status of neighbor STA
+ *
+ * @sta: mesh STA
+ *
+ * called after change of peering status or non-peer/peer-specific power mode
+ */
+void ieee80211_mps_sta_status_update(struct sta_info *sta)
+{
+	enum nl80211_mesh_power_mode pm;
+	bool do_buffer;
+
+	/*
+	 * use peer-specific power mode if peering is established and the
+	 * peer's power mode is known
+	 */
+	if (sta->plink_state == NL80211_PLINK_ESTAB &&
+	    sta->peer_pm != NL80211_MESH_POWER_UNKNOWN)
+		pm = sta->peer_pm;
+	else
+		pm = sta->nonpeer_pm;
+
+	do_buffer = (pm != NL80211_MESH_POWER_ACTIVE);
+
+	/* Don't let the same PS state be set twice */
+	if (test_sta_flag(sta, WLAN_STA_PS_STA) == do_buffer)
+		return;
+
+	if (do_buffer) {
+		set_sta_flag(sta, WLAN_STA_PS_STA);
+		atomic_inc(&sta->sdata->u.mesh.ps.num_sta_ps);
+		mps_dbg(sta->sdata, "start PS buffering frames towards %pM\n",
+			sta->sta.addr);
+	} else {
+		ieee80211_sta_ps_deliver_wakeup(sta);
+	}
+
+	/* clear the MPSP flags for non-peers or active STA */
+	if (sta->plink_state != NL80211_PLINK_ESTAB) {
+		clear_sta_flag(sta, WLAN_STA_MPSP_OWNER);
+		clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
+	} else if (!do_buffer) {
+		clear_sta_flag(sta, WLAN_STA_MPSP_OWNER);
+	}
+}
+
+static void mps_set_sta_peer_pm(struct sta_info *sta,
+				struct ieee80211_hdr *hdr)
+{
+	enum nl80211_mesh_power_mode pm;
+	u8 *qc = ieee80211_get_qos_ctl(hdr);
+
+	/*
+	 * Test Power Management field of frame control (PW) and
+	 * mesh power save level subfield of QoS control field (PSL)
+	 *
+	 * | PM | PSL| Mesh PM |
+	 * +----+----+---------+
+	 * | 0  |Rsrv|  Active |
+	 * | 1  | 0  |  Light  |
+	 * | 1  | 1  |  Deep   |
+	 */
+	if (ieee80211_has_pm(hdr->frame_control)) {
+		if (qc[1] & (IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8))
+			pm = NL80211_MESH_POWER_DEEP_SLEEP;
+		else
+			pm = NL80211_MESH_POWER_LIGHT_SLEEP;
+	} else {
+		pm = NL80211_MESH_POWER_ACTIVE;
+	}
+
+	if (sta->peer_pm == pm)
+		return;
+
+	mps_dbg(sta->sdata, "STA %pM enters mode %d\n",
+		sta->sta.addr, pm);
+
+	sta->peer_pm = pm;
+
+	ieee80211_mps_sta_status_update(sta);
+}
+
+static void mps_set_sta_nonpeer_pm(struct sta_info *sta,
+				   struct ieee80211_hdr *hdr)
+{
+	enum nl80211_mesh_power_mode pm;
+
+	if (ieee80211_has_pm(hdr->frame_control))
+		pm = NL80211_MESH_POWER_DEEP_SLEEP;
+	else
+		pm = NL80211_MESH_POWER_ACTIVE;
+
+	if (sta->nonpeer_pm == pm)
+		return;
+
+	mps_dbg(sta->sdata, "STA %pM sets non-peer mode to %d\n",
+		sta->sta.addr, pm);
+
+	sta->nonpeer_pm = pm;
+
+	ieee80211_mps_sta_status_update(sta);
+}
+
+/**
+ * ieee80211_mps_rx_h_sta_process - frame receive handler for mesh powersave
+ *
+ * @sta: STA info that transmitted the frame
+ * @hdr: IEEE 802.11 (QoS) Header
+ */
+void ieee80211_mps_rx_h_sta_process(struct sta_info *sta,
+				    struct ieee80211_hdr *hdr)
+{
+	if (is_unicast_ether_addr(hdr->addr1) &&
+	    ieee80211_is_data_qos(hdr->frame_control)) {
+		/*
+		 * individually addressed QoS Data/Null frames contain
+		 * peer link-specific PS mode towards the local STA
+		 */
+		mps_set_sta_peer_pm(sta, hdr);
+
+		/* check for mesh Peer Service Period trigger frames */
+		ieee80211_mpsp_trigger_process(ieee80211_get_qos_ctl(hdr),
+					       sta, false, false);
+	} else {
+		/*
+		 * can only determine non-peer PS mode
+		 * (see IEEE802.11-2012 8.2.4.1.7)
+		 */
+		mps_set_sta_nonpeer_pm(sta, hdr);
+	}
+}
+
+
+/* mesh PS frame release */
+
+static void mpsp_trigger_send(struct sta_info *sta, bool rspi, bool eosp)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct sk_buff *skb;
+	struct ieee80211_hdr *nullfunc;
+	struct ieee80211_tx_info *info;
+	u8 *qc;
+
+	skb = mps_qos_null_get(sta);
+	if (!skb)
+		return;
+
+	nullfunc = (struct ieee80211_hdr *) skb->data;
+	if (!eosp)
+		nullfunc->frame_control |=
+				cpu_to_le16(IEEE80211_FCTL_MOREDATA);
+	/*
+	 * | RSPI | EOSP |  MPSP triggering   |
+	 * +------+------+--------------------+
+	 * |  0   |  0   | local STA is owner |
+	 * |  0   |  1   | no MPSP (MPSP end) |
+	 * |  1   |  0   | both STA are owner |
+	 * |  1   |  1   | peer STA is owner  | see IEEE802.11-2012 13.14.9.2
+	 */
+	qc = ieee80211_get_qos_ctl(nullfunc);
+	if (rspi)
+		qc[1] |= (IEEE80211_QOS_CTL_RSPI >> 8);
+	if (eosp)
+		qc[0] |= IEEE80211_QOS_CTL_EOSP;
+
+	info = IEEE80211_SKB_CB(skb);
+
+	info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER |
+		       IEEE80211_TX_CTL_REQ_TX_STATUS;
+
+	mps_dbg(sdata, "sending MPSP trigger%s%s to %pM\n",
+		rspi ? " RSPI" : "", eosp ? " EOSP" : "", sta->sta.addr);
+
+	ieee80211_tx_skb(sdata, skb);
+}
+
+/**
+ * mpsp_qos_null_append - append QoS Null frame to MPSP skb queue if needed
+ *
+ * To properly end a mesh MPSP the last transmitted frame has to set the EOSP
+ * flag in the QoS Control field. In case the current tailing frame is not a
+ * QoS Data frame, append a QoS Null to carry the flag.
+ */
+static void mpsp_qos_null_append(struct sta_info *sta,
+				 struct sk_buff_head *frames)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct sk_buff *new_skb, *skb = skb_peek_tail(frames);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_tx_info *info;
+
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		return;
+
+	new_skb = mps_qos_null_get(sta);
+	if (!new_skb)
+		return;
+
+	mps_dbg(sdata, "appending QoS Null in MPSP towards %pM\n",
+		sta->sta.addr);
+	/*
+	 * This frame has to be transmitted last. Assign lowest priority to
+	 * make sure it cannot pass other frames when releasing multiple ACs.
+	 */
+	new_skb->priority = 1;
+	skb_set_queue_mapping(new_skb, IEEE80211_AC_BK);
+	ieee80211_set_qos_hdr(sdata, new_skb);
+
+	info = IEEE80211_SKB_CB(new_skb);
+	info->control.vif = &sdata->vif;
+	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
+
+	__skb_queue_tail(frames, new_skb);
+}
+
+/**
+ * mps_frame_deliver - transmit frames during mesh powersave
+ *
+ * @sta: STA info to transmit to
+ * @n_frames: number of frames to transmit. -1 for all
+ */
+static void mps_frame_deliver(struct sta_info *sta, int n_frames)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sdata->local;
+	int ac;
+	struct sk_buff_head frames;
+	struct sk_buff *skb;
+	bool more_data = false;
+
+	skb_queue_head_init(&frames);
+
+	/* collect frame(s) from buffers */
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		while (n_frames != 0) {
+			skb = skb_dequeue(&sta->tx_filtered[ac]);
+			if (!skb) {
+				skb = skb_dequeue(
+					&sta->ps_tx_buf[ac]);
+				if (skb)
+					local->total_ps_buffered--;
+			}
+			if (!skb)
+				break;
+			n_frames--;
+			__skb_queue_tail(&frames, skb);
+		}
+
+		if (!skb_queue_empty(&sta->tx_filtered[ac]) ||
+		    !skb_queue_empty(&sta->ps_tx_buf[ac]))
+			more_data = true;
+	}
+
+	/* nothing to send? -> EOSP */
+	if (skb_queue_empty(&frames)) {
+		mpsp_trigger_send(sta, false, true);
+		return;
+	}
+
+	/* in a MPSP make sure the last skb is a QoS Data frame */
+	if (test_sta_flag(sta, WLAN_STA_MPSP_OWNER))
+		mpsp_qos_null_append(sta, &frames);
+
+	mps_dbg(sta->sdata, "sending %d frames to PS STA %pM\n",
+		skb_queue_len(&frames), sta->sta.addr);
+
+	/* prepare collected frames for transmission */
+	skb_queue_walk(&frames, skb) {
+		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+		struct ieee80211_hdr *hdr = (void *) skb->data;
+
+		/*
+		 * Tell TX path to send this frame even though the
+		 * STA may still remain is PS mode after this frame
+		 * exchange.
+		 */
+		info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
+
+		if (more_data || !skb_queue_is_last(&frames, skb))
+			hdr->frame_control |=
+				cpu_to_le16(IEEE80211_FCTL_MOREDATA);
+		else
+			hdr->frame_control &=
+				cpu_to_le16(~IEEE80211_FCTL_MOREDATA);
+
+		if (skb_queue_is_last(&frames, skb) &&
+		    ieee80211_is_data_qos(hdr->frame_control)) {
+			u8 *qoshdr = ieee80211_get_qos_ctl(hdr);
+
+			/* MPSP trigger frame ends service period */
+			*qoshdr |= IEEE80211_QOS_CTL_EOSP;
+			info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+		}
+	}
+
+	ieee80211_add_pending_skbs(local, &frames);
+	sta_info_recalc_tim(sta);
+}
+
+/**
+ * ieee80211_mpsp_trigger_process - track status of mesh Peer Service Periods
+ *
+ * @qc: QoS Control field
+ * @sta: peer to start a MPSP with
+ * @tx: frame was transmitted by the local STA
+ * @acked: frame has been transmitted successfully
+ *
+ * NOTE: active mode STA may only serve as MPSP owner
+ */
+void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta,
+				    bool tx, bool acked)
+{
+	u8 rspi = qc[1] & (IEEE80211_QOS_CTL_RSPI >> 8);
+	u8 eosp = qc[0] & IEEE80211_QOS_CTL_EOSP;
+
+	if (tx) {
+		if (rspi && acked)
+			set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
+
+		if (eosp)
+			clear_sta_flag(sta, WLAN_STA_MPSP_OWNER);
+		else if (acked &&
+			 test_sta_flag(sta, WLAN_STA_PS_STA) &&
+			 !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER))
+			mps_frame_deliver(sta, -1);
+	} else {
+		if (eosp)
+			clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
+		else if (sta->local_pm != NL80211_MESH_POWER_ACTIVE)
+			set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
+
+		if (rspi && !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER))
+			mps_frame_deliver(sta, -1);
+	}
+}
+
+/**
+ * ieee80211_mps_frame_release - release buffered frames in response to beacon
+ *
+ * @sta: mesh STA
+ * @elems: beacon IEs
+ *
+ * For peers if we have individually-addressed frames buffered or the peer
+ * indicates buffered frames, send a corresponding MPSP trigger frame. Since
+ * we do not evaluate the awake window duration, QoS Nulls are used as MPSP
+ * trigger frames. If the neighbour STA is not a peer, only send single frames.
+ */
+void ieee80211_mps_frame_release(struct sta_info *sta,
+				 struct ieee802_11_elems *elems)
+{
+	int ac, buffer_local = 0;
+	bool has_buffered = false;
+
+	/* TIM map only for LLID <= IEEE80211_MAX_AID */
+	if (sta->plink_state == NL80211_PLINK_ESTAB)
+		has_buffered = ieee80211_check_tim(elems->tim, elems->tim_len,
+				le16_to_cpu(sta->llid) % IEEE80211_MAX_AID);
+
+	if (has_buffered)
+		mps_dbg(sta->sdata, "%pM indicates buffered frames\n",
+			sta->sta.addr);
+
+	/* only transmit to PS STA with announced, non-zero awake window */
+	if (test_sta_flag(sta, WLAN_STA_PS_STA) &&
+	    (!elems->awake_window || !le16_to_cpu(*elems->awake_window)))
+		return;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+		buffer_local += skb_queue_len(&sta->ps_tx_buf[ac]) +
+				skb_queue_len(&sta->tx_filtered[ac]);
+
+	if (!has_buffered && !buffer_local)
+		return;
+
+	if (sta->plink_state == NL80211_PLINK_ESTAB)
+		mpsp_trigger_send(sta, has_buffered, !buffer_local);
+	else
+		mps_frame_deliver(sta, 1);
+}
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index aa8d1e437385..05a256b38e24 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -43,7 +43,7 @@ struct sync_method {
 static bool mesh_peer_tbtt_adjusting(struct ieee802_11_elems *ie)
 {
 	return (ie->mesh_config->meshconf_cap &
-	    IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING) != 0;
+			IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING) != 0;
 }
 
 void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata)
@@ -112,7 +112,8 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 
 	if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) {
 		clear_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN);
-		msync_dbg(sdata, "STA %pM : is adjusting TBTT\n", sta->sta.addr);
+		msync_dbg(sdata, "STA %pM : is adjusting TBTT\n",
+			  sta->sta.addr);
 		goto no_sync;
 	}
 
@@ -129,18 +130,15 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 	sta->t_offset = t_t - t_r;
 
 	if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) {
-		s64 t_clockdrift = sta->t_offset_setpoint
-				   - sta->t_offset;
+		s64 t_clockdrift = sta->t_offset_setpoint - sta->t_offset;
 		msync_dbg(sdata,
 			  "STA %pM : sta->t_offset=%lld, sta->t_offset_setpoint=%lld, t_clockdrift=%lld\n",
-			  sta->sta.addr,
-			  (long long) sta->t_offset,
-			  (long long)
-			  sta->t_offset_setpoint,
+			  sta->sta.addr, (long long) sta->t_offset,
+			  (long long) sta->t_offset_setpoint,
 			  (long long) t_clockdrift);
 
 		if (t_clockdrift > TOFFSET_MAXIMUM_ADJUSTMENT ||
-			t_clockdrift < -TOFFSET_MAXIMUM_ADJUSTMENT) {
+		    t_clockdrift < -TOFFSET_MAXIMUM_ADJUSTMENT) {
 			msync_dbg(sdata,
 				  "STA %pM : t_clockdrift=%lld too large, setpoint reset\n",
 				  sta->sta.addr,
@@ -149,15 +147,10 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 			goto no_sync;
 		}
 
-		rcu_read_unlock();
-
 		spin_lock_bh(&ifmsh->sync_offset_lock);
-		if (t_clockdrift >
-		    ifmsh->sync_offset_clockdrift_max)
-			ifmsh->sync_offset_clockdrift_max
-				= t_clockdrift;
+		if (t_clockdrift > ifmsh->sync_offset_clockdrift_max)
+			ifmsh->sync_offset_clockdrift_max = t_clockdrift;
 		spin_unlock_bh(&ifmsh->sync_offset_lock);
-
 	} else {
 		sta->t_offset_setpoint = sta->t_offset - TOFFSET_SET_MARGIN;
 		set_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN);
@@ -165,9 +158,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 			  "STA %pM : offset was invalid, sta->t_offset=%lld\n",
 			  sta->sta.addr,
 			  (long long) sta->t_offset);
-		rcu_read_unlock();
 	}
-	return;
 
 no_sync:
 	rcu_read_unlock();
@@ -177,14 +168,12 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 
-	WARN_ON(ifmsh->mesh_sp_id
-		!= IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET);
+	WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET);
 	BUG_ON(!rcu_read_lock_held());
 
 	spin_lock_bh(&ifmsh->sync_offset_lock);
 
-	if (ifmsh->sync_offset_clockdrift_max >
-		TOFFSET_MINIMUM_ADJUSTMENT) {
+	if (ifmsh->sync_offset_clockdrift_max > TOFFSET_MINIMUM_ADJUSTMENT) {
 		/* Since ajusting the tsf here would
 		 * require a possibly blocking call
 		 * to the driver tsf setter, we punt
@@ -193,8 +182,7 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata)
 		msync_dbg(sdata,
 			  "TBTT : kicking off TBTT adjustment with clockdrift_max=%lld\n",
 			  ifmsh->sync_offset_clockdrift_max);
-		set_bit(MESH_WORK_DRIFT_ADJUST,
-			&ifmsh->wrkq_flags);
+		set_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags);
 
 		ifmsh->adjusting_tbtt = true;
 	} else {
@@ -220,14 +208,11 @@ static const struct sync_method sync_methods[] = {
 
 const struct ieee80211_mesh_sync_ops *ieee80211_mesh_sync_ops_get(u8 method)
 {
-	const struct ieee80211_mesh_sync_ops *ops = NULL;
-	u8 i;
+	int i;
 
 	for (i = 0 ; i < ARRAY_SIZE(sync_methods); ++i) {
-		if (sync_methods[i].method == method) {
-			ops = &sync_methods[i].ops;
-			break;
-		}
+		if (sync_methods[i].method == method)
+			return &sync_methods[i].ops;
 	}
-	return ops;
+	return NULL;
 }
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5107248af7fb..29620bfc7a69 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -30,11 +30,13 @@
 #include "rate.h"
 #include "led.h"
 
-#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
-#define IEEE80211_AUTH_MAX_TRIES 3
-#define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5)
-#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
-#define IEEE80211_ASSOC_MAX_TRIES 3
+#define IEEE80211_AUTH_TIMEOUT		(HZ / 5)
+#define IEEE80211_AUTH_TIMEOUT_SHORT	(HZ / 10)
+#define IEEE80211_AUTH_MAX_TRIES	3
+#define IEEE80211_AUTH_WAIT_ASSOC	(HZ * 5)
+#define IEEE80211_ASSOC_TIMEOUT		(HZ / 5)
+#define IEEE80211_ASSOC_TIMEOUT_SHORT	(HZ / 10)
+#define IEEE80211_ASSOC_MAX_TRIES	3
 
 static int max_nullfunc_tries = 2;
 module_param(max_nullfunc_tries, int, 0644);
@@ -54,7 +56,10 @@ MODULE_PARM_DESC(max_probe_tries,
  * probe on beacon miss before declaring the connection lost
  * default to what we want.
  */
-#define IEEE80211_BEACON_LOSS_COUNT	7
+static int beacon_loss_count = 7;
+module_param(beacon_loss_count, int, 0644);
+MODULE_PARM_DESC(beacon_loss_count,
+		 "Number of beacon intervals before we decide beacon was lost.");
 
 /*
  * Time the connection can be idle before we probe
@@ -85,9 +90,6 @@ MODULE_PARM_DESC(probe_wait_ms,
  */
 #define IEEE80211_SIGNAL_AVE_MIN_COUNT	4
 
-#define TMR_RUNNING_TIMER	0
-#define TMR_RUNNING_CHANSW	1
-
 /*
  * All cfg80211 functions have to be called outside a locked
  * section so that they can acquire a lock themselves... This
@@ -112,6 +114,9 @@ enum rx_mgmt_action {
 
 	/* caller must call cfg80211_send_assoc_timeout() */
 	RX_MGMT_CFG80211_ASSOC_TIMEOUT,
+
+	/* used when a processed beacon causes a deauth */
+	RX_MGMT_CFG80211_TX_DEAUTH,
 };
 
 /* utils */
@@ -172,79 +177,331 @@ static int ecw2cw(int ecw)
 	return (1 << ecw) - 1;
 }
 
-static u32 ieee80211_config_ht_tx(struct ieee80211_sub_if_data *sdata,
-				  struct ieee80211_ht_operation *ht_oper,
-				  const u8 *bssid, bool reconfig)
+static u32 chandef_downgrade(struct cfg80211_chan_def *c)
+{
+	u32 ret;
+	int tmp;
+
+	switch (c->width) {
+	case NL80211_CHAN_WIDTH_20:
+		c->width = NL80211_CHAN_WIDTH_20_NOHT;
+		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+		break;
+	case NL80211_CHAN_WIDTH_40:
+		c->width = NL80211_CHAN_WIDTH_20;
+		c->center_freq1 = c->chan->center_freq;
+		ret = IEEE80211_STA_DISABLE_40MHZ |
+		      IEEE80211_STA_DISABLE_VHT;
+		break;
+	case NL80211_CHAN_WIDTH_80:
+		tmp = (30 + c->chan->center_freq - c->center_freq1)/20;
+		/* n_P40 */
+		tmp /= 2;
+		/* freq_P40 */
+		c->center_freq1 = c->center_freq1 - 20 + 40 * tmp;
+		c->width = NL80211_CHAN_WIDTH_40;
+		ret = IEEE80211_STA_DISABLE_VHT;
+		break;
+	case NL80211_CHAN_WIDTH_80P80:
+		c->center_freq2 = 0;
+		c->width = NL80211_CHAN_WIDTH_80;
+		ret = IEEE80211_STA_DISABLE_80P80MHZ |
+		      IEEE80211_STA_DISABLE_160MHZ;
+		break;
+	case NL80211_CHAN_WIDTH_160:
+		/* n_P20 */
+		tmp = (70 + c->chan->center_freq - c->center_freq1)/20;
+		/* n_P80 */
+		tmp /= 4;
+		c->center_freq1 = c->center_freq1 - 40 + 80 * tmp;
+		c->width = NL80211_CHAN_WIDTH_80;
+		ret = IEEE80211_STA_DISABLE_80P80MHZ |
+		      IEEE80211_STA_DISABLE_160MHZ;
+		break;
+	default:
+	case NL80211_CHAN_WIDTH_20_NOHT:
+		WARN_ON_ONCE(1);
+		c->width = NL80211_CHAN_WIDTH_20_NOHT;
+		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+		break;
+	}
+
+	WARN_ON_ONCE(!cfg80211_chandef_valid(c));
+
+	return ret;
+}
+
+static u32
+ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
+			     struct ieee80211_supported_band *sband,
+			     struct ieee80211_channel *channel,
+			     const struct ieee80211_ht_operation *ht_oper,
+			     const struct ieee80211_vht_operation *vht_oper,
+			     struct cfg80211_chan_def *chandef, bool verbose)
+{
+	struct cfg80211_chan_def vht_chandef;
+	u32 ht_cfreq, ret;
+
+	chandef->chan = channel;
+	chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
+	chandef->center_freq1 = channel->center_freq;
+	chandef->center_freq2 = 0;
+
+	if (!ht_oper || !sband->ht_cap.ht_supported) {
+		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	chandef->width = NL80211_CHAN_WIDTH_20;
+
+	ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
+						  channel->band);
+	/* check that channel matches the right operating channel */
+	if (channel->center_freq != ht_cfreq) {
+		/*
+		 * It's possible that some APs are confused here;
+		 * Netgear WNDR3700 sometimes reports 4 higher than
+		 * the actual channel in association responses, but
+		 * since we look at probe response/beacon data here
+		 * it should be OK.
+		 */
+		if (verbose)
+			sdata_info(sdata,
+				   "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n",
+				   channel->center_freq, ht_cfreq,
+				   ht_oper->primary_chan, channel->band);
+		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	/* check 40 MHz support, if we have it */
+	if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) {
+		switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
+		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+			chandef->width = NL80211_CHAN_WIDTH_40;
+			chandef->center_freq1 += 10;
+			break;
+		case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+			chandef->width = NL80211_CHAN_WIDTH_40;
+			chandef->center_freq1 -= 10;
+			break;
+		}
+	} else {
+		/* 40 MHz (and 80 MHz) must be supported for VHT */
+		ret = IEEE80211_STA_DISABLE_VHT;
+		/* also mark 40 MHz disabled */
+		ret |= IEEE80211_STA_DISABLE_40MHZ;
+		goto out;
+	}
+
+	if (!vht_oper || !sband->vht_cap.vht_supported) {
+		ret = IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	vht_chandef.chan = channel;
+	vht_chandef.center_freq1 =
+		ieee80211_channel_to_frequency(vht_oper->center_freq_seg1_idx,
+					       channel->band);
+	vht_chandef.center_freq2 = 0;
+
+	switch (vht_oper->chan_width) {
+	case IEEE80211_VHT_CHANWIDTH_USE_HT:
+		vht_chandef.width = chandef->width;
+		break;
+	case IEEE80211_VHT_CHANWIDTH_80MHZ:
+		vht_chandef.width = NL80211_CHAN_WIDTH_80;
+		break;
+	case IEEE80211_VHT_CHANWIDTH_160MHZ:
+		vht_chandef.width = NL80211_CHAN_WIDTH_160;
+		break;
+	case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
+		vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
+		vht_chandef.center_freq2 =
+			ieee80211_channel_to_frequency(
+				vht_oper->center_freq_seg2_idx,
+				channel->band);
+		break;
+	default:
+		if (verbose)
+			sdata_info(sdata,
+				   "AP VHT operation IE has invalid channel width (%d), disable VHT\n",
+				   vht_oper->chan_width);
+		ret = IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	if (!cfg80211_chandef_valid(&vht_chandef)) {
+		if (verbose)
+			sdata_info(sdata,
+				   "AP VHT information is invalid, disable VHT\n");
+		ret = IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	if (cfg80211_chandef_identical(chandef, &vht_chandef)) {
+		ret = 0;
+		goto out;
+	}
+
+	if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) {
+		if (verbose)
+			sdata_info(sdata,
+				   "AP VHT information doesn't match HT, disable VHT\n");
+		ret = IEEE80211_STA_DISABLE_VHT;
+		goto out;
+	}
+
+	*chandef = vht_chandef;
+
+	ret = 0;
+
+out:
+	/* don't print the message below for VHT mismatch if VHT is disabled */
+	if (ret & IEEE80211_STA_DISABLE_VHT)
+		vht_chandef = *chandef;
+
+	while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
+					IEEE80211_CHAN_DISABLED)) {
+		if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) {
+			ret = IEEE80211_STA_DISABLE_HT |
+			      IEEE80211_STA_DISABLE_VHT;
+			goto out;
+		}
+
+		ret |= chandef_downgrade(chandef);
+	}
+
+	if (chandef->width != vht_chandef.width && verbose)
+		sdata_info(sdata,
+			   "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n");
+
+	WARN_ON_ONCE(!cfg80211_chandef_valid(chandef));
+	return ret;
+}
+
+static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
+			       struct sta_info *sta,
+			       const struct ieee80211_ht_operation *ht_oper,
+			       const struct ieee80211_vht_operation *vht_oper,
+			       const u8 *bssid, u32 *changed)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_supported_band *sband;
-	struct ieee80211_chanctx_conf *chanctx_conf;
 	struct ieee80211_channel *chan;
-	struct sta_info *sta;
-	u32 changed = 0;
+	struct cfg80211_chan_def chandef;
 	u16 ht_opmode;
-	bool disable_40 = false;
+	u32 flags;
+	enum ieee80211_sta_rx_bandwidth new_sta_bw;
+	int ret;
 
-	rcu_read_lock();
-	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
-	if (WARN_ON(!chanctx_conf)) {
-		rcu_read_unlock();
+	/* if HT was/is disabled, don't track any bandwidth changes */
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_HT || !ht_oper)
 		return 0;
-	}
-	chan = chanctx_conf->def.chan;
-	rcu_read_unlock();
+
+	/* don't check VHT if we associated as non-VHT station */
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT)
+		vht_oper = NULL;
+
+	if (WARN_ON_ONCE(!sta))
+		return -EINVAL;
+
+	chan = sdata->vif.bss_conf.chandef.chan;
 	sband = local->hw.wiphy->bands[chan->band];
 
-	switch (sdata->vif.bss_conf.chandef.width) {
+	/* calculate new channel (type) based on HT/VHT operation IEs */
+	flags = ieee80211_determine_chantype(sdata, sband, chan, ht_oper,
+					     vht_oper, &chandef, false);
+
+	/*
+	 * Downgrade the new channel if we associated with restricted
+	 * capabilities. For example, if we associated as a 20 MHz STA
+	 * to a 40 MHz AP (due to regulatory, capabilities or config
+	 * reasons) then switching to a 40 MHz channel now won't do us
+	 * any good -- we couldn't use it with the AP.
+	 */
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_80P80MHZ &&
+	    chandef.width == NL80211_CHAN_WIDTH_80P80)
+		flags |= chandef_downgrade(&chandef);
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_160MHZ &&
+	    chandef.width == NL80211_CHAN_WIDTH_160)
+		flags |= chandef_downgrade(&chandef);
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ &&
+	    chandef.width > NL80211_CHAN_WIDTH_20)
+		flags |= chandef_downgrade(&chandef);
+
+	if (cfg80211_chandef_identical(&chandef, &sdata->vif.bss_conf.chandef))
+		return 0;
+
+	sdata_info(sdata,
+		   "AP %pM changed bandwidth, new config is %d MHz, width %d (%d/%d MHz)\n",
+		   ifmgd->bssid, chandef.chan->center_freq, chandef.width,
+		   chandef.center_freq1, chandef.center_freq2);
+
+	if (flags != (ifmgd->flags & (IEEE80211_STA_DISABLE_HT |
+				      IEEE80211_STA_DISABLE_VHT |
+				      IEEE80211_STA_DISABLE_40MHZ |
+				      IEEE80211_STA_DISABLE_80P80MHZ |
+				      IEEE80211_STA_DISABLE_160MHZ)) ||
+	    !cfg80211_chandef_valid(&chandef)) {
+		sdata_info(sdata,
+			   "AP %pM changed bandwidth in a way we can't support - disconnect\n",
+			   ifmgd->bssid);
+		return -EINVAL;
+	}
+
+	switch (chandef.width) {
+	case NL80211_CHAN_WIDTH_20_NOHT:
+	case NL80211_CHAN_WIDTH_20:
+		new_sta_bw = IEEE80211_STA_RX_BW_20;
+		break;
 	case NL80211_CHAN_WIDTH_40:
-		if (sdata->vif.bss_conf.chandef.chan->center_freq >
-				sdata->vif.bss_conf.chandef.center_freq1 &&
-		    chan->flags & IEEE80211_CHAN_NO_HT40PLUS)
-			disable_40 = true;
-		if (sdata->vif.bss_conf.chandef.chan->center_freq <
-				sdata->vif.bss_conf.chandef.center_freq1 &&
-		    chan->flags & IEEE80211_CHAN_NO_HT40MINUS)
-			disable_40 = true;
+		new_sta_bw = IEEE80211_STA_RX_BW_40;
 		break;
-	default:
+	case NL80211_CHAN_WIDTH_80:
+		new_sta_bw = IEEE80211_STA_RX_BW_80;
+		break;
+	case NL80211_CHAN_WIDTH_80P80:
+	case NL80211_CHAN_WIDTH_160:
+		new_sta_bw = IEEE80211_STA_RX_BW_160;
 		break;
+	default:
+		return -EINVAL;
 	}
 
-	/* This can change during the lifetime of the BSS */
-	if (!(ht_oper->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY))
-		disable_40 = true;
-
-	mutex_lock(&local->sta_mtx);
-	sta = sta_info_get(sdata, bssid);
-
-	WARN_ON_ONCE(!sta);
-
-	if (sta && !sta->supports_40mhz)
-		disable_40 = true;
+	if (new_sta_bw > sta->cur_max_bandwidth)
+		new_sta_bw = sta->cur_max_bandwidth;
 
-	if (sta && (!reconfig ||
-		    (disable_40 != !(sta->sta.ht_cap.cap &
-					IEEE80211_HT_CAP_SUP_WIDTH_20_40)))) {
+	if (new_sta_bw < sta->sta.bandwidth) {
+		sta->sta.bandwidth = new_sta_bw;
+		rate_control_rate_update(local, sband, sta,
+					 IEEE80211_RC_BW_CHANGED);
+	}
 
-		if (disable_40)
-			sta->sta.ht_cap.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-		else
-			sta->sta.ht_cap.cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+	ret = ieee80211_vif_change_bandwidth(sdata, &chandef, changed);
+	if (ret) {
+		sdata_info(sdata,
+			   "AP %pM changed bandwidth to incompatible one - disconnect\n",
+			   ifmgd->bssid);
+		return ret;
+	}
 
+	if (new_sta_bw > sta->sta.bandwidth) {
+		sta->sta.bandwidth = new_sta_bw;
 		rate_control_rate_update(local, sband, sta,
 					 IEEE80211_RC_BW_CHANGED);
 	}
-	mutex_unlock(&local->sta_mtx);
 
 	ht_opmode = le16_to_cpu(ht_oper->operation_mode);
 
 	/* if bss configuration changed store the new one */
-	if (!reconfig || (sdata->vif.bss_conf.ht_operation_mode != ht_opmode)) {
-		changed |= BSS_CHANGED_HT;
+	if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) {
+		*changed |= BSS_CHANGED_HT;
 		sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
 	}
 
-	return changed;
+	return 0;
 }
 
 /* frame sending functions */
@@ -341,7 +598,8 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
 
 static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
 				 struct sk_buff *skb,
-				 struct ieee80211_supported_band *sband)
+				 struct ieee80211_supported_band *sband,
+				 struct ieee80211_vht_cap *ap_vht_cap)
 {
 	u8 *pos;
 	u32 cap;
@@ -350,6 +608,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
 	BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap));
 
 	memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap));
+	ieee80211_apply_vhtcap_overrides(sdata, &vht_cap);
 
 	/* determine capability flags */
 	cap = vht_cap.cap;
@@ -364,6 +623,14 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
 		cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ;
 	}
 
+	/*
+	 * Some APs apparently get confused if our capabilities are better
+	 * than theirs, so restrict what we advertise in the assoc request.
+	 */
+	if (!(ap_vht_cap->vht_cap_info &
+			cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)))
+		cap &= ~IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
+
 	/* reserve and fill IE */
 	pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);
 	ieee80211_ie_build_vht_cap(pos, &vht_cap, cap);
@@ -562,7 +829,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 				    sband, chan, sdata->smps_mode);
 
 	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
-		ieee80211_add_vht_ie(sdata, skb, sband);
+		ieee80211_add_vht_ie(sdata, skb, sband,
+				     &assoc_data->ap_vht_cap);
 
 	/* if present, add any custom non-vendor IEs that go after HT */
 	if (assoc_data->ie_len && assoc_data->ie) {
@@ -605,6 +873,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 	drv_mgd_prepare_tx(local, sdata);
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+	if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+		IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS |
+						IEEE80211_TX_INTFL_MLME_CONN_TX;
 	ieee80211_tx_skb(sdata, skb);
 }
 
@@ -641,7 +912,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	if (powersave)
 		nullfunc->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM);
 
-	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
+					IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
 	if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
 			    IEEE80211_STA_CONNECTION_POLL))
 		IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE;
@@ -684,6 +956,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 {
 	struct ieee80211_sub_if_data *sdata =
 		container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
 	if (!ieee80211_sdata_running(sdata))
@@ -693,21 +966,22 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 	if (!ifmgd->associated)
 		goto out;
 
-	sdata->local->_oper_channel = sdata->local->csa_channel;
-	if (!sdata->local->ops->channel_switch) {
+	local->_oper_chandef = local->csa_chandef;
+
+	if (!local->ops->channel_switch) {
 		/* call "hw_config" only if doing sw channel switch */
-		ieee80211_hw_config(sdata->local,
-			IEEE80211_CONF_CHANGE_CHANNEL);
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 	} else {
 		/* update the device channel directly */
-		sdata->local->hw.conf.channel = sdata->local->_oper_channel;
+		local->hw.conf.chandef = local->_oper_chandef;
 	}
 
 	/* XXX: shouldn't really modify cfg80211-owned data! */
-	ifmgd->associated->channel = sdata->local->_oper_channel;
+	ifmgd->associated->channel = local->_oper_chandef.chan;
 
 	/* XXX: wait for a beacon first? */
-	ieee80211_wake_queues_by_reason(&sdata->local->hw,
+	ieee80211_wake_queues_by_reason(&local->hw,
+					IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_CSA);
  out:
 	ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
@@ -735,66 +1009,197 @@ static void ieee80211_chswitch_timer(unsigned long data)
 {
 	struct ieee80211_sub_if_data *sdata =
 		(struct ieee80211_sub_if_data *) data;
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-
-	if (sdata->local->quiescing) {
-		set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running);
-		return;
-	}
 
-	ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);
 }
 
-void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
-				      struct ieee80211_channel_sw_ie *sw_elem,
-				      struct ieee80211_bss *bss,
-				      u64 timestamp)
+static void
+ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
+				 u64 timestamp, struct ieee802_11_elems *elems)
 {
-	struct cfg80211_bss *cbss =
-		container_of((void *)bss, struct cfg80211_bss, priv);
-	struct ieee80211_channel *new_ch;
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num,
-						      cbss->channel->band);
+	struct cfg80211_bss *cbss = ifmgd->associated;
+	struct ieee80211_bss *bss;
 	struct ieee80211_chanctx *chanctx;
+	enum ieee80211_band new_band;
+	int new_freq;
+	u8 new_chan_no;
+	u8 count;
+	u8 mode;
+	struct ieee80211_channel *new_chan;
+	struct cfg80211_chan_def new_chandef = {};
+	struct cfg80211_chan_def new_vht_chandef = {};
+	const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
+	const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
+	int secondary_channel_offset = -1;
 
 	ASSERT_MGD_MTX(ifmgd);
 
-	if (!ifmgd->associated)
+	if (!cbss)
 		return;
 
-	if (sdata->local->scanning)
+	if (local->scanning)
 		return;
 
-	/* Disregard subsequent beacons if we are already running a timer
-	   processing a CSA */
-
+	/* disregard subsequent announcements if we are already processing */
 	if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED)
 		return;
 
-	new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq);
-	if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) {
+	sec_chan_offs = elems->sec_chan_offs;
+	wide_bw_chansw_ie = elems->wide_bw_chansw_ie;
+
+	if (ifmgd->flags & (IEEE80211_STA_DISABLE_HT |
+			    IEEE80211_STA_DISABLE_40MHZ)) {
+		sec_chan_offs = NULL;
+		wide_bw_chansw_ie = NULL;
+	}
+
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT)
+		wide_bw_chansw_ie = NULL;
+
+	if (elems->ext_chansw_ie) {
+		if (!ieee80211_operating_class_to_band(
+				elems->ext_chansw_ie->new_operating_class,
+				&new_band)) {
+			sdata_info(sdata,
+				   "cannot understand ECSA IE operating class %d, disconnecting\n",
+				   elems->ext_chansw_ie->new_operating_class);
+			ieee80211_queue_work(&local->hw,
+					     &ifmgd->csa_connection_drop_work);
+		}
+		new_chan_no = elems->ext_chansw_ie->new_ch_num;
+		count = elems->ext_chansw_ie->count;
+		mode = elems->ext_chansw_ie->mode;
+	} else if (elems->ch_switch_ie) {
+		new_band = cbss->channel->band;
+		new_chan_no = elems->ch_switch_ie->new_ch_num;
+		count = elems->ch_switch_ie->count;
+		mode = elems->ch_switch_ie->mode;
+	} else {
+		/* nothing here we understand */
+		return;
+	}
+
+	bss = (void *)cbss->priv;
+
+	new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band);
+	new_chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq);
+	if (!new_chan || new_chan->flags & IEEE80211_CHAN_DISABLED) {
 		sdata_info(sdata,
 			   "AP %pM switches to unsupported channel (%d MHz), disconnecting\n",
 			   ifmgd->associated->bssid, new_freq);
-		ieee80211_queue_work(&sdata->local->hw,
+		ieee80211_queue_work(&local->hw,
+				     &ifmgd->csa_connection_drop_work);
+		return;
+	}
+
+	if (sec_chan_offs) {
+		secondary_channel_offset = sec_chan_offs->sec_chan_offs;
+	} else if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
+		/* if HT is enabled and the IE not present, it's still HT */
+		secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE;
+	}
+
+	switch (secondary_channel_offset) {
+	default:
+		/* secondary_channel_offset was present but is invalid */
+	case IEEE80211_HT_PARAM_CHA_SEC_NONE:
+		cfg80211_chandef_create(&new_chandef, new_chan,
+					NL80211_CHAN_HT20);
+		break;
+	case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+		cfg80211_chandef_create(&new_chandef, new_chan,
+					NL80211_CHAN_HT40PLUS);
+		break;
+	case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+		cfg80211_chandef_create(&new_chandef, new_chan,
+					NL80211_CHAN_HT40MINUS);
+		break;
+	case -1:
+		cfg80211_chandef_create(&new_chandef, new_chan,
+					NL80211_CHAN_NO_HT);
+		break;
+	}
+
+	if (wide_bw_chansw_ie) {
+		new_vht_chandef.chan = new_chan;
+		new_vht_chandef.center_freq1 =
+			ieee80211_channel_to_frequency(
+				wide_bw_chansw_ie->new_center_freq_seg0,
+				new_band);
+
+		switch (wide_bw_chansw_ie->new_channel_width) {
+		default:
+			/* hmmm, ignore VHT and use HT if present */
+		case IEEE80211_VHT_CHANWIDTH_USE_HT:
+			new_vht_chandef.chan = NULL;
+			break;
+		case IEEE80211_VHT_CHANWIDTH_80MHZ:
+			new_vht_chandef.width = NL80211_CHAN_WIDTH_80;
+			break;
+		case IEEE80211_VHT_CHANWIDTH_160MHZ:
+			new_vht_chandef.width = NL80211_CHAN_WIDTH_160;
+			break;
+		case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
+			/* field is otherwise reserved */
+			new_vht_chandef.center_freq2 =
+				ieee80211_channel_to_frequency(
+					wide_bw_chansw_ie->new_center_freq_seg1,
+					new_band);
+			new_vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
+			break;
+		}
+		if (ifmgd->flags & IEEE80211_STA_DISABLE_80P80MHZ &&
+		    new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80)
+			chandef_downgrade(&new_vht_chandef);
+		if (ifmgd->flags & IEEE80211_STA_DISABLE_160MHZ &&
+		    new_vht_chandef.width == NL80211_CHAN_WIDTH_160)
+			chandef_downgrade(&new_vht_chandef);
+		if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ &&
+		    new_vht_chandef.width > NL80211_CHAN_WIDTH_20)
+			chandef_downgrade(&new_vht_chandef);
+	}
+
+	/* if VHT data is there validate & use it */
+	if (new_vht_chandef.chan) {
+		if (!cfg80211_chandef_compatible(&new_vht_chandef,
+						 &new_chandef)) {
+			sdata_info(sdata,
+				   "AP %pM CSA has inconsistent channel data, disconnecting\n",
+				   ifmgd->associated->bssid);
+			ieee80211_queue_work(&local->hw,
+					     &ifmgd->csa_connection_drop_work);
+			return;
+		}
+		new_chandef = new_vht_chandef;
+	}
+
+	if (!cfg80211_chandef_usable(local->hw.wiphy, &new_chandef,
+				     IEEE80211_CHAN_DISABLED)) {
+		sdata_info(sdata,
+			   "AP %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
+			   ifmgd->associated->bssid, new_freq,
+			   new_chandef.width, new_chandef.center_freq1,
+			   new_chandef.center_freq2);
+		ieee80211_queue_work(&local->hw,
 				     &ifmgd->csa_connection_drop_work);
 		return;
 	}
 
 	ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
 
-	if (sdata->local->use_chanctx) {
+	if (local->use_chanctx) {
 		sdata_info(sdata,
 			   "not handling channel switch with channel contexts\n");
-		ieee80211_queue_work(&sdata->local->hw,
+		ieee80211_queue_work(&local->hw,
 				     &ifmgd->csa_connection_drop_work);
 		return;
 	}
 
-	mutex_lock(&sdata->local->chanctx_mtx);
+	mutex_lock(&local->chanctx_mtx);
 	if (WARN_ON(!rcu_access_pointer(sdata->vif.chanctx_conf))) {
-		mutex_unlock(&sdata->local->chanctx_mtx);
+		mutex_unlock(&local->chanctx_mtx);
 		return;
 	}
 	chanctx = container_of(rcu_access_pointer(sdata->vif.chanctx_conf),
@@ -802,39 +1207,39 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	if (chanctx->refcount > 1) {
 		sdata_info(sdata,
 			   "channel switch with multiple interfaces on the same channel, disconnecting\n");
-		ieee80211_queue_work(&sdata->local->hw,
+		ieee80211_queue_work(&local->hw,
 				     &ifmgd->csa_connection_drop_work);
-		mutex_unlock(&sdata->local->chanctx_mtx);
+		mutex_unlock(&local->chanctx_mtx);
 		return;
 	}
-	mutex_unlock(&sdata->local->chanctx_mtx);
+	mutex_unlock(&local->chanctx_mtx);
 
-	sdata->local->csa_channel = new_ch;
+	local->csa_chandef = new_chandef;
 
-	if (sw_elem->mode)
-		ieee80211_stop_queues_by_reason(&sdata->local->hw,
+	if (mode)
+		ieee80211_stop_queues_by_reason(&local->hw,
+				IEEE80211_MAX_QUEUE_MAP,
 				IEEE80211_QUEUE_STOP_REASON_CSA);
 
-	if (sdata->local->ops->channel_switch) {
+	if (local->ops->channel_switch) {
 		/* use driver's channel switch callback */
 		struct ieee80211_channel_switch ch_switch = {
 			.timestamp = timestamp,
-			.block_tx = sw_elem->mode,
-			.channel = new_ch,
-			.count = sw_elem->count,
+			.block_tx = mode,
+			.chandef = new_chandef,
+			.count = count,
 		};
 
-		drv_channel_switch(sdata->local, &ch_switch);
+		drv_channel_switch(local, &ch_switch);
 		return;
 	}
 
 	/* channel switch handled in software */
-	if (sw_elem->count <= 1)
-		ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
+	if (count <= 1)
+		ieee80211_queue_work(&local->hw, &ifmgd->chswitch_work);
 	else
 		mod_timer(&ifmgd->chswitch_timer,
-			  TU_TO_EXP_TIME(sw_elem->count *
-					 cbss->beacon_interval));
+			  TU_TO_EXP_TIME(count * cbss->beacon_interval));
 }
 
 static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
@@ -907,39 +1312,6 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
-void ieee80211_enable_dyn_ps(struct ieee80211_vif *vif)
-{
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_conf *conf = &local->hw.conf;
-
-	WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION ||
-		!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) ||
-		(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS));
-
-	local->disable_dynamic_ps = false;
-	conf->dynamic_ps_timeout = local->dynamic_ps_user_timeout;
-}
-EXPORT_SYMBOL(ieee80211_enable_dyn_ps);
-
-void ieee80211_disable_dyn_ps(struct ieee80211_vif *vif)
-{
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_conf *conf = &local->hw.conf;
-
-	WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION ||
-		!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) ||
-		(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS));
-
-	local->disable_dynamic_ps = true;
-	conf->dynamic_ps_timeout = 0;
-	del_timer_sync(&local->dynamic_ps_timer);
-	ieee80211_queue_work(&local->hw,
-			     &local->dynamic_ps_enable_work);
-}
-EXPORT_SYMBOL(ieee80211_disable_dyn_ps);
-
 /* powersave */
 static void ieee80211_enable_ps(struct ieee80211_local *local,
 				struct ieee80211_sub_if_data *sdata)
@@ -1042,7 +1414,6 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 	}
 
 	if (count == 1 && ieee80211_powersave_allowed(found)) {
-		struct ieee80211_conf *conf = &local->hw.conf;
 		s32 beaconint_us;
 
 		if (latency < 0)
@@ -1066,10 +1437,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 			else
 				timeout = 100;
 		}
-		local->dynamic_ps_user_timeout = timeout;
-		if (!local->disable_dynamic_ps)
-			conf->dynamic_ps_timeout =
-				local->dynamic_ps_user_timeout;
+		local->hw.conf.dynamic_ps_timeout = timeout;
 
 		if (beaconint_us > latency) {
 			local->ps_sdata = NULL;
@@ -1117,6 +1485,7 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work)
 	}
 
 	ieee80211_wake_queues_by_reason(&local->hw,
+					IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_PS);
 }
 
@@ -1139,8 +1508,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
 	if (local->hw.conf.flags & IEEE80211_CONF_PS)
 		return;
 
-	if (!local->disable_dynamic_ps &&
-	    local->hw.conf.dynamic_ps_timeout > 0) {
+	if (local->hw.conf.dynamic_ps_timeout > 0) {
 		/* don't enter PS if TX frames are pending */
 		if (drv_tx_frames_pending(local)) {
 			mod_timer(&local->dynamic_ps_timer, jiffies +
@@ -1170,16 +1538,14 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
 
 	if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
 	    !(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
-		netif_tx_stop_all_queues(sdata->dev);
-
-		if (drv_tx_frames_pending(local))
+		if (drv_tx_frames_pending(local)) {
 			mod_timer(&local->dynamic_ps_timer, jiffies +
 				  msecs_to_jiffies(
 				  local->hw.conf.dynamic_ps_timeout));
-		else {
+		} else {
 			ieee80211_send_nullfunc(local, sdata, 1);
 			/* Flush to get the tx status of nullfunc frame */
-			drv_flush(local, false);
+			ieee80211_flush_queues(local, sdata);
 		}
 	}
 
@@ -1190,9 +1556,6 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
 		local->hw.conf.flags |= IEEE80211_CONF_PS;
 		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 	}
-
-	if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
-		netif_tx_wake_all_queues(sdata->dev);
 }
 
 void ieee80211_dynamic_ps_timer(unsigned long data)
@@ -1205,16 +1568,30 @@ void ieee80211_dynamic_ps_timer(unsigned long data)
 	ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work);
 }
 
+void ieee80211_dfs_cac_timer_work(struct work_struct *work)
+{
+	struct delayed_work *delayed_work =
+		container_of(work, struct delayed_work, work);
+	struct ieee80211_sub_if_data *sdata =
+		container_of(delayed_work, struct ieee80211_sub_if_data,
+			     dfs_cac_timer_work);
+
+	ieee80211_vif_release_channel(sdata);
+
+	cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_FINISHED, GFP_KERNEL);
+}
+
 /* MLME */
 static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				     struct ieee80211_sub_if_data *sdata,
-				     u8 *wmm_param, size_t wmm_param_len)
+				     const u8 *wmm_param, size_t wmm_param_len)
 {
 	struct ieee80211_tx_queue_params params;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	size_t left;
 	int count;
-	u8 *pos, uapsd_queues = 0;
+	const u8 *pos;
+	u8 uapsd_queues = 0;
 
 	if (!local->ops->conf_tx)
 		return false;
@@ -1284,6 +1661,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 		params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4);
 		params.cw_min = ecw2cw(pos[1] & 0x0f);
 		params.txop = get_unaligned_le16(pos + 2);
+		params.acm = acm;
 		params.uapsd = uapsd;
 
 		mlme_dbg(sdata,
@@ -1371,7 +1749,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 		bss_conf->assoc_capability, bss->has_erp_value, bss->erp_value);
 
 	sdata->u.mgd.beacon_timeout = usecs_to_jiffies(ieee80211_tu_to_usec(
-		IEEE80211_BEACON_LOSS_COUNT * bss_conf->beacon_int));
+		beacon_loss_count * bss_conf->beacon_int));
 
 	sdata->u.mgd.associated = cbss;
 	memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
@@ -1384,18 +1762,17 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 		rcu_read_lock();
 		ies = rcu_dereference(cbss->ies);
 		if (ies) {
-			u8 noa[2];
 			int ret;
 
 			ret = cfg80211_get_p2p_attr(
 					ies->data, ies->len,
 					IEEE80211_P2P_ATTR_ABSENCE_NOTICE,
-					noa, sizeof(noa));
+					(u8 *) &bss_conf->p2p_noa_attr,
+					sizeof(bss_conf->p2p_noa_attr));
 			if (ret >= 2) {
-				bss_conf->p2p_oppps = noa[1] & 0x80;
-				bss_conf->p2p_ctwindow = noa[1] & 0x7f;
+				sdata->u.mgd.p2p_noa_index =
+					bss_conf->p2p_noa_attr.index;
 				bss_info_changed |= BSS_CHANGED_P2P_PS;
-				sdata->u.mgd.p2p_noa_index = noa[0];
 			}
 		}
 		rcu_read_unlock();
@@ -1406,7 +1783,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_led_assoc(local, 1);
 
-	if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) {
+	if (sdata->u.mgd.assoc_data->have_beacon) {
 		/*
 		 * If the AP is buggy we may get here with no DTIM period
 		 * known, so assume it's 1 which is the only safe assumption
@@ -1414,6 +1791,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 		 * probably just won't work at all.
 		 */
 		bss_conf->dtim_period = sdata->u.mgd.dtim_period ?: 1;
+		bss_info_changed |= BSS_CHANGED_DTIM_PERIOD;
 	} else {
 		bss_conf->dtim_period = 0;
 	}
@@ -1426,10 +1804,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 		bss_info_changed |= BSS_CHANGED_CQM;
 
 	/* Enable ARP filtering */
-	if (bss_conf->arp_filter_enabled != sdata->arp_filter_state) {
-		bss_conf->arp_filter_enabled = sdata->arp_filter_state;
+	if (bss_conf->arp_addr_cnt)
 		bss_info_changed |= BSS_CHANGED_ARP_FILTER;
-	}
 
 	ieee80211_bss_info_change_notify(sdata, bss_info_changed);
 
@@ -1440,7 +1816,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	ieee80211_recalc_smps(sdata);
 	ieee80211_recalc_ps_vif(sdata);
 
-	netif_tx_start_all_queues(sdata->dev);
 	netif_carrier_on(sdata->dev);
 }
 
@@ -1450,7 +1825,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_local *local = sdata->local;
-	struct sta_info *sta;
 	u32 changed = 0;
 
 	ASSERT_MGD_MTX(ifmgd);
@@ -1464,32 +1838,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	ieee80211_stop_poll(sdata);
 
 	ifmgd->associated = NULL;
-
-	/*
-	 * we need to commit the associated = NULL change because the
-	 * scan code uses that to determine whether this iface should
-	 * go to/wake up from powersave or not -- and could otherwise
-	 * wake the queues erroneously.
-	 */
-	smp_mb();
-
-	/*
-	 * Thus, we can only afterwards stop the queues -- to account
-	 * for the case where another CPU is finishing a scan at this
-	 * time -- we don't want the scan code to enable queues.
-	 */
-
-	netif_tx_stop_all_queues(sdata->dev);
 	netif_carrier_off(sdata->dev);
 
-	mutex_lock(&local->sta_mtx);
-	sta = sta_info_get(sdata, ifmgd->bssid);
-	if (sta) {
-		set_sta_flag(sta, WLAN_STA_BLOCK_BA);
-		ieee80211_sta_tear_down_BA_sessions(sta, false);
-	}
-	mutex_unlock(&local->sta_mtx);
-
 	/*
 	 * if we want to get out of ps before disassoc (why?) we have
 	 * to do it before sending disassoc, as otherwise the null-packet
@@ -1506,7 +1856,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	/* flush out any pending frame (e.g. DELBA) before deauth/disassoc */
 	if (tx)
-		drv_flush(local, false);
+		ieee80211_flush_queues(local, sdata);
 
 	/* deauthenticate/disassociate now */
 	if (tx || frame_buf)
@@ -1515,13 +1865,13 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	/* flush out frame */
 	if (tx)
-		drv_flush(local, false);
+		ieee80211_flush_queues(local, sdata);
 
 	/* clear bssid only after building the needed mgmt frames */
 	memset(ifmgd->bssid, 0, ETH_ALEN);
 
 	/* remove AP and TDLS peers */
-	sta_info_flush(local, sdata);
+	sta_info_flush_defer(sdata);
 
 	/* finally reset all BSS / config parameters */
 	changed |= ieee80211_reset_erp_info(sdata);
@@ -1530,12 +1880,15 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	changed |= BSS_CHANGED_ASSOC;
 	sdata->vif.bss_conf.assoc = false;
 
-	sdata->vif.bss_conf.p2p_ctwindow = 0;
-	sdata->vif.bss_conf.p2p_oppps = false;
+	ifmgd->p2p_noa_index = -1;
+	memset(&sdata->vif.bss_conf.p2p_noa_attr, 0,
+	       sizeof(sdata->vif.bss_conf.p2p_noa_attr));
 
-	/* on the next assoc, re-program HT parameters */
+	/* on the next assoc, re-program HT/VHT parameters */
 	memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa));
 	memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask));
+	memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa));
+	memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask));
 
 	sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
 
@@ -1543,10 +1896,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	cancel_work_sync(&local->dynamic_ps_enable_work);
 
 	/* Disable ARP filtering */
-	if (sdata->vif.bss_conf.arp_filter_enabled) {
-		sdata->vif.bss_conf.arp_filter_enabled = false;
+	if (sdata->vif.bss_conf.arp_addr_cnt)
 		changed |= BSS_CHANGED_ARP_FILTER;
-	}
 
 	sdata->vif.bss_conf.qos = false;
 	changed |= BSS_CHANGED_QOS;
@@ -1563,8 +1914,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	del_timer_sync(&sdata->u.mgd.timer);
 	del_timer_sync(&sdata->u.mgd.chswitch_timer);
 
-	sdata->u.mgd.timers_running = 0;
-
 	sdata->vif.bss_conf.dtim_period = 0;
 
 	ifmgd->flags = 0;
@@ -1629,17 +1978,18 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
 	if (!ieee80211_is_data(hdr->frame_control))
 	    return;
 
-	if (ack)
-		ieee80211_sta_reset_conn_monitor(sdata);
-
 	if (ieee80211_is_nullfunc(hdr->frame_control) &&
 	    sdata->u.mgd.probe_send_count > 0) {
 		if (ack)
-			sdata->u.mgd.probe_send_count = 0;
+			ieee80211_sta_reset_conn_monitor(sdata);
 		else
 			sdata->u.mgd.nullfunc_failed = true;
 		ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+		return;
 	}
+
+	if (ack)
+		ieee80211_sta_reset_conn_monitor(sdata);
 }
 
 static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
@@ -1680,7 +2030,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 			ssid_len = ssid[1];
 
 		ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid_len, NULL,
-					 0, (u32) -1, true, false,
+					 0, (u32) -1, true, 0,
 					 ifmgd->associated->channel, false);
 		rcu_read_unlock();
 	}
@@ -1688,7 +2038,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 	ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
 	run_again(ifmgd, ifmgd->probe_timeout);
 	if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
-		drv_flush(sdata->local, false);
+		ieee80211_flush_queues(sdata->local, sdata);
 }
 
 static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
@@ -1712,12 +2062,15 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
 		goto out;
 	}
 
-	if (beacon)
+	if (beacon) {
 		mlme_dbg_ratelimited(sdata,
-				     "detected beacon loss from AP - sending probe request\n");
+				     "detected beacon loss from AP (missed %d beacons) - probing\n",
+				     beacon_loss_count);
 
-	ieee80211_cqm_rssi_notify(&sdata->vif,
-		NL80211_CQM_RSSI_BEACON_LOSS_EVENT, GFP_KERNEL);
+		ieee80211_cqm_rssi_notify(&sdata->vif,
+					  NL80211_CQM_RSSI_BEACON_LOSS_EVENT,
+					  GFP_KERNEL);
+	}
 
 	/*
 	 * The driver/our work has already reported this event or the
@@ -1795,11 +2148,9 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_ap_probereq_get);
 
-static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata,
-				   bool transmit_frame)
+static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_local *local = sdata->local;
 	u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
 
 	mutex_lock(&ifmgd->mtx);
@@ -1810,8 +2161,11 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
 			       WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
-			       transmit_frame, frame_buf);
+			       true, frame_buf);
 	ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
+	ieee80211_wake_queues_by_reason(&sdata->local->hw,
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
 	mutex_unlock(&ifmgd->mtx);
 
 	/*
@@ -1819,10 +2173,6 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata,
 	 * but that's not a problem.
 	 */
 	cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN);
-
-	mutex_lock(&local->mtx);
-	ieee80211_recalc_idle(local);
-	mutex_unlock(&local->mtx);
 }
 
 static void ieee80211_beacon_connection_loss_work(struct work_struct *work)
@@ -1841,10 +2191,10 @@ static void ieee80211_beacon_connection_loss_work(struct work_struct *work)
 		rcu_read_unlock();
 	}
 
-	if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) {
+	if (ifmgd->connection_loss) {
 		sdata_info(sdata, "Connection to AP %pM lost\n",
 			   ifmgd->bssid);
-		__ieee80211_disconnect(sdata, false);
+		__ieee80211_disconnect(sdata);
 	} else {
 		ieee80211_mgd_probe_ap(sdata, true);
 	}
@@ -1856,9 +2206,7 @@ static void ieee80211_csa_connection_drop_work(struct work_struct *work)
 		container_of(work, struct ieee80211_sub_if_data,
 			     u.mgd.csa_connection_drop_work);
 
-	ieee80211_wake_queues_by_reason(&sdata->local->hw,
-					IEEE80211_QUEUE_STOP_REASON_CSA);
-	__ieee80211_disconnect(sdata, true);
+	__ieee80211_disconnect(sdata);
 }
 
 void ieee80211_beacon_loss(struct ieee80211_vif *vif)
@@ -1868,7 +2216,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif)
 
 	trace_api_beacon_loss(sdata);
 
-	WARN_ON(hw->flags & IEEE80211_HW_CONNECTION_MONITOR);
+	sdata->u.mgd.connection_loss = false;
 	ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
 }
 EXPORT_SYMBOL(ieee80211_beacon_loss);
@@ -1880,7 +2228,7 @@ void ieee80211_connection_loss(struct ieee80211_vif *vif)
 
 	trace_api_connection_loss(sdata);
 
-	WARN_ON(!(hw->flags & IEEE80211_HW_CONNECTION_MONITOR));
+	sdata->u.mgd.connection_loss = true;
 	ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
 }
 EXPORT_SYMBOL(ieee80211_connection_loss);
@@ -1902,7 +2250,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
 		ieee80211_vif_release_channel(sdata);
 	}
 
-	cfg80211_put_bss(auth_data->bss);
+	cfg80211_put_bss(sdata->local->hw.wiphy, auth_data->bss);
 	kfree(auth_data);
 	sdata->u.mgd.auth_data = NULL;
 }
@@ -1910,21 +2258,26 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
 static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_mgmt *mgmt, size_t len)
 {
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data;
 	u8 *pos;
 	struct ieee802_11_elems elems;
+	u32 tx_flags = 0;
 
 	pos = mgmt->u.auth.variable;
-	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
 	if (!elems.challenge)
 		return;
 	auth_data->expected_transaction = 4;
 	drv_mgd_prepare_tx(sdata->local, sdata);
+	if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+		tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
+			   IEEE80211_TX_INTFL_MLME_CONN_TX;
 	ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0,
 			    elems.challenge - 2, elems.challenge_len + 2,
 			    auth_data->bss->bssid, auth_data->bss->bssid,
 			    auth_data->key, auth_data->key_len,
-			    auth_data->key_idx);
+			    auth_data->key_idx, tx_flags);
 }
 
 static enum rx_mgmt_action __must_check
@@ -1991,6 +2344,7 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	sdata_info(sdata, "authenticated\n");
 	ifmgd->auth_data->done = true;
 	ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC;
+	ifmgd->auth_data->timeout_started = true;
 	run_again(ifmgd, ifmgd->auth_data->timeout);
 
 	if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
@@ -2049,10 +2403,6 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
 
-	mutex_lock(&sdata->local->mtx);
-	ieee80211_recalc_idle(sdata->local);
-	mutex_unlock(&sdata->local->mtx);
-
 	return RX_MGMT_CFG80211_DEAUTH;
 }
 
@@ -2080,10 +2430,6 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
 
-	mutex_lock(&sdata->local->mtx);
-	ieee80211_recalc_idle(sdata->local);
-	mutex_unlock(&sdata->local->mtx);
-
 	return RX_MGMT_CFG80211_DISASSOC;
 }
 
@@ -2184,7 +2530,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 	}
 
 	pos = mgmt->u.assoc_resp.variable;
-	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
 
 	if (!elems.supp_rates) {
 		sdata_info(sdata, "no SuppRates element in AssocResp\n");
@@ -2193,6 +2539,24 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 
 	ifmgd->aid = aid;
 
+	/*
+	 * We previously checked these in the beacon/probe response, so
+	 * they should be present here. This is just a safety net.
+	 */
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
+	    (!elems.wmm_param || !elems.ht_cap_elem || !elems.ht_operation)) {
+		sdata_info(sdata,
+			   "HT AP is missing WMM params or HT capability/operation in AssocResp\n");
+		return false;
+	}
+
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
+	    (!elems.vht_cap_elem || !elems.vht_operation)) {
+		sdata_info(sdata,
+			   "VHT AP is missing VHT capability/operation in AssocResp\n");
+		return false;
+	}
+
 	mutex_lock(&sdata->local->sta_mtx);
 	/*
 	 * station info was already allocated and inserted before
@@ -2206,17 +2570,36 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 
 	sband = local->hw.wiphy->bands[ieee80211_get_sdata_band(sdata)];
 
+	/* Set up internal HT/VHT capabilities */
 	if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
 		ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
-				elems.ht_cap_elem, &sta->sta.ht_cap);
-
-	sta->supports_40mhz =
-		sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+						  elems.ht_cap_elem, sta);
 
 	if (elems.vht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
 		ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
-						    elems.vht_cap_elem,
-						    &sta->sta.vht_cap);
+						    elems.vht_cap_elem, sta);
+
+	/*
+	 * Some APs, e.g. Netgear WNDR3700, report invalid HT operation data
+	 * in their association response, so ignore that data for our own
+	 * configuration. If it changed since the last beacon, we'll get the
+	 * next beacon and update then.
+	 */
+
+	/*
+	 * If an operating mode notification IE is present, override the
+	 * NSS calculation (that would be done in rate_control_rate_init())
+	 * and use the # of streams from that element.
+	 */
+	if (elems.opmode_notif &&
+	    !(*elems.opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF)) {
+		u8 nss;
+
+		nss = *elems.opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK;
+		nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT;
+		nss += 1;
+		sta->sta.rx_nss = nss;
+	}
 
 	rate_control_rate_init(sta);
 
@@ -2226,9 +2609,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 	if (elems.wmm_param)
 		set_sta_flag(sta, WLAN_STA_WME);
 
-	err = sta_info_move_state(sta, IEEE80211_STA_AUTH);
-	if (!err)
-		err = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
+	err = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
 	if (!err && !(ifmgd->flags & IEEE80211_STA_CONTROL_PORT))
 		err = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED);
 	if (err) {
@@ -2257,11 +2638,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		ieee80211_set_wmm_default(sdata, false);
 	changed |= BSS_CHANGED_QOS;
 
-	if (elems.ht_operation && elems.wmm_param &&
-	    !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
-		changed |= ieee80211_config_ht_tx(sdata, elems.ht_operation,
-						  cbss->bssid, false);
-
 	/* set AID and assoc capability,
 	 * ieee80211_set_associated() will tell the driver */
 	bss_conf->aid = aid;
@@ -2323,18 +2699,19 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		   capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
 
 	pos = mgmt->u.assoc_resp.variable;
-	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
 
 	if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
-	    elems.timeout_int && elems.timeout_int_len == 5 &&
-	    elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) {
+	    elems.timeout_int &&
+	    elems.timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) {
 		u32 tu, ms;
-		tu = get_unaligned_le32(elems.timeout_int + 1);
+		tu = le32_to_cpu(elems.timeout_int->value);
 		ms = tu * 1024 / 1000;
 		sdata_info(sdata,
 			   "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n",
 			   mgmt->sa, tu, ms);
 		assoc_data->timeout = jiffies + msecs_to_jiffies(ms);
+		assoc_data->timeout_started = true;
 		if (ms > IEEE80211_ASSOC_TIMEOUT)
 			run_again(ifmgd, assoc_data->timeout);
 		return RX_MGMT_NONE;
@@ -2350,7 +2727,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) {
 			/* oops -- internal error -- send timeout for now */
 			ieee80211_destroy_assoc_data(sdata, false);
-			cfg80211_put_bss(*bss);
+			cfg80211_put_bss(sdata->local->hw.wiphy, *bss);
 			return RX_MGMT_CFG80211_ASSOC_TIMEOUT;
 		}
 		sdata_info(sdata, "associated\n");
@@ -2369,8 +2746,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				  struct ieee80211_mgmt *mgmt, size_t len,
 				  struct ieee80211_rx_status *rx_status,
-				  struct ieee802_11_elems *elems,
-				  bool beacon)
+				  struct ieee802_11_elems *elems)
 {
 	struct ieee80211_local *local = sdata->local;
 	int freq;
@@ -2378,6 +2754,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_channel *channel;
 	bool need_ps = false;
 
+	lockdep_assert_held(&sdata->u.mgd.mtx);
+
 	if ((sdata->u.mgd.associated &&
 	     ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) ||
 	    (sdata->u.mgd.assoc_data &&
@@ -2387,12 +2765,12 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		need_ps = sdata->u.mgd.associated && !sdata->u.mgd.dtim_period;
 
 		if (elems->tim && !elems->parse_error) {
-			struct ieee80211_tim_ie *tim_ie = elems->tim;
+			const struct ieee80211_tim_ie *tim_ie = elems->tim;
 			sdata->u.mgd.dtim_period = tim_ie->dtim_period;
 		}
 	}
 
-	if (elems->ds_params && elems->ds_params_len == 1)
+	if (elems->ds_params)
 		freq = ieee80211_channel_to_frequency(elems->ds_params[0],
 						      rx_status->band);
 	else
@@ -2404,11 +2782,12 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		return;
 
 	bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
-					channel, beacon);
+					channel);
 	if (bss)
 		ieee80211_rx_bss_put(local, bss);
 
-	if (!sdata->u.mgd.associated)
+	if (!sdata->u.mgd.associated ||
+	    !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid))
 		return;
 
 	if (need_ps) {
@@ -2417,10 +2796,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		mutex_unlock(&local->iflist_mtx);
 	}
 
-	if (elems->ch_switch_ie &&
-	    memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid, ETH_ALEN) == 0)
-		ieee80211_sta_process_chanswitch(sdata, elems->ch_switch_ie,
-						 bss, rx_status->mactime);
+	ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, elems);
+
 }
 
 
@@ -2445,9 +2822,9 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 		return;
 
 	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
-				&elems);
+			       false, &elems);
 
-	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false);
+	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
 
 	if (ifmgd->associated &&
 	    ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
@@ -2459,6 +2836,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 		sdata_info(sdata, "direct probe responded\n");
 		ifmgd->auth_data->tries = 0;
 		ifmgd->auth_data->timeout = jiffies;
+		ifmgd->auth_data->timeout_started = true;
 		run_again(ifmgd, ifmgd->auth_data->timeout);
 	}
 }
@@ -2484,10 +2862,10 @@ static const u64 care_about_ies =
 	(1ULL << WLAN_EID_HT_CAPABILITY) |
 	(1ULL << WLAN_EID_HT_OPERATION);
 
-static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_mgmt *mgmt,
-				     size_t len,
-				     struct ieee80211_rx_status *rx_status)
+static enum rx_mgmt_action
+ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+			 struct ieee80211_mgmt *mgmt, size_t len,
+			 u8 *deauth_buf, struct ieee80211_rx_status *rx_status)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
@@ -2496,6 +2874,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_chanctx_conf *chanctx_conf;
 	struct ieee80211_channel *chan;
+	struct sta_info *sta;
 	u32 changed = 0;
 	bool erp_valid;
 	u8 erp_value = 0;
@@ -2507,40 +2886,51 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	/* Process beacon from the current BSS */
 	baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
 	if (baselen > len)
-		return;
+		return RX_MGMT_NONE;
 
 	rcu_read_lock();
 	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
 	if (!chanctx_conf) {
 		rcu_read_unlock();
-		return;
+		return RX_MGMT_NONE;
 	}
 
 	if (rx_status->freq != chanctx_conf->def.chan->center_freq) {
 		rcu_read_unlock();
-		return;
+		return RX_MGMT_NONE;
 	}
 	chan = chanctx_conf->def.chan;
 	rcu_read_unlock();
 
-	if (ifmgd->assoc_data && !ifmgd->assoc_data->have_beacon &&
+	if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&
 	    ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {
 		ieee802_11_parse_elems(mgmt->u.beacon.variable,
-				       len - baselen, &elems);
+				       len - baselen, false, &elems);
 
-		ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems,
-				      false);
+		ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
 		ifmgd->assoc_data->have_beacon = true;
-		ifmgd->assoc_data->sent_assoc = false;
+		ifmgd->assoc_data->need_beacon = false;
+		if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) {
+			sdata->vif.bss_conf.sync_tsf =
+				le64_to_cpu(mgmt->u.beacon.timestamp);
+			sdata->vif.bss_conf.sync_device_ts =
+				rx_status->device_timestamp;
+			if (elems.tim)
+				sdata->vif.bss_conf.sync_dtim_count =
+					elems.tim->dtim_count;
+			else
+				sdata->vif.bss_conf.sync_dtim_count = 0;
+		}
 		/* continue assoc process */
 		ifmgd->assoc_data->timeout = jiffies;
+		ifmgd->assoc_data->timeout_started = true;
 		run_again(ifmgd, ifmgd->assoc_data->timeout);
-		return;
+		return RX_MGMT_NONE;
 	}
 
 	if (!ifmgd->associated ||
 	    !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
-		return;
+		return RX_MGMT_NONE;
 	bssid = ifmgd->associated->bssid;
 
 	/* Track average RSSI from the Beacon frames of the current AP */
@@ -2571,12 +2961,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 		if (sig > ifmgd->rssi_max_thold &&
 		    (last_sig <= ifmgd->rssi_min_thold || last_sig == 0)) {
 			ifmgd->last_ave_beacon_signal = sig;
-			drv_rssi_callback(local, RSSI_EVENT_HIGH);
+			drv_rssi_callback(local, sdata, RSSI_EVENT_HIGH);
 		} else if (sig < ifmgd->rssi_min_thold &&
 			   (last_sig >= ifmgd->rssi_max_thold ||
 			   last_sig == 0)) {
 			ifmgd->last_ave_beacon_signal = sig;
-			drv_rssi_callback(local, RSSI_EVENT_LOW);
+			drv_rssi_callback(local, sdata, RSSI_EVENT_LOW);
 		}
 	}
 
@@ -2606,7 +2996,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
 	if (ifmgd->flags & IEEE80211_STA_BEACON_POLL) {
 		mlme_dbg_ratelimited(sdata,
-				     "cancelling probereq poll due to a received beacon\n");
+				     "cancelling AP probe due to a received beacon\n");
 		mutex_lock(&local->mtx);
 		ifmgd->flags &= ~IEEE80211_STA_BEACON_POLL;
 		ieee80211_run_deferred_scan(local);
@@ -2625,7 +3015,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
 	ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
 	ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable,
-					  len - baselen, &elems,
+					  len - baselen, false, &elems,
 					  care_about_ies, ncrc);
 
 	if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) {
@@ -2657,39 +3047,72 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (sdata->vif.p2p) {
-		u8 noa[2];
+		struct ieee80211_p2p_noa_attr noa = {};
 		int ret;
 
 		ret = cfg80211_get_p2p_attr(mgmt->u.beacon.variable,
 					    len - baselen,
 					    IEEE80211_P2P_ATTR_ABSENCE_NOTICE,
-					    noa, sizeof(noa));
-		if (ret >= 2 && sdata->u.mgd.p2p_noa_index != noa[0]) {
-			bss_conf->p2p_oppps = noa[1] & 0x80;
-			bss_conf->p2p_ctwindow = noa[1] & 0x7f;
+					    (u8 *) &noa, sizeof(noa));
+		if (ret >= 2) {
+			if (sdata->u.mgd.p2p_noa_index != noa.index) {
+				/* valid noa_attr and index changed */
+				sdata->u.mgd.p2p_noa_index = noa.index;
+				memcpy(&bss_conf->p2p_noa_attr, &noa, sizeof(noa));
+				changed |= BSS_CHANGED_P2P_PS;
+				/*
+				 * make sure we update all information, the CRC
+				 * mechanism doesn't look at P2P attributes.
+				 */
+				ifmgd->beacon_crc_valid = false;
+			}
+		} else if (sdata->u.mgd.p2p_noa_index != -1) {
+			/* noa_attr not found and we had valid noa_attr before */
+			sdata->u.mgd.p2p_noa_index = -1;
+			memset(&bss_conf->p2p_noa_attr, 0, sizeof(bss_conf->p2p_noa_attr));
 			changed |= BSS_CHANGED_P2P_PS;
-			sdata->u.mgd.p2p_noa_index = noa[0];
-			/*
-			 * make sure we update all information, the CRC
-			 * mechanism doesn't look at P2P attributes.
-			 */
 			ifmgd->beacon_crc_valid = false;
 		}
 	}
 
 	if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid)
-		return;
+		return RX_MGMT_NONE;
 	ifmgd->beacon_crc = ncrc;
 	ifmgd->beacon_crc_valid = true;
 
-	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems,
-			      true);
+	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
 
 	if (ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
 				     elems.wmm_param_len))
 		changed |= BSS_CHANGED_QOS;
 
-	if (elems.erp_info && elems.erp_info_len >= 1) {
+	/*
+	 * If we haven't had a beacon before, tell the driver about the
+	 * DTIM period (and beacon timing if desired) now.
+	 */
+	if (!bss_conf->dtim_period) {
+		/* a few bogus AP send dtim_period = 0 or no TIM IE */
+		if (elems.tim)
+			bss_conf->dtim_period = elems.tim->dtim_period ?: 1;
+		else
+			bss_conf->dtim_period = 1;
+
+		if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) {
+			sdata->vif.bss_conf.sync_tsf =
+				le64_to_cpu(mgmt->u.beacon.timestamp);
+			sdata->vif.bss_conf.sync_device_ts =
+				rx_status->device_timestamp;
+			if (elems.tim)
+				sdata->vif.bss_conf.sync_dtim_count =
+					elems.tim->dtim_count;
+			else
+				sdata->vif.bss_conf.sync_dtim_count = 0;
+		}
+
+		changed |= BSS_CHANGED_DTIM_PERIOD;
+	}
+
+	if (elems.erp_info) {
 		erp_valid = true;
 		erp_value = elems.erp_info[0];
 	} else {
@@ -2699,11 +3122,22 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 			le16_to_cpu(mgmt->u.beacon.capab_info),
 			erp_valid, erp_value);
 
+	mutex_lock(&local->sta_mtx);
+	sta = sta_info_get(sdata, bssid);
+
+	if (ieee80211_config_bw(sdata, sta, elems.ht_operation,
+				elems.vht_operation, bssid, &changed)) {
+		mutex_unlock(&local->sta_mtx);
+		ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
+				       WLAN_REASON_DEAUTH_LEAVING,
+				       true, deauth_buf);
+		return RX_MGMT_CFG80211_TX_DEAUTH;
+	}
 
-	if (elems.ht_cap_elem && elems.ht_operation && elems.wmm_param &&
-	    !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
-		changed |= ieee80211_config_ht_tx(sdata, elems.ht_operation,
-						  bssid, true);
+	if (sta && elems.opmode_notif)
+		ieee80211_vht_handle_opmode(sdata, sta, *elems.opmode_notif,
+					    rx_status->band, true);
+	mutex_unlock(&local->sta_mtx);
 
 	if (elems.country_elem && elems.pwr_constr_elem &&
 	    mgmt->u.probe_resp.capab_info &
@@ -2714,6 +3148,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 						       elems.pwr_constr_elem);
 
 	ieee80211_bss_info_change_notify(sdata, changed);
+
+	return RX_MGMT_NONE;
 }
 
 void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
@@ -2724,7 +3160,10 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_mgmt *mgmt;
 	struct cfg80211_bss *bss = NULL;
 	enum rx_mgmt_action rma = RX_MGMT_NONE;
+	u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];
 	u16 fc;
+	struct ieee802_11_elems elems;
+	int ies_len;
 
 	rx_status = (struct ieee80211_rx_status *) skb->cb;
 	mgmt = (struct ieee80211_mgmt *) skb->data;
@@ -2734,7 +3173,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 
 	switch (fc & IEEE80211_FCTL_STYPE) {
 	case IEEE80211_STYPE_BEACON:
-		ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status);
+		rma = ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len,
+					       deauth_buf, rx_status);
 		break;
 	case IEEE80211_STYPE_PROBE_RESP:
 		ieee80211_rx_mgmt_probe_resp(sdata, skb);
@@ -2753,14 +3193,48 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 		rma = ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, &bss);
 		break;
 	case IEEE80211_STYPE_ACTION:
-		switch (mgmt->u.action.category) {
-		case WLAN_CATEGORY_SPECTRUM_MGMT:
+		if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) {
+			ies_len = skb->len -
+				  offsetof(struct ieee80211_mgmt,
+					   u.action.u.chan_switch.variable);
+
+			if (ies_len < 0)
+				break;
+
+			ieee802_11_parse_elems(
+				mgmt->u.action.u.chan_switch.variable,
+				ies_len, true, &elems);
+
+			if (elems.parse_error)
+				break;
+
 			ieee80211_sta_process_chanswitch(sdata,
-					&mgmt->u.action.u.chan_switch.sw_elem,
-					(void *)ifmgd->associated->priv,
-					rx_status->mactime);
-			break;
+							 rx_status->mactime,
+							 &elems);
+		} else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) {
+			ies_len = skb->len -
+				  offsetof(struct ieee80211_mgmt,
+					   u.action.u.ext_chan_switch.variable);
+
+			if (ies_len < 0)
+				break;
+
+			ieee802_11_parse_elems(
+				mgmt->u.action.u.ext_chan_switch.variable,
+				ies_len, true, &elems);
+
+			if (elems.parse_error)
+				break;
+
+			/* for the handling code pretend this was also an IE */
+			elems.ext_chansw_ie =
+				&mgmt->u.action.u.ext_chan_switch.data;
+
+			ieee80211_sta_process_chanswitch(sdata,
+							 rx_status->mactime,
+							 &elems);
 		}
+		break;
 	}
 	mutex_unlock(&ifmgd->mtx);
 
@@ -2783,6 +3257,10 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	case RX_MGMT_CFG80211_ASSOC_TIMEOUT:
 		cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid);
 		break;
+	case RX_MGMT_CFG80211_TX_DEAUTH:
+		cfg80211_send_deauth(sdata->dev, deauth_buf,
+				     sizeof(deauth_buf));
+		break;
 	default:
 		WARN(1, "unexpected: %d", rma);
 	}
@@ -2792,26 +3270,18 @@ static void ieee80211_sta_timer(unsigned long data)
 {
 	struct ieee80211_sub_if_data *sdata =
 		(struct ieee80211_sub_if_data *) data;
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_local *local = sdata->local;
-
-	if (local->quiescing) {
-		set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
-		return;
-	}
 
-	ieee80211_queue_work(&local->hw, &sdata->work);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 }
 
 static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
-					  u8 *bssid, u8 reason)
+					  u8 *bssid, u8 reason, bool tx)
 {
-	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
 
 	ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason,
-			       false, frame_buf);
+			       tx, frame_buf);
 	mutex_unlock(&ifmgd->mtx);
 
 	/*
@@ -2820,10 +3290,6 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
 	 */
 	cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN);
 
-	mutex_lock(&local->mtx);
-	ieee80211_recalc_idle(local);
-	mutex_unlock(&local->mtx);
-
 	mutex_lock(&ifmgd->mtx);
 }
 
@@ -2832,12 +3298,17 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data;
+	u32 tx_flags = 0;
 
 	lockdep_assert_held(&ifmgd->mtx);
 
 	if (WARN_ON_ONCE(!auth_data))
 		return -EINVAL;
 
+	if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+		tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
+			   IEEE80211_TX_INTFL_MLME_CONN_TX;
+
 	auth_data->tries++;
 
 	if (auth_data->tries > IEEE80211_AUTH_MAX_TRIES) {
@@ -2874,7 +3345,8 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
 		ieee80211_send_auth(sdata, trans, auth_data->algorithm, status,
 				    auth_data->data, auth_data->data_len,
 				    auth_data->bss->bssid,
-				    auth_data->bss->bssid, NULL, 0, 0);
+				    auth_data->bss->bssid, NULL, 0, 0,
+				    tx_flags);
 	} else {
 		const u8 *ssidie;
 
@@ -2893,13 +3365,18 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
 		 * will not answer to direct packet in unassociated state.
 		 */
 		ieee80211_send_probe_req(sdata, NULL, ssidie + 2, ssidie[1],
-					 NULL, 0, (u32) -1, true, false,
+					 NULL, 0, (u32) -1, true, tx_flags,
 					 auth_data->bss->channel, false);
 		rcu_read_unlock();
 	}
 
-	auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
-	run_again(ifmgd, auth_data->timeout);
+	if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) {
+		auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
+		ifmgd->auth_data->timeout_started = true;
+		run_again(ifmgd, auth_data->timeout);
+	} else {
+		auth_data->timeout_started = false;
+	}
 
 	return 0;
 }
@@ -2930,12 +3407,29 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
 		   IEEE80211_ASSOC_MAX_TRIES);
 	ieee80211_send_assoc(sdata);
 
-	assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
-	run_again(&sdata->u.mgd, assoc_data->timeout);
+	if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) {
+		assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
+		assoc_data->timeout_started = true;
+		run_again(&sdata->u.mgd, assoc_data->timeout);
+	} else {
+		assoc_data->timeout_started = false;
+	}
 
 	return 0;
 }
 
+void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata,
+				  __le16 fc, bool acked)
+{
+	struct ieee80211_local *local = sdata->local;
+
+	sdata->u.mgd.status_fc = fc;
+	sdata->u.mgd.status_acked = acked;
+	sdata->u.mgd.status_received = true;
+
+	ieee80211_queue_work(&local->hw, &sdata->work);
+}
+
 void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
@@ -2943,7 +3437,36 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 
 	mutex_lock(&ifmgd->mtx);
 
-	if (ifmgd->auth_data &&
+	if (ifmgd->status_received) {
+		__le16 fc = ifmgd->status_fc;
+		bool status_acked = ifmgd->status_acked;
+
+		ifmgd->status_received = false;
+		if (ifmgd->auth_data &&
+		    (ieee80211_is_probe_req(fc) || ieee80211_is_auth(fc))) {
+			if (status_acked) {
+				ifmgd->auth_data->timeout =
+					jiffies + IEEE80211_AUTH_TIMEOUT_SHORT;
+				run_again(ifmgd, ifmgd->auth_data->timeout);
+			} else {
+				ifmgd->auth_data->timeout = jiffies - 1;
+			}
+			ifmgd->auth_data->timeout_started = true;
+		} else if (ifmgd->assoc_data &&
+			   (ieee80211_is_assoc_req(fc) ||
+			    ieee80211_is_reassoc_req(fc))) {
+			if (status_acked) {
+				ifmgd->assoc_data->timeout =
+					jiffies + IEEE80211_ASSOC_TIMEOUT_SHORT;
+				run_again(ifmgd, ifmgd->assoc_data->timeout);
+			} else {
+				ifmgd->assoc_data->timeout = jiffies - 1;
+			}
+			ifmgd->assoc_data->timeout_started = true;
+		}
+	}
+
+	if (ifmgd->auth_data && ifmgd->auth_data->timeout_started &&
 	    time_after(jiffies, ifmgd->auth_data->timeout)) {
 		if (ifmgd->auth_data->done) {
 			/*
@@ -2962,12 +3485,13 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 			cfg80211_send_auth_timeout(sdata->dev, bssid);
 			mutex_lock(&ifmgd->mtx);
 		}
-	} else if (ifmgd->auth_data)
+	} else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started)
 		run_again(ifmgd, ifmgd->auth_data->timeout);
 
-	if (ifmgd->assoc_data &&
+	if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started &&
 	    time_after(jiffies, ifmgd->assoc_data->timeout)) {
-		if (!ifmgd->assoc_data->have_beacon ||
+		if ((ifmgd->assoc_data->need_beacon &&
+		     !ifmgd->assoc_data->have_beacon) ||
 		    ieee80211_do_assoc(sdata)) {
 			u8 bssid[ETH_ALEN];
 
@@ -2979,7 +3503,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 			cfg80211_send_assoc_timeout(sdata->dev, bssid);
 			mutex_lock(&ifmgd->mtx);
 		}
-	} else if (ifmgd->assoc_data)
+	} else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started)
 		run_again(ifmgd, ifmgd->assoc_data->timeout);
 
 	if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
@@ -3010,7 +3534,8 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 					 "No ack for nullfunc frame to AP %pM, disconnecting.\n",
 					 bssid);
 				ieee80211_sta_connection_lost(sdata, bssid,
-					WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY);
+					WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
+					false);
 			}
 		} else if (time_is_after_jiffies(ifmgd->probe_timeout))
 			run_again(ifmgd, ifmgd->probe_timeout);
@@ -3019,7 +3544,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 				 "Failed to send nullfunc to AP %pM after %dms, disconnecting\n",
 				 bssid, probe_wait_ms);
 			ieee80211_sta_connection_lost(sdata, bssid,
-				WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY);
+				WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false);
 		} else if (ifmgd->probe_send_count < max_tries) {
 			mlme_dbg(sdata,
 				 "No probe response from AP %pM after %dms, try %d/%i\n",
@@ -3038,15 +3563,11 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 				    bssid, probe_wait_ms);
 
 			ieee80211_sta_connection_lost(sdata, bssid,
-				WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY);
+				WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false);
 		}
 	}
 
 	mutex_unlock(&ifmgd->mtx);
-
-	mutex_lock(&local->mtx);
-	ieee80211_recalc_idle(local);
-	mutex_unlock(&local->mtx);
 }
 
 static void ieee80211_sta_bcn_mon_timer(unsigned long data)
@@ -3058,6 +3579,7 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data)
 	if (local->quiescing)
 		return;
 
+	sdata->u.mgd.connection_loss = false;
 	ieee80211_queue_work(&sdata->local->hw,
 			     &sdata->u.mgd.beacon_connection_loss_work);
 }
@@ -3101,68 +3623,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
 	}
 }
 
-#ifdef CONFIG_PM
-void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-
-	/*
-	 * we need to use atomic bitops for the running bits
-	 * only because both timers might fire at the same
-	 * time -- the code here is properly synchronised.
-	 */
-
-	cancel_work_sync(&ifmgd->request_smps_work);
-
-	cancel_work_sync(&ifmgd->monitor_work);
-	cancel_work_sync(&ifmgd->beacon_connection_loss_work);
-	cancel_work_sync(&ifmgd->csa_connection_drop_work);
-	if (del_timer_sync(&ifmgd->timer))
-		set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
-
-	cancel_work_sync(&ifmgd->chswitch_work);
-	if (del_timer_sync(&ifmgd->chswitch_timer))
-		set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running);
-
-	/* these will just be re-established on connection */
-	del_timer_sync(&ifmgd->conn_mon_timer);
-	del_timer_sync(&ifmgd->bcn_mon_timer);
-}
-
-void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-
-	if (!ifmgd->associated)
-		return;
-
-	if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) {
-		sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME;
-		mutex_lock(&ifmgd->mtx);
-		if (ifmgd->associated) {
-			mlme_dbg(sdata,
-				 "driver requested disconnect after resume\n");
-			ieee80211_sta_connection_lost(sdata,
-				ifmgd->associated->bssid,
-				WLAN_REASON_UNSPECIFIED);
-			mutex_unlock(&ifmgd->mtx);
-			return;
-		}
-		mutex_unlock(&ifmgd->mtx);
-	}
-
-	if (test_and_clear_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running))
-		add_timer(&ifmgd->timer);
-	if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running))
-		add_timer(&ifmgd->chswitch_timer);
-	ieee80211_sta_reset_beacon_monitor(sdata);
-
-	mutex_lock(&sdata->local->mtx);
-	ieee80211_restart_sta_timer(sdata);
-	mutex_unlock(&sdata->local->mtx);
-}
-#endif
-
 /* interface setup */
 void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 {
@@ -3187,8 +3647,9 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 
 	ifmgd->flags = 0;
 	ifmgd->powersave = sdata->wdev.ps;
-	ifmgd->uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
-	ifmgd->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
+	ifmgd->uapsd_queues = sdata->local->hw.uapsd_queues;
+	ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len;
+	ifmgd->p2p_noa_index = -1;
 
 	mutex_init(&ifmgd->mtx);
 
@@ -3205,8 +3666,10 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
 
 	/* Restart STA timers */
 	rcu_read_lock();
-	list_for_each_entry_rcu(sdata, &local->interfaces, list)
-		ieee80211_restart_sta_timer(sdata);
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		if (ieee80211_sdata_running(sdata))
+			ieee80211_restart_sta_timer(sdata);
+	}
 	rcu_read_unlock();
 }
 
@@ -3225,201 +3688,6 @@ int ieee80211_max_network_latency(struct notifier_block *nb,
 	return 0;
 }
 
-static u32 chandef_downgrade(struct cfg80211_chan_def *c)
-{
-	u32 ret;
-	int tmp;
-
-	switch (c->width) {
-	case NL80211_CHAN_WIDTH_20:
-		c->width = NL80211_CHAN_WIDTH_20_NOHT;
-		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
-		break;
-	case NL80211_CHAN_WIDTH_40:
-		c->width = NL80211_CHAN_WIDTH_20;
-		c->center_freq1 = c->chan->center_freq;
-		ret = IEEE80211_STA_DISABLE_40MHZ |
-		      IEEE80211_STA_DISABLE_VHT;
-		break;
-	case NL80211_CHAN_WIDTH_80:
-		tmp = (30 + c->chan->center_freq - c->center_freq1)/20;
-		/* n_P40 */
-		tmp /= 2;
-		/* freq_P40 */
-		c->center_freq1 = c->center_freq1 - 20 + 40 * tmp;
-		c->width = NL80211_CHAN_WIDTH_40;
-		ret = IEEE80211_STA_DISABLE_VHT;
-		break;
-	case NL80211_CHAN_WIDTH_80P80:
-		c->center_freq2 = 0;
-		c->width = NL80211_CHAN_WIDTH_80;
-		ret = IEEE80211_STA_DISABLE_80P80MHZ |
-		      IEEE80211_STA_DISABLE_160MHZ;
-		break;
-	case NL80211_CHAN_WIDTH_160:
-		/* n_P20 */
-		tmp = (70 + c->chan->center_freq - c->center_freq1)/20;
-		/* n_P80 */
-		tmp /= 4;
-		c->center_freq1 = c->center_freq1 - 40 + 80 * tmp;
-		c->width = NL80211_CHAN_WIDTH_80;
-		ret = IEEE80211_STA_DISABLE_80P80MHZ |
-		      IEEE80211_STA_DISABLE_160MHZ;
-		break;
-	default:
-	case NL80211_CHAN_WIDTH_20_NOHT:
-		WARN_ON_ONCE(1);
-		c->width = NL80211_CHAN_WIDTH_20_NOHT;
-		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
-		break;
-	}
-
-	WARN_ON_ONCE(!cfg80211_chandef_valid(c));
-
-	return ret;
-}
-
-static u32
-ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
-			     struct ieee80211_supported_band *sband,
-			     struct ieee80211_channel *channel,
-			     const struct ieee80211_ht_operation *ht_oper,
-			     const struct ieee80211_vht_operation *vht_oper,
-			     struct cfg80211_chan_def *chandef)
-{
-	struct cfg80211_chan_def vht_chandef;
-	u32 ht_cfreq, ret;
-
-	chandef->chan = channel;
-	chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
-	chandef->center_freq1 = channel->center_freq;
-	chandef->center_freq2 = 0;
-
-	if (!ht_oper || !sband->ht_cap.ht_supported) {
-		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
-		goto out;
-	}
-
-	chandef->width = NL80211_CHAN_WIDTH_20;
-
-	ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
-						  channel->band);
-	/* check that channel matches the right operating channel */
-	if (channel->center_freq != ht_cfreq) {
-		/*
-		 * It's possible that some APs are confused here;
-		 * Netgear WNDR3700 sometimes reports 4 higher than
-		 * the actual channel in association responses, but
-		 * since we look at probe response/beacon data here
-		 * it should be OK.
-		 */
-		sdata_info(sdata,
-			   "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n",
-			   channel->center_freq, ht_cfreq,
-			   ht_oper->primary_chan, channel->band);
-		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
-		goto out;
-	}
-
-	/* check 40 MHz support, if we have it */
-	if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) {
-		switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
-		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
-			chandef->width = NL80211_CHAN_WIDTH_40;
-			chandef->center_freq1 += 10;
-			break;
-		case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
-			chandef->width = NL80211_CHAN_WIDTH_40;
-			chandef->center_freq1 -= 10;
-			break;
-		}
-	} else {
-		/* 40 MHz (and 80 MHz) must be supported for VHT */
-		ret = IEEE80211_STA_DISABLE_VHT;
-		goto out;
-	}
-
-	if (!vht_oper || !sband->vht_cap.vht_supported) {
-		ret = IEEE80211_STA_DISABLE_VHT;
-		goto out;
-	}
-
-	vht_chandef.chan = channel;
-	vht_chandef.center_freq1 =
-		ieee80211_channel_to_frequency(vht_oper->center_freq_seg1_idx,
-					       channel->band);
-	vht_chandef.center_freq2 = 0;
-
-	if (vht_oper->center_freq_seg2_idx)
-		vht_chandef.center_freq2 =
-			ieee80211_channel_to_frequency(
-				vht_oper->center_freq_seg2_idx,
-				channel->band);
-
-	switch (vht_oper->chan_width) {
-	case IEEE80211_VHT_CHANWIDTH_USE_HT:
-		vht_chandef.width = chandef->width;
-		break;
-	case IEEE80211_VHT_CHANWIDTH_80MHZ:
-		vht_chandef.width = NL80211_CHAN_WIDTH_80;
-		break;
-	case IEEE80211_VHT_CHANWIDTH_160MHZ:
-		vht_chandef.width = NL80211_CHAN_WIDTH_160;
-		break;
-	case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
-		vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
-		break;
-	default:
-		sdata_info(sdata,
-			   "AP VHT operation IE has invalid channel width (%d), disable VHT\n",
-			   vht_oper->chan_width);
-		ret = IEEE80211_STA_DISABLE_VHT;
-		goto out;
-	}
-
-	if (!cfg80211_chandef_valid(&vht_chandef)) {
-		sdata_info(sdata,
-			   "AP VHT information is invalid, disable VHT\n");
-		ret = IEEE80211_STA_DISABLE_VHT;
-		goto out;
-	}
-
-	if (cfg80211_chandef_identical(chandef, &vht_chandef)) {
-		ret = 0;
-		goto out;
-	}
-
-	if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) {
-		sdata_info(sdata,
-			   "AP VHT information doesn't match HT, disable VHT\n");
-		ret = IEEE80211_STA_DISABLE_VHT;
-		goto out;
-	}
-
-	*chandef = vht_chandef;
-
-	ret = 0;
-
-out:
-	while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
-					IEEE80211_CHAN_DISABLED)) {
-		if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) {
-			ret = IEEE80211_STA_DISABLE_HT |
-			      IEEE80211_STA_DISABLE_VHT;
-			goto out;
-		}
-
-		ret |= chandef_downgrade(chandef);
-	}
-
-	if (chandef->width != vht_chandef.width)
-		sdata_info(sdata,
-			   "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n");
-
-	WARN_ON_ONCE(!cfg80211_chandef_valid(chandef));
-	return ret;
-}
-
 static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,
 				     struct cfg80211_bss *cbss)
 {
@@ -3485,16 +3753,22 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 
 	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
 	    sband->ht_cap.ht_supported) {
-		const u8 *ht_oper_ie;
+		const u8 *ht_oper_ie, *ht_cap;
 
 		ht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_OPERATION);
 		if (ht_oper_ie && ht_oper_ie[1] >= sizeof(*ht_oper))
 			ht_oper = (void *)(ht_oper_ie + 2);
+
+		ht_cap = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_CAPABILITY);
+		if (!ht_cap || ht_cap[1] < sizeof(struct ieee80211_ht_cap)) {
+			ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
+			ht_oper = NULL;
+		}
 	}
 
 	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
 	    sband->vht_cap.vht_supported) {
-		const u8 *vht_oper_ie;
+		const u8 *vht_oper_ie, *vht_cap;
 
 		vht_oper_ie = ieee80211_bss_get_ie(cbss,
 						   WLAN_EID_VHT_OPERATION);
@@ -3504,15 +3778,21 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 			vht_oper = NULL;
 			sdata_info(sdata,
 				   "AP advertised VHT without HT, disabling both\n");
-			sdata->flags |= IEEE80211_STA_DISABLE_HT;
-			sdata->flags |= IEEE80211_STA_DISABLE_VHT;
+			ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
+			ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+		}
+
+		vht_cap = ieee80211_bss_get_ie(cbss, WLAN_EID_VHT_CAPABILITY);
+		if (!vht_cap || vht_cap[1] < sizeof(struct ieee80211_vht_cap)) {
+			ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+			vht_oper = NULL;
 		}
 	}
 
 	ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
 						     cbss->channel,
 						     ht_oper, vht_oper,
-						     &chandef);
+						     &chandef, true);
 
 	sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
 				      local->rx_chains);
@@ -3562,15 +3842,12 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 			return -ENOMEM;
 	}
 
-	mutex_lock(&local->mtx);
-	ieee80211_recalc_idle(sdata->local);
-	mutex_unlock(&local->mtx);
-
 	if (new_sta) {
 		u32 rates = 0, basic_rates = 0;
 		bool have_higher_than_11mbit;
 		int min_rate = INT_MAX, min_rate_index = -1;
 		struct ieee80211_supported_band *sband;
+		const struct cfg80211_bss_ies *ies;
 
 		sband = local->hw.wiphy->bands[cbss->channel->band];
 
@@ -3614,8 +3891,34 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 
 		/* set timing information */
 		sdata->vif.bss_conf.beacon_int = cbss->beacon_interval;
-		sdata->vif.bss_conf.sync_tsf = cbss->tsf;
-		sdata->vif.bss_conf.sync_device_ts = bss->device_ts;
+		rcu_read_lock();
+		ies = rcu_dereference(cbss->beacon_ies);
+		if (ies) {
+			const u8 *tim_ie;
+
+			sdata->vif.bss_conf.sync_tsf = ies->tsf;
+			sdata->vif.bss_conf.sync_device_ts =
+				bss->device_ts_beacon;
+			tim_ie = cfg80211_find_ie(WLAN_EID_TIM,
+						  ies->data, ies->len);
+			if (tim_ie && tim_ie[1] >= 2)
+				sdata->vif.bss_conf.sync_dtim_count = tim_ie[2];
+			else
+				sdata->vif.bss_conf.sync_dtim_count = 0;
+		} else if (!(local->hw.flags &
+					IEEE80211_HW_TIMING_BEACON_ONLY)) {
+			ies = rcu_dereference(cbss->proberesp_ies);
+			/* must be non-NULL since beacon IEs were NULL */
+			sdata->vif.bss_conf.sync_tsf = ies->tsf;
+			sdata->vif.bss_conf.sync_device_ts =
+				bss->device_ts_presp;
+			sdata->vif.bss_conf.sync_dtim_count = 0;
+		} else {
+			sdata->vif.bss_conf.sync_tsf = 0;
+			sdata->vif.bss_conf.sync_device_ts = 0;
+			sdata->vif.bss_conf.sync_dtim_count = 0;
+		}
+		rcu_read_unlock();
 
 		/* tell driver about BSSID, basic rates and timing */
 		ieee80211_bss_info_change_notify(sdata,
@@ -3719,8 +4022,16 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	/* prep auth_data so we don't go into idle on disassoc */
 	ifmgd->auth_data = auth_data;
 
-	if (ifmgd->associated)
-		ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
+	if (ifmgd->associated) {
+		u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
+
+		ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
+				       WLAN_REASON_UNSPECIFIED,
+				       false, frame_buf);
+
+		__cfg80211_send_deauth(sdata->dev, frame_buf,
+				       sizeof(frame_buf));
+	}
 
 	sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid);
 
@@ -3735,7 +4046,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	}
 
 	/* hold our own reference */
-	cfg80211_ref_bss(auth_data->bss);
+	cfg80211_ref_bss(local->hw.wiphy, auth_data->bss);
 	err = 0;
 	goto out_unlock;
 
@@ -3758,8 +4069,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_bss *bss = (void *)req->bss->priv;
 	struct ieee80211_mgd_assoc_data *assoc_data;
+	const struct cfg80211_bss_ies *beacon_ies;
 	struct ieee80211_supported_band *sband;
-	const u8 *ssidie, *ht_ie;
+	const u8 *ssidie, *ht_ie, *vht_ie;
 	int i, err;
 
 	assoc_data = kzalloc(sizeof(*assoc_data) + req->ie_len, GFP_KERNEL);
@@ -3779,8 +4091,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 
 	mutex_lock(&ifmgd->mtx);
 
-	if (ifmgd->associated)
-		ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
+	if (ifmgd->associated) {
+		u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
+
+		ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
+				       WLAN_REASON_UNSPECIFIED,
+				       false, frame_buf);
+
+		__cfg80211_send_deauth(sdata->dev, frame_buf,
+				       sizeof(frame_buf));
+	}
 
 	if (ifmgd->auth_data && !ifmgd->auth_data->done) {
 		err = -EBUSY;
@@ -3827,6 +4147,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
 	}
 
+	if (req->flags & ASSOC_REQ_DISABLE_VHT)
+		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+
 	/* Also disable HT if we don't support it or the AP doesn't use WMM */
 	sband = local->hw.wiphy->bands[req->bss->channel->band];
 	if (!sband->ht_cap.ht_supported ||
@@ -3850,6 +4173,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask,
 	       sizeof(ifmgd->ht_capa_mask));
 
+	memcpy(&ifmgd->vht_capa, &req->vht_capa, sizeof(ifmgd->vht_capa));
+	memcpy(&ifmgd->vht_capa_mask, &req->vht_capa_mask,
+	       sizeof(ifmgd->vht_capa_mask));
+
 	if (req->ie && req->ie_len) {
 		memcpy(assoc_data->ie, req->ie, req->ie_len);
 		assoc_data->ie_len = req->ie_len;
@@ -3878,10 +4205,17 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 			((struct ieee80211_ht_operation *)(ht_ie + 2))->ht_param;
 	else
 		ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
+	vht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_VHT_CAPABILITY);
+	if (vht_ie && vht_ie[1] >= sizeof(struct ieee80211_vht_cap))
+		memcpy(&assoc_data->ap_vht_cap, vht_ie + 2,
+		       sizeof(struct ieee80211_vht_cap));
+	else
+		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
 	rcu_read_unlock();
 
 	if (bss->wmm_used && bss->uapsd_supported &&
-	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) {
+	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) &&
+	    sdata->wmm_acm != 0xff) {
 		assoc_data->uapsd = true;
 		ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
 	} else {
@@ -3917,40 +4251,48 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	if (err)
 		goto err_clear;
 
-	if (sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) {
-		const struct cfg80211_bss_ies *beacon_ies;
+	rcu_read_lock();
+	beacon_ies = rcu_dereference(req->bss->beacon_ies);
 
-		rcu_read_lock();
-		beacon_ies = rcu_dereference(req->bss->beacon_ies);
-		if (!beacon_ies) {
-			/*
-			 * Wait up to one beacon interval ...
-			 * should this be more if we miss one?
-			 */
-			sdata_info(sdata, "waiting for beacon from %pM\n",
-				   ifmgd->bssid);
-			assoc_data->timeout =
-				TU_TO_EXP_TIME(req->bss->beacon_interval);
-		} else {
-			const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM,
-							    beacon_ies->data,
-							    beacon_ies->len);
-			if (tim_ie && tim_ie[1] >=
-					sizeof(struct ieee80211_tim_ie)) {
-				const struct ieee80211_tim_ie *tim;
-				tim = (void *)(tim_ie + 2);
-				ifmgd->dtim_period = tim->dtim_period;
-			}
-			assoc_data->have_beacon = true;
-			assoc_data->sent_assoc = false;
-			assoc_data->timeout = jiffies;
+	if (sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC &&
+	    !beacon_ies) {
+		/*
+		 * Wait up to one beacon interval ...
+		 * should this be more if we miss one?
+		 */
+		sdata_info(sdata, "waiting for beacon from %pM\n",
+			   ifmgd->bssid);
+		assoc_data->timeout = TU_TO_EXP_TIME(req->bss->beacon_interval);
+		assoc_data->timeout_started = true;
+		assoc_data->need_beacon = true;
+	} else if (beacon_ies) {
+		const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM,
+						    beacon_ies->data,
+						    beacon_ies->len);
+		u8 dtim_count = 0;
+
+		if (tim_ie && tim_ie[1] >= sizeof(struct ieee80211_tim_ie)) {
+			const struct ieee80211_tim_ie *tim;
+			tim = (void *)(tim_ie + 2);
+			ifmgd->dtim_period = tim->dtim_period;
+			dtim_count = tim->dtim_count;
 		}
-		rcu_read_unlock();
-	} else {
 		assoc_data->have_beacon = true;
-		assoc_data->sent_assoc = false;
 		assoc_data->timeout = jiffies;
+		assoc_data->timeout_started = true;
+
+		if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) {
+			sdata->vif.bss_conf.sync_tsf = beacon_ies->tsf;
+			sdata->vif.bss_conf.sync_device_ts =
+				bss->device_ts_beacon;
+			sdata->vif.bss_conf.sync_dtim_count = dtim_count;
+		}
+	} else {
+		assoc_data->timeout = jiffies;
+		assoc_data->timeout_started = true;
 	}
+	rcu_read_unlock();
+
 	run_again(ifmgd, assoc_data->timeout);
 
 	if (bss->corrupt_data) {
@@ -4017,10 +4359,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 	mutex_unlock(&ifmgd->mtx);
 
  out:
-	mutex_lock(&sdata->local->mtx);
-	ieee80211_recalc_idle(sdata->local);
-	mutex_unlock(&sdata->local->mtx);
-
 	if (sent_frame)
 		__cfg80211_send_deauth(sdata->dev, frame_buf,
 				       IEEE80211_DEAUTH_FRAME_LEN);
@@ -4061,10 +4399,6 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 	__cfg80211_send_disassoc(sdata->dev, frame_buf,
 				 IEEE80211_DEAUTH_FRAME_LEN);
 
-	mutex_lock(&sdata->local->mtx);
-	ieee80211_recalc_idle(sdata->local);
-	mutex_unlock(&sdata->local->mtx);
-
 	return 0;
 }
 
@@ -4072,6 +4406,17 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
+	/*
+	 * Make sure some work items will not run after this,
+	 * they will not do anything but might not have been
+	 * cancelled when disconnecting.
+	 */
+	cancel_work_sync(&ifmgd->monitor_work);
+	cancel_work_sync(&ifmgd->beacon_connection_loss_work);
+	cancel_work_sync(&ifmgd->request_smps_work);
+	cancel_work_sync(&ifmgd->csa_connection_drop_work);
+	cancel_work_sync(&ifmgd->chswitch_work);
+
 	mutex_lock(&ifmgd->mtx);
 	if (ifmgd->assoc_data)
 		ieee80211_destroy_assoc_data(sdata, false);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index a3ad4c3c80a3..acd1f71adc03 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -113,6 +113,15 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
 	 * notify the AP about us leaving the channel and stop all
 	 * STA interfaces.
 	 */
+
+	/*
+	 * Stop queues and transmit all frames queued by the driver
+	 * before sending nullfunc to enable powersave at the AP.
+	 */
+	ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);
+	ieee80211_flush_queues(local, NULL);
+
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		if (!ieee80211_sdata_running(sdata))
@@ -125,18 +134,17 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
 			set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
 
 		/* Check to see if we should disable beaconing. */
-		if (sdata->vif.type == NL80211_IFTYPE_AP ||
-		    sdata->vif.type == NL80211_IFTYPE_ADHOC ||
-		    sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
+		if (sdata->vif.bss_conf.enable_beacon) {
+			set_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED,
+				&sdata->state);
+			sdata->vif.bss_conf.enable_beacon = false;
 			ieee80211_bss_info_change_notify(
 				sdata, BSS_CHANGED_BEACON_ENABLED);
-
-		if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
-			netif_tx_stop_all_queues(sdata->dev);
-			if (sdata->vif.type == NL80211_IFTYPE_STATION &&
-			    sdata->u.mgd.associated)
-				ieee80211_offchannel_ps_enable(sdata);
 		}
+
+		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+		    sdata->u.mgd.associated)
+			ieee80211_offchannel_ps_enable(sdata);
 	}
 	mutex_unlock(&local->iflist_mtx);
 }
@@ -164,27 +172,17 @@ void ieee80211_offchannel_return(struct ieee80211_local *local)
 		    sdata->u.mgd.associated)
 			ieee80211_offchannel_ps_disable(sdata);
 
-		if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
-			/*
-			 * This may wake up queues even though the driver
-			 * currently has them stopped. This is not very
-			 * likely, since the driver won't have gotten any
-			 * (or hardly any) new packets while we weren't
-			 * on the right channel, and even if it happens
-			 * it will at most lead to queueing up one more
-			 * packet per queue in mac80211 rather than on
-			 * the interface qdisc.
-			 */
-			netif_tx_wake_all_queues(sdata->dev);
-		}
-
-		if (sdata->vif.type == NL80211_IFTYPE_AP ||
-		    sdata->vif.type == NL80211_IFTYPE_ADHOC ||
-		    sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
+		if (test_and_clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED,
+				       &sdata->state)) {
+			sdata->vif.bss_conf.enable_beacon = true;
 			ieee80211_bss_info_change_notify(
 				sdata, BSS_CHANGED_BEACON_ENABLED);
+		}
 	}
 	mutex_unlock(&local->iflist_mtx);
+
+	ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);
 }
 
 void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc)
@@ -279,7 +277,7 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
 			duration = 10;
 
 		ret = drv_remain_on_channel(local, roc->sdata, roc->chan,
-					    duration);
+					    duration, roc->type);
 
 		roc->started = true;
 
@@ -299,10 +297,13 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
 	}
 }
 
-void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc)
+void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free)
 {
 	struct ieee80211_roc_work *dep, *tmp;
 
+	if (WARN_ON(roc->to_be_freed))
+		return;
+
 	/* was never transmitted */
 	if (roc->frame) {
 		cfg80211_mgmt_tx_status(&roc->sdata->wdev,
@@ -318,9 +319,12 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc)
 						   GFP_KERNEL);
 
 	list_for_each_entry_safe(dep, tmp, &roc->dependents, list)
-		ieee80211_roc_notify_destroy(dep);
+		ieee80211_roc_notify_destroy(dep, true);
 
-	kfree(roc);
+	if (free)
+		kfree(roc);
+	else
+		roc->to_be_freed = true;
 }
 
 void ieee80211_sw_roc_work(struct work_struct *work)
@@ -333,6 +337,9 @@ void ieee80211_sw_roc_work(struct work_struct *work)
 
 	mutex_lock(&local->mtx);
 
+	if (roc->to_be_freed)
+		goto out_unlock;
+
 	if (roc->abort)
 		goto finish;
 
@@ -372,10 +379,10 @@ void ieee80211_sw_roc_work(struct work_struct *work)
  finish:
 		list_del(&roc->list);
 		started = roc->started;
-		ieee80211_roc_notify_destroy(roc);
+		ieee80211_roc_notify_destroy(roc, !roc->abort);
 
 		if (started) {
-			drv_flush(local, false);
+			ieee80211_flush_queues(local, NULL);
 
 			local->tmp_channel = NULL;
 			ieee80211_hw_config(local, 0);
@@ -412,7 +419,7 @@ static void ieee80211_hw_roc_done(struct work_struct *work)
 
 	list_del(&roc->list);
 
-	ieee80211_roc_notify_destroy(roc);
+	ieee80211_roc_notify_destroy(roc, true);
 
 	/* if there's another roc, start it now */
 	ieee80211_start_next_roc(local);
@@ -438,15 +445,15 @@ void ieee80211_roc_setup(struct ieee80211_local *local)
 	INIT_LIST_HEAD(&local->roc_list);
 }
 
-void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata)
+void ieee80211_roc_purge(struct ieee80211_local *local,
+			 struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_roc_work *roc, *tmp;
 	LIST_HEAD(tmp_list);
 
 	mutex_lock(&local->mtx);
 	list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
-		if (roc->sdata != sdata)
+		if (sdata && roc->sdata != sdata)
 			continue;
 
 		if (roc->started && local->ops->remain_on_channel) {
@@ -462,12 +469,14 @@ void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata)
 	list_for_each_entry_safe(roc, tmp, &tmp_list, list) {
 		if (local->ops->remain_on_channel) {
 			list_del(&roc->list);
-			ieee80211_roc_notify_destroy(roc);
+			ieee80211_roc_notify_destroy(roc, true);
 		} else {
 			ieee80211_queue_delayed_work(&local->hw, &roc->work, 0);
 
 			/* work will clean up etc */
 			flush_delayed_work(&roc->work);
+			WARN_ON(!roc->to_be_freed);
+			kfree(roc);
 		}
 	}
 
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 79a48f37d409..7fc5d0d8149a 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -6,56 +6,42 @@
 #include "driver-ops.h"
 #include "led.h"
 
-/* return value indicates whether the driver should be further notified */
-static bool ieee80211_quiesce(struct ieee80211_sub_if_data *sdata)
-{
-	switch (sdata->vif.type) {
-	case NL80211_IFTYPE_STATION:
-		ieee80211_sta_quiesce(sdata);
-		return true;
-	case NL80211_IFTYPE_ADHOC:
-		ieee80211_ibss_quiesce(sdata);
-		return true;
-	case NL80211_IFTYPE_MESH_POINT:
-		ieee80211_mesh_quiesce(sdata);
-		return true;
-	case NL80211_IFTYPE_AP_VLAN:
-	case NL80211_IFTYPE_MONITOR:
-		/* don't tell driver about this */
-		return false;
-	default:
-		return true;
-	}
-}
-
 int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
-	struct ieee80211_chanctx *ctx;
 
 	if (!local->open_count)
 		goto suspend;
 
 	ieee80211_scan_cancel(local);
 
+	ieee80211_dfs_cac_cancel(local);
+
+	ieee80211_roc_purge(local, NULL);
+
+	ieee80211_del_virtual_monitor(local);
+
 	if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
 		mutex_lock(&local->sta_mtx);
 		list_for_each_entry(sta, &local->sta_list, list) {
 			set_sta_flag(sta, WLAN_STA_BLOCK_BA);
-			ieee80211_sta_tear_down_BA_sessions(sta, true);
+			ieee80211_sta_tear_down_BA_sessions(
+					sta, AGG_STOP_LOCAL_REQUEST);
 		}
 		mutex_unlock(&local->sta_mtx);
 	}
 
 	ieee80211_stop_queues_by_reason(hw,
-			IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_SUSPEND);
 
-	/* flush out all packets */
+	/* flush out all packets and station cleanup call_rcu()s */
 	synchronize_net();
+	rcu_barrier();
 
-	drv_flush(local, false);
+	ieee80211_flush_queues(local, NULL);
 
 	local->quiescing = true;
 	/* make quiescing visible to timers everywhere */
@@ -88,24 +74,17 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 				mutex_unlock(&local->sta_mtx);
 			}
 			ieee80211_wake_queues_by_reason(hw,
+					IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_SUSPEND);
 			return err;
 		} else if (err > 0) {
 			WARN_ON(err != 1);
-			local->wowlan = false;
+			return err;
 		} else {
-			list_for_each_entry(sdata, &local->interfaces, list) {
-				cancel_work_sync(&sdata->work);
-				ieee80211_quiesce(sdata);
-			}
 			goto suspend;
 		}
 	}
 
-	/* disable keys */
-	list_for_each_entry(sdata, &local->interfaces, list)
-		ieee80211_disable_keys(sdata);
-
 	/* tear down aggregation sessions and remove STAs */
 	mutex_lock(&local->sta_mtx);
 	list_for_each_entry(sta, &local->sta_list, list) {
@@ -117,74 +96,21 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 				WARN_ON(drv_sta_state(local, sta->sdata, sta,
 						      state, state - 1));
 		}
-
-		mesh_plink_quiesce(sta);
 	}
 	mutex_unlock(&local->sta_mtx);
 
 	/* remove all interfaces */
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		cancel_work_sync(&sdata->work);
-
-		if (!ieee80211_quiesce(sdata))
-			continue;
-
 		if (!ieee80211_sdata_running(sdata))
 			continue;
-
-		/* disable beaconing */
-		ieee80211_bss_info_change_notify(sdata,
-			BSS_CHANGED_BEACON_ENABLED);
-
-		if (sdata->vif.type == NL80211_IFTYPE_AP &&
-		    rcu_access_pointer(sdata->u.ap.beacon))
-			drv_stop_ap(local, sdata);
-
-		if (local->use_chanctx) {
-			struct ieee80211_chanctx_conf *conf;
-
-			mutex_lock(&local->chanctx_mtx);
-			conf = rcu_dereference_protected(
-					sdata->vif.chanctx_conf,
-					lockdep_is_held(&local->chanctx_mtx));
-			if (conf) {
-				ctx = container_of(conf,
-						   struct ieee80211_chanctx,
-						   conf);
-				drv_unassign_vif_chanctx(local, sdata, ctx);
-			}
-
-			mutex_unlock(&local->chanctx_mtx);
-		}
-		drv_remove_interface(local, sdata);
-	}
-
-	sdata = rtnl_dereference(local->monitor_sdata);
-	if (sdata) {
-		if (local->use_chanctx) {
-			struct ieee80211_chanctx_conf *conf;
-
-			mutex_lock(&local->chanctx_mtx);
-			conf = rcu_dereference_protected(
-					sdata->vif.chanctx_conf,
-					lockdep_is_held(&local->chanctx_mtx));
-			if (conf) {
-				ctx = container_of(conf,
-						   struct ieee80211_chanctx,
-						   conf);
-				drv_unassign_vif_chanctx(local, sdata, ctx);
-			}
-
-			mutex_unlock(&local->chanctx_mtx);
-		}
-
 		drv_remove_interface(local, sdata);
 	}
 
-	mutex_lock(&local->chanctx_mtx);
-	list_for_each_entry(ctx, &local->chanctx_list, list)
-		drv_remove_chanctx(local, ctx);
-	mutex_unlock(&local->chanctx_mtx);
+	/*
+	 * We disconnected on all interfaces before suspend, all channel
+	 * contexts should be released.
+	 */
+	WARN_ON(!list_empty(&local->chanctx_list));
 
 	/* stop hardware - this must stop RX */
 	if (local->open_count)
@@ -204,3 +130,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
  * ieee80211_reconfig(), which is also needed for hardware
  * hang/firmware failure/etc. recovery.
  */
+
+void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif,
+				    struct cfg80211_wowlan_wakeup *wakeup,
+				    gfp_t gfp)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+	cfg80211_report_wowlan_wakeup(&sdata->wdev, wakeup, gfp);
+}
+EXPORT_SYMBOL(ieee80211_report_wowlan_wakeup);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index dd88381c53b7..0d51877efdb7 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -252,6 +252,25 @@ rate_lowest_non_cck_index(struct ieee80211_supported_band *sband,
 	return 0;
 }
 
+static void __rate_control_send_low(struct ieee80211_hw *hw,
+				    struct ieee80211_supported_band *sband,
+				    struct ieee80211_sta *sta,
+				    struct ieee80211_tx_info *info)
+{
+	if ((sband->band != IEEE80211_BAND_2GHZ) ||
+	    !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE))
+		info->control.rates[0].idx = rate_lowest_index(sband, sta);
+	else
+		info->control.rates[0].idx =
+			rate_lowest_non_cck_index(sband, sta);
+
+	info->control.rates[0].count =
+		(info->flags & IEEE80211_TX_CTL_NO_ACK) ?
+		1 : hw->max_rate_tries;
+
+	info->control.skip_table = 1;
+}
+
 
 bool rate_control_send_low(struct ieee80211_sta *sta,
 			   void *priv_sta,
@@ -262,16 +281,8 @@ bool rate_control_send_low(struct ieee80211_sta *sta,
 	int mcast_rate;
 
 	if (!sta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) {
-		if ((sband->band != IEEE80211_BAND_2GHZ) ||
-		    !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE))
-			info->control.rates[0].idx =
-				rate_lowest_index(txrc->sband, sta);
-		else
-			info->control.rates[0].idx =
-				rate_lowest_non_cck_index(txrc->sband, sta);
-		info->control.rates[0].count =
-			(info->flags & IEEE80211_TX_CTL_NO_ACK) ?
-			1 : txrc->hw->max_rate_tries;
+		__rate_control_send_low(txrc->hw, sband, sta, info);
+
 		if (!sta && txrc->bss) {
 			mcast_rate = txrc->bss_conf->mcast_rate[sband->band];
 			if (mcast_rate > 0) {
@@ -355,7 +366,8 @@ static bool rate_idx_match_mcs_mask(struct ieee80211_tx_rate *rate,
 
 
 static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
-				struct ieee80211_tx_rate_control *txrc,
+				struct ieee80211_supported_band *sband,
+				enum nl80211_chan_width chan_width,
 				u32 mask,
 				u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN])
 {
@@ -375,27 +387,17 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
 				  IEEE80211_TX_RC_USE_SHORT_PREAMBLE);
 		alt_rate.count = rate->count;
 		if (rate_idx_match_legacy_mask(&alt_rate,
-					       txrc->sband->n_bitrates,
-					       mask)) {
+					       sband->n_bitrates, mask)) {
 			*rate = alt_rate;
 			return;
 		}
 	} else {
-		struct sk_buff *skb = txrc->skb;
-		struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-		__le16 fc;
-
 		/* handle legacy rates */
-		if (rate_idx_match_legacy_mask(rate, txrc->sband->n_bitrates,
-					       mask))
+		if (rate_idx_match_legacy_mask(rate, sband->n_bitrates, mask))
 			return;
 
 		/* if HT BSS, and we handle a data frame, also try HT rates */
-		if (txrc->bss_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT)
-			return;
-
-		fc = hdr->frame_control;
-		if (!ieee80211_is_data(fc))
+		if (chan_width == NL80211_CHAN_WIDTH_20_NOHT)
 			return;
 
 		alt_rate.idx = 0;
@@ -408,7 +410,7 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
 
 		alt_rate.flags |= IEEE80211_TX_RC_MCS;
 
-		if (txrc->bss_conf->chandef.width == NL80211_CHAN_WIDTH_40)
+		if (chan_width == NL80211_CHAN_WIDTH_40)
 			alt_rate.flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
 
 		if (rate_idx_match_mcs_mask(&alt_rate, mcs_mask)) {
@@ -426,6 +428,228 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
 	 */
 }
 
+static void rate_fixup_ratelist(struct ieee80211_vif *vif,
+				struct ieee80211_supported_band *sband,
+				struct ieee80211_tx_info *info,
+				struct ieee80211_tx_rate *rates,
+				int max_rates)
+{
+	struct ieee80211_rate *rate;
+	bool inval = false;
+	int i;
+
+	/*
+	 * Set up the RTS/CTS rate as the fastest basic rate
+	 * that is not faster than the data rate unless there
+	 * is no basic rate slower than the data rate, in which
+	 * case we pick the slowest basic rate
+	 *
+	 * XXX: Should this check all retry rates?
+	 */
+	if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) {
+		u32 basic_rates = vif->bss_conf.basic_rates;
+		s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0;
+
+		rate = &sband->bitrates[rates[0].idx];
+
+		for (i = 0; i < sband->n_bitrates; i++) {
+			/* must be a basic rate */
+			if (!(basic_rates & BIT(i)))
+				continue;
+			/* must not be faster than the data rate */
+			if (sband->bitrates[i].bitrate > rate->bitrate)
+				continue;
+			/* maximum */
+			if (sband->bitrates[baserate].bitrate <
+			     sband->bitrates[i].bitrate)
+				baserate = i;
+		}
+
+		info->control.rts_cts_rate_idx = baserate;
+	}
+
+	for (i = 0; i < max_rates; i++) {
+		/*
+		 * make sure there's no valid rate following
+		 * an invalid one, just in case drivers don't
+		 * take the API seriously to stop at -1.
+		 */
+		if (inval) {
+			rates[i].idx = -1;
+			continue;
+		}
+		if (rates[i].idx < 0) {
+			inval = true;
+			continue;
+		}
+
+		/*
+		 * For now assume MCS is already set up correctly, this
+		 * needs to be fixed.
+		 */
+		if (rates[i].flags & IEEE80211_TX_RC_MCS) {
+			WARN_ON(rates[i].idx > 76);
+
+			if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) &&
+			    info->control.use_cts_prot)
+				rates[i].flags |=
+					IEEE80211_TX_RC_USE_CTS_PROTECT;
+			continue;
+		}
+
+		if (rates[i].flags & IEEE80211_TX_RC_VHT_MCS) {
+			WARN_ON(ieee80211_rate_get_vht_mcs(&rates[i]) > 9);
+			continue;
+		}
+
+		/* set up RTS protection if desired */
+		if (info->control.use_rts) {
+			rates[i].flags |= IEEE80211_TX_RC_USE_RTS_CTS;
+			info->control.use_cts_prot = false;
+		}
+
+		/* RC is busted */
+		if (WARN_ON_ONCE(rates[i].idx >= sband->n_bitrates)) {
+			rates[i].idx = -1;
+			continue;
+		}
+
+		rate = &sband->bitrates[rates[i].idx];
+
+		/* set up short preamble */
+		if (info->control.short_preamble &&
+		    rate->flags & IEEE80211_RATE_SHORT_PREAMBLE)
+			rates[i].flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE;
+
+		/* set up G protection */
+		if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) &&
+		    info->control.use_cts_prot &&
+		    rate->flags & IEEE80211_RATE_ERP_G)
+			rates[i].flags |= IEEE80211_TX_RC_USE_CTS_PROTECT;
+	}
+}
+
+
+static void rate_control_fill_sta_table(struct ieee80211_sta *sta,
+					struct ieee80211_tx_info *info,
+					struct ieee80211_tx_rate *rates,
+					int max_rates)
+{
+	struct ieee80211_sta_rates *ratetbl = NULL;
+	int i;
+
+	if (sta && !info->control.skip_table)
+		ratetbl = rcu_dereference(sta->rates);
+
+	/* Fill remaining rate slots with data from the sta rate table. */
+	max_rates = min_t(int, max_rates, IEEE80211_TX_RATE_TABLE_SIZE);
+	for (i = 0; i < max_rates; i++) {
+		if (i < ARRAY_SIZE(info->control.rates) &&
+		    info->control.rates[i].idx >= 0 &&
+		    info->control.rates[i].count) {
+			if (rates != info->control.rates)
+				rates[i] = info->control.rates[i];
+		} else if (ratetbl) {
+			rates[i].idx = ratetbl->rate[i].idx;
+			rates[i].flags = ratetbl->rate[i].flags;
+			if (info->control.use_rts)
+				rates[i].count = ratetbl->rate[i].count_rts;
+			else if (info->control.use_cts_prot)
+				rates[i].count = ratetbl->rate[i].count_cts;
+			else
+				rates[i].count = ratetbl->rate[i].count;
+		} else {
+			rates[i].idx = -1;
+			rates[i].count = 0;
+		}
+
+		if (rates[i].idx < 0 || !rates[i].count)
+			break;
+	}
+}
+
+static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_sta *sta,
+				    struct ieee80211_supported_band *sband,
+				    struct ieee80211_tx_info *info,
+				    struct ieee80211_tx_rate *rates,
+				    int max_rates)
+{
+	enum nl80211_chan_width chan_width;
+	u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN];
+	bool has_mcs_mask;
+	u32 mask;
+	int i;
+
+	/*
+	 * Try to enforce the rateidx mask the user wanted. skip this if the
+	 * default mask (allow all rates) is used to save some processing for
+	 * the common case.
+	 */
+	mask = sdata->rc_rateidx_mask[info->band];
+	has_mcs_mask = sdata->rc_has_mcs_mask[info->band];
+	if (mask == (1 << sband->n_bitrates) - 1 && !has_mcs_mask)
+		return;
+
+	if (has_mcs_mask)
+		memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band],
+		       sizeof(mcs_mask));
+	else
+		memset(mcs_mask, 0xff, sizeof(mcs_mask));
+
+	if (sta) {
+		/* Filter out rates that the STA does not support */
+		mask &= sta->supp_rates[info->band];
+		for (i = 0; i < sizeof(mcs_mask); i++)
+			mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i];
+	}
+
+	/*
+	 * Make sure the rate index selected for each TX rate is
+	 * included in the configured mask and change the rate indexes
+	 * if needed.
+	 */
+	chan_width = sdata->vif.bss_conf.chandef.width;
+	for (i = 0; i < max_rates; i++) {
+		/* Skip invalid rates */
+		if (rates[i].idx < 0)
+			break;
+
+		rate_idx_match_mask(&rates[i], sband, mask, chan_width,
+				    mcs_mask);
+	}
+}
+
+void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
+			    struct ieee80211_sta *sta,
+			    struct sk_buff *skb,
+			    struct ieee80211_tx_rate *dest,
+			    int max_rates)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_supported_band *sband;
+
+	rate_control_fill_sta_table(sta, info, dest, max_rates);
+
+	if (!vif)
+		return;
+
+	sdata = vif_to_sdata(vif);
+	sband = sdata->local->hw.wiphy->bands[info->band];
+
+	if (ieee80211_is_data(hdr->frame_control))
+		rate_control_apply_mask(sdata, sta, sband, info, dest, max_rates);
+
+	if (dest[0].idx < 0)
+		__rate_control_send_low(&sdata->local->hw, sband, sta, info);
+
+	if (sta)
+		rate_fixup_ratelist(vif, sband, info, dest, max_rates);
+}
+EXPORT_SYMBOL(ieee80211_get_tx_rates);
+
 void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 			   struct sta_info *sta,
 			   struct ieee80211_tx_rate_control *txrc)
@@ -435,8 +659,6 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_sta *ista = NULL;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
 	int i;
-	u32 mask;
-	u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN];
 
 	if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) {
 		ista = &sta->sta;
@@ -454,37 +676,27 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 
 	ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
 
-	/*
-	 * Try to enforce the rateidx mask the user wanted. skip this if the
-	 * default mask (allow all rates) is used to save some processing for
-	 * the common case.
-	 */
-	mask = sdata->rc_rateidx_mask[info->band];
-	memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band],
-	       sizeof(mcs_mask));
-	if (mask != (1 << txrc->sband->n_bitrates) - 1) {
-		if (sta) {
-			/* Filter out rates that the STA does not support */
-			mask &= sta->sta.supp_rates[info->band];
-			for (i = 0; i < sizeof(mcs_mask); i++)
-				mcs_mask[i] &= sta->sta.ht_cap.mcs.rx_mask[i];
-		}
-		/*
-		 * Make sure the rate index selected for each TX rate is
-		 * included in the configured mask and change the rate indexes
-		 * if needed.
-		 */
-		for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
-			/* Skip invalid rates */
-			if (info->control.rates[i].idx < 0)
-				break;
-			rate_idx_match_mask(&info->control.rates[i], txrc,
-					    mask, mcs_mask);
-		}
-	}
+	if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_RC_TABLE)
+		return;
+
+	ieee80211_get_tx_rates(&sdata->vif, ista, txrc->skb,
+			       info->control.rates,
+			       ARRAY_SIZE(info->control.rates));
+}
 
-	BUG_ON(info->control.rates[0].idx < 0);
+int rate_control_set_rates(struct ieee80211_hw *hw,
+			   struct ieee80211_sta *pubsta,
+			   struct ieee80211_sta_rates *rates)
+{
+	struct ieee80211_sta_rates *old = rcu_dereference(pubsta->rates);
+
+	rcu_assign_pointer(pubsta->rates, rates);
+	if (old)
+		kfree_rcu(old, rcu_head);
+
+	return 0;
 }
+EXPORT_SYMBOL(rate_control_set_rates);
 
 int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
 				 const char *name)
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 301386dabf88..d35a5dd3fb13 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -68,6 +68,8 @@ static inline void rate_control_rate_init(struct sta_info *sta)
 	sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band];
 	rcu_read_unlock();
 
+	ieee80211_sta_set_rx_nss(sta);
+
 	ref->ops->rate_init(ref->priv, sband, ista, priv_sta);
 	set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
 }
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 8c5acdc06226..ac7ef5414bde 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -55,7 +55,6 @@
 #include "rate.h"
 #include "rc80211_minstrel.h"
 
-#define SAMPLE_COLUMNS	10
 #define SAMPLE_TBL(_mi, _idx, _col) \
 		_mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col]
 
@@ -70,16 +69,75 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix)
 	return i;
 }
 
+/* find & sort topmost throughput rates */
+static inline void
+minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list)
+{
+	int j = MAX_THR_RATES;
+
+	while (j > 0 && mi->r[i].cur_tp > mi->r[tp_list[j - 1]].cur_tp)
+		j--;
+	if (j < MAX_THR_RATES - 1)
+		memmove(&tp_list[j + 1], &tp_list[j], MAX_THR_RATES - (j + 1));
+	if (j < MAX_THR_RATES)
+		tp_list[j] = i;
+}
+
+static void
+minstrel_set_rate(struct minstrel_sta_info *mi, struct ieee80211_sta_rates *ratetbl,
+		  int offset, int idx)
+{
+	struct minstrel_rate *r = &mi->r[idx];
+
+	ratetbl->rate[offset].idx = r->rix;
+	ratetbl->rate[offset].count = r->adjusted_retry_count;
+	ratetbl->rate[offset].count_cts = r->retry_count_cts;
+	ratetbl->rate[offset].count_rts = r->retry_count_rtscts;
+}
+
+static void
+minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
+{
+	struct ieee80211_sta_rates *ratetbl;
+	int i = 0;
+
+	ratetbl = kzalloc(sizeof(*ratetbl), GFP_ATOMIC);
+	if (!ratetbl)
+		return;
+
+	/* Start with max_tp_rate */
+	minstrel_set_rate(mi, ratetbl, i++, mi->max_tp_rate[0]);
+
+	if (mp->hw->max_rates >= 3) {
+		/* At least 3 tx rates supported, use max_tp_rate2 next */
+		minstrel_set_rate(mi, ratetbl, i++, mi->max_tp_rate[1]);
+	}
+
+	if (mp->hw->max_rates >= 2) {
+		/* At least 2 tx rates supported, use max_prob_rate next */
+		minstrel_set_rate(mi, ratetbl, i++, mi->max_prob_rate);
+	}
+
+	/* Use lowest rate last */
+	ratetbl->rate[i].idx = mi->lowest_rix;
+	ratetbl->rate[i].count = mp->max_retry;
+	ratetbl->rate[i].count_cts = mp->max_retry;
+	ratetbl->rate[i].count_rts = mp->max_retry;
+
+	rate_control_set_rates(mp->hw, mi->sta, ratetbl);
+}
+
 static void
 minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
 {
-	u32 max_tp = 0, index_max_tp = 0, index_max_tp2 = 0;
-	u32 max_prob = 0, index_max_prob = 0;
+	u8 tmp_tp_rate[MAX_THR_RATES];
+	u8 tmp_prob_rate = 0;
 	u32 usecs;
-	u32 p;
 	int i;
 
-	mi->stats_update = jiffies;
+	for (i=0; i < MAX_THR_RATES; i++)
+	    tmp_tp_rate[i] = 0;
+
 	for (i = 0; i < mi->n_rates; i++) {
 		struct minstrel_rate *mr = &mi->r[i];
 
@@ -87,27 +145,32 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
 		if (!usecs)
 			usecs = 1000000;
 
-		/* To avoid rounding issues, probabilities scale from 0 (0%)
-		 * to 18000 (100%) */
-		if (mr->attempts) {
-			p = (mr->success * 18000) / mr->attempts;
+		if (unlikely(mr->attempts > 0)) {
+			mr->sample_skipped = 0;
+			mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts);
 			mr->succ_hist += mr->success;
 			mr->att_hist += mr->attempts;
-			mr->cur_prob = p;
-			p = ((p * (100 - mp->ewma_level)) + (mr->probability *
-				mp->ewma_level)) / 100;
-			mr->probability = p;
-			mr->cur_tp = p * (1000000 / usecs);
-		}
+			mr->probability = minstrel_ewma(mr->probability,
+							mr->cur_prob,
+							EWMA_LEVEL);
+		} else
+			mr->sample_skipped++;
 
 		mr->last_success = mr->success;
 		mr->last_attempts = mr->attempts;
 		mr->success = 0;
 		mr->attempts = 0;
 
+		/* Update throughput per rate, reset thr. below 10% success */
+		if (mr->probability < MINSTREL_FRAC(10, 100))
+			mr->cur_tp = 0;
+		else
+			mr->cur_tp = mr->probability * (1000000 / usecs);
+
 		/* Sample less often below the 10% chance of success.
 		 * Sample less often above the 95% chance of success. */
-		if ((mr->probability > 17100) || (mr->probability < 1800)) {
+		if (mr->probability > MINSTREL_FRAC(95, 100) ||
+		    mr->probability < MINSTREL_FRAC(10, 100)) {
 			mr->adjusted_retry_count = mr->retry_count >> 1;
 			if (mr->adjusted_retry_count > 2)
 				mr->adjusted_retry_count = 2;
@@ -118,35 +181,32 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
 		}
 		if (!mr->adjusted_retry_count)
 			mr->adjusted_retry_count = 2;
-	}
 
-	for (i = 0; i < mi->n_rates; i++) {
-		struct minstrel_rate *mr = &mi->r[i];
-		if (max_tp < mr->cur_tp) {
-			index_max_tp = i;
-			max_tp = mr->cur_tp;
-		}
-		if (max_prob < mr->probability) {
-			index_max_prob = i;
-			max_prob = mr->probability;
+		minstrel_sort_best_tp_rates(mi, i, tmp_tp_rate);
+
+		/* To determine the most robust rate (max_prob_rate) used at
+		 * 3rd mmr stage we distinct between two cases:
+		 * (1) if any success probabilitiy >= 95%, out of those rates
+		 * choose the maximum throughput rate as max_prob_rate
+		 * (2) if all success probabilities < 95%, the rate with
+		 * highest success probability is choosen as max_prob_rate */
+		if (mr->probability >= MINSTREL_FRAC(95,100)) {
+			if (mr->cur_tp >= mi->r[tmp_prob_rate].cur_tp)
+				tmp_prob_rate = i;
+		} else {
+			if (mr->probability >= mi->r[tmp_prob_rate].probability)
+				tmp_prob_rate = i;
 		}
 	}
 
-	max_tp = 0;
-	for (i = 0; i < mi->n_rates; i++) {
-		struct minstrel_rate *mr = &mi->r[i];
+	/* Assign the new rate set */
+	memcpy(mi->max_tp_rate, tmp_tp_rate, sizeof(mi->max_tp_rate));
+	mi->max_prob_rate = tmp_prob_rate;
 
-		if (i == index_max_tp)
-			continue;
+	/* Reset update timer */
+	mi->stats_update = jiffies;
 
-		if (max_tp < mr->cur_tp) {
-			index_max_tp2 = i;
-			max_tp = mr->cur_tp;
-		}
-	}
-	mi->max_tp_rate = index_max_tp;
-	mi->max_tp_rate2 = index_max_tp2;
-	mi->max_prob_rate = index_max_prob;
+	minstrel_update_rates(mp, mi);
 }
 
 static void
@@ -195,9 +255,9 @@ minstrel_get_retry_count(struct minstrel_rate *mr,
 {
 	unsigned int retry = mr->adjusted_retry_count;
 
-	if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS)
+	if (info->control.use_rts)
 		retry = max(2U, min(mr->retry_count_rtscts, retry));
-	else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)
+	else if (info->control.use_cts_prot)
 		retry = max(2U, min(mr->retry_count_cts, retry));
 	return retry;
 }
@@ -207,10 +267,10 @@ static int
 minstrel_get_next_sample(struct minstrel_sta_info *mi)
 {
 	unsigned int sample_ndx;
-	sample_ndx = SAMPLE_TBL(mi, mi->sample_idx, mi->sample_column);
-	mi->sample_idx++;
-	if ((int) mi->sample_idx > (mi->n_rates - 2)) {
-		mi->sample_idx = 0;
+	sample_ndx = SAMPLE_TBL(mi, mi->sample_row, mi->sample_column);
+	mi->sample_row++;
+	if ((int) mi->sample_row >= mi->n_rates) {
+		mi->sample_row = 0;
 		mi->sample_column++;
 		if (mi->sample_column >= SAMPLE_COLUMNS)
 			mi->sample_column = 0;
@@ -226,111 +286,96 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct minstrel_sta_info *mi = priv_sta;
 	struct minstrel_priv *mp = priv;
-	struct ieee80211_tx_rate *ar = info->control.rates;
-	unsigned int ndx, sample_ndx = 0;
-	bool mrr;
-	bool sample_slower = false;
-	bool sample = false;
-	int i, delta;
-	int mrr_ndx[3];
-	int sample_rate;
-
+	struct ieee80211_tx_rate *rate = &info->control.rates[0];
+	struct minstrel_rate *msr, *mr;
+	unsigned int ndx;
+	bool mrr_capable;
+	bool prev_sample = mi->prev_sample;
+	int delta;
+	int sampling_ratio;
+
+	/* management/no-ack frames do not use rate control */
 	if (rate_control_send_low(sta, priv_sta, txrc))
 		return;
 
-	mrr = mp->has_mrr && !txrc->rts && !txrc->bss_conf->use_cts_prot;
-
-	ndx = mi->max_tp_rate;
-
-	if (mrr)
-		sample_rate = mp->lookaround_rate_mrr;
+	/* check multi-rate-retry capabilities & adjust lookaround_rate */
+	mrr_capable = mp->has_mrr &&
+		      !txrc->rts &&
+		      !txrc->bss_conf->use_cts_prot;
+	if (mrr_capable)
+		sampling_ratio = mp->lookaround_rate_mrr;
 	else
-		sample_rate = mp->lookaround_rate;
+		sampling_ratio = mp->lookaround_rate;
 
+	/* increase sum packet counter */
 	mi->packet_count++;
-	delta = (mi->packet_count * sample_rate / 100) -
+
+	delta = (mi->packet_count * sampling_ratio / 100) -
 			(mi->sample_count + mi->sample_deferred / 2);
 
-	/* delta > 0: sampling required */
-	if ((delta > 0) && (mrr || !mi->prev_sample)) {
-		struct minstrel_rate *msr;
-		if (mi->packet_count >= 10000) {
-			mi->sample_deferred = 0;
-			mi->sample_count = 0;
-			mi->packet_count = 0;
-		} else if (delta > mi->n_rates * 2) {
-			/* With multi-rate retry, not every planned sample
-			 * attempt actually gets used, due to the way the retry
-			 * chain is set up - [max_tp,sample,prob,lowest] for
-			 * sample_rate < max_tp.
-			 *
-			 * If there's too much sampling backlog and the link
-			 * starts getting worse, minstrel would start bursting
-			 * out lots of sampling frames, which would result
-			 * in a large throughput loss. */
-			mi->sample_count += (delta - mi->n_rates * 2);
-		}
+	/* delta < 0: no sampling required */
+	mi->prev_sample = false;
+	if (delta < 0 || (!mrr_capable && prev_sample))
+		return;
 
-		sample_ndx = minstrel_get_next_sample(mi);
-		msr = &mi->r[sample_ndx];
-		sample = true;
-		sample_slower = mrr && (msr->perfect_tx_time >
-			mi->r[ndx].perfect_tx_time);
-
-		if (!sample_slower) {
-			if (msr->sample_limit != 0) {
-				ndx = sample_ndx;
-				mi->sample_count++;
-				if (msr->sample_limit > 0)
-					msr->sample_limit--;
-			} else {
-				sample = false;
-			}
-		} else {
-			/* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
-			 * packets that have the sampling rate deferred to the
-			 * second MRR stage. Increase the sample counter only
-			 * if the deferred sample rate was actually used.
-			 * Use the sample_deferred counter to make sure that
-			 * the sampling is not done in large bursts */
-			info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
-			mi->sample_deferred++;
-		}
+	if (mi->packet_count >= 10000) {
+		mi->sample_deferred = 0;
+		mi->sample_count = 0;
+		mi->packet_count = 0;
+	} else if (delta > mi->n_rates * 2) {
+		/* With multi-rate retry, not every planned sample
+		 * attempt actually gets used, due to the way the retry
+		 * chain is set up - [max_tp,sample,prob,lowest] for
+		 * sample_rate < max_tp.
+		 *
+		 * If there's too much sampling backlog and the link
+		 * starts getting worse, minstrel would start bursting
+		 * out lots of sampling frames, which would result
+		 * in a large throughput loss. */
+		mi->sample_count += (delta - mi->n_rates * 2);
+	}
+
+	/* get next random rate sample */
+	ndx = minstrel_get_next_sample(mi);
+	msr = &mi->r[ndx];
+	mr = &mi->r[mi->max_tp_rate[0]];
+
+	/* Decide if direct ( 1st mrr stage) or indirect (2nd mrr stage)
+	 * rate sampling method should be used.
+	 * Respect such rates that are not sampled for 20 interations.
+	 */
+	if (mrr_capable &&
+	    msr->perfect_tx_time > mr->perfect_tx_time &&
+	    msr->sample_skipped < 20) {
+		/* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
+		 * packets that have the sampling rate deferred to the
+		 * second MRR stage. Increase the sample counter only
+		 * if the deferred sample rate was actually used.
+		 * Use the sample_deferred counter to make sure that
+		 * the sampling is not done in large bursts */
+		info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
+		rate++;
+		mi->sample_deferred++;
+	} else {
+		if (!msr->sample_limit != 0)
+			return;
+
+		mi->sample_count++;
+		if (msr->sample_limit > 0)
+			msr->sample_limit--;
 	}
-	mi->prev_sample = sample;
 
 	/* If we're not using MRR and the sampling rate already
 	 * has a probability of >95%, we shouldn't be attempting
 	 * to use it, as this only wastes precious airtime */
-	if (!mrr && sample && (mi->r[ndx].probability > 17100))
-		ndx = mi->max_tp_rate;
-
-	ar[0].idx = mi->r[ndx].rix;
-	ar[0].count = minstrel_get_retry_count(&mi->r[ndx], info);
-
-	if (!mrr) {
-		if (!sample)
-			ar[0].count = mp->max_retry;
-		ar[1].idx = mi->lowest_rix;
-		ar[1].count = mp->max_retry;
+	if (!mrr_capable &&
+	   (mi->r[ndx].probability > MINSTREL_FRAC(95, 100)))
 		return;
-	}
 
-	/* MRR setup */
-	if (sample) {
-		if (sample_slower)
-			mrr_ndx[0] = sample_ndx;
-		else
-			mrr_ndx[0] = mi->max_tp_rate;
-	} else {
-		mrr_ndx[0] = mi->max_tp_rate2;
-	}
-	mrr_ndx[1] = mi->max_prob_rate;
-	mrr_ndx[2] = 0;
-	for (i = 1; i < 4; i++) {
-		ar[i].idx = mi->r[mrr_ndx[i - 1]].rix;
-		ar[i].count = mi->r[mrr_ndx[i - 1]].adjusted_retry_count;
-	}
+	mi->prev_sample = true;
+
+	rate->idx = mi->r[ndx].rix;
+	rate->count = minstrel_get_retry_count(&mi->r[ndx], info);
 }
 
 
@@ -351,26 +396,21 @@ static void
 init_sample_table(struct minstrel_sta_info *mi)
 {
 	unsigned int i, col, new_idx;
-	unsigned int n_srates = mi->n_rates - 1;
 	u8 rnd[8];
 
 	mi->sample_column = 0;
-	mi->sample_idx = 0;
-	memset(mi->sample_table, 0, SAMPLE_COLUMNS * mi->n_rates);
+	mi->sample_row = 0;
+	memset(mi->sample_table, 0xff, SAMPLE_COLUMNS * mi->n_rates);
 
 	for (col = 0; col < SAMPLE_COLUMNS; col++) {
-		for (i = 0; i < n_srates; i++) {
+		for (i = 0; i < mi->n_rates; i++) {
 			get_random_bytes(rnd, sizeof(rnd));
-			new_idx = (i + rnd[i & 7]) % n_srates;
+			new_idx = (i + rnd[i & 7]) % mi->n_rates;
 
-			while (SAMPLE_TBL(mi, new_idx, col) != 0)
-				new_idx = (new_idx + 1) % n_srates;
+			while (SAMPLE_TBL(mi, new_idx, col) != 0xff)
+				new_idx = (new_idx + 1) % mi->n_rates;
 
-			/* Don't sample the slowest rate (i.e. slowest base
-			 * rate). We must presume that the slowest rate works
-			 * fine, or else other management frames will also be
-			 * failing and the link will break */
-			SAMPLE_TBL(mi, new_idx, col) = i + 1;
+			SAMPLE_TBL(mi, new_idx, col) = i;
 		}
 	}
 }
@@ -385,12 +425,16 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
 	unsigned int i, n = 0;
 	unsigned int t_slot = 9; /* FIXME: get real slot time */
 
+	mi->sta = sta;
 	mi->lowest_rix = rate_lowest_index(sband, sta);
 	ctl_rate = &sband->bitrates[mi->lowest_rix];
 	mi->sp_ack_dur = ieee80211_frame_duration(sband->band, 10,
 				ctl_rate->bitrate,
 				!!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1);
 
+	memset(mi->max_tp_rate, 0, sizeof(mi->max_tp_rate));
+	mi->max_prob_rate = 0;
+
 	for (i = 0; i < sband->n_bitrates; i++) {
 		struct minstrel_rate *mr = &mi->r[n];
 		unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0;
@@ -433,6 +477,8 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
 		} while ((tx_time < mp->segment_size) &&
 				(++mr->retry_count < mp->max_retry));
 		mr->adjusted_retry_count = mr->retry_count;
+		if (!(sband->bitrates[i].flags & IEEE80211_RATE_ERP_G))
+			mr->retry_count_cts = mr->retry_count;
 	}
 
 	for (i = n; i < sband->n_bitrates; i++) {
@@ -444,6 +490,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
 	mi->stats_update = jiffies;
 
 	init_sample_table(mi);
+	minstrel_update_rates(mp, mi);
 }
 
 static void *
@@ -494,6 +541,33 @@ minstrel_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta)
 	kfree(mi);
 }
 
+static void
+minstrel_init_cck_rates(struct minstrel_priv *mp)
+{
+	static const int bitrates[4] = { 10, 20, 55, 110 };
+	struct ieee80211_supported_band *sband;
+	int i, j;
+
+	sband = mp->hw->wiphy->bands[IEEE80211_BAND_2GHZ];
+	if (!sband)
+		return;
+
+	for (i = 0, j = 0; i < sband->n_bitrates; i++) {
+		struct ieee80211_rate *rate = &sband->bitrates[i];
+
+		if (rate->flags & IEEE80211_RATE_ERP_G)
+			continue;
+
+		for (j = 0; j < ARRAY_SIZE(bitrates); j++) {
+			if (rate->bitrate != bitrates[j])
+				continue;
+
+			mp->cck_rates[j] = i;
+			break;
+		}
+	}
+}
+
 static void *
 minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 {
@@ -515,9 +589,6 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 	mp->lookaround_rate = 5;
 	mp->lookaround_rate_mrr = 10;
 
-	/* moving average weight for EWMA */
-	mp->ewma_level = 75;
-
 	/* maximum time that the hw is allowed to stay in one MRR segment */
 	mp->segment_size = 6000;
 
@@ -539,6 +610,8 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 			S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx);
 #endif
 
+	minstrel_init_cck_rates(mp);
+
 	return mp;
 }
 
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 5d278eccaef0..f4301f4b2e41 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -9,6 +9,29 @@
 #ifndef __RC_MINSTREL_H
 #define __RC_MINSTREL_H
 
+#define EWMA_LEVEL	96	/* ewma weighting factor [/EWMA_DIV] */
+#define EWMA_DIV	128
+#define SAMPLE_COLUMNS	10	/* number of columns in sample table */
+
+
+/* scaled fraction values */
+#define MINSTREL_SCALE  16
+#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
+#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
+
+/* number of highest throughput rates to consider*/
+#define MAX_THR_RATES 4
+
+/*
+ * Perform EWMA (Exponentially Weighted Moving Average) calculation
+  */
+static inline int
+minstrel_ewma(int old, int new, int weight)
+{
+	return (new * (EWMA_DIV - weight) + old * weight) / EWMA_DIV;
+}
+
+
 struct minstrel_rate {
 	int bitrate;
 	int rix;
@@ -26,6 +49,7 @@ struct minstrel_rate {
 	u32 attempts;
 	u32 last_attempts;
 	u32 last_success;
+	u8 sample_skipped;
 
 	/* parts per thousand */
 	u32 cur_prob;
@@ -39,20 +63,21 @@ struct minstrel_rate {
 };
 
 struct minstrel_sta_info {
+	struct ieee80211_sta *sta;
+
 	unsigned long stats_update;
 	unsigned int sp_ack_dur;
 	unsigned int rate_avg;
 
 	unsigned int lowest_rix;
 
-	unsigned int max_tp_rate;
-	unsigned int max_tp_rate2;
-	unsigned int max_prob_rate;
+	u8 max_tp_rate[MAX_THR_RATES];
+	u8 max_prob_rate;
 	unsigned int packet_count;
 	unsigned int sample_count;
 	int sample_deferred;
 
-	unsigned int sample_idx;
+	unsigned int sample_row;
 	unsigned int sample_column;
 
 	int n_rates;
@@ -73,12 +98,13 @@ struct minstrel_priv {
 	unsigned int cw_min;
 	unsigned int cw_max;
 	unsigned int max_retry;
-	unsigned int ewma_level;
 	unsigned int segment_size;
 	unsigned int update_interval;
 	unsigned int lookaround_rate;
 	unsigned int lookaround_rate_mrr;
 
+	u8 cck_rates[4];
+
 #ifdef CONFIG_MAC80211_DEBUGFS
 	/*
 	 * enable fixed rate processing per RC
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index d5a56226e675..fd0b9ca1570e 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -68,23 +68,25 @@ minstrel_stats_open(struct inode *inode, struct file *file)
 
 	file->private_data = ms;
 	p = ms->buf;
-	p += sprintf(p, "rate     throughput  ewma prob   this prob  "
+	p += sprintf(p, "rate      throughput  ewma prob  this prob  "
 			"this succ/attempt   success    attempts\n");
 	for (i = 0; i < mi->n_rates; i++) {
 		struct minstrel_rate *mr = &mi->r[i];
 
-		*(p++) = (i == mi->max_tp_rate) ? 'T' : ' ';
-		*(p++) = (i == mi->max_tp_rate2) ? 't' : ' ';
+		*(p++) = (i == mi->max_tp_rate[0]) ? 'A' : ' ';
+		*(p++) = (i == mi->max_tp_rate[1]) ? 'B' : ' ';
+		*(p++) = (i == mi->max_tp_rate[2]) ? 'C' : ' ';
+		*(p++) = (i == mi->max_tp_rate[3]) ? 'D' : ' ';
 		*(p++) = (i == mi->max_prob_rate) ? 'P' : ' ';
 		p += sprintf(p, "%3u%s", mr->bitrate / 2,
 				(mr->bitrate & 1 ? ".5" : "  "));
 
-		tp = mr->cur_tp / ((18000 << 10) / 96);
-		prob = mr->cur_prob / 18;
-		eprob = mr->probability / 18;
+		tp = MINSTREL_TRUNC(mr->cur_tp / 10);
+		prob = MINSTREL_TRUNC(mr->cur_prob * 1000);
+		eprob = MINSTREL_TRUNC(mr->probability * 1000);
 
 		p += sprintf(p, "  %6u.%1u   %6u.%1u   %6u.%1u        "
-				"%3u(%3u)   %8llu    %8llu\n",
+				"   %3u(%3u)  %8llu    %8llu\n",
 				tp / 10, tp % 10,
 				eprob / 10, eprob % 10,
 				prob / 10, prob % 10,
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 9f9c453bc45d..5b2d3012b983 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org>
+ * Copyright (C) 2010-2013 Felix Fietkau <nbd@openwrt.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -17,8 +17,6 @@
 #include "rc80211_minstrel_ht.h"
 
 #define AVG_PKT_SIZE	1200
-#define SAMPLE_COLUMNS	10
-#define EWMA_LEVEL		75
 
 /* Number of bits for an average sized packet */
 #define MCS_NBITS (AVG_PKT_SIZE << 3)
@@ -26,11 +24,11 @@
 /* Number of symbols for a packet with (bps) bits per symbol */
 #define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps))
 
-/* Transmission time for a packet containing (syms) symbols */
+/* Transmission time (nanoseconds) for a packet containing (syms) symbols */
 #define MCS_SYMBOL_TIME(sgi, syms)					\
 	(sgi ?								\
-	  ((syms) * 18 + 4) / 5 :	/* syms * 3.6 us */		\
-	  (syms) << 2			/* syms * 4 us */		\
+	  ((syms) * 18000 + 4000) / 5 :	/* syms * 3.6 us */		\
+	  ((syms) * 1000) << 2		/* syms * 4 us */		\
 	)
 
 /* Transmit duration for the raw data part of an average sized packet */
@@ -63,6 +61,30 @@
 	}								\
 }
 
+#define CCK_DURATION(_bitrate, _short, _len)		\
+	(1000 * (10 /* SIFS */ +			\
+	 (_short ? 72 + 24 : 144 + 48 ) +		\
+	 (8 * (_len + 4) * 10) / (_bitrate)))
+
+#define CCK_ACK_DURATION(_bitrate, _short)			\
+	(CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) +	\
+	 CCK_DURATION(_bitrate, _short, AVG_PKT_SIZE))
+
+#define CCK_DURATION_LIST(_short)			\
+	CCK_ACK_DURATION(10, _short),			\
+	CCK_ACK_DURATION(20, _short),			\
+	CCK_ACK_DURATION(55, _short),			\
+	CCK_ACK_DURATION(110, _short)
+
+#define CCK_GROUP						\
+	[MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS] = {	\
+		.streams = 0,					\
+		.duration = {					\
+			CCK_DURATION_LIST(false),		\
+			CCK_DURATION_LIST(true)			\
+		}						\
+	}
+
 /*
  * To enable sufficiently targeted rate sampling, MCS rates are divided into
  * groups, based on the number of streams and flags (HT40, SGI) that they
@@ -95,18 +117,17 @@ const struct mcs_group minstrel_mcs_groups[] = {
 #if MINSTREL_MAX_STREAMS >= 3
 	MCS_GROUP(3, 1, 1),
 #endif
+
+	/* must be last */
+	CCK_GROUP
 };
 
+#define MINSTREL_CCK_GROUP	(ARRAY_SIZE(minstrel_mcs_groups) - 1)
+
 static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES];
 
-/*
- * Perform EWMA (Exponentially Weighted Moving Average) calculation
- */
-static int
-minstrel_ewma(int old, int new, int weight)
-{
-	return (new * (100 - weight) + old * weight) / 100;
-}
+static void
+minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi);
 
 /*
  * Look up an MCS group index based on mac80211 rate information
@@ -119,6 +140,29 @@ minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate)
 			 !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH));
 }
 
+static struct minstrel_rate_stats *
+minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
+		      struct ieee80211_tx_rate *rate)
+{
+	int group, idx;
+
+	if (rate->flags & IEEE80211_TX_RC_MCS) {
+		group = minstrel_ht_get_group_idx(rate);
+		idx = rate->idx % MCS_GROUP_RATES;
+	} else {
+		group = MINSTREL_CCK_GROUP;
+
+		for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++)
+			if (rate->idx == mp->cck_rates[idx])
+				break;
+
+		/* short preamble */
+		if (!(mi->groups[group].supported & BIT(idx)))
+			idx += 4;
+	}
+	return &mi->groups[group].rates[idx];
+}
+
 static inline struct minstrel_rate_stats *
 minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index)
 {
@@ -159,18 +203,32 @@ static void
 minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate)
 {
 	struct minstrel_rate_stats *mr;
-	unsigned int usecs;
+	unsigned int nsecs = 0;
+	unsigned int tp;
+	unsigned int prob;
 
 	mr = &mi->groups[group].rates[rate];
+	prob = mr->probability;
 
-	if (mr->probability < MINSTREL_FRAC(1, 10)) {
+	if (prob < MINSTREL_FRAC(1, 10)) {
 		mr->cur_tp = 0;
 		return;
 	}
 
-	usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
-	usecs += minstrel_mcs_groups[group].duration[rate];
-	mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability);
+	/*
+	 * For the throughput calculation, limit the probability value to 90% to
+	 * account for collision related packet error rate fluctuation
+	 */
+	if (prob > MINSTREL_FRAC(9, 10))
+		prob = MINSTREL_FRAC(9, 10);
+
+	if (group != MINSTREL_CCK_GROUP)
+		nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
+
+	nsecs += minstrel_mcs_groups[group].duration[rate];
+	tp = 1000000 * ((mr->probability * 1000) / nsecs);
+
+	mr->cur_tp = MINSTREL_TRUNC(tp);
 }
 
 /*
@@ -189,6 +247,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 	struct minstrel_rate_stats *mr;
 	int cur_prob, cur_prob_tp, cur_tp, cur_tp2;
 	int group, i, index;
+	bool mi_rates_valid = false;
 
 	if (mi->ampdu_packets > 0) {
 		mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len,
@@ -199,11 +258,10 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 
 	mi->sample_slow = 0;
 	mi->sample_count = 0;
-	mi->max_tp_rate = 0;
-	mi->max_tp_rate2 = 0;
-	mi->max_prob_rate = 0;
 
 	for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
+		bool mg_rates_valid = false;
+
 		cur_prob = 0;
 		cur_prob_tp = 0;
 		cur_tp = 0;
@@ -213,15 +271,24 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 		if (!mg->supported)
 			continue;
 
-		mg->max_tp_rate = 0;
-		mg->max_tp_rate2 = 0;
-		mg->max_prob_rate = 0;
 		mi->sample_count++;
 
 		for (i = 0; i < MCS_GROUP_RATES; i++) {
 			if (!(mg->supported & BIT(i)))
 				continue;
 
+			/* initialize rates selections starting indexes */
+			if (!mg_rates_valid) {
+				mg->max_tp_rate = mg->max_tp_rate2 =
+					mg->max_prob_rate = i;
+				if (!mi_rates_valid) {
+					mi->max_tp_rate = mi->max_tp_rate2 =
+						mi->max_prob_rate = i;
+					mi_rates_valid = true;
+				}
+				mg_rates_valid = true;
+			}
+
 			mr = &mg->rates[i];
 			mr->retry_updated = false;
 			index = MCS_GROUP_RATES * group + i;
@@ -231,10 +298,6 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 			if (!mr->cur_tp)
 				continue;
 
-			/* ignore the lowest rate of each single-stream group */
-			if (!i && minstrel_mcs_groups[group].streams == 1)
-				continue;
-
 			if ((mr->cur_tp > cur_prob_tp && mr->probability >
 			     MINSTREL_FRAC(3, 4)) || mr->probability > cur_prob) {
 				mg->max_prob_rate = index;
@@ -258,8 +321,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 		}
 	}
 
-	/* try to sample up to half of the available rates during each interval */
-	mi->sample_count *= 4;
+	/* try to sample all available rates during each interval */
+	mi->sample_count *= 8;
 
 	cur_prob = 0;
 	cur_prob_tp = 0;
@@ -270,20 +333,13 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 		if (!mg->supported)
 			continue;
 
-		mr = minstrel_get_ratestats(mi, mg->max_prob_rate);
-		if (cur_prob_tp < mr->cur_tp &&
-		    minstrel_mcs_groups[group].streams == 1) {
-			mi->max_prob_rate = mg->max_prob_rate;
-			cur_prob = mr->cur_prob;
-			cur_prob_tp = mr->cur_tp;
-		}
-
 		mr = minstrel_get_ratestats(mi, mg->max_tp_rate);
 		if (cur_tp < mr->cur_tp) {
 			mi->max_tp_rate2 = mi->max_tp_rate;
 			cur_tp2 = cur_tp;
 			mi->max_tp_rate = mg->max_tp_rate;
 			cur_tp = mr->cur_tp;
+			mi->max_prob_streams = minstrel_mcs_groups[group].streams - 1;
 		}
 
 		mr = minstrel_get_ratestats(mi, mg->max_tp_rate2);
@@ -293,11 +349,28 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 		}
 	}
 
+	if (mi->max_prob_streams < 1)
+		mi->max_prob_streams = 1;
+
+	for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
+		mg = &mi->groups[group];
+		if (!mg->supported)
+			continue;
+		mr = minstrel_get_ratestats(mi, mg->max_prob_rate);
+		if (cur_prob_tp < mr->cur_tp &&
+		    minstrel_mcs_groups[group].streams <= mi->max_prob_streams) {
+			mi->max_prob_rate = mg->max_prob_rate;
+			cur_prob = mr->cur_prob;
+			cur_prob_tp = mr->cur_tp;
+		}
+	}
+
+
 	mi->stats_update = jiffies;
 }
 
 static bool
-minstrel_ht_txstat_valid(struct ieee80211_tx_rate *rate)
+minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct ieee80211_tx_rate *rate)
 {
 	if (rate->idx < 0)
 		return false;
@@ -305,7 +378,13 @@ minstrel_ht_txstat_valid(struct ieee80211_tx_rate *rate)
 	if (!rate->count)
 		return false;
 
-	return !!(rate->flags & IEEE80211_TX_RC_MCS);
+	if (rate->flags & IEEE80211_TX_RC_MCS)
+		return true;
+
+	return rate->idx == mp->cck_rates[0] ||
+	       rate->idx == mp->cck_rates[1] ||
+	       rate->idx == mp->cck_rates[2] ||
+	       rate->idx == mp->cck_rates[3];
 }
 
 static void
@@ -389,8 +468,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
 	struct ieee80211_tx_rate *ar = info->status.rates;
 	struct minstrel_rate_stats *rate, *rate2;
 	struct minstrel_priv *mp = priv;
-	bool last;
-	int group;
+	bool last, update = false;
 	int i;
 
 	if (!msp->is_ht)
@@ -412,20 +490,19 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
 
 	if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) {
 		mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len);
-		mi->sample_tries = 2;
+		mi->sample_tries = 1;
 		mi->sample_count--;
 	}
 
 	if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
 		mi->sample_packets += info->status.ampdu_len;
 
-	last = !minstrel_ht_txstat_valid(&ar[0]);
+	last = !minstrel_ht_txstat_valid(mp, &ar[0]);
 	for (i = 0; !last; i++) {
 		last = (i == IEEE80211_TX_MAX_RATES - 1) ||
-		       !minstrel_ht_txstat_valid(&ar[i + 1]);
+		       !minstrel_ht_txstat_valid(mp, &ar[i + 1]);
 
-		group = minstrel_ht_get_group_idx(&ar[i]);
-		rate = &mi->groups[group].rates[ar[i].idx % 8];
+		rate = minstrel_ht_get_stats(mp, mi, &ar[i]);
 
 		if (last)
 			rate->success += info->status.ampdu_ack_len;
@@ -440,20 +517,29 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
 	rate = minstrel_get_ratestats(mi, mi->max_tp_rate);
 	if (rate->attempts > 30 &&
 	    MINSTREL_FRAC(rate->success, rate->attempts) <
-	    MINSTREL_FRAC(20, 100))
+	    MINSTREL_FRAC(20, 100)) {
 		minstrel_downgrade_rate(mi, &mi->max_tp_rate, true);
+		update = true;
+	}
 
 	rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate2);
 	if (rate2->attempts > 30 &&
 	    MINSTREL_FRAC(rate2->success, rate2->attempts) <
-	    MINSTREL_FRAC(20, 100))
+	    MINSTREL_FRAC(20, 100)) {
 		minstrel_downgrade_rate(mi, &mi->max_tp_rate2, false);
+		update = true;
+	}
 
 	if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) {
+		update = true;
 		minstrel_ht_update_stats(mp, mi);
-		if (!(info->flags & IEEE80211_TX_CTL_AMPDU))
+		if (!(info->flags & IEEE80211_TX_CTL_AMPDU) &&
+		    mi->max_prob_rate / MCS_GROUP_RATES != MINSTREL_CCK_GROUP)
 			minstrel_aggr_check(sta, skb);
 	}
+
+	if (update)
+		minstrel_ht_update_rates(mp, mi);
 }
 
 static void
@@ -467,6 +553,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
 	unsigned int ctime = 0;
 	unsigned int t_slot = 9; /* FIXME */
 	unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len);
+	unsigned int overhead = 0, overhead_rtscts = 0;
 
 	mr = minstrel_get_ratestats(mi, index);
 	if (mr->probability < MINSTREL_FRAC(1, 10)) {
@@ -480,7 +567,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
 	mr->retry_updated = true;
 
 	group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
-	tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len;
+	tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len / 1000;
 
 	/* Contention time for first 2 tries */
 	ctime = (t_slot * cw) >> 1;
@@ -488,9 +575,14 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
 	ctime += (t_slot * cw) >> 1;
 	cw = min((cw << 1) | 1, mp->cw_max);
 
+	if (index / MCS_GROUP_RATES != MINSTREL_CCK_GROUP) {
+		overhead = mi->overhead;
+		overhead_rtscts = mi->overhead_rtscts;
+	}
+
 	/* Total TX time for data and Contention after first 2 tries */
-	tx_time = ctime + 2 * (mi->overhead + tx_time_data);
-	tx_time_rtscts = ctime + 2 * (mi->overhead_rtscts + tx_time_data);
+	tx_time = ctime + 2 * (overhead + tx_time_data);
+	tx_time_rtscts = ctime + 2 * (overhead_rtscts + tx_time_data);
 
 	/* See how many more tries we can fit inside segment size */
 	do {
@@ -499,8 +591,8 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
 		cw = min((cw << 1) | 1, mp->cw_max);
 
 		/* Total TX time after this try */
-		tx_time += ctime + mi->overhead + tx_time_data;
-		tx_time_rtscts += ctime + mi->overhead_rtscts + tx_time_data;
+		tx_time += ctime + overhead + tx_time_data;
+		tx_time_rtscts += ctime + overhead_rtscts + tx_time_data;
 
 		if (tx_time_rtscts < mp->segment_size)
 			mr->retry_count_rtscts++;
@@ -511,29 +603,71 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
 
 static void
 minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
-                     struct ieee80211_tx_rate *rate, int index,
-                     bool sample, bool rtscts)
+                     struct ieee80211_sta_rates *ratetbl, int offset, int index)
 {
 	const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
 	struct minstrel_rate_stats *mr;
+	u8 idx;
+	u16 flags;
 
 	mr = minstrel_get_ratestats(mi, index);
 	if (!mr->retry_updated)
 		minstrel_calc_retransmit(mp, mi, index);
 
-	if (sample)
-		rate->count = 1;
-	else if (mr->probability < MINSTREL_FRAC(20, 100))
-		rate->count = 2;
-	else if (rtscts)
-		rate->count = mr->retry_count_rtscts;
-	else
-		rate->count = mr->retry_count;
+	if (mr->probability < MINSTREL_FRAC(20, 100) || !mr->retry_count) {
+		ratetbl->rate[offset].count = 2;
+		ratetbl->rate[offset].count_rts = 2;
+		ratetbl->rate[offset].count_cts = 2;
+	} else {
+		ratetbl->rate[offset].count = mr->retry_count;
+		ratetbl->rate[offset].count_cts = mr->retry_count;
+		ratetbl->rate[offset].count_rts = mr->retry_count_rtscts;
+	}
+
+	if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) {
+		idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)];
+		flags = 0;
+	} else {
+		idx = index % MCS_GROUP_RATES +
+		      (group->streams - 1) * MCS_GROUP_RATES;
+		flags = IEEE80211_TX_RC_MCS | group->flags;
+	}
+
+	if (offset > 0) {
+		ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts;
+		flags |= IEEE80211_TX_RC_USE_RTS_CTS;
+	}
 
-	rate->flags = IEEE80211_TX_RC_MCS | group->flags;
-	if (rtscts)
-		rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS;
-	rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES;
+	ratetbl->rate[offset].idx = idx;
+	ratetbl->rate[offset].flags = flags;
+}
+
+static void
+minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
+{
+	struct ieee80211_sta_rates *rates;
+	int i = 0;
+
+	rates = kzalloc(sizeof(*rates), GFP_ATOMIC);
+	if (!rates)
+		return;
+
+	/* Start with max_tp_rate */
+	minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate);
+
+	if (mp->hw->max_rates >= 3) {
+		/* At least 3 tx rates supported, use max_tp_rate2 next */
+		minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate2);
+	}
+
+	if (mp->hw->max_rates >= 2) {
+		/*
+		 * At least 2 tx rates supported, use max_prob_rate next */
+		minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate);
+	}
+
+	rates->rate[i].idx = -1;
+	rate_control_set_rates(mp->hw, mi->sta, rates);
 }
 
 static inline int
@@ -548,6 +682,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 {
 	struct minstrel_rate_stats *mr;
 	struct minstrel_mcs_group_data *mg;
+	unsigned int sample_dur, sample_group;
 	int sample_idx = 0;
 
 	if (mi->sample_wait > 0) {
@@ -558,54 +693,77 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 	if (!mi->sample_tries)
 		return -1;
 
-	mi->sample_tries--;
 	mg = &mi->groups[mi->sample_group];
 	sample_idx = sample_table[mg->column][mg->index];
 	mr = &mg->rates[sample_idx];
-	sample_idx += mi->sample_group * MCS_GROUP_RATES;
+	sample_group = mi->sample_group;
+	sample_idx += sample_group * MCS_GROUP_RATES;
 	minstrel_next_sample_idx(mi);
 
 	/*
 	 * Sampling might add some overhead (RTS, no aggregation)
 	 * to the frame. Hence, don't use sampling for the currently
-	 * used max TP rate.
+	 * used rates.
 	 */
-	if (sample_idx == mi->max_tp_rate)
+	if (sample_idx == mi->max_tp_rate ||
+	    sample_idx == mi->max_tp_rate2 ||
+	    sample_idx == mi->max_prob_rate)
 		return -1;
+
 	/*
-	 * When not using MRR, do not sample if the probability is already
-	 * higher than 95% to avoid wasting airtime
+	 * Do not sample if the probability is already higher than 95%
+	 * to avoid wasting airtime.
 	 */
-	if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100)))
+	if (mr->probability > MINSTREL_FRAC(95, 100))
 		return -1;
 
 	/*
 	 * Make sure that lower rates get sampled only occasionally,
 	 * if the link is working perfectly.
 	 */
-	if (minstrel_get_duration(sample_idx) >
-	    minstrel_get_duration(mi->max_tp_rate)) {
+	sample_dur = minstrel_get_duration(sample_idx);
+	if (sample_dur >= minstrel_get_duration(mi->max_tp_rate2) &&
+	    (mi->max_prob_streams <
+	     minstrel_mcs_groups[sample_group].streams ||
+	     sample_dur >= minstrel_get_duration(mi->max_prob_rate))) {
 		if (mr->sample_skipped < 20)
 			return -1;
 
 		if (mi->sample_slow++ > 2)
 			return -1;
 	}
+	mi->sample_tries--;
 
 	return sample_idx;
 }
 
 static void
+minstrel_ht_check_cck_shortpreamble(struct minstrel_priv *mp,
+				    struct minstrel_ht_sta *mi, bool val)
+{
+	u8 supported = mi->groups[MINSTREL_CCK_GROUP].supported;
+
+	if (!supported || !mi->cck_supported_short)
+		return;
+
+	if (supported & (mi->cck_supported_short << (val * 4)))
+		return;
+
+	supported ^= mi->cck_supported_short | (mi->cck_supported_short << 4);
+	mi->groups[MINSTREL_CCK_GROUP].supported = supported;
+}
+
+static void
 minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
                      struct ieee80211_tx_rate_control *txrc)
 {
+	const struct mcs_group *sample_group;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
-	struct ieee80211_tx_rate *ar = info->status.rates;
+	struct ieee80211_tx_rate *rate = &info->status.rates[0];
 	struct minstrel_ht_sta_priv *msp = priv_sta;
 	struct minstrel_ht_sta *mi = &msp->ht;
 	struct minstrel_priv *mp = priv;
 	int sample_idx;
-	bool sample = false;
 
 	if (rate_control_send_low(sta, priv_sta, txrc))
 		return;
@@ -614,6 +772,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
 		return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc);
 
 	info->flags |= mi->tx_flags;
+	minstrel_ht_check_cck_shortpreamble(mp, mi, txrc->short_preamble);
 
 	/* Don't use EAPOL frames for sampling on non-mrr hw */
 	if (mp->hw->max_rates == 1 &&
@@ -632,51 +791,6 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
 	}
 #endif
 
-	if (sample_idx >= 0) {
-		sample = true;
-		minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx,
-			true, false);
-		info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
-	} else {
-		minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate,
-			false, false);
-	}
-
-	if (mp->hw->max_rates >= 3) {
-		/*
-		 * At least 3 tx rates supported, use
-		 * sample_rate -> max_tp_rate -> max_prob_rate for sampling and
-		 * max_tp_rate -> max_tp_rate2 -> max_prob_rate by default.
-		 */
-		if (sample_idx >= 0)
-			minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate,
-				false, false);
-		else
-			minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2,
-				false, true);
-
-		minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate,
-				     false, !sample);
-
-		ar[3].count = 0;
-		ar[3].idx = -1;
-	} else if (mp->hw->max_rates == 2) {
-		/*
-		 * Only 2 tx rates supported, use
-		 * sample_rate -> max_prob_rate for sampling and
-		 * max_tp_rate -> max_prob_rate by default.
-		 */
-		minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_prob_rate,
-				     false, !sample);
-
-		ar[2].count = 0;
-		ar[2].idx = -1;
-	} else {
-		/* Not using MRR, only use the first rate */
-		ar[1].count = 0;
-		ar[1].idx = -1;
-	}
-
 	mi->total_packets++;
 
 	/* wraparound */
@@ -684,6 +798,40 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
 		mi->total_packets = 0;
 		mi->sample_packets = 0;
 	}
+
+	if (sample_idx < 0)
+		return;
+
+	sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES];
+	info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
+	rate->idx = sample_idx % MCS_GROUP_RATES +
+		    (sample_group->streams - 1) * MCS_GROUP_RATES;
+	rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags;
+	rate->count = 1;
+}
+
+static void
+minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
+		       struct ieee80211_supported_band *sband,
+		       struct ieee80211_sta *sta)
+{
+	int i;
+
+	if (sband->band != IEEE80211_BAND_2GHZ)
+		return;
+
+	mi->cck_supported = 0;
+	mi->cck_supported_short = 0;
+	for (i = 0; i < 4; i++) {
+		if (!rate_supported(sta, sband->band, mp->cck_rates[i]))
+			continue;
+
+		mi->cck_supported |= BIT(i);
+		if (sband->bitrates[i].flags & IEEE80211_RATE_SHORT_PREAMBLE)
+			mi->cck_supported_short |= BIT(i);
+	}
+
+	mi->groups[MINSTREL_CCK_GROUP].supported = mi->cck_supported;
 }
 
 static void
@@ -699,17 +847,18 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
 	int ack_dur;
 	int stbc;
 	int i;
-	unsigned int smps;
 
 	/* fall back to the old minstrel for legacy stations */
 	if (!sta->ht_cap.ht_supported)
 		goto use_legacy;
 
 	BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) !=
-		MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS);
+		MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS + 1);
 
 	msp->is_ht = true;
 	memset(mi, 0, sizeof(*mi));
+
+	mi->sta = sta;
 	mi->stats_update = jiffies;
 
 	ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1);
@@ -735,28 +884,29 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
 	if (sta_cap & IEEE80211_HT_CAP_LDPC_CODING)
 		mi->tx_flags |= IEEE80211_TX_CTL_LDPC;
 
-	smps = (sta_cap & IEEE80211_HT_CAP_SM_PS) >>
-		IEEE80211_HT_CAP_SM_PS_SHIFT;
-
 	for (i = 0; i < ARRAY_SIZE(mi->groups); i++) {
-		u16 req = 0;
-
 		mi->groups[i].supported = 0;
-		if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) {
-			if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
-				req |= IEEE80211_HT_CAP_SGI_40;
-			else
-				req |= IEEE80211_HT_CAP_SGI_20;
+		if (i == MINSTREL_CCK_GROUP) {
+			minstrel_ht_update_cck(mp, mi, sband, sta);
+			continue;
 		}
 
-		if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
-			req |= IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+		if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) {
+			if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) {
+				if (!(sta_cap & IEEE80211_HT_CAP_SGI_40))
+					continue;
+			} else {
+				if (!(sta_cap & IEEE80211_HT_CAP_SGI_20))
+					continue;
+			}
+		}
 
-		if ((sta_cap & req) != req)
+		if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH &&
+		    sta->bandwidth < IEEE80211_STA_RX_BW_40)
 			continue;
 
 		/* Mark MCS > 7 as unsupported if STA is in static SMPS mode */
-		if (smps == WLAN_HT_CAP_SM_PS_STATIC &&
+		if (sta->smps_mode == IEEE80211_SMPS_STATIC &&
 		    minstrel_mcs_groups[i].streams > 1)
 			continue;
 
@@ -770,6 +920,10 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
 	if (!n_supported)
 		goto use_legacy;
 
+	/* create an initial rate table with the lowest supported rates */
+	minstrel_ht_update_stats(mp, mi);
+	minstrel_ht_update_rates(mp, mi);
+
 	return;
 
 use_legacy:
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index 462d2b227ed5..d655586773ac 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -16,11 +16,6 @@
 #define MINSTREL_MAX_STREAMS	3
 #define MINSTREL_STREAM_GROUPS	4
 
-/* scaled fraction values */
-#define MINSTREL_SCALE	16
-#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
-#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
-
 #define MCS_GROUP_RATES	8
 
 struct mcs_group {
@@ -70,6 +65,8 @@ struct minstrel_mcs_group_data {
 };
 
 struct minstrel_ht_sta {
+	struct ieee80211_sta *sta;
+
 	/* ampdu length (average, per sampling interval) */
 	unsigned int ampdu_len;
 	unsigned int ampdu_packets;
@@ -85,6 +82,7 @@ struct minstrel_ht_sta {
 
 	/* best probability rate */
 	unsigned int max_prob_rate;
+	unsigned int max_prob_streams;
 
 	/* time of last status update */
 	unsigned long stats_update;
@@ -107,8 +105,11 @@ struct minstrel_ht_sta {
 	/* current MCS group to be sampled */
 	u8 sample_group;
 
+	u8 cck_supported;
+	u8 cck_supported_short;
+
 	/* MCS rate group info and statistics */
-	struct minstrel_mcs_group_data groups[MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS];
+	struct minstrel_mcs_group_data groups[MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS + 1];
 };
 
 struct minstrel_ht_sta_priv {
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index e788f76a1dfe..df44a5ad8270 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -15,13 +15,76 @@
 #include "rc80211_minstrel.h"
 #include "rc80211_minstrel_ht.h"
 
+static char *
+minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
+{
+	unsigned int max_mcs = MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS;
+	const struct mcs_group *mg;
+	unsigned int j, tp, prob, eprob;
+	char htmode = '2';
+	char gimode = 'L';
+
+	if (!mi->groups[i].supported)
+		return p;
+
+	mg = &minstrel_mcs_groups[i];
+	if (mg->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
+		htmode = '4';
+	if (mg->flags & IEEE80211_TX_RC_SHORT_GI)
+		gimode = 'S';
+
+	for (j = 0; j < MCS_GROUP_RATES; j++) {
+		struct minstrel_rate_stats *mr = &mi->groups[i].rates[j];
+		static const int bitrates[4] = { 10, 20, 55, 110 };
+		int idx = i * MCS_GROUP_RATES + j;
+
+		if (!(mi->groups[i].supported & BIT(j)))
+			continue;
+
+		if (i == max_mcs)
+			p += sprintf(p, "CCK/%cP   ", j < 4 ? 'L' : 'S');
+		else
+			p += sprintf(p, "HT%c0/%cGI ", htmode, gimode);
+
+		*(p++) = (idx == mi->max_tp_rate) ? 'T' : ' ';
+		*(p++) = (idx == mi->max_tp_rate2) ? 't' : ' ';
+		*(p++) = (idx == mi->max_prob_rate) ? 'P' : ' ';
+
+		if (i == max_mcs) {
+			int r = bitrates[j % 4];
+			p += sprintf(p, " %2u.%1uM", r / 10, r % 10);
+		} else {
+			p += sprintf(p, " MCS%-2u", (mg->streams - 1) *
+					 MCS_GROUP_RATES + j);
+		}
+
+		tp = mr->cur_tp / 10;
+		prob = MINSTREL_TRUNC(mr->cur_prob * 1000);
+		eprob = MINSTREL_TRUNC(mr->probability * 1000);
+
+		p += sprintf(p, "      %6u.%1u   %6u.%1u    %6u.%1u    "
+				"%3u            %3u(%3u)  %8llu    %8llu\n",
+				tp / 10, tp % 10,
+				eprob / 10, eprob % 10,
+				prob / 10, prob % 10,
+				mr->retry_count,
+				mr->last_success,
+				mr->last_attempts,
+				(unsigned long long)mr->succ_hist,
+				(unsigned long long)mr->att_hist);
+	}
+
+	return p;
+}
+
 static int
 minstrel_ht_stats_open(struct inode *inode, struct file *file)
 {
 	struct minstrel_ht_sta_priv *msp = inode->i_private;
 	struct minstrel_ht_sta *mi = &msp->ht;
 	struct minstrel_debugfs_info *ms;
-	unsigned int i, j, tp, prob, eprob;
+	unsigned int i;
+	unsigned int max_mcs = MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS;
 	char *p;
 	int ret;
 
@@ -38,50 +101,13 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file)
 
 	file->private_data = ms;
 	p = ms->buf;
-	p += sprintf(p, "type      rate     throughput  ewma prob   this prob  "
-			"this succ/attempt   success    attempts\n");
-	for (i = 0; i < MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS; i++) {
-		char htmode = '2';
-		char gimode = 'L';
-
-		if (!mi->groups[i].supported)
-			continue;
-
-		if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
-			htmode = '4';
-		if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI)
-			gimode = 'S';
+	p += sprintf(p, "type         rate     throughput  ewma prob   this prob  "
+			"retry   this succ/attempt   success    attempts\n");
 
-		for (j = 0; j < MCS_GROUP_RATES; j++) {
-			struct minstrel_rate_stats *mr = &mi->groups[i].rates[j];
-			int idx = i * MCS_GROUP_RATES + j;
+	p = minstrel_ht_stats_dump(mi, max_mcs, p);
+	for (i = 0; i < max_mcs; i++)
+		p = minstrel_ht_stats_dump(mi, i, p);
 
-			if (!(mi->groups[i].supported & BIT(j)))
-				continue;
-
-			p += sprintf(p, "HT%c0/%cGI ", htmode, gimode);
-
-			*(p++) = (idx == mi->max_tp_rate) ? 'T' : ' ';
-			*(p++) = (idx == mi->max_tp_rate2) ? 't' : ' ';
-			*(p++) = (idx == mi->max_prob_rate) ? 'P' : ' ';
-			p += sprintf(p, "MCS%-2u", (minstrel_mcs_groups[i].streams - 1) *
-					MCS_GROUP_RATES + j);
-
-			tp = mr->cur_tp / 10;
-			prob = MINSTREL_TRUNC(mr->cur_prob * 1000);
-			eprob = MINSTREL_TRUNC(mr->probability * 1000);
-
-			p += sprintf(p, "  %6u.%1u   %6u.%1u   %6u.%1u        "
-					"%3u(%3u)   %8llu    %8llu\n",
-					tp / 10, tp % 10,
-					eprob / 10, eprob % 10,
-					prob / 10, prob % 10,
-					mr->last_success,
-					mr->last_attempts,
-					(unsigned long long)mr->succ_hist,
-					(unsigned long long)mr->att_hist);
-		}
-	}
 	p += sprintf(p, "\nTotal packet count::    ideal %d      "
 			"lookaround %d\n",
 			max(0, (int) mi->total_packets - (int) mi->sample_packets),
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 580704eba8b8..c8447af76ead 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -648,29 +648,11 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
-#define SEQ_MODULO 0x1000
-#define SEQ_MASK   0xfff
-
-static inline int seq_less(u16 sq1, u16 sq2)
-{
-	return ((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1);
-}
-
-static inline u16 seq_inc(u16 sq)
-{
-	return (sq + 1) & SEQ_MASK;
-}
-
-static inline u16 seq_sub(u16 sq1, u16 sq2)
-{
-	return (sq1 - sq2) & SEQ_MASK;
-}
-
 static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
 					    struct tid_ampdu_rx *tid_agg_rx,
-					    int index)
+					    int index,
+					    struct sk_buff_head *frames)
 {
-	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb = tid_agg_rx->reorder_buf[index];
 	struct ieee80211_rx_status *status;
 
@@ -684,24 +666,27 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
 	tid_agg_rx->reorder_buf[index] = NULL;
 	status = IEEE80211_SKB_RXCB(skb);
 	status->rx_flags |= IEEE80211_RX_DEFERRED_RELEASE;
-	skb_queue_tail(&local->rx_skb_queue, skb);
+	__skb_queue_tail(frames, skb);
 
 no_frame:
-	tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
+	tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);
 }
 
 static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata,
 					     struct tid_ampdu_rx *tid_agg_rx,
-					     u16 head_seq_num)
+					     u16 head_seq_num,
+					     struct sk_buff_head *frames)
 {
 	int index;
 
 	lockdep_assert_held(&tid_agg_rx->reorder_lock);
 
-	while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) {
-		index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
+	while (ieee80211_sn_less(tid_agg_rx->head_seq_num, head_seq_num)) {
+		index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
+					 tid_agg_rx->ssn) %
 							tid_agg_rx->buf_size;
-		ieee80211_release_reorder_frame(sdata, tid_agg_rx, index);
+		ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,
+						frames);
 	}
 }
 
@@ -717,15 +702,16 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata
 #define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10)
 
 static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
-					  struct tid_ampdu_rx *tid_agg_rx)
+					  struct tid_ampdu_rx *tid_agg_rx,
+					  struct sk_buff_head *frames)
 {
 	int index, j;
 
 	lockdep_assert_held(&tid_agg_rx->reorder_lock);
 
 	/* release the buffer until next missing frame */
-	index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
-						tid_agg_rx->buf_size;
+	index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
+				 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
 	if (!tid_agg_rx->reorder_buf[index] &&
 	    tid_agg_rx->stored_mpdu_num) {
 		/*
@@ -746,24 +732,29 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
 
 			ht_dbg_ratelimited(sdata,
 					   "release an RX reorder frame due to timeout on earlier frames\n");
-			ieee80211_release_reorder_frame(sdata, tid_agg_rx, j);
+			ieee80211_release_reorder_frame(sdata, tid_agg_rx, j,
+							frames);
 
 			/*
 			 * Increment the head seq# also for the skipped slots.
 			 */
 			tid_agg_rx->head_seq_num =
-				(tid_agg_rx->head_seq_num + skipped) & SEQ_MASK;
+				(tid_agg_rx->head_seq_num +
+				 skipped) & IEEE80211_SN_MASK;
 			skipped = 0;
 		}
 	} else while (tid_agg_rx->reorder_buf[index]) {
-		ieee80211_release_reorder_frame(sdata, tid_agg_rx, index);
-		index =	seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
+		ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,
+						frames);
+		index =	ieee80211_sn_sub(tid_agg_rx->head_seq_num,
+					 tid_agg_rx->ssn) %
 							tid_agg_rx->buf_size;
 	}
 
 	if (tid_agg_rx->stored_mpdu_num) {
-		j = index = seq_sub(tid_agg_rx->head_seq_num,
-				    tid_agg_rx->ssn) % tid_agg_rx->buf_size;
+		j = index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
+					     tid_agg_rx->ssn) %
+							tid_agg_rx->buf_size;
 
 		for (; j != (index - 1) % tid_agg_rx->buf_size;
 		     j = (j + 1) % tid_agg_rx->buf_size) {
@@ -788,7 +779,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
  */
 static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata,
 					     struct tid_ampdu_rx *tid_agg_rx,
-					     struct sk_buff *skb)
+					     struct sk_buff *skb,
+					     struct sk_buff_head *frames)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	u16 sc = le16_to_cpu(hdr->seq_ctrl);
@@ -803,7 +795,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
 	head_seq_num = tid_agg_rx->head_seq_num;
 
 	/* frame with out of date sequence number */
-	if (seq_less(mpdu_seq_num, head_seq_num)) {
+	if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
 		dev_kfree_skb(skb);
 		goto out;
 	}
@@ -812,16 +804,18 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
 	 * If frame the sequence number exceeds our buffering window
 	 * size release some previous frames to make room for this one.
 	 */
-	if (!seq_less(mpdu_seq_num, head_seq_num + buf_size)) {
-		head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size));
+	if (!ieee80211_sn_less(mpdu_seq_num, head_seq_num + buf_size)) {
+		head_seq_num = ieee80211_sn_inc(
+				ieee80211_sn_sub(mpdu_seq_num, buf_size));
 		/* release stored frames up to new head to stack */
 		ieee80211_release_reorder_frames(sdata, tid_agg_rx,
-						 head_seq_num);
+						 head_seq_num, frames);
 	}
 
 	/* Now the new frame is always in the range of the reordering buffer */
 
-	index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size;
+	index = ieee80211_sn_sub(mpdu_seq_num,
+				 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
 
 	/* check if we already stored this frame */
 	if (tid_agg_rx->reorder_buf[index]) {
@@ -837,7 +831,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
 	 */
 	if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
 	    tid_agg_rx->stored_mpdu_num == 0) {
-		tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
+		tid_agg_rx->head_seq_num =
+			ieee80211_sn_inc(tid_agg_rx->head_seq_num);
 		ret = false;
 		goto out;
 	}
@@ -846,7 +841,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
 	tid_agg_rx->reorder_buf[index] = skb;
 	tid_agg_rx->reorder_time[index] = jiffies;
 	tid_agg_rx->stored_mpdu_num++;
-	ieee80211_sta_reorder_release(sdata, tid_agg_rx);
+	ieee80211_sta_reorder_release(sdata, tid_agg_rx, frames);
 
  out:
 	spin_unlock(&tid_agg_rx->reorder_lock);
@@ -857,7 +852,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
  * Reorder MPDUs from A-MPDUs, keeping them on a buffer. Returns
  * true if the MPDU was buffered, false if it should be processed.
  */
-static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx)
+static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
+				       struct sk_buff_head *frames)
 {
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_local *local = rx->local;
@@ -922,11 +918,12 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx)
 	 * sure that we cannot get to it any more before doing
 	 * anything with it.
 	 */
-	if (ieee80211_sta_manage_reorder_buf(rx->sdata, tid_agg_rx, skb))
+	if (ieee80211_sta_manage_reorder_buf(rx->sdata, tid_agg_rx, skb,
+					     frames))
 		return;
 
  dont_reorder:
-	skb_queue_tail(&local->rx_skb_queue, skb);
+	__skb_queue_tail(frames, skb);
 }
 
 static ieee80211_rx_result debug_noinline
@@ -1452,6 +1449,10 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 		}
 	}
 
+	/* mesh power save support */
+	if (ieee80211_vif_is_mesh(&rx->sdata->vif))
+		ieee80211_mps_rx_h_sta_process(sta, hdr);
+
 	/*
 	 * Drop (qos-)data::nullfunc frames silently, since they
 	 * are used only to control station power saving mode.
@@ -1882,8 +1883,10 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 		 * 'align' will only take the values 0 or 2 here
 		 * since all frames are required to be aligned
 		 * to 2-byte boundaries when being passed to
-		 * mac80211. That also explains the __skb_push()
-		 * below.
+		 * mac80211; the code here works just as well if
+		 * that isn't true, but mac80211 assumes it can
+		 * access fields as 2-byte aligned (e.g. for
+		 * compare_ether_addr)
 		 */
 		align = ((unsigned long)(skb->data + sizeof(struct ethhdr))) & 3;
 		if (align) {
@@ -2015,7 +2018,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	/* frame is in RMC, don't forward */
 	if (ieee80211_is_data(hdr->frame_control) &&
 	    is_multicast_ether_addr(hdr->addr1) &&
-	    mesh_rmc_check(hdr->addr3, mesh_hdr, rx->sdata))
+	    mesh_rmc_check(rx->sdata, hdr->addr3, mesh_hdr))
 		return RX_DROP_MONITOR;
 
 	if (!ieee80211_is_data(hdr->frame_control) ||
@@ -2042,9 +2045,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 		}
 
 		rcu_read_lock();
-		mppath = mpp_path_lookup(proxied_addr, sdata);
+		mppath = mpp_path_lookup(sdata, proxied_addr);
 		if (!mppath) {
-			mpp_path_add(proxied_addr, mpp_addr, sdata);
+			mpp_path_add(sdata, proxied_addr, mpp_addr);
 		} else {
 			spin_lock_bh(&mppath->state_lock);
 			if (!ether_addr_equal(mppath->mpp, mpp_addr))
@@ -2082,6 +2085,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	}
 
 	fwd_hdr =  (struct ieee80211_hdr *) fwd_skb->data;
+	fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY);
 	info = IEEE80211_SKB_CB(fwd_skb);
 	memset(info, 0, sizeof(*info));
 	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
@@ -2090,12 +2094,15 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	if (is_multicast_ether_addr(fwd_hdr->addr1)) {
 		IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_mcast);
 		memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN);
-	} else if (!mesh_nexthop_lookup(fwd_skb, sdata)) {
+		/* update power mode indication when forwarding */
+		ieee80211_mps_set_frame_flags(sdata, NULL, fwd_hdr);
+	} else if (!mesh_nexthop_lookup(sdata, fwd_skb)) {
+		/* mesh power mode flags updated in mesh_nexthop_lookup */
 		IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_unicast);
 	} else {
 		/* unable to resolve next hop */
-		mesh_path_error_tx(ifmsh->mshcfg.element_ttl, fwd_hdr->addr3,
-				   0, reason, fwd_hdr->addr2, sdata);
+		mesh_path_error_tx(sdata, ifmsh->mshcfg.element_ttl,
+				   fwd_hdr->addr3, 0, reason, fwd_hdr->addr2);
 		IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_no_route);
 		kfree_skb(fwd_skb);
 		return RX_DROP_MONITOR;
@@ -2177,7 +2184,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 }
 
 static ieee80211_rx_result debug_noinline
-ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
+ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 {
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_bar *bar = (struct ieee80211_bar *)skb->data;
@@ -2216,7 +2223,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
 		spin_lock(&tid_agg_rx->reorder_lock);
 		/* release stored frames up to start of BAR */
 		ieee80211_release_reorder_frames(rx->sdata, tid_agg_rx,
-						 start_seq_num);
+						 start_seq_num, frames);
 		spin_unlock(&tid_agg_rx->reorder_lock);
 
 		kfree_skb(skb);
@@ -2353,38 +2360,34 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		    sdata->vif.type != NL80211_IFTYPE_ADHOC)
 			break;
 
-		/* verify action & smps_control are present */
+		/* verify action & smps_control/chanwidth are present */
 		if (len < IEEE80211_MIN_ACTION_SIZE + 2)
 			goto invalid;
 
 		switch (mgmt->u.action.u.ht_smps.action) {
 		case WLAN_HT_ACTION_SMPS: {
 			struct ieee80211_supported_band *sband;
-			u8 smps;
+			enum ieee80211_smps_mode smps_mode;
 
 			/* convert to HT capability */
 			switch (mgmt->u.action.u.ht_smps.smps_control) {
 			case WLAN_HT_SMPS_CONTROL_DISABLED:
-				smps = WLAN_HT_CAP_SM_PS_DISABLED;
+				smps_mode = IEEE80211_SMPS_OFF;
 				break;
 			case WLAN_HT_SMPS_CONTROL_STATIC:
-				smps = WLAN_HT_CAP_SM_PS_STATIC;
+				smps_mode = IEEE80211_SMPS_STATIC;
 				break;
 			case WLAN_HT_SMPS_CONTROL_DYNAMIC:
-				smps = WLAN_HT_CAP_SM_PS_DYNAMIC;
+				smps_mode = IEEE80211_SMPS_DYNAMIC;
 				break;
 			default:
 				goto invalid;
 			}
-			smps <<= IEEE80211_HT_CAP_SM_PS_SHIFT;
 
 			/* if no change do nothing */
-			if ((rx->sta->sta.ht_cap.cap &
-					IEEE80211_HT_CAP_SM_PS) == smps)
+			if (rx->sta->sta.smps_mode == smps_mode)
 				goto handled;
-
-			rx->sta->sta.ht_cap.cap &= ~IEEE80211_HT_CAP_SM_PS;
-			rx->sta->sta.ht_cap.cap |= smps;
+			rx->sta->sta.smps_mode = smps_mode;
 
 			sband = rx->local->hw.wiphy->bands[status->band];
 
@@ -2392,11 +2395,82 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 						 IEEE80211_RC_SMPS_CHANGED);
 			goto handled;
 		}
+		case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
+			struct ieee80211_supported_band *sband;
+			u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
+			enum ieee80211_sta_rx_bandwidth new_bw;
+
+			/* If it doesn't support 40 MHz it can't change ... */
+			if (!(rx->sta->sta.ht_cap.cap &
+					IEEE80211_HT_CAP_SUP_WIDTH_20_40))
+				goto handled;
+
+			if (chanwidth == IEEE80211_HT_CHANWIDTH_20MHZ)
+				new_bw = IEEE80211_STA_RX_BW_20;
+			else
+				new_bw = ieee80211_sta_cur_vht_bw(rx->sta);
+
+			if (rx->sta->sta.bandwidth == new_bw)
+				goto handled;
+
+			sband = rx->local->hw.wiphy->bands[status->band];
+
+			rate_control_rate_update(local, sband, rx->sta,
+						 IEEE80211_RC_BW_CHANGED);
+			goto handled;
+		}
 		default:
 			goto invalid;
 		}
 
 		break;
+	case WLAN_CATEGORY_PUBLIC:
+		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+			goto invalid;
+		if (sdata->vif.type != NL80211_IFTYPE_STATION)
+			break;
+		if (!rx->sta)
+			break;
+		if (!ether_addr_equal(mgmt->bssid, sdata->u.mgd.bssid))
+			break;
+		if (mgmt->u.action.u.ext_chan_switch.action_code !=
+				WLAN_PUB_ACTION_EXT_CHANSW_ANN)
+			break;
+		if (len < offsetof(struct ieee80211_mgmt,
+				   u.action.u.ext_chan_switch.variable))
+			goto invalid;
+		goto queue;
+	case WLAN_CATEGORY_VHT:
+		if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+		    sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
+		    sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
+		    sdata->vif.type != NL80211_IFTYPE_AP &&
+		    sdata->vif.type != NL80211_IFTYPE_ADHOC)
+			break;
+
+		/* verify action code is present */
+		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+			goto invalid;
+
+		switch (mgmt->u.action.u.vht_opmode_notif.action_code) {
+		case WLAN_VHT_ACTION_OPMODE_NOTIF: {
+			u8 opmode;
+
+			/* verify opmode is present */
+			if (len < IEEE80211_MIN_ACTION_SIZE + 2)
+				goto invalid;
+
+			opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
+
+			ieee80211_vht_handle_opmode(rx->sdata, rx->sta,
+						    opmode, status->band,
+						    false);
+			goto handled;
+		}
+		default:
+			break;
+		}
+		break;
 	case WLAN_CATEGORY_BACK:
 		if (sdata->vif.type != NL80211_IFTYPE_STATION &&
 		    sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
@@ -2449,10 +2523,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			ieee80211_process_measurement_req(sdata, mgmt, len);
 			goto handled;
 		case WLAN_ACTION_SPCT_CHL_SWITCH:
-			if (len < (IEEE80211_MIN_ACTION_SIZE +
-				   sizeof(mgmt->u.action.u.chan_switch)))
-				break;
-
 			if (sdata->vif.type != NL80211_IFTYPE_STATION)
 				break;
 
@@ -2486,7 +2556,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		case WLAN_SP_MESH_PEERING_CONFIRM:
 			if (!ieee80211_vif_is_mesh(&sdata->vif))
 				goto invalid;
-			if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)
+			if (sdata->u.mesh.user_mpm)
 				/* userspace handles this frame */
 				break;
 			goto queue;
@@ -2609,7 +2679,19 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
 
 		memset(nskb->cb, 0, sizeof(nskb->cb));
 
-		ieee80211_tx_skb(rx->sdata, nskb);
+		if (rx->sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) {
+			struct ieee80211_tx_info *info = IEEE80211_SKB_CB(nskb);
+
+			info->flags = IEEE80211_TX_CTL_TX_OFFCHAN |
+				      IEEE80211_TX_INTFL_OFFCHAN_TX_OK |
+				      IEEE80211_TX_CTL_NO_CCK_RATE;
+			if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
+				info->hw_queue =
+					local->hw.offchannel_tx_hw_queue;
+		}
+
+		__ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7,
+					    status->band);
 	}
 	dev_kfree_skb(rx->skb);
 	return RX_QUEUED;
@@ -2648,8 +2730,9 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 			return RX_DROP_MONITOR;
 		break;
 	case cpu_to_le16(IEEE80211_STYPE_PROBE_REQ):
-		/* process only for ibss */
-		if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
+		/* process only for ibss and mesh */
+		if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
+		    sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
 			return RX_DROP_MONITOR;
 		break;
 	default:
@@ -2772,7 +2855,8 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
 	}
 }
 
-static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx)
+static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
+				  struct sk_buff_head *frames)
 {
 	ieee80211_rx_result res = RX_DROP_MONITOR;
 	struct sk_buff *skb;
@@ -2784,15 +2868,9 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx)
 			goto rxh_next;  \
 	} while (0);
 
-	spin_lock(&rx->local->rx_skb_queue.lock);
-	if (rx->local->running_rx_handler)
-		goto unlock;
-
-	rx->local->running_rx_handler = true;
-
-	while ((skb = __skb_dequeue(&rx->local->rx_skb_queue))) {
-		spin_unlock(&rx->local->rx_skb_queue.lock);
+	spin_lock_bh(&rx->local->rx_path_lock);
 
+	while ((skb = __skb_dequeue(frames))) {
 		/*
 		 * all the other fields are valid across frames
 		 * that belong to an aMPDU since they are on the
@@ -2813,7 +2891,12 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx)
 #endif
 		CALL_RXH(ieee80211_rx_h_amsdu)
 		CALL_RXH(ieee80211_rx_h_data)
-		CALL_RXH(ieee80211_rx_h_ctrl);
+
+		/* special treatment -- needs the queue */
+		res = ieee80211_rx_h_ctrl(rx, frames);
+		if (res != RX_CONTINUE)
+			goto rxh_next;
+
 		CALL_RXH(ieee80211_rx_h_mgmt_check)
 		CALL_RXH(ieee80211_rx_h_action)
 		CALL_RXH(ieee80211_rx_h_userspace_mgmt)
@@ -2822,20 +2905,20 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx)
 
  rxh_next:
 		ieee80211_rx_handlers_result(rx, res);
-		spin_lock(&rx->local->rx_skb_queue.lock);
+
 #undef CALL_RXH
 	}
 
-	rx->local->running_rx_handler = false;
-
- unlock:
-	spin_unlock(&rx->local->rx_skb_queue.lock);
+	spin_unlock_bh(&rx->local->rx_path_lock);
 }
 
 static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
 {
+	struct sk_buff_head reorder_release;
 	ieee80211_rx_result res = RX_DROP_MONITOR;
 
+	__skb_queue_head_init(&reorder_release);
+
 #define CALL_RXH(rxh)			\
 	do {				\
 		res = rxh(rx);		\
@@ -2845,9 +2928,9 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
 
 	CALL_RXH(ieee80211_rx_h_check)
 
-	ieee80211_rx_reorder_ampdu(rx);
+	ieee80211_rx_reorder_ampdu(rx, &reorder_release);
 
-	ieee80211_rx_handlers(rx);
+	ieee80211_rx_handlers(rx, &reorder_release);
 	return;
 
  rxh_next:
@@ -2862,6 +2945,7 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
  */
 void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
 {
+	struct sk_buff_head frames;
 	struct ieee80211_rx_data rx = {
 		.sta = sta,
 		.sdata = sta->sdata,
@@ -2877,11 +2961,13 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
 	if (!tid_agg_rx)
 		return;
 
+	__skb_queue_head_init(&frames);
+
 	spin_lock(&tid_agg_rx->reorder_lock);
-	ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx);
+	ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx, &frames);
 	spin_unlock(&tid_agg_rx->reorder_lock);
 
-	ieee80211_rx_handlers(&rx);
+	ieee80211_rx_handlers(&rx, &frames);
 }
 
 /* main receive path */
@@ -2969,7 +3055,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
 		    !ieee80211_is_probe_resp(hdr->frame_control) &&
 		    !ieee80211_is_beacon(hdr->frame_control))
 			return 0;
-		if (!ether_addr_equal(sdata->vif.addr, hdr->addr1))
+		if (!ether_addr_equal(sdata->vif.addr, hdr->addr1) &&
+		    !multicast)
 			status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
 		break;
 	default:
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index bf82e69d0601..99b103921a4b 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -27,22 +27,15 @@
 
 #define IEEE80211_PROBE_DELAY (HZ / 33)
 #define IEEE80211_CHANNEL_TIME (HZ / 33)
-#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 8)
-
-static void ieee80211_rx_bss_free(struct cfg80211_bss *cbss)
-{
-	struct ieee80211_bss *bss = (void *)cbss->priv;
-
-	kfree(bss_mesh_id(bss));
-	kfree(bss_mesh_cfg(bss));
-}
+#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 9)
 
 void ieee80211_rx_bss_put(struct ieee80211_local *local,
 			  struct ieee80211_bss *bss)
 {
 	if (!bss)
 		return;
-	cfg80211_put_bss(container_of((void *)bss, struct cfg80211_bss, priv));
+	cfg80211_put_bss(local->hw.wiphy,
+			 container_of((void *)bss, struct cfg80211_bss, priv));
 }
 
 static bool is_uapsd_supported(struct ieee802_11_elems *elems)
@@ -65,12 +58,11 @@ static bool is_uapsd_supported(struct ieee802_11_elems *elems)
 struct ieee80211_bss *
 ieee80211_bss_info_update(struct ieee80211_local *local,
 			  struct ieee80211_rx_status *rx_status,
-			  struct ieee80211_mgmt *mgmt,
-			  size_t len,
+			  struct ieee80211_mgmt *mgmt, size_t len,
 			  struct ieee802_11_elems *elems,
-			  struct ieee80211_channel *channel,
-			  bool beacon)
+			  struct ieee80211_channel *channel)
 {
+	bool beacon = ieee80211_is_beacon(mgmt->frame_control);
 	struct cfg80211_bss *cbss;
 	struct ieee80211_bss *bss;
 	int clen, srlen;
@@ -86,10 +78,12 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 	if (!cbss)
 		return NULL;
 
-	cbss->free_priv = ieee80211_rx_bss_free;
 	bss = (void *)cbss->priv;
 
-	bss->device_ts = rx_status->device_timestamp;
+	if (beacon)
+		bss->device_ts_beacon = rx_status->device_timestamp;
+	else
+		bss->device_ts_presp = rx_status->device_timestamp;
 
 	if (elems->parse_error) {
 		if (beacon)
@@ -104,9 +98,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 	}
 
 	/* save the ERP value so that it is available at association time */
-	if (elems->erp_info && elems->erp_info_len >= 1 &&
-			(!elems->parse_error ||
-			 !(bss->valid_data & IEEE80211_BSS_VALID_ERP))) {
+	if (elems->erp_info && (!elems->parse_error ||
+				!(bss->valid_data & IEEE80211_BSS_VALID_ERP))) {
 		bss->erp_value = elems->erp_info[0];
 		bss->has_erp_value = true;
 		if (!elems->parse_error)
@@ -147,9 +140,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 			bss->valid_data |= IEEE80211_BSS_VALID_WMM;
 	}
 
-	if (!beacon)
-		bss->last_probe_resp = jiffies;
-
 	return bss;
 }
 
@@ -162,7 +152,6 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
 	u8 *elements;
 	struct ieee80211_channel *channel;
 	size_t baselen;
-	bool beacon;
 	struct ieee802_11_elems elems;
 
 	if (skb->len < 24 ||
@@ -184,17 +173,15 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
 
 		elements = mgmt->u.probe_resp.variable;
 		baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable);
-		beacon = false;
 	} else {
 		baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable);
 		elements = mgmt->u.beacon.variable;
-		beacon = true;
 	}
 
 	if (baselen > skb->len)
 		return;
 
-	ieee802_11_parse_elems(elements, skb->len - baselen, &elems);
+	ieee802_11_parse_elems(elements, skb->len - baselen, false, &elems);
 
 	channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq);
 
@@ -203,7 +190,7 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
 
 	bss = ieee80211_bss_info_update(local, rx_status,
 					mgmt, skb->len, &elems,
-					channel, beacon);
+					channel);
 	if (bss)
 		ieee80211_rx_bss_put(local, bss);
 }
@@ -343,6 +330,9 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 
 	ieee80211_offchannel_stop_vifs(local);
 
+	/* ensure nullfunc is transmitted before leaving operating channel */
+	ieee80211_flush_queues(local, NULL);
+
 	ieee80211_configure_filter(local);
 
 	/* We need to set power level at maximum rate for scanning. */
@@ -357,6 +347,9 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 static bool ieee80211_can_scan(struct ieee80211_local *local,
 			       struct ieee80211_sub_if_data *sdata)
 {
+	if (local->radar_detect_enabled)
+		return false;
+
 	if (!list_empty(&local->roc_list))
 		return false;
 
@@ -390,7 +383,12 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
 {
 	int i;
 	struct ieee80211_sub_if_data *sdata;
-	enum ieee80211_band band = local->hw.conf.channel->band;
+	enum ieee80211_band band = local->hw.conf.chandef.chan->band;
+	u32 tx_flags;
+
+	tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
+	if (local->scan_req->no_cck)
+		tx_flags |= IEEE80211_TX_CTL_NO_CCK_RATE;
 
 	sdata = rcu_dereference_protected(local->scan_sdata,
 					  lockdep_is_held(&local->mtx));
@@ -402,8 +400,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
 			local->scan_req->ssids[i].ssid_len,
 			local->scan_req->ie, local->scan_req->ie_len,
 			local->scan_req->rates[band], false,
-			local->scan_req->no_cck,
-			local->hw.conf.channel, true);
+			tx_flags, local->hw.conf.chandef.chan, true);
 
 	/*
 	 * After sending probe requests, wait for probe responses
@@ -469,7 +466,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	if (local->ops->hw_scan) {
 		__set_bit(SCAN_HW_SCANNING, &local->scanning);
 	} else if ((req->n_channels == 1) &&
-		   (req->channels[0] == local->_oper_channel)) {
+		   (req->channels[0] == local->_oper_chandef.chan)) {
 		/*
 		 * If we are scanning only on the operating channel
 		 * then we do not need to stop normal activities
@@ -547,8 +544,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
 	bool associated = false;
 	bool tx_empty = true;
 	bool bad_latency;
-	bool listen_int_exceeded;
-	unsigned long min_beacon_int = 0;
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_channel *next_chan;
 	enum mac80211_scan_state next_scan_state;
@@ -567,11 +562,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
 			if (sdata->u.mgd.associated) {
 				associated = true;
 
-				if (sdata->vif.bss_conf.beacon_int <
-				    min_beacon_int || min_beacon_int == 0)
-					min_beacon_int =
-						sdata->vif.bss_conf.beacon_int;
-
 				if (!qdisc_all_tx_empty(sdata->dev)) {
 					tx_empty = false;
 					break;
@@ -588,34 +578,19 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
 	 * see if we can scan another channel without interfering
 	 * with the current traffic situation.
 	 *
-	 * Since we don't know if the AP has pending frames for us
-	 * we can only check for our tx queues and use the current
-	 * pm_qos requirements for rx. Hence, if no tx traffic occurs
-	 * at all we will scan as many channels in a row as the pm_qos
-	 * latency allows us to. Additionally we also check for the
-	 * currently negotiated listen interval to prevent losing
-	 * frames unnecessarily.
-	 *
-	 * Otherwise switch back to the operating channel.
+	 * Keep good latency, do not stay off-channel more than 125 ms.
 	 */
 
 	bad_latency = time_after(jiffies +
-			ieee80211_scan_get_channel_time(next_chan),
-			local->leave_oper_channel_time +
-			usecs_to_jiffies(pm_qos_request(PM_QOS_NETWORK_LATENCY)));
-
-	listen_int_exceeded = time_after(jiffies +
-			ieee80211_scan_get_channel_time(next_chan),
-			local->leave_oper_channel_time +
-			usecs_to_jiffies(min_beacon_int * 1024) *
-			local->hw.conf.listen_interval);
+				 ieee80211_scan_get_channel_time(next_chan),
+				 local->leave_oper_channel_time + HZ / 8);
 
 	if (associated && !tx_empty) {
 		if (local->scan_req->flags & NL80211_SCAN_FLAG_LOW_PRIORITY)
 			next_scan_state = SCAN_ABORT;
 		else
 			next_scan_state = SCAN_SUSPEND;
-	} else if (associated && (bad_latency || listen_int_exceeded)) {
+	} else if (associated && bad_latency) {
 		next_scan_state = SCAN_SUSPEND;
 	} else {
 		next_scan_state = SCAN_SET_CHANNEL;
@@ -692,7 +667,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local,
 	ieee80211_offchannel_stop_vifs(local);
 
 	if (local->ops->flush) {
-		drv_flush(local, false);
+		ieee80211_flush_queues(local, NULL);
 		*next_delay = 0;
 	} else
 		*next_delay = HZ / 10;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index ca9fde198188..11216bc13b27 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -104,12 +104,24 @@ static void cleanup_single_sta(struct sta_info *sta)
 	 * neither mac80211 nor the driver can reference this
 	 * sta struct any more except by still existing timers
 	 * associated with this station that we clean up below.
+	 *
+	 * Note though that this still uses the sdata and even
+	 * calls the driver in AP and mesh mode, so interfaces
+	 * of those types mush use call sta_info_flush_cleanup()
+	 * (typically via sta_info_flush()) before deconfiguring
+	 * the driver.
+	 *
+	 * In station mode, nothing happens here so it doesn't
+	 * have to (and doesn't) do that, this is intentional to
+	 * speed up roaming.
 	 */
 
 	if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
 		if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
 		    sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 			ps = &sdata->bss->ps;
+		else if (ieee80211_vif_is_mesh(&sdata->vif))
+			ps = &sdata->u.mesh.ps;
 		else
 			return;
 
@@ -125,13 +137,8 @@ static void cleanup_single_sta(struct sta_info *sta)
 		ieee80211_purge_tx_queue(&local->hw, &sta->tx_filtered[ac]);
 	}
 
-#ifdef CONFIG_MAC80211_MESH
-	if (ieee80211_vif_is_mesh(&sdata->vif)) {
-		mesh_accept_plinks_update(sdata);
-		mesh_plink_deactivate(sta);
-		del_timer_sync(&sta->plink_timer);
-	}
-#endif
+	if (ieee80211_vif_is_mesh(&sdata->vif))
+		mesh_sta_cleanup(sta);
 
 	cancel_work_sync(&sta->drv_unblock_wk);
 
@@ -335,6 +342,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
 	INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
 	mutex_init(&sta->ampdu_mlme.mtx);
+#ifdef CONFIG_MAC80211_MESH
+	if (ieee80211_vif_is_mesh(&sdata->vif) &&
+	    !sdata->u.mesh.user_mpm)
+		init_timer(&sta->plink_timer);
+#endif
 
 	memcpy(sta->sta.addr, addr, ETH_ALEN);
 	sta->local = local;
@@ -368,12 +380,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
 		sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
 
-	sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr);
+	sta->sta.smps_mode = IEEE80211_SMPS_OFF;
 
-#ifdef CONFIG_MAC80211_MESH
-	sta->plink_state = NL80211_PLINK_LISTEN;
-	init_timer(&sta->plink_timer);
-#endif
+	sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr);
 
 	return sta;
 }
@@ -547,6 +556,15 @@ static inline void __bss_tim_clear(u8 *tim, u16 id)
 	tim[id / 8] &= ~(1 << (id % 8));
 }
 
+static inline bool __bss_tim_get(u8 *tim, u16 id)
+{
+	/*
+	 * This format has been mandated by the IEEE specifications,
+	 * so this line may not be changed to use the test_bit() format.
+	 */
+	return tim[id / 8] & (1 << (id % 8));
+}
+
 static unsigned long ieee80211_tids_for_ac(int ac)
 {
 	/* If we ever support TIDs > 7, this obviously needs to be adjusted */
@@ -569,7 +587,6 @@ void sta_info_recalc_tim(struct sta_info *sta)
 {
 	struct ieee80211_local *local = sta->local;
 	struct ps_data *ps;
-	unsigned long flags;
 	bool indicate_tim = false;
 	u8 ignore_for_tim = sta->sta.uapsd_queues;
 	int ac;
@@ -582,6 +599,12 @@ void sta_info_recalc_tim(struct sta_info *sta)
 
 		ps = &sta->sdata->bss->ps;
 		id = sta->sta.aid;
+#ifdef CONFIG_MAC80211_MESH
+	} else if (ieee80211_vif_is_mesh(&sta->sdata->vif)) {
+		ps = &sta->sdata->u.mesh.ps;
+		/* TIM map only for PLID <= IEEE80211_MAX_AID */
+		id = le16_to_cpu(sta->plid) % IEEE80211_MAX_AID;
+#endif
 	} else {
 		return;
 	}
@@ -620,7 +643,10 @@ void sta_info_recalc_tim(struct sta_info *sta)
 	}
 
  done:
-	spin_lock_irqsave(&local->tim_lock, flags);
+	spin_lock_bh(&local->tim_lock);
+
+	if (indicate_tim == __bss_tim_get(ps->tim, id))
+		goto out_unlock;
 
 	if (indicate_tim)
 		__bss_tim_set(ps->tim, id);
@@ -633,7 +659,8 @@ void sta_info_recalc_tim(struct sta_info *sta)
 		local->tim_in_locked_section = false;
 	}
 
-	spin_unlock_irqrestore(&local->tim_lock, flags);
+out_unlock:
+	spin_unlock_bh(&local->tim_lock);
 }
 
 static bool sta_info_buffer_expired(struct sta_info *sta, struct sk_buff *skb)
@@ -740,8 +767,9 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 	bool have_buffered = false;
 	int ac;
 
-	/* This is only necessary for stations on BSS interfaces */
-	if (!sta->sdata->bss)
+	/* This is only necessary for stations on BSS/MBSS interfaces */
+	if (!sta->sdata->bss &&
+	    !ieee80211_vif_is_mesh(&sta->sdata->vif))
 		return false;
 
 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
@@ -755,7 +783,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
 {
 	struct ieee80211_local *local;
 	struct ieee80211_sub_if_data *sdata;
-	int ret, i;
+	int ret;
 
 	might_sleep();
 
@@ -774,7 +802,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
 	 * will be sufficient.
 	 */
 	set_sta_flag(sta, WLAN_STA_BLOCK_BA);
-	ieee80211_sta_tear_down_BA_sessions(sta, false);
+	ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA);
 
 	ret = sta_info_hash_del(local, sta);
 	if (ret)
@@ -782,12 +810,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
 
 	list_del_rcu(&sta->list);
 
-	mutex_lock(&local->key_mtx);
-	for (i = 0; i < NUM_DEFAULT_KEYS; i++)
-		__ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i]));
-	if (sta->ptk)
-		__ieee80211_key_free(key_mtx_dereference(local, sta->ptk));
-	mutex_unlock(&local->key_mtx);
+	/* this always calls synchronize_net() */
+	ieee80211_free_sta_keys(local, sta);
 
 	sta->dead = true;
 
@@ -885,20 +909,12 @@ void sta_info_init(struct ieee80211_local *local)
 void sta_info_stop(struct ieee80211_local *local)
 {
 	del_timer_sync(&local->sta_cleanup);
-	sta_info_flush(local, NULL);
 }
 
-/**
- * sta_info_flush - flush matching STA entries from the STA table
- *
- * Returns the number of removed STA entries.
- *
- * @local: local interface data
- * @sdata: matching rule for the net device (sta->dev) or %NULL to match all STAs
- */
-int sta_info_flush(struct ieee80211_local *local,
-		   struct ieee80211_sub_if_data *sdata)
+
+int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata)
 {
+	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta, *tmp;
 	int ret = 0;
 
@@ -906,30 +922,22 @@ int sta_info_flush(struct ieee80211_local *local,
 
 	mutex_lock(&local->sta_mtx);
 	list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
-		if (!sdata || sdata == sta->sdata) {
+		if (sdata == sta->sdata) {
 			WARN_ON(__sta_info_destroy(sta));
 			ret++;
 		}
 	}
 	mutex_unlock(&local->sta_mtx);
 
-	rcu_barrier();
-
-	if (sdata) {
-		ieee80211_cleanup_sdata_stas(sdata);
-		cancel_work_sync(&sdata->cleanup_stations_wk);
-	} else {
-		mutex_lock(&local->iflist_mtx);
-		list_for_each_entry(sdata, &local->interfaces, list) {
-			ieee80211_cleanup_sdata_stas(sdata);
-			cancel_work_sync(&sdata->cleanup_stations_wk);
-		}
-		mutex_unlock(&local->iflist_mtx);
-	}
-
 	return ret;
 }
 
+void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata)
+{
+	ieee80211_cleanup_sdata_stas(sdata);
+	cancel_work_sync(&sdata->cleanup_stations_wk);
+}
+
 void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 			  unsigned long exp_time)
 {
@@ -945,6 +953,11 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 		if (time_after(jiffies, sta->last_rx + exp_time)) {
 			sta_dbg(sta->sdata, "expiring inactive STA %pM\n",
 				sta->sta.addr);
+
+			if (ieee80211_vif_is_mesh(&sdata->vif) &&
+			    test_sta_flag(sta, WLAN_STA_PS_STA))
+				atomic_dec(&sdata->u.mesh.ps.num_sta_ps);
+
 			WARN_ON(__sta_info_destroy(sta));
 		}
 	}
@@ -1003,6 +1016,8 @@ static void clear_sta_ps_flags(void *_sta)
 	if (sdata->vif.type == NL80211_IFTYPE_AP ||
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		ps = &sdata->bss->ps;
+	else if (ieee80211_vif_is_mesh(&sdata->vif))
+		ps = &sdata->u.mesh.ps;
 	else
 		return;
 
@@ -1120,6 +1135,8 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata,
 
 	drv_allow_buffered_frames(local, sta, BIT(tid), 1, reason, false);
 
+	skb->dev = sdata->dev;
+
 	rcu_read_lock();
 	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
 	if (WARN_ON(!chanctx_conf)) {
@@ -1380,30 +1397,16 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_sta_block_awake);
 
-void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta)
+void ieee80211_sta_eosp(struct ieee80211_sta *pubsta)
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
 	struct ieee80211_local *local = sta->local;
-	struct sk_buff *skb;
-	struct skb_eosp_msg_data *data;
 
 	trace_api_eosp(local, pubsta);
 
-	skb = alloc_skb(0, GFP_ATOMIC);
-	if (!skb) {
-		/* too bad ... but race is better than loss */
-		clear_sta_flag(sta, WLAN_STA_SP);
-		return;
-	}
-
-	data = (void *)skb->cb;
-	memcpy(data->sta, pubsta->addr, ETH_ALEN);
-	memcpy(data->iface, sta->sdata->vif.addr, ETH_ALEN);
-	skb->pkt_type = IEEE80211_EOSP_MSG;
-	skb_queue_tail(&local->skb_queue, skb);
-	tasklet_schedule(&local->tasklet);
+	clear_sta_flag(sta, WLAN_STA_SP);
 }
-EXPORT_SYMBOL(ieee80211_sta_eosp_irqsafe);
+EXPORT_SYMBOL(ieee80211_sta_eosp);
 
 void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
 				u8 tid, bool buffered)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 37c1889afd3a..adc30045f99e 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -56,6 +56,8 @@
  * @WLAN_STA_INSERTED: This station is inserted into the hash table.
  * @WLAN_STA_RATE_CONTROL: rate control was initialized for this station.
  * @WLAN_STA_TOFFSET_KNOWN: toffset calculated for this station is valid.
+ * @WLAN_STA_MPSP_OWNER: local STA is owner of a mesh Peer Service Period.
+ * @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP.
  */
 enum ieee80211_sta_info_flags {
 	WLAN_STA_AUTH,
@@ -78,6 +80,8 @@ enum ieee80211_sta_info_flags {
 	WLAN_STA_INSERTED,
 	WLAN_STA_RATE_CONTROL,
 	WLAN_STA_TOFFSET_KNOWN,
+	WLAN_STA_MPSP_OWNER,
+	WLAN_STA_MPSP_RECIPIENT,
 };
 
 #define ADDBA_RESP_INTERVAL HZ
@@ -92,6 +96,13 @@ enum ieee80211_sta_info_flags {
 #define HT_AGG_STATE_WANT_START		4
 #define HT_AGG_STATE_WANT_STOP		5
 
+enum ieee80211_agg_stop_reason {
+	AGG_STOP_DECLINED,
+	AGG_STOP_LOCAL_REQUEST,
+	AGG_STOP_PEER_REQUEST,
+	AGG_STOP_DESTROY_STA,
+};
+
 /**
  * struct tid_ampdu_tx - TID aggregation information (Tx).
  *
@@ -270,11 +281,12 @@ struct sta_ampdu_mlme {
  * @plink_state: peer link state
  * @plink_timeout: timeout of peer link
  * @plink_timer: peer link watch timer
- * @plink_timer_was_running: used by suspend/resume to restore timers
  * @t_offset: timing offset relative to this host
  * @t_offset_setpoint: reference timing offset of this sta to be used when
  * 	calculating clockdrift
- * @ch_width: peer's channel width
+ * @local_pm: local link-specific power save mode
+ * @peer_pm: peer-specific power save mode towards local STA
+ * @nonpeer_pm: STA power save mode towards non-peer neighbors
  * @debugfs: debug filesystem info
  * @dead: set to true when sta is unlinked
  * @uploaded: set to true when sta is uploaded to the driver
@@ -282,8 +294,9 @@ struct sta_ampdu_mlme {
  * @sta: station information we share with the driver
  * @sta_state: duplicates information about station state (for debug)
  * @beacon_loss_count: number of times beacon loss has triggered
- * @supports_40mhz: tracks whether the station advertised 40 MHz support
- *	as we overwrite its HT parameters with the currently used value
+ * @rcu_head: RCU head used for freeing this station struct
+ * @cur_max_bandwidth: maximum bandwidth to use for TX to the station,
+ *	taken from HT/VHT capabilities or VHT operating mode notification
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -320,7 +333,8 @@ struct sta_info {
 	unsigned long driver_buffered_tids;
 
 	/* Updated from RX path only, no locking requirements */
-	unsigned long rx_packets, rx_bytes;
+	unsigned long rx_packets;
+	u64 rx_bytes;
 	unsigned long wep_weak_iv_count;
 	unsigned long last_rx;
 	long last_connected;
@@ -340,9 +354,9 @@ struct sta_info {
 	unsigned int fail_avg;
 
 	/* Updated from TX path only, no locking requirements */
-	unsigned long tx_packets;
-	unsigned long tx_bytes;
-	unsigned long tx_fragments;
+	u32 tx_fragments;
+	u64 tx_packets[IEEE80211_NUM_ACS];
+	u64 tx_bytes[IEEE80211_NUM_ACS];
 	struct ieee80211_tx_rate last_tx_rate;
 	int last_rx_rate_idx;
 	u32 last_rx_rate_flag;
@@ -365,13 +379,15 @@ struct sta_info {
 	__le16 reason;
 	u8 plink_retries;
 	bool ignore_plink_timer;
-	bool plink_timer_was_running;
 	enum nl80211_plink_state plink_state;
 	u32 plink_timeout;
 	struct timer_list plink_timer;
 	s64 t_offset;
 	s64 t_offset_setpoint;
-	enum nl80211_chan_width ch_width;
+	/* mesh power save */
+	enum nl80211_mesh_power_mode local_pm;
+	enum nl80211_mesh_power_mode peer_pm;
+	enum nl80211_mesh_power_mode nonpeer_pm;
 #endif
 
 #ifdef CONFIG_MAC80211_DEBUGFS
@@ -381,11 +397,11 @@ struct sta_info {
 	} debugfs;
 #endif
 
+	enum ieee80211_sta_rx_bandwidth cur_max_bandwidth;
+
 	unsigned int lost_packets;
 	unsigned int beacon_loss_count;
 
-	bool supports_40mhz;
-
 	/* keep last! */
 	struct ieee80211_sta sta;
 };
@@ -548,8 +564,39 @@ void sta_info_recalc_tim(struct sta_info *sta);
 
 void sta_info_init(struct ieee80211_local *local);
 void sta_info_stop(struct ieee80211_local *local);
-int sta_info_flush(struct ieee80211_local *local,
-		   struct ieee80211_sub_if_data *sdata);
+int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata);
+
+/**
+ * sta_info_flush_cleanup - flush the sta_info cleanup queue
+ * @sdata: the interface
+ *
+ * Flushes the sta_info cleanup queue for a given interface;
+ * this is necessary before the interface is removed or, for
+ * AP/mesh interfaces, before it is deconfigured.
+ *
+ * Note an rcu_barrier() must precede the function, after all
+ * stations have been flushed/removed to ensure the call_rcu()
+ * calls that add stations to the cleanup queue have completed.
+ */
+void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata);
+
+/**
+ * sta_info_flush - flush matching STA entries from the STA table
+ *
+ * Returns the number of removed STA entries.
+ *
+ * @sdata: sdata to remove all stations from
+ */
+static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata)
+{
+	int ret = sta_info_flush_defer(sdata);
+
+	rcu_barrier();
+	sta_info_flush_cleanup(sdata);
+
+	return ret;
+}
+
 void sta_set_rate_info_tx(struct sta_info *sta,
 			  const struct ieee80211_tx_rate *rate,
 			  struct rate_info *rinfo);
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 07d99578a2b1..43439203f4e4 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -335,7 +335,8 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
 	if (dropped)
 		acked = false;
 
-	if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) {
+	if (info->flags & (IEEE80211_TX_INTFL_NL80211_FRAME_TX |
+			   IEEE80211_TX_INTFL_MLME_CONN_TX)) {
 		struct ieee80211_sub_if_data *sdata = NULL;
 		struct ieee80211_sub_if_data *iter_sdata;
 		u64 cookie = (unsigned long)skb;
@@ -357,10 +358,13 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
 			sdata = rcu_dereference(local->p2p_sdata);
 		}
 
-		if (!sdata)
+		if (!sdata) {
 			skb->dev = NULL;
-		else if (ieee80211_is_nullfunc(hdr->frame_control) ||
-			 ieee80211_is_qos_nullfunc(hdr->frame_control)) {
+		} else if (info->flags & IEEE80211_TX_INTFL_MLME_CONN_TX) {
+			ieee80211_mgd_conn_tx_status(sdata, hdr->frame_control,
+						     acked);
+		} else if (ieee80211_is_nullfunc(hdr->frame_control) ||
+			   ieee80211_is_qos_nullfunc(hdr->frame_control)) {
 			cfg80211_probe_status(sdata->dev, hdr->addr1,
 					      cookie, acked, GFP_ATOMIC);
 		} else {
@@ -468,6 +472,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			return;
 		}
 
+		/* mesh Peer Service Period support */
+		if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
+		    ieee80211_is_data_qos(fc))
+			ieee80211_mpsp_trigger_process(
+					ieee80211_get_qos_ctl(hdr),
+					sta, true, acked);
+
 		if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) &&
 		    (rates_idx != -1))
 			sta->last_tx_rate = info->status.rates[rates_idx];
@@ -502,11 +513,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 				       IEEE80211_BAR_CTRL_TID_INFO_MASK) >>
 				      IEEE80211_BAR_CTRL_TID_INFO_SHIFT;
 
-				if (local->hw.flags &
-				    IEEE80211_HW_TEARDOWN_AGGR_ON_BAR_FAIL)
-					ieee80211_stop_tx_ba_session(&sta->sta, tid);
-				else
-					ieee80211_set_bar_pending(sta, tid, ssn);
+				ieee80211_set_bar_pending(sta, tid, ssn);
 			}
 		}
 
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 57e14d59e12f..3ed801d90f1e 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -177,12 +177,11 @@ void ieee80211_get_tkip_p1k_iv(struct ieee80211_key_conf *keyconf,
 	struct ieee80211_key *key = (struct ieee80211_key *)
 			container_of(keyconf, struct ieee80211_key, conf);
 	struct tkip_ctx *ctx = &key->u.tkip.tx;
-	unsigned long flags;
 
-	spin_lock_irqsave(&key->u.tkip.txlock, flags);
+	spin_lock_bh(&key->u.tkip.txlock);
 	ieee80211_compute_tkip_p1k(key, iv32);
 	memcpy(p1k, ctx->p1k, sizeof(ctx->p1k));
-	spin_unlock_irqrestore(&key->u.tkip.txlock, flags);
+	spin_unlock_bh(&key->u.tkip.txlock);
 }
 EXPORT_SYMBOL(ieee80211_get_tkip_p1k_iv);
 
@@ -208,12 +207,11 @@ void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf,
 	const u8 *data = (u8 *)hdr + ieee80211_hdrlen(hdr->frame_control);
 	u32 iv32 = get_unaligned_le32(&data[4]);
 	u16 iv16 = data[2] | (data[0] << 8);
-	unsigned long flags;
 
-	spin_lock_irqsave(&key->u.tkip.txlock, flags);
+	spin_lock_bh(&key->u.tkip.txlock);
 	ieee80211_compute_tkip_p1k(key, iv32);
 	tkip_mixing_phase2(tk, ctx, iv16, p2k);
-	spin_unlock_irqrestore(&key->u.tkip.txlock, flags);
+	spin_unlock_bh(&key->u.tkip.txlock);
 }
 EXPORT_SYMBOL(ieee80211_get_tkip_p2k);
 
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index a8270b441a6f..c215fafd7a2f 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -28,21 +28,27 @@
 #define VIF_PR_FMT	" vif:%s(%d%s)"
 #define VIF_PR_ARG	__get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
 
-#define CHANCTX_ENTRY	__field(u32, control_freq)				\
-			__field(u32, chan_width)				\
-			__field(u32, center_freq1)				\
-			__field(u32, center_freq2)				\
-			__field(u8, rx_chains_static)				\
+#define CHANDEF_ENTRY	__field(u32, control_freq)					\
+			__field(u32, chan_width)					\
+			__field(u32, center_freq1)					\
+			__field(u32, center_freq2)
+#define CHANDEF_ASSIGN(c)								\
+			__entry->control_freq = (c)->chan ? (c)->chan->center_freq : 0;	\
+			__entry->chan_width = (c)->width;				\
+			__entry->center_freq1 = (c)->center_freq1;			\
+			__entry->center_freq2 = (c)->center_freq2;
+#define CHANDEF_PR_FMT	" control:%d MHz width:%d center: %d/%d MHz"
+#define CHANDEF_PR_ARG	__entry->control_freq, __entry->chan_width,			\
+			__entry->center_freq1, __entry->center_freq2
+
+#define CHANCTX_ENTRY	CHANDEF_ENTRY							\
+			__field(u8, rx_chains_static)					\
 			__field(u8, rx_chains_dynamic)
-#define CHANCTX_ASSIGN	__entry->control_freq = ctx->conf.def.chan->center_freq;\
-			__entry->chan_width = ctx->conf.def.width;		\
-			__entry->center_freq1 = ctx->conf.def.center_freq1;	\
-			__entry->center_freq2 = ctx->conf.def.center_freq2;	\
-			__entry->rx_chains_static = ctx->conf.rx_chains_static;	\
+#define CHANCTX_ASSIGN	CHANDEF_ASSIGN(&ctx->conf.def)					\
+			__entry->rx_chains_static = ctx->conf.rx_chains_static;		\
 			__entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic
-#define CHANCTX_PR_FMT	" control:%d MHz width:%d center: %d/%d MHz chains:%d/%d"
-#define CHANCTX_PR_ARG	__entry->control_freq, __entry->chan_width,		\
-			__entry->center_freq1, __entry->center_freq2,		\
+#define CHANCTX_PR_FMT	CHANDEF_PR_FMT " chains:%d/%d"
+#define CHANCTX_PR_ARG	CHANDEF_PR_ARG,							\
 			__entry->rx_chains_static, __entry->rx_chains_dynamic
 
 
@@ -280,8 +286,7 @@ TRACE_EVENT(drv_config,
 		__field(u16, listen_interval)
 		__field(u8, long_frame_max_tx_count)
 		__field(u8, short_frame_max_tx_count)
-		__field(int, center_freq)
-		__field(int, channel_type)
+		CHANDEF_ENTRY
 		__field(int, smps)
 	),
 
@@ -297,15 +302,13 @@ TRACE_EVENT(drv_config,
 			local->hw.conf.long_frame_max_tx_count;
 		__entry->short_frame_max_tx_count =
 			local->hw.conf.short_frame_max_tx_count;
-		__entry->center_freq = local->hw.conf.channel ?
-					local->hw.conf.channel->center_freq : 0;
-		__entry->channel_type = local->hw.conf.channel_type;
+		CHANDEF_ASSIGN(&local->hw.conf.chandef)
 		__entry->smps = local->hw.conf.smps_mode;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " ch:%#x freq:%d",
-		LOCAL_PR_ARG, __entry->changed, __entry->center_freq
+		LOCAL_PR_FMT " ch:%#x" CHANDEF_PR_FMT,
+		LOCAL_PR_ARG, __entry->changed, CHANDEF_PR_ARG
 	)
 );
 
@@ -334,6 +337,7 @@ TRACE_EVENT(drv_bss_info_changed,
 		__field(u16, assoc_cap)
 		__field(u64, sync_tsf)
 		__field(u32, sync_device_ts)
+		__field(u8, sync_dtim_count)
 		__field(u32, basic_rates)
 		__array(int, mcast_rate, IEEE80211_NUM_BANDS)
 		__field(u16, ht_operation_mode)
@@ -341,16 +345,18 @@ TRACE_EVENT(drv_bss_info_changed,
 		__field(s32, cqm_rssi_hyst);
 		__field(u32, channel_width);
 		__field(u32, channel_cfreq1);
-		__dynamic_array(u32, arp_addr_list, info->arp_addr_cnt);
-		__field(bool, arp_filter_enabled);
+		__dynamic_array(u32, arp_addr_list,
+				info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ?
+					IEEE80211_BSS_ARP_ADDR_LIST_LEN :
+					info->arp_addr_cnt);
+		__field(int, arp_addr_cnt);
 		__field(bool, qos);
 		__field(bool, idle);
 		__field(bool, ps);
 		__dynamic_array(u8, ssid, info->ssid_len);
 		__field(bool, hidden_ssid);
 		__field(int, txpower)
-		__field(u8, p2p_ctwindow)
-		__field(bool, p2p_oppps)
+		__field(u8, p2p_oppps_ctwindow)
 	),
 
 	TP_fast_assign(
@@ -370,6 +376,7 @@ TRACE_EVENT(drv_bss_info_changed,
 		__entry->assoc_cap = info->assoc_capability;
 		__entry->sync_tsf = info->sync_tsf;
 		__entry->sync_device_ts = info->sync_device_ts;
+		__entry->sync_dtim_count = info->sync_dtim_count;
 		__entry->basic_rates = info->basic_rates;
 		memcpy(__entry->mcast_rate, info->mcast_rate,
 		       sizeof(__entry->mcast_rate));
@@ -378,17 +385,18 @@ TRACE_EVENT(drv_bss_info_changed,
 		__entry->cqm_rssi_hyst = info->cqm_rssi_hyst;
 		__entry->channel_width = info->chandef.width;
 		__entry->channel_cfreq1 = info->chandef.center_freq1;
+		__entry->arp_addr_cnt = info->arp_addr_cnt;
 		memcpy(__get_dynamic_array(arp_addr_list), info->arp_addr_list,
-		       sizeof(u32) * info->arp_addr_cnt);
-		__entry->arp_filter_enabled = info->arp_filter_enabled;
+		       sizeof(u32) * (info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ?
+					IEEE80211_BSS_ARP_ADDR_LIST_LEN :
+					info->arp_addr_cnt));
 		__entry->qos = info->qos;
 		__entry->idle = info->idle;
 		__entry->ps = info->ps;
 		memcpy(__get_dynamic_array(ssid), info->ssid, info->ssid_len);
 		__entry->hidden_ssid = info->hidden_ssid;
 		__entry->txpower = info->txpower;
-		__entry->p2p_ctwindow = info->p2p_ctwindow;
-		__entry->p2p_oppps = info->p2p_oppps;
+		__entry->p2p_oppps_ctwindow = info->p2p_noa_attr.oppps_ctwindow;
 	),
 
 	TP_printk(
@@ -418,6 +426,30 @@ TRACE_EVENT(drv_prepare_multicast,
 	)
 );
 
+TRACE_EVENT(drv_set_multicast_list,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata, int mc_count),
+
+	TP_ARGS(local, sdata, mc_count),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(bool, allmulti)
+		__field(int, mc_count)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI;
+		__entry->mc_count = mc_count;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " configure mc filter, count=%d, allmulti=%d",
+		LOCAL_PR_ARG, __entry->mc_count, __entry->allmulti
+	)
+);
+
 TRACE_EVENT(drv_configure_filter,
 	TP_PROTO(struct ieee80211_local *local,
 		 unsigned int changed_flags,
@@ -466,7 +498,7 @@ TRACE_EVENT(drv_set_tim,
 
 	TP_printk(
 		LOCAL_PR_FMT STA_PR_FMT " set:%d",
-		LOCAL_PR_ARG, STA_PR_FMT, __entry->set
+		LOCAL_PR_ARG, STA_PR_ARG, __entry->set
 	)
 );
 
@@ -927,23 +959,26 @@ TRACE_EVENT(drv_get_survey,
 );
 
 TRACE_EVENT(drv_flush,
-	TP_PROTO(struct ieee80211_local *local, bool drop),
+	TP_PROTO(struct ieee80211_local *local,
+		 u32 queues, bool drop),
 
-	TP_ARGS(local, drop),
+	TP_ARGS(local, queues, drop),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		__field(bool, drop)
+		__field(u32, queues)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		__entry->drop = drop;
+		__entry->queues = queues;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " drop:%d",
-		LOCAL_PR_ARG, __entry->drop
+		LOCAL_PR_FMT " queues:0x%x drop:%d",
+		LOCAL_PR_ARG, __entry->queues, __entry->drop
 	)
 );
 
@@ -955,23 +990,23 @@ TRACE_EVENT(drv_channel_switch,
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
+		CHANDEF_ENTRY
 		__field(u64, timestamp)
 		__field(bool, block_tx)
-		__field(u16, freq)
 		__field(u8, count)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
+		CHANDEF_ASSIGN(&ch_switch->chandef)
 		__entry->timestamp = ch_switch->timestamp;
 		__entry->block_tx = ch_switch->block_tx;
-		__entry->freq = ch_switch->channel->center_freq;
 		__entry->count = ch_switch->count;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " new freq:%u count:%d",
-		LOCAL_PR_ARG, __entry->freq, __entry->count
+		LOCAL_PR_FMT " new " CHANDEF_PR_FMT " count:%d",
+		LOCAL_PR_ARG, CHANDEF_PR_ARG, __entry->count
 	)
 );
 
@@ -1029,15 +1064,17 @@ TRACE_EVENT(drv_remain_on_channel,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
 		 struct ieee80211_channel *chan,
-		 unsigned int duration),
+		 unsigned int duration,
+		 enum ieee80211_roc_type type),
 
-	TP_ARGS(local, sdata, chan, duration),
+	TP_ARGS(local, sdata, chan, duration, type),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		VIF_ENTRY
 		__field(int, center_freq)
 		__field(unsigned int, duration)
+		__field(u32, type)
 	),
 
 	TP_fast_assign(
@@ -1045,12 +1082,13 @@ TRACE_EVENT(drv_remain_on_channel,
 		VIF_ASSIGN;
 		__entry->center_freq = chan->center_freq;
 		__entry->duration = duration;
+		__entry->type = type;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT  VIF_PR_FMT " freq:%dMHz duration:%dms",
+		LOCAL_PR_FMT  VIF_PR_FMT " freq:%dMHz duration:%dms type=%d",
 		LOCAL_PR_ARG, VIF_PR_ARG,
-		__entry->center_freq, __entry->duration
+		__entry->center_freq, __entry->duration, __entry->type
 	)
 );
 
@@ -1178,23 +1216,26 @@ TRACE_EVENT(drv_set_rekey_data,
 
 TRACE_EVENT(drv_rssi_callback,
 	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
 		 enum ieee80211_rssi_event rssi_event),
 
-	TP_ARGS(local, rssi_event),
+	TP_ARGS(local, sdata, rssi_event),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
+		VIF_ENTRY
 		__field(u32, rssi_event)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
+		VIF_ASSIGN;
 		__entry->rssi_event = rssi_event;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " rssi_event:%d",
-		LOCAL_PR_ARG, __entry->rssi_event
+		LOCAL_PR_FMT VIF_PR_FMT " rssi_event:%d",
+		LOCAL_PR_ARG, VIF_PR_ARG, __entry->rssi_event
 	)
 );
 
@@ -1426,6 +1467,14 @@ DEFINE_EVENT(local_only_evt, drv_restart_complete,
 	TP_ARGS(local)
 );
 
+#if IS_ENABLED(CONFIG_IPV6)
+DEFINE_EVENT(local_sdata_evt, drv_ipv6_addr_change,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata),
+	TP_ARGS(local, sdata)
+);
+#endif
+
 /*
  * Tracing for API calls that drivers call.
  */
@@ -1660,7 +1709,7 @@ TRACE_EVENT(api_sta_block_awake,
 
 	TP_printk(
 		LOCAL_PR_FMT STA_PR_FMT " block:%d",
-		LOCAL_PR_ARG, STA_PR_FMT, __entry->block
+		LOCAL_PR_ARG, STA_PR_ARG, __entry->block
 	)
 );
 
@@ -1758,7 +1807,7 @@ TRACE_EVENT(api_eosp,
 
 	TP_printk(
 		LOCAL_PR_FMT STA_PR_FMT,
-		LOCAL_PR_ARG, STA_PR_FMT
+		LOCAL_PR_ARG, STA_PR_ARG
 	)
 );
 
@@ -1815,6 +1864,48 @@ TRACE_EVENT(stop_queue,
 	)
 );
 
+TRACE_EVENT(drv_set_default_unicast_key,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 int key_idx),
+
+	TP_ARGS(local, sdata, key_idx),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		__field(int, key_idx)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		__entry->key_idx = key_idx;
+	),
+
+	TP_printk(LOCAL_PR_FMT VIF_PR_FMT " key_idx:%d",
+		  LOCAL_PR_ARG, VIF_PR_ARG, __entry->key_idx)
+);
+
+TRACE_EVENT(api_radar_detected,
+	TP_PROTO(struct ieee80211_local *local),
+
+	TP_ARGS(local),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " radar detected",
+		LOCAL_PR_ARG
+	)
+);
+
 #ifdef CONFIG_MAC80211_MESSAGE_TRACING
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM mac80211_msg
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 467c1d1b66f2..9972e07a2f96 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -48,15 +48,15 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	/* assume HW handles this */
-	if (info->control.rates[0].flags & IEEE80211_TX_RC_MCS)
+	if (tx->rate.flags & IEEE80211_TX_RC_MCS)
 		return 0;
 
 	/* uh huh? */
-	if (WARN_ON_ONCE(info->control.rates[0].idx < 0))
+	if (WARN_ON_ONCE(tx->rate.idx < 0))
 		return 0;
 
 	sband = local->hw.wiphy->bands[info->band];
-	txrate = &sband->bitrates[info->control.rates[0].idx];
+	txrate = &sband->bitrates[tx->rate.idx];
 
 	erp = txrate->flags & IEEE80211_RATE_ERP_G;
 
@@ -233,6 +233,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
 
 	if (local->hw.conf.flags & IEEE80211_CONF_PS) {
 		ieee80211_stop_queues_by_reason(&local->hw,
+						IEEE80211_MAX_QUEUE_MAP,
 						IEEE80211_QUEUE_STOP_REASON_PS);
 		ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
 		ieee80211_queue_work(&local->hw,
@@ -329,6 +330,8 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
 
 		if (sdata->vif.type == NL80211_IFTYPE_AP)
 			ps = &sdata->u.ap.ps;
+		else if (ieee80211_vif_is_mesh(&sdata->vif))
+			ps = &sdata->u.mesh.ps;
 		else
 			continue;
 
@@ -372,18 +375,20 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 	/*
 	 * broadcast/multicast frame
 	 *
-	 * If any of the associated stations is in power save mode,
+	 * If any of the associated/peer stations is in power save mode,
 	 * the frame is buffered to be sent after DTIM beacon frame.
 	 * This is done either by the hardware or us.
 	 */
 
-	/* powersaving STAs currently only in AP/VLAN mode */
+	/* powersaving STAs currently only in AP/VLAN/mesh mode */
 	if (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
 	    tx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
 		if (!tx->sdata->bss)
 			return TX_CONTINUE;
 
 		ps = &tx->sdata->bss->ps;
+	} else if (ieee80211_vif_is_mesh(&tx->sdata->vif)) {
+		ps = &tx->sdata->u.mesh.ps;
 	} else {
 		return TX_CONTINUE;
 	}
@@ -594,7 +599,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 			break;
 		}
 
-		if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED))
+		if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED &&
+			     !ieee80211_is_deauth(hdr->frame_control)))
 			return TX_DROP;
 
 		if (!skip_hw && tx->key &&
@@ -611,11 +617,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	struct ieee80211_hdr *hdr = (void *)tx->skb->data;
 	struct ieee80211_supported_band *sband;
-	struct ieee80211_rate *rate;
-	int i;
 	u32 len;
-	bool inval = false, rts = false, short_preamble = false;
 	struct ieee80211_tx_rate_control txrc;
+	struct ieee80211_sta_rates *ratetbl = NULL;
 	bool assoc = false;
 
 	memset(&txrc, 0, sizeof(txrc));
@@ -636,18 +640,23 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 		txrc.max_rate_idx = -1;
 	else
 		txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
-	memcpy(txrc.rate_idx_mcs_mask,
-	       tx->sdata->rc_rateidx_mcs_mask[info->band],
-	       sizeof(txrc.rate_idx_mcs_mask));
+
+	if (tx->sdata->rc_has_mcs_mask[info->band])
+		txrc.rate_idx_mcs_mask =
+			tx->sdata->rc_rateidx_mcs_mask[info->band];
+
 	txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
 		    tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
 		    tx->sdata->vif.type == NL80211_IFTYPE_ADHOC);
 
 	/* set up RTS protection if desired */
 	if (len > tx->local->hw.wiphy->rts_threshold) {
-		txrc.rts = rts = true;
+		txrc.rts = true;
 	}
 
+	info->control.use_rts = txrc.rts;
+	info->control.use_cts_prot = tx->sdata->vif.bss_conf.use_cts_prot;
+
 	/*
 	 * Use short preamble if the BSS can handle it, but not for
 	 * management frames unless we know the receiver can handle
@@ -657,7 +666,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	if (tx->sdata->vif.bss_conf.use_short_preamble &&
 	    (ieee80211_is_data(hdr->frame_control) ||
 	     (tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE))))
-		txrc.short_preamble = short_preamble = true;
+		txrc.short_preamble = true;
+
+	info->control.short_preamble = txrc.short_preamble;
 
 	if (tx->sta)
 		assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC);
@@ -681,16 +692,38 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	 */
 	rate_control_get_rate(tx->sdata, tx->sta, &txrc);
 
-	if (unlikely(info->control.rates[0].idx < 0))
-		return TX_DROP;
+	if (tx->sta && !info->control.skip_table)
+		ratetbl = rcu_dereference(tx->sta->sta.rates);
+
+	if (unlikely(info->control.rates[0].idx < 0)) {
+		if (ratetbl) {
+			struct ieee80211_tx_rate rate = {
+				.idx = ratetbl->rate[0].idx,
+				.flags = ratetbl->rate[0].flags,
+				.count = ratetbl->rate[0].count
+			};
+
+			if (ratetbl->rate[0].idx < 0)
+				return TX_DROP;
+
+			tx->rate = rate;
+		} else {
+			return TX_DROP;
+		}
+	} else {
+		tx->rate = info->control.rates[0];
+	}
 
 	if (txrc.reported_rate.idx < 0) {
-		txrc.reported_rate = info->control.rates[0];
+		txrc.reported_rate = tx->rate;
 		if (tx->sta && ieee80211_is_data(hdr->frame_control))
 			tx->sta->last_tx_rate = txrc.reported_rate;
 	} else if (tx->sta)
 		tx->sta->last_tx_rate = txrc.reported_rate;
 
+	if (ratetbl)
+		return TX_CONTINUE;
+
 	if (unlikely(!info->control.rates[0].count))
 		info->control.rates[0].count = 1;
 
@@ -698,91 +731,6 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 			 (info->flags & IEEE80211_TX_CTL_NO_ACK)))
 		info->control.rates[0].count = 1;
 
-	if (is_multicast_ether_addr(hdr->addr1)) {
-		/*
-		 * XXX: verify the rate is in the basic rateset
-		 */
-		return TX_CONTINUE;
-	}
-
-	/*
-	 * set up the RTS/CTS rate as the fastest basic rate
-	 * that is not faster than the data rate
-	 *
-	 * XXX: Should this check all retry rates?
-	 */
-	if (!(info->control.rates[0].flags & IEEE80211_TX_RC_MCS)) {
-		s8 baserate = 0;
-
-		rate = &sband->bitrates[info->control.rates[0].idx];
-
-		for (i = 0; i < sband->n_bitrates; i++) {
-			/* must be a basic rate */
-			if (!(tx->sdata->vif.bss_conf.basic_rates & BIT(i)))
-				continue;
-			/* must not be faster than the data rate */
-			if (sband->bitrates[i].bitrate > rate->bitrate)
-				continue;
-			/* maximum */
-			if (sband->bitrates[baserate].bitrate <
-			     sband->bitrates[i].bitrate)
-				baserate = i;
-		}
-
-		info->control.rts_cts_rate_idx = baserate;
-	}
-
-	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
-		/*
-		 * make sure there's no valid rate following
-		 * an invalid one, just in case drivers don't
-		 * take the API seriously to stop at -1.
-		 */
-		if (inval) {
-			info->control.rates[i].idx = -1;
-			continue;
-		}
-		if (info->control.rates[i].idx < 0) {
-			inval = true;
-			continue;
-		}
-
-		/*
-		 * For now assume MCS is already set up correctly, this
-		 * needs to be fixed.
-		 */
-		if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS) {
-			WARN_ON(info->control.rates[i].idx > 76);
-			continue;
-		}
-
-		/* set up RTS protection if desired */
-		if (rts)
-			info->control.rates[i].flags |=
-				IEEE80211_TX_RC_USE_RTS_CTS;
-
-		/* RC is busted */
-		if (WARN_ON_ONCE(info->control.rates[i].idx >=
-				 sband->n_bitrates)) {
-			info->control.rates[i].idx = -1;
-			continue;
-		}
-
-		rate = &sband->bitrates[info->control.rates[i].idx];
-
-		/* set up short preamble */
-		if (short_preamble &&
-		    rate->flags & IEEE80211_RATE_SHORT_PREAMBLE)
-			info->control.rates[i].flags |=
-				IEEE80211_TX_RC_USE_SHORT_PREAMBLE;
-
-		/* set up G protection */
-		if (!rts && tx->sdata->vif.bss_conf.use_cts_prot &&
-		    rate->flags & IEEE80211_RATE_ERP_G)
-			info->control.rates[i].flags |=
-				IEEE80211_TX_RC_USE_CTS_PROTECT;
-	}
-
 	return TX_CONTINUE;
 }
 
@@ -986,15 +934,18 @@ static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
 {
 	struct sk_buff *skb;
+	int ac = -1;
 
 	if (!tx->sta)
 		return TX_CONTINUE;
 
-	tx->sta->tx_packets++;
 	skb_queue_walk(&tx->skbs, skb) {
+		ac = skb_get_queue_mapping(skb);
 		tx->sta->tx_fragments++;
-		tx->sta->tx_bytes += skb->len;
+		tx->sta->tx_bytes[ac] += skb->len;
 	}
+	if (ac >= 0)
+		tx->sta->tx_packets[ac]++;
 
 	return TX_CONTINUE;
 }
@@ -1225,20 +1176,41 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
 		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 		if (local->queue_stop_reasons[q] ||
 		    (!txpending && !skb_queue_empty(&local->pending[q]))) {
-			/*
-			 * Since queue is stopped, queue up frames for later
-			 * transmission from the tx-pending tasklet when the
-			 * queue is woken again.
-			 */
-			if (txpending)
-				skb_queue_splice_init(skbs, &local->pending[q]);
-			else
-				skb_queue_splice_tail_init(skbs,
-							   &local->pending[q]);
+			if (unlikely(info->flags &
+				     IEEE80211_TX_INTFL_OFFCHAN_TX_OK)) {
+				if (local->queue_stop_reasons[q] &
+				    ~BIT(IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL)) {
+					/*
+					 * Drop off-channel frames if queues
+					 * are stopped for any reason other
+					 * than off-channel operation. Never
+					 * queue them.
+					 */
+					spin_unlock_irqrestore(
+						&local->queue_stop_reason_lock,
+						flags);
+					ieee80211_purge_tx_queue(&local->hw,
+								 skbs);
+					return true;
+				}
+			} else {
 
-			spin_unlock_irqrestore(&local->queue_stop_reason_lock,
-					       flags);
-			return false;
+				/*
+				 * Since queue is stopped, queue up frames for
+				 * later transmission from the tx-pending
+				 * tasklet when the queue is woken again.
+				 */
+				if (txpending)
+					skb_queue_splice_init(skbs,
+							      &local->pending[q]);
+				else
+					skb_queue_splice_tail_init(skbs,
+								   &local->pending[q]);
+
+				spin_unlock_irqrestore(&local->queue_stop_reason_lock,
+						       flags);
+				return false;
+			}
 		}
 		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
@@ -1472,12 +1444,14 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 	hdr = (struct ieee80211_hdr *) skb->data;
 	info->control.vif = &sdata->vif;
 
-	if (ieee80211_vif_is_mesh(&sdata->vif) &&
-	    ieee80211_is_data(hdr->frame_control) &&
-	    !is_multicast_ether_addr(hdr->addr1) &&
-	    mesh_nexthop_resolve(skb, sdata)) {
-		/* skb queued: don't free */
-		return;
+	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		if (ieee80211_is_data(hdr->frame_control) &&
+		    is_unicast_ether_addr(hdr->addr1)) {
+			if (mesh_nexthop_resolve(sdata, skb))
+				return; /* skb queued: don't free */
+		} else {
+			ieee80211_mps_set_frame_flags(sdata, NULL, hdr);
+		}
 	}
 
 	ieee80211_set_qos_hdr(sdata, skb);
@@ -1677,7 +1651,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
 	if (chanctx_conf)
 		chan = chanctx_conf->def.chan;
 	else if (!local->use_chanctx)
-		chan = local->_oper_channel;
+		chan = local->_oper_chandef.chan;
 	else
 		goto fail_rcu;
 
@@ -1787,16 +1761,16 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 			break;
 		/* fall through */
 	case NL80211_IFTYPE_AP:
+		if (sdata->vif.type == NL80211_IFTYPE_AP)
+			chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+		if (!chanctx_conf)
+			goto fail_rcu;
 		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
 		/* DA BSSID SA */
 		memcpy(hdr.addr1, skb->data, ETH_ALEN);
 		memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
 		memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
 		hdrlen = 24;
-		if (sdata->vif.type == NL80211_IFTYPE_AP)
-			chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
-		if (!chanctx_conf)
-			goto fail_rcu;
 		band = chanctx_conf->def.chan->band;
 		break;
 	case NL80211_IFTYPE_WDS:
@@ -1811,7 +1785,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		 * This is the exception! WDS style interfaces are prohibited
 		 * when channel contexts are in used so this must be valid
 		 */
-		band = local->hw.conf.channel->band;
+		band = local->hw.conf.chandef.chan->band;
 		break;
 #ifdef CONFIG_MAC80211_MESH
 	case NL80211_IFTYPE_MESH_POINT:
@@ -1822,9 +1796,24 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		}
 
 		if (!is_multicast_ether_addr(skb->data)) {
-			mpath = mesh_path_lookup(skb->data, sdata);
-			if (!mpath)
-				mppath = mpp_path_lookup(skb->data, sdata);
+			struct sta_info *next_hop;
+			bool mpp_lookup = true;
+
+			mpath = mesh_path_lookup(sdata, skb->data);
+			if (mpath) {
+				mpp_lookup = false;
+				next_hop = rcu_dereference(mpath->next_hop);
+				if (!next_hop ||
+				    !(mpath->flags & (MESH_PATH_ACTIVE |
+						      MESH_PATH_RESOLVING)))
+					mpp_lookup = true;
+			}
+
+			if (mpp_lookup)
+				mppath = mpp_path_lookup(sdata, skb->data);
+
+			if (mppath && mpath)
+				mesh_path_del(mpath->sdata, mpath->dst);
 		}
 
 		/*
@@ -1837,8 +1826,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		    !(mppath && !ether_addr_equal(mppath->mpp, skb->data))) {
 			hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc,
 					skb->data, skb->data + ETH_ALEN);
-			meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr,
-					sdata, NULL, NULL);
+			meshhdrlen = ieee80211_new_mesh_header(sdata, &mesh_hdr,
+							       NULL, NULL);
 		} else {
 			/* DS -> MBSS (802.11-2012 13.11.3.3).
 			 * For unicast with unknown forwarding information,
@@ -1857,18 +1846,14 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 					mesh_da, sdata->vif.addr);
 			if (is_multicast_ether_addr(mesh_da))
 				/* DA TA mSA AE:SA */
-				meshhdrlen =
-					ieee80211_new_mesh_header(&mesh_hdr,
-							sdata,
-							skb->data + ETH_ALEN,
-							NULL);
+				meshhdrlen = ieee80211_new_mesh_header(
+						sdata, &mesh_hdr,
+						skb->data + ETH_ALEN, NULL);
 			else
 				/* RA TA mDA mSA AE:DA SA */
-				meshhdrlen =
-					ieee80211_new_mesh_header(&mesh_hdr,
-							sdata,
-							skb->data,
-							skb->data + ETH_ALEN);
+				meshhdrlen = ieee80211_new_mesh_header(
+						sdata, &mesh_hdr, skb->data,
+						skb->data + ETH_ALEN);
 
 		}
 		chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
@@ -1999,24 +1984,14 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		skb = skb_clone(skb, GFP_ATOMIC);
 		if (skb) {
 			unsigned long flags;
-			int id, r;
+			int id;
 
 			spin_lock_irqsave(&local->ack_status_lock, flags);
-			r = idr_get_new_above(&local->ack_status_frames,
-					      orig_skb, 1, &id);
-			if (r == -EAGAIN) {
-				idr_pre_get(&local->ack_status_frames,
-					    GFP_ATOMIC);
-				r = idr_get_new_above(&local->ack_status_frames,
-						      orig_skb, 1, &id);
-			}
-			if (WARN_ON(!id) || id > 0xffff) {
-				idr_remove(&local->ack_status_frames, id);
-				r = -ERANGE;
-			}
+			id = idr_alloc(&local->ack_status_frames, orig_skb,
+				       1, 0x10000, GFP_ATOMIC);
 			spin_unlock_irqrestore(&local->ack_status_lock, flags);
 
-			if (!r) {
+			if (id >= 0) {
 				info_id = id;
 				info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
 			} else if (skb_shared(skb)) {
@@ -2056,7 +2031,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		encaps_data = bridge_tunnel_header;
 		encaps_len = sizeof(bridge_tunnel_header);
 		skip_header_bytes -= 2;
-	} else if (ethertype >= 0x600) {
+	} else if (ethertype >= ETH_P_802_3_MIN) {
 		encaps_data = rfc1042_header;
 		encaps_len = sizeof(rfc1042_header);
 		skip_header_bytes -= 2;
@@ -2264,9 +2239,8 @@ void ieee80211_tx_pending(unsigned long data)
 
 /* functions for drivers to get certain frames */
 
-static void ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
-				     struct ps_data *ps,
-				     struct sk_buff *skb)
+static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
+				       struct ps_data *ps, struct sk_buff *skb)
 {
 	u8 *pos, *tim;
 	int aid0 = 0;
@@ -2328,6 +2302,29 @@ static void ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
 	}
 }
 
+static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
+				    struct ps_data *ps, struct sk_buff *skb)
+{
+	struct ieee80211_local *local = sdata->local;
+
+	/*
+	 * Not very nice, but we want to allow the driver to call
+	 * ieee80211_beacon_get() as a response to the set_tim()
+	 * callback. That, however, is already invoked under the
+	 * sta_lock to guarantee consistent and race-free update
+	 * of the tim bitmap in mac80211 and the driver.
+	 */
+	if (local->tim_in_locked_section) {
+		__ieee80211_beacon_add_tim(sdata, ps, skb);
+	} else {
+		spin_lock_bh(&local->tim_lock);
+		__ieee80211_beacon_add_tim(sdata, ps, skb);
+		spin_unlock_bh(&local->tim_lock);
+	}
+
+	return 0;
+}
+
 struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 					 struct ieee80211_vif *vif,
 					 u16 *tim_offset, u16 *tim_length)
@@ -2372,22 +2369,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 			memcpy(skb_put(skb, beacon->head_len), beacon->head,
 			       beacon->head_len);
 
-			/*
-			 * Not very nice, but we want to allow the driver to call
-			 * ieee80211_beacon_get() as a response to the set_tim()
-			 * callback. That, however, is already invoked under the
-			 * sta_lock to guarantee consistent and race-free update
-			 * of the tim bitmap in mac80211 and the driver.
-			 */
-			if (local->tim_in_locked_section) {
-				ieee80211_beacon_add_tim(sdata, &ap->ps, skb);
-			} else {
-				unsigned long flags;
-
-				spin_lock_irqsave(&local->tim_lock, flags);
-				ieee80211_beacon_add_tim(sdata, &ap->ps, skb);
-				spin_unlock_irqrestore(&local->tim_lock, flags);
-			}
+			ieee80211_beacon_add_tim(sdata, &ap->ps, skb);
 
 			if (tim_offset)
 				*tim_offset = beacon->head_len;
@@ -2402,79 +2384,42 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 	} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 		struct ieee80211_hdr *hdr;
-		struct sk_buff *presp = rcu_dereference(ifibss->presp);
+		struct beacon_data *presp = rcu_dereference(ifibss->presp);
 
 		if (!presp)
 			goto out;
 
-		skb = skb_copy(presp, GFP_ATOMIC);
+		skb = dev_alloc_skb(local->tx_headroom + presp->head_len);
 		if (!skb)
 			goto out;
+		skb_reserve(skb, local->tx_headroom);
+		memcpy(skb_put(skb, presp->head_len), presp->head,
+		       presp->head_len);
 
 		hdr = (struct ieee80211_hdr *) skb->data;
 		hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 						 IEEE80211_STYPE_BEACON);
 	} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
-		struct ieee80211_mgmt *mgmt;
 		struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
-		u8 *pos;
-		int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) +
-			      sizeof(mgmt->u.beacon);
+		struct beacon_data *bcn = rcu_dereference(ifmsh->beacon);
 
-#ifdef CONFIG_MAC80211_MESH
-		if (!sdata->u.mesh.mesh_id_len)
+		if (!bcn)
 			goto out;
-#endif
 
 		if (ifmsh->sync_ops)
 			ifmsh->sync_ops->adjust_tbtt(
 						sdata);
 
 		skb = dev_alloc_skb(local->tx_headroom +
-				    hdr_len +
-				    2 + /* NULL SSID */
-				    2 + 8 + /* supported rates */
-				    2 + 3 + /* DS params */
-				    2 + (IEEE80211_MAX_SUPP_RATES - 8) +
-				    2 + sizeof(struct ieee80211_ht_cap) +
-				    2 + sizeof(struct ieee80211_ht_operation) +
-				    2 + sdata->u.mesh.mesh_id_len +
-				    2 + sizeof(struct ieee80211_meshconf_ie) +
-				    sdata->u.mesh.ie_len);
+				    bcn->head_len +
+				    256 + /* TIM IE */
+				    bcn->tail_len);
 		if (!skb)
 			goto out;
-
-		skb_reserve(skb, local->hw.extra_tx_headroom);
-		mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len);
-		memset(mgmt, 0, hdr_len);
-		mgmt->frame_control =
-		    cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON);
-		eth_broadcast_addr(mgmt->da);
-		memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
-		memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
-		mgmt->u.beacon.beacon_int =
-			cpu_to_le16(sdata->vif.bss_conf.beacon_int);
-		mgmt->u.beacon.capab_info |= cpu_to_le16(
-			sdata->u.mesh.security ? WLAN_CAPABILITY_PRIVACY : 0);
-
-		pos = skb_put(skb, 2);
-		*pos++ = WLAN_EID_SSID;
-		*pos++ = 0x0;
-
-		band = chanctx_conf->def.chan->band;
-
-		if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
-		    mesh_add_ds_params_ie(skb, sdata) ||
-		    ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
-		    mesh_add_rsn_ie(skb, sdata) ||
-		    mesh_add_ht_cap_ie(skb, sdata) ||
-		    mesh_add_ht_oper_ie(skb, sdata) ||
-		    mesh_add_meshid_ie(skb, sdata) ||
-		    mesh_add_meshconf_ie(skb, sdata) ||
-		    mesh_add_vendor_ies(skb, sdata)) {
-			pr_err("o11s: couldn't add ies!\n");
-			goto out;
-		}
+		skb_reserve(skb, local->tx_headroom);
+		memcpy(skb_put(skb, bcn->head_len), bcn->head, bcn->head_len);
+		ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb);
+		memcpy(skb_put(skb, bcn->tail_len), bcn->tail, bcn->tail_len);
 	} else {
 		WARN_ON(1);
 		goto out;
@@ -2499,8 +2444,6 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 		txrc.max_rate_idx = -1;
 	else
 		txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
-	memcpy(txrc.rate_idx_mcs_mask, sdata->rc_rateidx_mcs_mask[band],
-	       sizeof(txrc.rate_idx_mcs_mask));
 	txrc.bss = true;
 	rate_control_get_rate(sdata, NULL, &txrc);
 
@@ -2724,6 +2667,8 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 			goto out;
 
 		ps = &sdata->u.ap.ps;
+	} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		ps = &sdata->u.mesh.ps;
 	} else {
 		goto out;
 	}
@@ -2747,6 +2692,8 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 				cpu_to_le16(IEEE80211_FCTL_MOREDATA);
 		}
 
+		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+			sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev);
 		if (!ieee80211_tx_prepare(sdata, &tx, skb))
 			break;
 		dev_kfree_skb_any(skb);
@@ -2779,6 +2726,8 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
 	skb_set_queue_mapping(skb, ac);
 	skb->priority = tid;
 
+	skb->dev = sdata->dev;
+
 	/*
 	 * The other path calling ieee80211_xmit is from the tasklet,
 	 * and while we can handle concurrent transmissions locking
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index f11e8c540db4..3f87fa468b1f 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -453,7 +453,8 @@ void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local,
 }
 
 void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
-				    enum queue_stop_reason reason)
+				     unsigned long queues,
+				     enum queue_stop_reason reason)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	unsigned long flags;
@@ -461,7 +462,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
 
 	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 
-	for (i = 0; i < hw->queues; i++)
+	for_each_set_bit(i, &queues, hw->queues)
 		__ieee80211_stop_queue(hw, i, reason);
 
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -469,7 +470,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
 
 void ieee80211_stop_queues(struct ieee80211_hw *hw)
 {
-	ieee80211_stop_queues_by_reason(hw,
+	ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_DRIVER);
 }
 EXPORT_SYMBOL(ieee80211_stop_queues);
@@ -484,13 +485,15 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue)
 		return true;
 
 	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
-	ret = !!local->queue_stop_reasons[queue];
+	ret = test_bit(IEEE80211_QUEUE_STOP_REASON_DRIVER,
+		       &local->queue_stop_reasons[queue]);
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(ieee80211_queue_stopped);
 
 void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
+				     unsigned long queues,
 				     enum queue_stop_reason reason)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
@@ -499,7 +502,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
 
 	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 
-	for (i = 0; i < hw->queues; i++)
+	for_each_set_bit(i, &queues, hw->queues)
 		__ieee80211_wake_queue(hw, i, reason);
 
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -507,10 +510,42 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
 
 void ieee80211_wake_queues(struct ieee80211_hw *hw)
 {
-	ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_DRIVER);
+	ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_DRIVER);
 }
 EXPORT_SYMBOL(ieee80211_wake_queues);
 
+void ieee80211_flush_queues(struct ieee80211_local *local,
+			    struct ieee80211_sub_if_data *sdata)
+{
+	u32 queues;
+
+	if (!local->ops->flush)
+		return;
+
+	if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) {
+		int ac;
+
+		queues = 0;
+
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+			queues |= BIT(sdata->vif.hw_queue[ac]);
+		if (sdata->vif.cab_queue != IEEE80211_INVAL_HW_QUEUE)
+			queues |= BIT(sdata->vif.cab_queue);
+	} else {
+		/* all queues */
+		queues = BIT(local->hw.queues) - 1;
+	}
+
+	ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_FLUSH);
+
+	drv_flush(local, queues, false);
+
+	ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_FLUSH);
+}
+
 void ieee80211_iterate_active_interfaces(
 	struct ieee80211_hw *hw, u32 iter_flags,
 	void (*iterator)(void *data, u8 *mac,
@@ -626,7 +661,7 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_queue_delayed_work);
 
-u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
+u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, bool action,
 			       struct ieee802_11_elems *elems,
 			       u64 filter, u32 crc)
 {
@@ -634,6 +669,7 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 	u8 *pos = start;
 	bool calc_crc = filter != 0;
 	DECLARE_BITMAP(seen_elems, 256);
+	const u8 *ie;
 
 	bitmap_zero(seen_elems, 256);
 	memset(elems, 0, sizeof(*elems));
@@ -681,6 +717,12 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 		case WLAN_EID_COUNTRY:
 		case WLAN_EID_PWR_CONSTRAINT:
 		case WLAN_EID_TIMEOUT_INTERVAL:
+		case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
+		case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
+		/*
+		 * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible
+		 * that if the content gets bigger it might be needed more than once
+		 */
 			if (test_bit(id, seen_elems)) {
 				elems->parse_error = true;
 				left -= elen;
@@ -704,17 +746,11 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 			elems->supp_rates = pos;
 			elems->supp_rates_len = elen;
 			break;
-		case WLAN_EID_FH_PARAMS:
-			elems->fh_params = pos;
-			elems->fh_params_len = elen;
-			break;
 		case WLAN_EID_DS_PARAMS:
-			elems->ds_params = pos;
-			elems->ds_params_len = elen;
-			break;
-		case WLAN_EID_CF_PARAMS:
-			elems->cf_params = pos;
-			elems->cf_params_len = elen;
+			if (elen >= 1)
+				elems->ds_params = pos;
+			else
+				elem_parse_failed = true;
 			break;
 		case WLAN_EID_TIM:
 			if (elen >= sizeof(struct ieee80211_tim_ie)) {
@@ -723,10 +759,6 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 			} else
 				elem_parse_failed = true;
 			break;
-		case WLAN_EID_IBSS_PARAMS:
-			elems->ibss_params = pos;
-			elems->ibss_params_len = elen;
-			break;
 		case WLAN_EID_CHALLENGE:
 			elems->challenge = pos;
 			elems->challenge_len = elen;
@@ -739,11 +771,7 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 				if (calc_crc)
 					crc = crc32_be(crc, pos - 2, elen + 2);
 
-				if (pos[3] == 1) {
-					/* OUI Type 1 - WPA IE */
-					elems->wpa = pos;
-					elems->wpa_len = elen;
-				} else if (elen >= 5 && pos[3] == 2) {
+				if (elen >= 5 && pos[3] == 2) {
 					/* OUI Type 2 - WMM IE */
 					if (pos[4] == 0) {
 						elems->wmm_info = pos;
@@ -760,8 +788,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 			elems->rsn_len = elen;
 			break;
 		case WLAN_EID_ERP_INFO:
-			elems->erp_info = pos;
-			elems->erp_info_len = elen;
+			if (elen >= 1)
+				elems->erp_info = pos;
+			else
+				elem_parse_failed = true;
 			break;
 		case WLAN_EID_EXT_SUPP_RATES:
 			elems->ext_supp_rates = pos;
@@ -791,6 +821,12 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 			else
 				elem_parse_failed = true;
 			break;
+		case WLAN_EID_OPMODE_NOTIF:
+			if (elen > 0)
+				elems->opmode_notif = pos;
+			else
+				elem_parse_failed = true;
+			break;
 		case WLAN_EID_MESH_ID:
 			elems->mesh_id = pos;
 			elems->mesh_id_len = elen;
@@ -805,6 +841,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 			elems->peering = pos;
 			elems->peering_len = elen;
 			break;
+		case WLAN_EID_MESH_AWAKE_WINDOW:
+			if (elen >= 2)
+				elems->awake_window = (void *)pos;
+			break;
 		case WLAN_EID_PREQ:
 			elems->preq = pos;
 			elems->preq_len = elen;
@@ -830,12 +870,47 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 			}
 			elems->ch_switch_ie = (void *)pos;
 			break;
-		case WLAN_EID_QUIET:
-			if (!elems->quiet_elem) {
-				elems->quiet_elem = pos;
-				elems->quiet_elem_len = elen;
+		case WLAN_EID_EXT_CHANSWITCH_ANN:
+			if (elen != sizeof(struct ieee80211_ext_chansw_ie)) {
+				elem_parse_failed = true;
+				break;
+			}
+			elems->ext_chansw_ie = (void *)pos;
+			break;
+		case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
+			if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) {
+				elem_parse_failed = true;
+				break;
+			}
+			elems->sec_chan_offs = (void *)pos;
+			break;
+		case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
+			if (!action ||
+			    elen != sizeof(*elems->wide_bw_chansw_ie)) {
+				elem_parse_failed = true;
+				break;
+			}
+			elems->wide_bw_chansw_ie = (void *)pos;
+			break;
+		case WLAN_EID_CHANNEL_SWITCH_WRAPPER:
+			if (action) {
+				elem_parse_failed = true;
+				break;
+			}
+			/*
+			 * This is a bit tricky, but as we only care about
+			 * the wide bandwidth channel switch element, so
+			 * just parse it out manually.
+			 */
+			ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
+					      pos, elen);
+			if (ie) {
+				if (ie[1] == sizeof(*elems->wide_bw_chansw_ie))
+					elems->wide_bw_chansw_ie =
+						(void *)(ie + 2);
+				else
+					elem_parse_failed = true;
 			}
-			elems->num_of_quiet_elem++;
 			break;
 		case WLAN_EID_COUNTRY:
 			elems->country_elem = pos;
@@ -849,8 +924,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 			elems->pwr_constr_elem = pos;
 			break;
 		case WLAN_EID_TIMEOUT_INTERVAL:
-			elems->timeout_int = pos;
-			elems->timeout_int_len = elen;
+			if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
+				elems->timeout_int = (void *)pos;
+			else
+				elem_parse_failed = true;
 			break;
 		default:
 			break;
@@ -871,12 +948,6 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 	return crc;
 }
 
-void ieee802_11_parse_elems(u8 *start, size_t len,
-			    struct ieee802_11_elems *elems)
-{
-	ieee802_11_parse_elems_crc(start, len, elems, 0, 0);
-}
-
 void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
 			       bool bss_notify)
 {
@@ -1029,8 +1100,9 @@ u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
 
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg, u16 status,
-			 u8 *extra, size_t extra_len, const u8 *da,
-			 const u8 *bssid, const u8 *key, u8 key_len, u8 key_idx)
+			 const u8 *extra, size_t extra_len, const u8 *da,
+			 const u8 *bssid, const u8 *key, u8 key_len, u8 key_idx,
+			 u32 tx_flags)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
@@ -1063,7 +1135,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 		WARN_ON(err);
 	}
 
-	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
+					tx_flags;
 	ieee80211_tx_skb(sdata, skb);
 }
 
@@ -1277,7 +1350,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
 void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 			      const u8 *ssid, size_t ssid_len,
 			      const u8 *ie, size_t ie_len,
-			      u32 ratemask, bool directed, bool no_cck,
+			      u32 ratemask, bool directed, u32 tx_flags,
 			      struct ieee80211_channel *channel, bool scan)
 {
 	struct sk_buff *skb;
@@ -1286,9 +1359,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 					ssid, ssid_len,
 					ie, ie_len, directed);
 	if (skb) {
-		if (no_cck)
-			IEEE80211_SKB_CB(skb)->flags |=
-				IEEE80211_TX_CTL_NO_CCK_RATE;
+		IEEE80211_SKB_CB(skb)->flags |= tx_flags;
 		if (scan)
 			ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band);
 		else
@@ -1351,6 +1422,25 @@ void ieee80211_stop_device(struct ieee80211_local *local)
 	drv_stop(local);
 }
 
+static void ieee80211_assign_chanctx(struct ieee80211_local *local,
+				     struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_chanctx_conf *conf;
+	struct ieee80211_chanctx *ctx;
+
+	if (!local->use_chanctx)
+		return;
+
+	mutex_lock(&local->chanctx_mtx);
+	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+					 lockdep_is_held(&local->chanctx_mtx));
+	if (conf) {
+		ctx = container_of(conf, struct ieee80211_chanctx, conf);
+		drv_assign_vif_chanctx(local, sdata, ctx);
+	}
+	mutex_unlock(&local->chanctx_mtx);
+}
+
 int ieee80211_reconfig(struct ieee80211_local *local)
 {
 	struct ieee80211_hw *hw = &local->hw;
@@ -1358,6 +1448,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	struct ieee80211_chanctx *ctx;
 	struct sta_info *sta;
 	int res, i;
+	bool reconfig_due_to_wowlan = false;
 
 #ifdef CONFIG_PM
 	if (local->suspended)
@@ -1377,6 +1468,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		 * res is 1, which means the driver requested
 		 * to go through a regular reset on wakeup.
 		 */
+		reconfig_due_to_wowlan = true;
 	}
 #endif
 	/* everything else happens only if HW was up & running */
@@ -1413,6 +1505,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	/* add interfaces */
 	sdata = rtnl_dereference(local->monitor_sdata);
 	if (sdata) {
+		/* in HW restart it exists already */
+		WARN_ON(local->resuming);
 		res = drv_add_interface(local, sdata);
 		if (WARN_ON(res)) {
 			rcu_assign_pointer(local->monitor_sdata, NULL);
@@ -1437,36 +1531,14 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	}
 
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		struct ieee80211_chanctx_conf *ctx_conf;
-
 		if (!ieee80211_sdata_running(sdata))
 			continue;
-
-		mutex_lock(&local->chanctx_mtx);
-		ctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
-				lockdep_is_held(&local->chanctx_mtx));
-		if (ctx_conf) {
-			ctx = container_of(ctx_conf, struct ieee80211_chanctx,
-					   conf);
-			drv_assign_vif_chanctx(local, sdata, ctx);
-		}
-		mutex_unlock(&local->chanctx_mtx);
+		ieee80211_assign_chanctx(local, sdata);
 	}
 
 	sdata = rtnl_dereference(local->monitor_sdata);
-	if (sdata && local->use_chanctx && ieee80211_sdata_running(sdata)) {
-		struct ieee80211_chanctx_conf *ctx_conf;
-
-		mutex_lock(&local->chanctx_mtx);
-		ctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
-				lockdep_is_held(&local->chanctx_mtx));
-		if (ctx_conf) {
-			ctx = container_of(ctx_conf, struct ieee80211_chanctx,
-					   conf);
-			drv_assign_vif_chanctx(local, sdata, ctx);
-		}
-		mutex_unlock(&local->chanctx_mtx);
-	}
+	if (sdata && ieee80211_sdata_running(sdata))
+		ieee80211_assign_chanctx(local, sdata);
 
 	/* add STAs back */
 	mutex_lock(&local->sta_mtx);
@@ -1531,6 +1603,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 			changed |= BSS_CHANGED_ASSOC |
 				   BSS_CHANGED_ARP_FILTER |
 				   BSS_CHANGED_PS;
+
+			if (sdata->u.mgd.dtim_period)
+				changed |= BSS_CHANGED_DTIM_PERIOD;
+
 			mutex_lock(&sdata->u.mgd.mtx);
 			ieee80211_bss_info_change_notify(sdata, changed);
 			mutex_unlock(&sdata->u.mgd.mtx);
@@ -1550,9 +1626,11 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 
 			/* fall through */
 		case NL80211_IFTYPE_MESH_POINT:
-			changed |= BSS_CHANGED_BEACON |
-				   BSS_CHANGED_BEACON_ENABLED;
-			ieee80211_bss_info_change_notify(sdata, changed);
+			if (sdata->vif.bss_conf.enable_beacon) {
+				changed |= BSS_CHANGED_BEACON |
+					   BSS_CHANGED_BEACON_ENABLED;
+				ieee80211_bss_info_change_notify(sdata, changed);
+			}
 			break;
 		case NL80211_IFTYPE_WDS:
 			break;
@@ -1618,6 +1696,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	local->in_reconfig = false;
 	barrier();
 
+	if (local->monitors == local->open_count && local->monitors > 0)
+		ieee80211_add_virtual_monitor(local);
+
 	/*
 	 * Clear the WLAN_STA_BLOCK_BA flag so new aggregation
 	 * sessions can be established after a resume.
@@ -1632,24 +1713,26 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		mutex_lock(&local->sta_mtx);
 
 		list_for_each_entry(sta, &local->sta_list, list) {
-			ieee80211_sta_tear_down_BA_sessions(sta, true);
+			ieee80211_sta_tear_down_BA_sessions(
+					sta, AGG_STOP_LOCAL_REQUEST);
 			clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
 		}
 
 		mutex_unlock(&local->sta_mtx);
 	}
 
-	ieee80211_wake_queues_by_reason(hw,
-			IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+	ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_SUSPEND);
 
 	/*
 	 * If this is for hw restart things are still running.
 	 * We may want to change that later, however.
 	 */
-	if (!local->suspended) {
+	if (!local->suspended || reconfig_due_to_wowlan)
 		drv_restart_complete(local);
+
+	if (!local->suspended)
 		return 0;
-	}
 
 #ifdef CONFIG_PM
 	/* first set suspended false, then resuming */
@@ -1657,28 +1740,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	mb();
 	local->resuming = false;
 
-	list_for_each_entry(sdata, &local->interfaces, list) {
-		switch(sdata->vif.type) {
-		case NL80211_IFTYPE_STATION:
-			ieee80211_sta_restart(sdata);
-			break;
-		case NL80211_IFTYPE_ADHOC:
-			ieee80211_ibss_restart(sdata);
-			break;
-		case NL80211_IFTYPE_MESH_POINT:
-			ieee80211_mesh_restart(sdata);
-			break;
-		default:
-			break;
-		}
-	}
-
 	mod_timer(&local->sta_cleanup, jiffies + 1);
-
-	mutex_lock(&local->sta_mtx);
-	list_for_each_entry(sta, &local->sta_list, list)
-		mesh_plink_restart(sta);
-	mutex_unlock(&local->sta_mtx);
 #else
 	WARN_ON(1);
 #endif
@@ -1864,7 +1926,7 @@ u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
 }
 
 u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
-							   u32 cap)
+			       u32 cap)
 {
 	__le32 tmp;
 
@@ -1926,7 +1988,7 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
 }
 
 void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
-				  struct ieee80211_ht_operation *ht_oper,
+				  const struct ieee80211_ht_operation *ht_oper,
 				  struct cfg80211_chan_def *chandef)
 {
 	enum nl80211_channel_type channel_type;
@@ -2030,7 +2092,7 @@ int ieee80211_ave_rssi(struct ieee80211_vif *vif)
 		/* non-managed type inferfaces */
 		return 0;
 	}
-	return ifmgd->ave_beacon_signal;
+	return ifmgd->ave_beacon_signal / 16;
 }
 EXPORT_SYMBOL_GPL(ieee80211_ave_rssi);
 
@@ -2114,3 +2176,48 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 
 	return ts;
 }
+
+void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	mutex_lock(&local->iflist_mtx);
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
+
+		if (sdata->wdev.cac_started) {
+			ieee80211_vif_release_channel(sdata);
+			cfg80211_cac_event(sdata->dev,
+					   NL80211_RADAR_CAC_ABORTED,
+					   GFP_KERNEL);
+		}
+	}
+	mutex_unlock(&local->iflist_mtx);
+}
+
+void ieee80211_dfs_radar_detected_work(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local, radar_detected_work);
+	struct cfg80211_chan_def chandef;
+
+	ieee80211_dfs_cac_cancel(local);
+
+	if (local->use_chanctx)
+		/* currently not handled */
+		WARN_ON(1);
+	else {
+		chandef = local->hw.conf.chandef;
+		cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL);
+	}
+}
+
+void ieee80211_radar_detected(struct ieee80211_hw *hw)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	trace_api_radar_detected(local);
+
+	ieee80211_queue_work(hw, &local->radar_detected_work);
+}
+EXPORT_SYMBOL(ieee80211_radar_detected);
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index f311388aeedf..171344d4eb7c 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -10,26 +10,396 @@
 #include <linux/export.h>
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
+#include "rate.h"
 
 
-void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
-					 struct ieee80211_supported_band *sband,
-					 struct ieee80211_vht_cap *vht_cap_ie,
-					 struct ieee80211_sta_vht_cap *vht_cap)
+static void __check_vhtcap_disable(struct ieee80211_sub_if_data *sdata,
+				   struct ieee80211_sta_vht_cap *vht_cap,
+				   u32 flag)
 {
-	if (WARN_ON_ONCE(!vht_cap))
+	__le32 le_flag = cpu_to_le32(flag);
+
+	if (sdata->u.mgd.vht_capa_mask.vht_cap_info & le_flag &&
+	    !(sdata->u.mgd.vht_capa.vht_cap_info & le_flag))
+		vht_cap->cap &= ~flag;
+}
+
+void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
+				      struct ieee80211_sta_vht_cap *vht_cap)
+{
+	int i;
+	u16 rxmcs_mask, rxmcs_cap, rxmcs_n, txmcs_mask, txmcs_cap, txmcs_n;
+
+	if (!vht_cap->vht_supported)
 		return;
 
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return;
+
+	__check_vhtcap_disable(sdata, vht_cap,
+			       IEEE80211_VHT_CAP_RXLDPC);
+	__check_vhtcap_disable(sdata, vht_cap,
+			       IEEE80211_VHT_CAP_SHORT_GI_80);
+	__check_vhtcap_disable(sdata, vht_cap,
+			       IEEE80211_VHT_CAP_SHORT_GI_160);
+	__check_vhtcap_disable(sdata, vht_cap,
+			       IEEE80211_VHT_CAP_TXSTBC);
+	__check_vhtcap_disable(sdata, vht_cap,
+			       IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE);
+	__check_vhtcap_disable(sdata, vht_cap,
+			       IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE);
+	__check_vhtcap_disable(sdata, vht_cap,
+			       IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN);
+	__check_vhtcap_disable(sdata, vht_cap,
+			       IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN);
+
+	/* Allow user to decrease AMPDU length exponent */
+	if (sdata->u.mgd.vht_capa_mask.vht_cap_info &
+	    cpu_to_le32(IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK)) {
+		u32 cap, n;
+
+		n = le32_to_cpu(sdata->u.mgd.vht_capa.vht_cap_info) &
+			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
+		n >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
+		cap = vht_cap->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
+		cap >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
+
+		if (n < cap) {
+			vht_cap->cap &=
+				~IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
+			vht_cap->cap |=
+				n << IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
+		}
+	}
+
+	/* Allow the user to decrease MCSes */
+	rxmcs_mask =
+		le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.rx_mcs_map);
+	rxmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.rx_mcs_map);
+	rxmcs_n &= rxmcs_mask;
+	rxmcs_cap = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map);
+
+	txmcs_mask =
+		le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.tx_mcs_map);
+	txmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.tx_mcs_map);
+	txmcs_n &= txmcs_mask;
+	txmcs_cap = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map);
+	for (i = 0; i < 8; i++) {
+		u8 m, n, c;
+
+		m = (rxmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+		n = (rxmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+		c = (rxmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+
+		if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) ||
+			  n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) {
+			rxmcs_cap &= ~(3 << 2*i);
+			rxmcs_cap |= (rxmcs_n & (3 << 2*i));
+		}
+
+		m = (txmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+		n = (txmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+		c = (txmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+
+		if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) ||
+			  n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) {
+			txmcs_cap &= ~(3 << 2*i);
+			txmcs_cap |= (txmcs_n & (3 << 2*i));
+		}
+	}
+	vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(rxmcs_cap);
+	vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(txmcs_cap);
+}
+
+void
+ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_supported_band *sband,
+				    const struct ieee80211_vht_cap *vht_cap_ie,
+				    struct sta_info *sta)
+{
+	struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
+	struct ieee80211_sta_vht_cap own_cap;
+	u32 cap_info, i;
+
 	memset(vht_cap, 0, sizeof(*vht_cap));
 
+	if (!sta->sta.ht_cap.ht_supported)
+		return;
+
 	if (!vht_cap_ie || !sband->vht_cap.vht_supported)
 		return;
 
+	/* A VHT STA must support 40 MHz */
+	if (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40))
+		return;
+
 	vht_cap->vht_supported = true;
 
-	vht_cap->cap = le32_to_cpu(vht_cap_ie->vht_cap_info);
+	own_cap = sband->vht_cap;
+	/*
+	 * If user has specified capability overrides, take care
+	 * of that if the station we're setting up is the AP that
+	 * we advertised a restricted capability set to. Override
+	 * our own capabilities and then use those below.
+	 */
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    !test_sta_flag(sta, WLAN_STA_TDLS_PEER))
+		ieee80211_apply_vhtcap_overrides(sdata, &own_cap);
+
+	/* take some capabilities as-is */
+	cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info);
+	vht_cap->cap = cap_info;
+	vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 |
+			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
+			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 |
+			IEEE80211_VHT_CAP_RXLDPC |
+			IEEE80211_VHT_CAP_VHT_TXOP_PS |
+			IEEE80211_VHT_CAP_HTC_VHT |
+			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK |
+			IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB |
+			IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB |
+			IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN |
+			IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN;
+
+	/* and some based on our own capabilities */
+	switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
+	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
+		vht_cap->cap |= cap_info &
+				IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ;
+		break;
+	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
+		vht_cap->cap |= cap_info &
+				IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
+		break;
+	default:
+		/* nothing */
+		break;
+	}
+
+	/* symmetric capabilities */
+	vht_cap->cap |= cap_info & own_cap.cap &
+			(IEEE80211_VHT_CAP_SHORT_GI_80 |
+			 IEEE80211_VHT_CAP_SHORT_GI_160);
+
+	/* remaining ones */
+	if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) {
+		vht_cap->cap |= cap_info &
+				(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
+				 IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX |
+				 IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX);
+	}
+
+	if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)
+		vht_cap->cap |= cap_info &
+				IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
+
+	if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)
+		vht_cap->cap |= cap_info &
+				IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
+
+	if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE)
+		vht_cap->cap |= cap_info &
+				IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE;
+
+	if (own_cap.cap & IEEE80211_VHT_CAP_TXSTBC)
+		vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_RXSTBC_MASK;
+
+	if (own_cap.cap & IEEE80211_VHT_CAP_RXSTBC_MASK)
+		vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_TXSTBC;
 
 	/* Copy peer MCS info, the driver might need them. */
 	memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs,
 	       sizeof(struct ieee80211_vht_mcs_info));
+
+	/* but also restrict MCSes */
+	for (i = 0; i < 8; i++) {
+		u16 own_rx, own_tx, peer_rx, peer_tx;
+
+		own_rx = le16_to_cpu(own_cap.vht_mcs.rx_mcs_map);
+		own_rx = (own_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+
+		own_tx = le16_to_cpu(own_cap.vht_mcs.tx_mcs_map);
+		own_tx = (own_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+
+		peer_rx = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map);
+		peer_rx = (peer_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+
+		peer_tx = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map);
+		peer_tx = (peer_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+
+		if (peer_tx != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
+			if (own_rx == IEEE80211_VHT_MCS_NOT_SUPPORTED)
+				peer_tx = IEEE80211_VHT_MCS_NOT_SUPPORTED;
+			else if (own_rx < peer_tx)
+				peer_tx = own_rx;
+		}
+
+		if (peer_rx != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
+			if (own_tx == IEEE80211_VHT_MCS_NOT_SUPPORTED)
+				peer_rx = IEEE80211_VHT_MCS_NOT_SUPPORTED;
+			else if (own_tx < peer_rx)
+				peer_rx = own_tx;
+		}
+
+		vht_cap->vht_mcs.rx_mcs_map &=
+			~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2);
+		vht_cap->vht_mcs.rx_mcs_map |= cpu_to_le16(peer_rx << i * 2);
+
+		vht_cap->vht_mcs.tx_mcs_map &=
+			~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2);
+		vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2);
+	}
+
+	/* finally set up the bandwidth */
+	switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
+	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
+	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+		break;
+	default:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+	}
+
+	sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
+}
+
+enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	u32 cap = sta->sta.vht_cap.cap;
+	enum ieee80211_sta_rx_bandwidth bw;
+
+	if (!sta->sta.vht_cap.vht_supported) {
+		bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
+		goto check_max;
+	}
+
+	switch (sdata->vif.bss_conf.chandef.width) {
+	default:
+		WARN_ON_ONCE(1);
+		/* fall through */
+	case NL80211_CHAN_WIDTH_20_NOHT:
+	case NL80211_CHAN_WIDTH_20:
+	case NL80211_CHAN_WIDTH_40:
+		bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
+		break;
+	case NL80211_CHAN_WIDTH_160:
+		if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) ==
+				IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) {
+			bw = IEEE80211_STA_RX_BW_160;
+			break;
+		}
+		/* fall through */
+	case NL80211_CHAN_WIDTH_80P80:
+		if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) ==
+				IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) {
+			bw = IEEE80211_STA_RX_BW_160;
+			break;
+		}
+		/* fall through */
+	case NL80211_CHAN_WIDTH_80:
+		bw = IEEE80211_STA_RX_BW_80;
+	}
+
+ check_max:
+	if (bw > sta->cur_max_bandwidth)
+		bw = sta->cur_max_bandwidth;
+	return bw;
+}
+
+void ieee80211_sta_set_rx_nss(struct sta_info *sta)
+{
+	u8 ht_rx_nss = 0, vht_rx_nss = 0;
+
+	/* if we received a notification already don't overwrite it */
+	if (sta->sta.rx_nss)
+		return;
+
+	if (sta->sta.ht_cap.ht_supported) {
+		if (sta->sta.ht_cap.mcs.rx_mask[0])
+			ht_rx_nss++;
+		if (sta->sta.ht_cap.mcs.rx_mask[1])
+			ht_rx_nss++;
+		if (sta->sta.ht_cap.mcs.rx_mask[2])
+			ht_rx_nss++;
+		if (sta->sta.ht_cap.mcs.rx_mask[3])
+			ht_rx_nss++;
+		/* FIXME: consider rx_highest? */
+	}
+
+	if (sta->sta.vht_cap.vht_supported) {
+		int i;
+		u16 rx_mcs_map;
+
+		rx_mcs_map = le16_to_cpu(sta->sta.vht_cap.vht_mcs.rx_mcs_map);
+
+		for (i = 7; i >= 0; i--) {
+			u8 mcs = (rx_mcs_map >> (2 * i)) & 3;
+
+			if (mcs != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
+				vht_rx_nss = i + 1;
+				break;
+			}
+		}
+		/* FIXME: consider rx_highest? */
+	}
+
+	ht_rx_nss = max(ht_rx_nss, vht_rx_nss);
+	sta->sta.rx_nss = max_t(u8, 1, ht_rx_nss);
+}
+
+void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
+				 struct sta_info *sta, u8 opmode,
+				 enum ieee80211_band band, bool nss_only)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_supported_band *sband;
+	enum ieee80211_sta_rx_bandwidth new_bw;
+	u32 changed = 0;
+	u8 nss;
+
+	sband = local->hw.wiphy->bands[band];
+
+	/* ignore - no support for BF yet */
+	if (opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF)
+		return;
+
+	nss = opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK;
+	nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT;
+	nss += 1;
+
+	if (sta->sta.rx_nss != nss) {
+		sta->sta.rx_nss = nss;
+		changed |= IEEE80211_RC_NSS_CHANGED;
+	}
+
+	if (nss_only)
+		goto change;
+
+	switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
+	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20;
+		break;
+	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40;
+		break;
+	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+		break;
+	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+		break;
+	}
+
+	new_bw = ieee80211_sta_cur_vht_bw(sta);
+	if (new_bw != sta->sta.bandwidth) {
+		sta->sta.bandwidth = new_bw;
+		changed |= IEEE80211_RC_NSS_CHANGED;
+	}
+
+ change:
+	if (changed)
+		rate_control_rate_update(local, sband, sta, changed);
 }
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 906f00cd6d2f..afba19cb6f87 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -191,6 +191,15 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata,
 
 	/* qos header is 2 bytes */
 	*p++ = ack_policy | tid;
-	*p = ieee80211_vif_is_mesh(&sdata->vif) ?
-		(IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8) : 0;
+	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		/* preserve RSPI and Mesh PS Level bit */
+		*p &= ((IEEE80211_QOS_CTL_RSPI |
+			IEEE80211_QOS_CTL_MESH_PS_LEVEL) >> 8);
+
+		/* Nulls don't have a mesh header (frame body) */
+		if (!ieee80211_is_qos_nullfunc(hdr->frame_control))
+			*p |= (IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8);
+	} else {
+		*p = 0;
+	}
 }
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index c175ee866ff4..c7c6d644486f 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -181,7 +181,6 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_key *key = tx->key;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	unsigned long flags;
 	unsigned int hdrlen;
 	int len, tail;
 	u8 *pos;
@@ -216,12 +215,12 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 		return 0;
 
 	/* Increase IV for the frame */
-	spin_lock_irqsave(&key->u.tkip.txlock, flags);
+	spin_lock(&key->u.tkip.txlock);
 	key->u.tkip.tx.iv16++;
 	if (key->u.tkip.tx.iv16 == 0)
 		key->u.tkip.tx.iv32++;
 	pos = ieee80211_tkip_add_iv(pos, key);
-	spin_unlock_irqrestore(&key->u.tkip.txlock, flags);
+	spin_unlock(&key->u.tkip.txlock);
 
 	/* hwaccel - with software IV */
 	if (info->control.hw_key)
diff --git a/net/mac802154/Kconfig b/net/mac802154/Kconfig
index a967ddaa4e2f..b33dd76d4307 100644
--- a/net/mac802154/Kconfig
+++ b/net/mac802154/Kconfig
@@ -1,6 +1,6 @@
 config MAC802154
 	tristate "Generic IEEE 802.15.4 Soft Networking Stack (mac802154)"
-	depends on IEEE802154 && EXPERIMENTAL
+	depends on IEEE802154
 	select CRC_CCITT
 	---help---
 	  This option enables the hardware independent IEEE 802.15.4
diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index a4dcaf1dd4b6..d48422e27110 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -88,9 +88,7 @@ struct mac802154_sub_if_data {
 
 #define mac802154_to_priv(_hw)	container_of(_hw, struct mac802154_priv, hw)
 
-#define MAC802154_MAX_XMIT_ATTEMPTS	3
-
-#define MAC802154_CHAN_NONE		(~(u8)0) /* No channel is assigned */
+#define MAC802154_CHAN_NONE		0xff /* No channel is assigned */
 
 extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced;
 extern struct ieee802154_mlme_ops mac802154_mlme_wpan;
@@ -114,5 +112,6 @@ void mac802154_dev_set_ieee_addr(struct net_device *dev);
 u16 mac802154_dev_get_pan_id(const struct net_device *dev);
 void mac802154_dev_set_pan_id(struct net_device *dev, u16 val);
 void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan);
+u8 mac802154_dev_get_dsn(const struct net_device *dev);
 
 #endif /* MAC802154_H */
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index d8d277006089..a99910d4d52f 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -73,4 +73,5 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = {
 	.start_req = mac802154_mlme_start_req,
 	.get_pan_id = mac802154_dev_get_pan_id,
 	.get_short_addr = mac802154_dev_get_short_addr,
+	.get_dsn = mac802154_dev_get_dsn,
 };
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index f47781ab0ccc..8ded97cf1c33 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -159,6 +159,15 @@ void mac802154_dev_set_pan_id(struct net_device *dev, u16 val)
 	}
 }
 
+u8 mac802154_dev_get_dsn(const struct net_device *dev)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	return priv->dsn++;
+}
+
 static void phy_chan_notify(struct work_struct *work)
 {
 	struct phy_chan_notify_work *nw = container_of(work,
@@ -167,9 +176,15 @@ static void phy_chan_notify(struct work_struct *work)
 	struct mac802154_sub_if_data *priv = netdev_priv(nw->dev);
 	int res;
 
+	mutex_lock(&priv->hw->phy->pib_lock);
 	res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan);
 	if (res)
 		pr_debug("set_channel failed\n");
+	else {
+		priv->hw->phy->current_channel = priv->chan;
+		priv->hw->phy->current_page = priv->page;
+	}
+	mutex_unlock(&priv->hw->phy->pib_lock);
 
 	kfree(nw);
 }
@@ -186,8 +201,11 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
 	priv->chan = chan;
 	spin_unlock_bh(&priv->mib_lock);
 
+	mutex_lock(&priv->hw->phy->pib_lock);
 	if (priv->hw->phy->current_channel != priv->chan ||
 	    priv->hw->phy->current_page != priv->page) {
+		mutex_unlock(&priv->hw->phy->pib_lock);
+
 		work = kzalloc(sizeof(*work), GFP_ATOMIC);
 		if (!work)
 			return;
@@ -195,5 +213,6 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
 		INIT_WORK(&work->work, phy_chan_notify);
 		work->dev = dev;
 		queue_work(priv->hw->dev_workqueue, &work->work);
-	}
+	} else
+		mutex_unlock(&priv->hw->phy->pib_lock);
 }
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 4e09d070995a..6d1647399d4f 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -25,6 +25,7 @@
 #include <linux/if_arp.h>
 #include <linux/crc-ccitt.h>
 
+#include <net/ieee802154_netdev.h>
 #include <net/mac802154.h>
 #include <net/wpan-phy.h>
 
@@ -39,12 +40,12 @@ struct xmit_work {
 	struct mac802154_priv *priv;
 	u8 chan;
 	u8 page;
-	u8 xmit_attempts;
 };
 
 static void mac802154_xmit_worker(struct work_struct *work)
 {
 	struct xmit_work *xw = container_of(work, struct xmit_work, work);
+	struct mac802154_sub_if_data *sdata;
 	int res;
 
 	mutex_lock(&xw->priv->phy->pib_lock);
@@ -57,21 +58,23 @@ static void mac802154_xmit_worker(struct work_struct *work)
 			pr_debug("set_channel failed\n");
 			goto out;
 		}
+
+		xw->priv->phy->current_channel = xw->chan;
+		xw->priv->phy->current_page = xw->page;
 	}
 
 	res = xw->priv->ops->xmit(&xw->priv->hw, xw->skb);
+	if (res)
+		pr_debug("transmission failed\n");
 
 out:
 	mutex_unlock(&xw->priv->phy->pib_lock);
 
-	if (res) {
-		if (xw->xmit_attempts++ < MAC802154_MAX_XMIT_ATTEMPTS) {
-			queue_work(xw->priv->dev_workqueue, &xw->work);
-			return;
-		} else
-			pr_debug("transmission failed for %d times",
-				 MAC802154_MAX_XMIT_ATTEMPTS);
-	}
+	/* Restart the netif queue on each sub_if_data object. */
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &xw->priv->slaves, list)
+		netif_wake_queue(sdata->dev);
+	rcu_read_unlock();
 
 	dev_kfree_skb(xw->skb);
 
@@ -82,6 +85,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
 			 u8 page, u8 chan)
 {
 	struct xmit_work *work;
+	struct mac802154_sub_if_data *sdata;
 
 	if (!(priv->phy->channels_supported[page] & (1 << chan))) {
 		WARN_ON(1);
@@ -109,12 +113,17 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
+	/* Stop the netif queue on each sub_if_data object. */
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &priv->slaves, list)
+		netif_stop_queue(sdata->dev);
+	rcu_read_unlock();
+
 	INIT_WORK(&work->work, mac802154_xmit_worker);
 	work->skb = skb;
 	work->priv = priv;
 	work->page = page;
 	work->chan = chan;
-	work->xmit_attempts = 0;
 
 	queue_work(priv->dev_workqueue, &work->work);
 
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index 199b92261e94..2ca2f4dceab7 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -41,7 +41,7 @@ static inline int mac802154_fetch_skb_u8(struct sk_buff *skb, u8 *val)
 		return -EINVAL;
 
 	*val = skb->data[0];
-	 skb_pull(skb, 1);
+	skb_pull(skb, 1);
 
 	return 0;
 }
@@ -137,18 +137,16 @@ static int mac802154_header_create(struct sk_buff *skb,
 	struct ieee802154_addr dev_addr;
 	struct mac802154_sub_if_data *priv = netdev_priv(dev);
 	int pos = 2;
-	u8 *head;
+	u8 head[MAC802154_FRAME_HARD_HEADER_LEN];
 	u16 fc;
 
 	if (!daddr)
 		return -EINVAL;
 
-	head = kzalloc(MAC802154_FRAME_HARD_HEADER_LEN, GFP_KERNEL);
-	if (head == NULL)
-		return -ENOMEM;
-
 	head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */
 	fc = mac_cb_type(skb);
+	if (mac_cb_is_ackreq(skb))
+		fc |= IEEE802154_FC_ACK_REQ;
 
 	if (!saddr) {
 		spin_lock_bh(&priv->mib_lock);
@@ -210,7 +208,6 @@ static int mac802154_header_create(struct sk_buff *skb,
 	head[1] = fc >> 8;
 
 	memcpy(skb_push(skb, pos), head, pos);
-	kfree(head);
 
 	return pos;
 }
@@ -363,7 +360,7 @@ void mac802154_wpan_setup(struct net_device *dev)
 	dev->header_ops		= &mac802154_header_ops;
 	dev->needed_tailroom	= 2; /* FCS */
 	dev->mtu		= IEEE802154_MTU;
-	dev->tx_queue_len	= 10;
+	dev->tx_queue_len	= 300;
 	dev->type		= ARPHRD_IEEE802154;
 	dev->flags		= IFF_NOARP | IFF_BROADCAST;
 	dev->watchdog_timeo	= 0;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 49e96df5fbc4..56d22cae5906 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -124,9 +124,14 @@ config NF_CONNTRACK_TIMESTAMP
 
 	  If unsure, say `N'.
 
+config NF_CONNTRACK_LABELS
+	bool
+	help
+	  This option enables support for assigning user-defined flag bits
+	  to connection tracking entries.  It selected by the connlabel match.
+
 config NF_CT_PROTO_DCCP
-	tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL
+	tristate 'DCCP protocol connection tracking support'
 	depends on NETFILTER_ADVANCED
 	default IP_DCCP
 	help
@@ -139,8 +144,7 @@ config NF_CT_PROTO_GRE
 	tristate
 
 config NF_CT_PROTO_SCTP
-	tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL
+	tristate 'SCTP protocol connection tracking support'
 	depends on NETFILTER_ADVANCED
 	default IP_SCTP
 	help
@@ -281,8 +285,7 @@ config NF_CONNTRACK_PPTP
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NF_CONNTRACK_SANE
-	tristate "SANE protocol support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	tristate "SANE protocol support"
 	depends on NETFILTER_ADVANCED
 	help
 	  SANE is a protocol for remote access to scanners as implemented
@@ -409,8 +412,7 @@ endif # NF_CONNTRACK
 
 # transparent proxy support
 config NETFILTER_TPROXY
-	tristate "Transparent proxying support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	tristate "Transparent proxying support"
 	depends on IP_NF_MANGLE
 	depends on NETFILTER_ADVANCED
 	help
@@ -718,8 +720,7 @@ config NETFILTER_XT_TARGET_TEE
 	this clone be rerouted to another nexthop.
 
 config NETFILTER_XT_TARGET_TPROXY
-	tristate '"TPROXY" target support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL
+	tristate '"TPROXY" target support'
 	depends on NETFILTER_TPROXY
 	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
@@ -783,8 +784,7 @@ config NETFILTER_XT_TARGET_TCPMSS
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NETFILTER_XT_TARGET_TCPOPTSTRIP
-	tristate '"TCPOPTSTRIP" target support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL
+	tristate '"TCPOPTSTRIP" target support'
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
 	depends on NETFILTER_ADVANCED
 	help
@@ -805,6 +805,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_BPF
+	tristate '"bpf" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  BPF matching applies a linux socket filter to each packet and
+	  accepts those for which the filter returns non-zero.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_CLUSTER
 	tristate '"cluster" match support'
 	depends on NF_CONNTRACK
@@ -842,6 +851,19 @@ config NETFILTER_XT_MATCH_CONNBYTES
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_CONNLABEL
+	tristate '"connlabel" match support'
+	select NF_CONNTRACK_LABELS
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	---help---
+	  This match allows you to test and assign userspace-defined labels names
+	  to a connection.  The kernel only stores bit values - mapping
+	  names to bits is done by userspace.
+
+	  Unlike connmark, more than 32 flag bits may be assigned to a
+	  connection simultaneously.
+
 config NETFILTER_XT_MATCH_CONNLIMIT
 	tristate '"connlimit" match support"'
 	depends on NF_CONNTRACK
@@ -1145,8 +1167,7 @@ config NETFILTER_XT_MATCH_RECENT
 	Official Website: <http://snowman.net/projects/ipt_recent/>
 
 config NETFILTER_XT_MATCH_SCTP
-	tristate  '"sctp" protocol match support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL
+	tristate  '"sctp" protocol match support'
 	depends on NETFILTER_ADVANCED
 	default IP_SCTP
 	help
@@ -1158,8 +1179,7 @@ config NETFILTER_XT_MATCH_SCTP
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
 config NETFILTER_XT_MATCH_SOCKET
-	tristate '"socket" match support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL
+	tristate '"socket" match support'
 	depends on NETFILTER_TPROXY
 	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 32596978df1d..a1abf87d43bf 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -4,6 +4,7 @@ nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_exp
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
 
@@ -98,9 +99,11 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
 
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLABEL) += xt_connlabel.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a9c488b6c50d..07c865a31a3d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -5,6 +5,7 @@
  * way.
  *
  * Rusty Russell (C)2000 -- This code is GPL.
+ * Patrick McHardy (c) 2006-2012
  */
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
@@ -276,10 +277,30 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
 EXPORT_SYMBOL(nf_nat_decode_session_hook);
 #endif
 
+static int __net_init netfilter_net_init(struct net *net)
+{
 #ifdef CONFIG_PROC_FS
-struct proc_dir_entry *proc_net_netfilter;
-EXPORT_SYMBOL(proc_net_netfilter);
+	net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
+						net->proc_net);
+	if (!net->nf.proc_netfilter) {
+		if (!net_eq(net, &init_net))
+			pr_err("cannot create netfilter proc entry");
+
+		return -ENOMEM;
+	}
 #endif
+	return 0;
+}
+
+static void __net_exit netfilter_net_exit(struct net *net)
+{
+	remove_proc_entry("netfilter", net->proc_net);
+}
+
+static struct pernet_operations netfilter_net_ops = {
+	.init = netfilter_net_init,
+	.exit = netfilter_net_exit,
+};
 
 void __init netfilter_init(void)
 {
@@ -289,11 +310,8 @@ void __init netfilter_init(void)
 			INIT_LIST_HEAD(&nf_hooks[i][h]);
 	}
 
-#ifdef CONFIG_PROC_FS
-	proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net);
-	if (!proc_net_netfilter)
+	if (register_pernet_subsys(&netfilter_net_ops) < 0)
 		panic("cannot create netfilter proc entry");
-#endif
 
 	if (netfilter_log_init() < 0)
 		panic("cannot initialize nf_log");
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
new file mode 100644
index 000000000000..25243379b887
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -0,0 +1,277 @@
+/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __IP_SET_BITMAP_IP_GEN_H
+#define __IP_SET_BITMAP_IP_GEN_H
+
+#define CONCAT(a, b)		a##b
+#define TOKEN(a,b)		CONCAT(a, b)
+
+#define mtype_do_test		TOKEN(MTYPE, _do_test)
+#define mtype_gc_test		TOKEN(MTYPE, _gc_test)
+#define mtype_is_filled		TOKEN(MTYPE, _is_filled)
+#define mtype_do_add		TOKEN(MTYPE, _do_add)
+#define mtype_do_del		TOKEN(MTYPE, _do_del)
+#define mtype_do_list		TOKEN(MTYPE, _do_list)
+#define mtype_do_head		TOKEN(MTYPE, _do_head)
+#define mtype_adt_elem		TOKEN(MTYPE, _adt_elem)
+#define mtype_add_timeout	TOKEN(MTYPE, _add_timeout)
+#define mtype_gc_init		TOKEN(MTYPE, _gc_init)
+#define mtype_kadt		TOKEN(MTYPE, _kadt)
+#define mtype_uadt		TOKEN(MTYPE, _uadt)
+#define mtype_destroy		TOKEN(MTYPE, _destroy)
+#define mtype_flush		TOKEN(MTYPE, _flush)
+#define mtype_head		TOKEN(MTYPE, _head)
+#define mtype_same_set		TOKEN(MTYPE, _same_set)
+#define mtype_elem		TOKEN(MTYPE, _elem)
+#define mtype_test		TOKEN(MTYPE, _test)
+#define mtype_add		TOKEN(MTYPE, _add)
+#define mtype_del		TOKEN(MTYPE, _del)
+#define mtype_list		TOKEN(MTYPE, _list)
+#define mtype_gc		TOKEN(MTYPE, _gc)
+#define mtype			MTYPE
+
+#define ext_timeout(e, m)	\
+	(unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
+#define ext_counter(e, m)	\
+	(struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER])
+#define get_ext(map, id)	((map)->extensions + (map)->dsize * (id))
+
+static void
+mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+{
+	struct mtype *map = set->data;
+
+	init_timer(&map->gc);
+	map->gc.data = (unsigned long) set;
+	map->gc.function = gc;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	add_timer(&map->gc);
+}
+
+static void
+mtype_destroy(struct ip_set *set)
+{
+	struct mtype *map = set->data;
+
+	if (SET_WITH_TIMEOUT(set))
+		del_timer_sync(&map->gc);
+
+	ip_set_free(map->members);
+	if (map->dsize)
+		ip_set_free(map->extensions);
+	kfree(map);
+
+	set->data = NULL;
+}
+
+static void
+mtype_flush(struct ip_set *set)
+{
+	struct mtype *map = set->data;
+
+	memset(map->members, 0, map->memsize);
+}
+
+static int
+mtype_head(struct ip_set *set, struct sk_buff *skb)
+{
+	const struct mtype *map = set->data;
+	struct nlattr *nested;
+
+	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+	if (!nested)
+		goto nla_put_failure;
+	if (mtype_do_head(skb, map) ||
+	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
+			  htonl(sizeof(*map) +
+				map->memsize +
+				map->dsize * map->elements)) ||
+	    (SET_WITH_TIMEOUT(set) &&
+	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
+	    (SET_WITH_COUNTER(set) &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
+			   htonl(IPSET_FLAG_WITH_COUNTERS))))
+		goto nla_put_failure;
+	ipset_nest_end(skb, nested);
+
+	return 0;
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int
+mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	   struct ip_set_ext *mext, u32 flags)
+{
+	struct mtype *map = set->data;
+	const struct mtype_adt_elem *e = value;
+	void *x = get_ext(map, e->id);
+	int ret = mtype_do_test(e, map);
+
+	if (ret <= 0)
+		return ret;
+	if (SET_WITH_TIMEOUT(set) &&
+	    ip_set_timeout_expired(ext_timeout(x, map)))
+		return 0;
+	if (SET_WITH_COUNTER(set))
+		ip_set_update_counter(ext_counter(x, map), ext, mext, flags);
+	return 1;
+}
+
+static int
+mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	  struct ip_set_ext *mext, u32 flags)
+{
+	struct mtype *map = set->data;
+	const struct mtype_adt_elem *e = value;
+	void *x = get_ext(map, e->id);
+	int ret = mtype_do_add(e, map, flags);
+
+	if (ret == IPSET_ADD_FAILED) {
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(x, map)))
+			ret = 0;
+		else if (!(flags & IPSET_FLAG_EXIST))
+			return -IPSET_ERR_EXIST;
+	}
+
+	if (SET_WITH_TIMEOUT(set))
+#ifdef IP_SET_BITMAP_STORED_TIMEOUT
+		mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret);
+#else
+		ip_set_timeout_set(ext_timeout(x, map), ext->timeout);
+#endif
+
+	if (SET_WITH_COUNTER(set))
+		ip_set_init_counter(ext_counter(x, map), ext);
+	return 0;
+}
+
+static int
+mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	  struct ip_set_ext *mext, u32 flags)
+{
+	struct mtype *map = set->data;
+	const struct mtype_adt_elem *e = value;
+	const void *x = get_ext(map, e->id);
+
+	if (mtype_do_del(e, map) ||
+	    (SET_WITH_TIMEOUT(set) &&
+	     ip_set_timeout_expired(ext_timeout(x, map))))
+		return -IPSET_ERR_EXIST;
+
+	return 0;
+}
+
+static int
+mtype_list(const struct ip_set *set,
+	   struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct mtype *map = set->data;
+	struct nlattr *adt, *nested;
+	void *x;
+	u32 id, first = cb->args[2];
+
+	adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
+	if (!adt)
+		return -EMSGSIZE;
+	for (; cb->args[2] < map->elements; cb->args[2]++) {
+		id = cb->args[2];
+		x = get_ext(map, id);
+		if (!test_bit(id, map->members) ||
+		    (SET_WITH_TIMEOUT(set) &&
+#ifdef IP_SET_BITMAP_STORED_TIMEOUT
+		     mtype_is_filled((const struct mtype_elem *) x) &&
+#endif
+		     ip_set_timeout_expired(ext_timeout(x, map))))
+			continue;
+		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+		if (!nested) {
+			if (id == first) {
+				nla_nest_cancel(skb, adt);
+				return -EMSGSIZE;
+			} else
+				goto nla_put_failure;
+		}
+		if (mtype_do_list(skb, map, id))
+			goto nla_put_failure;
+		if (SET_WITH_TIMEOUT(set)) {
+#ifdef IP_SET_BITMAP_STORED_TIMEOUT
+			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+					  htonl(ip_set_timeout_stored(map, id,
+							ext_timeout(x, map)))))
+				goto nla_put_failure;
+#else
+			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+					  htonl(ip_set_timeout_get(
+							ext_timeout(x, map)))))
+				goto nla_put_failure;
+#endif
+		}
+		if (SET_WITH_COUNTER(set) &&
+		    ip_set_put_counter(skb, ext_counter(x, map)))
+			goto nla_put_failure;
+		ipset_nest_end(skb, nested);
+	}
+	ipset_nest_end(skb, adt);
+
+	/* Set listing finished */
+	cb->args[2] = 0;
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nested);
+	ipset_nest_end(skb, adt);
+	if (unlikely(id == first)) {
+		cb->args[2] = 0;
+		return -EMSGSIZE;
+	}
+	return 0;
+}
+
+static void
+mtype_gc(unsigned long ul_set)
+{
+	struct ip_set *set = (struct ip_set *) ul_set;
+	struct mtype *map = set->data;
+	const void *x;
+	u32 id;
+
+	/* We run parallel with other readers (test element)
+	 * but adding/deleting new entries is locked out */
+	read_lock_bh(&set->lock);
+	for (id = 0; id < map->elements; id++)
+		if (mtype_gc_test(id, map)) {
+			x = get_ext(map, id);
+			if (ip_set_timeout_expired(ext_timeout(x, map)))
+				clear_bit(id, map->members);
+		}
+	read_unlock_bh(&set->lock);
+
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	add_timer(&map->gc);
+}
+
+static const struct ip_set_type_variant mtype = {
+	.kadt	= mtype_kadt,
+	.uadt	= mtype_uadt,
+	.adt	= {
+		[IPSET_ADD] = mtype_add,
+		[IPSET_DEL] = mtype_del,
+		[IPSET_TEST] = mtype_test,
+	},
+	.destroy = mtype_destroy,
+	.flush	= mtype_flush,
+	.head	= mtype_head,
+	.list	= mtype_list,
+	.same_set = mtype_same_set,
+};
+
+#endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 4a92fd47bd4c..f1a8128bef01 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -1,6 +1,6 @@
 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
  *                         Patrick Schaaf <bof@bof.de>
- * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -24,31 +24,37 @@
 #include <linux/netfilter/ipset/pfxlen.h>
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
-#define IP_SET_BITMAP_TIMEOUT
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 
 #define REVISION_MIN	0
-#define REVISION_MAX	0
+#define REVISION_MAX	1	/* Counter support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
 IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_bitmap:ip");
 
+#define MTYPE		bitmap_ip
+
 /* Type structure */
 struct bitmap_ip {
 	void *members;		/* the set members */
+	void *extensions;	/* data extensions */
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
 	u32 hosts;		/* number of hosts in a subnet */
 	size_t memsize;		/* members size */
+	size_t dsize;		/* extensions struct size */
+	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
 	u8 netmask;		/* subnet netmask */
 	u32 timeout;		/* timeout parameter */
 	struct timer_list gc;	/* garbage collection */
 };
 
-/* Base variant */
+/* ADT structure for generic function args */
+struct bitmap_ip_adt_elem {
+	u16 id;
+};
 
 static inline u32
 ip_to_id(const struct bitmap_ip *m, u32 ip)
@@ -56,188 +62,67 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)
 	return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts;
 }
 
-static int
-bitmap_ip_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
-{
-	const struct bitmap_ip *map = set->data;
-	u16 id = *(u16 *)value;
-
-	return !!test_bit(id, map->members);
-}
+/* Common functions */
 
-static int
-bitmap_ip_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
 {
-	struct bitmap_ip *map = set->data;
-	u16 id = *(u16 *)value;
-
-	if (test_and_set_bit(id, map->members))
-		return -IPSET_ERR_EXIST;
-
-	return 0;
+	return !!test_bit(e->id, map->members);
 }
 
-static int
-bitmap_ip_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map)
 {
-	struct bitmap_ip *map = set->data;
-	u16 id = *(u16 *)value;
-
-	if (!test_and_clear_bit(id, map->members))
-		return -IPSET_ERR_EXIST;
-
-	return 0;
-}
-
-static int
-bitmap_ip_list(const struct ip_set *set,
-	       struct sk_buff *skb, struct netlink_callback *cb)
-{
-	const struct bitmap_ip *map = set->data;
-	struct nlattr *atd, *nested;
-	u32 id, first = cb->args[2];
-
-	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
-	if (!atd)
-		return -EMSGSIZE;
-	for (; cb->args[2] < map->elements; cb->args[2]++) {
-		id = cb->args[2];
-		if (!test_bit(id, map->members))
-			continue;
-		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
-		if (!nested) {
-			if (id == first) {
-				nla_nest_cancel(skb, atd);
-				return -EMSGSIZE;
-			} else
-				goto nla_put_failure;
-		}
-		if (nla_put_ipaddr4(skb, IPSET_ATTR_IP,
-				    htonl(map->first_ip + id * map->hosts)))
-			goto nla_put_failure;
-		ipset_nest_end(skb, nested);
-	}
-	ipset_nest_end(skb, atd);
-	/* Set listing finished */
-	cb->args[2] = 0;
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, nested);
-	ipset_nest_end(skb, atd);
-	if (unlikely(id == first)) {
-		cb->args[2] = 0;
-		return -EMSGSIZE;
-	}
-	return 0;
+	return !!test_bit(id, map->members);
 }
 
-/* Timeout variant */
-
-static int
-bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
+		 u32 flags)
 {
-	const struct bitmap_ip *map = set->data;
-	const unsigned long *members = map->members;
-	u16 id = *(u16 *)value;
-
-	return ip_set_timeout_test(members[id]);
+	return !!test_and_set_bit(e->id, map->members);
 }
 
-static int
-bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
 {
-	struct bitmap_ip *map = set->data;
-	unsigned long *members = map->members;
-	u16 id = *(u16 *)value;
-
-	if (ip_set_timeout_test(members[id]) && !(flags & IPSET_FLAG_EXIST))
-		return -IPSET_ERR_EXIST;
-
-	members[id] = ip_set_timeout_set(timeout);
-
-	return 0;
+	return !test_and_clear_bit(e->id, map->members);
 }
 
-static int
-bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id)
 {
-	struct bitmap_ip *map = set->data;
-	unsigned long *members = map->members;
-	u16 id = *(u16 *)value;
-	int ret = -IPSET_ERR_EXIST;
-
-	if (ip_set_timeout_test(members[id]))
-		ret = 0;
-
-	members[id] = IPSET_ELEM_UNSET;
-	return ret;
+	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
+			htonl(map->first_ip + id * map->hosts));
 }
 
-static int
-bitmap_ip_tlist(const struct ip_set *set,
-		struct sk_buff *skb, struct netlink_callback *cb)
+static inline int
+bitmap_ip_do_head(struct sk_buff *skb, const struct bitmap_ip *map)
 {
-	const struct bitmap_ip *map = set->data;
-	struct nlattr *adt, *nested;
-	u32 id, first = cb->args[2];
-	const unsigned long *members = map->members;
-
-	adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
-	if (!adt)
-		return -EMSGSIZE;
-	for (; cb->args[2] < map->elements; cb->args[2]++) {
-		id = cb->args[2];
-		if (!ip_set_timeout_test(members[id]))
-			continue;
-		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
-		if (!nested) {
-			if (id == first) {
-				nla_nest_cancel(skb, adt);
-				return -EMSGSIZE;
-			} else
-				goto nla_put_failure;
-		}
-		if (nla_put_ipaddr4(skb, IPSET_ATTR_IP,
-				    htonl(map->first_ip + id * map->hosts)) ||
-		    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-				  htonl(ip_set_timeout_get(members[id]))))
-			goto nla_put_failure;
-		ipset_nest_end(skb, nested);
-	}
-	ipset_nest_end(skb, adt);
-
-	/* Set listing finished */
-	cb->args[2] = 0;
-
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, nested);
-	ipset_nest_end(skb, adt);
-	if (unlikely(id == first)) {
-		cb->args[2] = 0;
-		return -EMSGSIZE;
-	}
-	return 0;
+	return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
+	       nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) ||
+	       (map->netmask != 32 &&
+		nla_put_u8(skb, IPSET_ATTR_NETMASK, map->netmask));
 }
 
 static int
 bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
 	       const struct xt_action_param *par,
-	       enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
 	struct bitmap_ip *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct bitmap_ip_adt_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
 	u32 ip;
 
 	ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
 	if (ip < map->first_ip || ip > map->last_ip)
 		return -IPSET_ERR_BITMAP_RANGE;
 
-	ip = ip_to_id(map, ip);
+	e.id = ip_to_id(map, ip);
 
-	return adtfn(set, &ip, opt_timeout(opt, map), opt->cmdflags);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
@@ -246,33 +131,31 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
 {
 	struct bitmap_ip *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	u32 timeout = map->timeout;
-	u32 ip, ip_to, id;
+	u32 ip, ip_to;
+	struct bitmap_ip_adt_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
 	int ret = 0;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
 	if (ip < map->first_ip || ip > map->last_ip)
 		return -IPSET_ERR_BITMAP_RANGE;
 
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(map->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
-
 	if (adt == IPSET_TEST) {
-		id = ip_to_id(map, ip);
-		return adtfn(set, &id, timeout, flags);
+		e.id = ip_to_id(map, ip);
+		return adtfn(set, &e, &ext, &ext, flags);
 	}
 
 	if (tb[IPSET_ATTR_IP_TO]) {
@@ -297,8 +180,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
 		return -IPSET_ERR_BITMAP_RANGE;
 
 	for (; !before(ip_to, ip); ip += map->hosts) {
-		id = ip_to_id(map, ip);
-		ret = adtfn(set, &id, timeout, flags);
+		e.id = ip_to_id(map, ip);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
@@ -308,54 +191,6 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-static void
-bitmap_ip_destroy(struct ip_set *set)
-{
-	struct bitmap_ip *map = set->data;
-
-	if (with_timeout(map->timeout))
-		del_timer_sync(&map->gc);
-
-	ip_set_free(map->members);
-	kfree(map);
-
-	set->data = NULL;
-}
-
-static void
-bitmap_ip_flush(struct ip_set *set)
-{
-	struct bitmap_ip *map = set->data;
-
-	memset(map->members, 0, map->memsize);
-}
-
-static int
-bitmap_ip_head(struct ip_set *set, struct sk_buff *skb)
-{
-	const struct bitmap_ip *map = set->data;
-	struct nlattr *nested;
-
-	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
-	if (!nested)
-		goto nla_put_failure;
-	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
-	    nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) ||
-	    (map->netmask != 32 &&
-	     nla_put_u8(skb, IPSET_ATTR_NETMASK, map->netmask)) ||
-	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
-	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
-			  htonl(sizeof(*map) + map->memsize)) ||
-	    (with_timeout(map->timeout) &&
-	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))))
-		goto nla_put_failure;
-	ipset_nest_end(skb, nested);
-
-	return 0;
-nla_put_failure:
-	return -EMSGSIZE;
-}
-
 static bool
 bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
 {
@@ -365,70 +200,35 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
 	return x->first_ip == y->first_ip &&
 	       x->last_ip == y->last_ip &&
 	       x->netmask == y->netmask &&
-	       x->timeout == y->timeout;
+	       x->timeout == y->timeout &&
+	       a->extensions == b->extensions;
 }
 
-static const struct ip_set_type_variant bitmap_ip = {
-	.kadt	= bitmap_ip_kadt,
-	.uadt	= bitmap_ip_uadt,
-	.adt	= {
-		[IPSET_ADD] = bitmap_ip_add,
-		[IPSET_DEL] = bitmap_ip_del,
-		[IPSET_TEST] = bitmap_ip_test,
-	},
-	.destroy = bitmap_ip_destroy,
-	.flush	= bitmap_ip_flush,
-	.head	= bitmap_ip_head,
-	.list	= bitmap_ip_list,
-	.same_set = bitmap_ip_same_set,
+/* Plain variant */
+
+struct bitmap_ip_elem {
 };
 
-static const struct ip_set_type_variant bitmap_tip = {
-	.kadt	= bitmap_ip_kadt,
-	.uadt	= bitmap_ip_uadt,
-	.adt	= {
-		[IPSET_ADD] = bitmap_ip_tadd,
-		[IPSET_DEL] = bitmap_ip_tdel,
-		[IPSET_TEST] = bitmap_ip_ttest,
-	},
-	.destroy = bitmap_ip_destroy,
-	.flush	= bitmap_ip_flush,
-	.head	= bitmap_ip_head,
-	.list	= bitmap_ip_tlist,
-	.same_set = bitmap_ip_same_set,
+/* Timeout variant */
+
+struct bitmap_ipt_elem {
+	unsigned long timeout;
 };
 
-static void
-bitmap_ip_gc(unsigned long ul_set)
-{
-	struct ip_set *set = (struct ip_set *) ul_set;
-	struct bitmap_ip *map = set->data;
-	unsigned long *table = map->members;
-	u32 id;
-
-	/* We run parallel with other readers (test element)
-	 * but adding/deleting new entries is locked out */
-	read_lock_bh(&set->lock);
-	for (id = 0; id < map->elements; id++)
-		if (ip_set_timeout_expired(table[id]))
-			table[id] = IPSET_ELEM_UNSET;
-	read_unlock_bh(&set->lock);
-
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
-	add_timer(&map->gc);
-}
+/* Plain variant with counter */
 
-static void
-bitmap_ip_gc_init(struct ip_set *set)
-{
-	struct bitmap_ip *map = set->data;
+struct bitmap_ipc_elem {
+	struct ip_set_counter counter;
+};
 
-	init_timer(&map->gc);
-	map->gc.data = (unsigned long) set;
-	map->gc.function = bitmap_ip_gc;
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
-	add_timer(&map->gc);
-}
+/* Timeout variant with counter */
+
+struct bitmap_ipct_elem {
+	unsigned long timeout;
+	struct ip_set_counter counter;
+};
+
+#include "ip_set_bitmap_gen.h"
 
 /* Create bitmap:ip type of sets */
 
@@ -440,6 +240,13 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
+	if (map->dsize) {
+		map->extensions = ip_set_alloc(map->dsize * elements);
+		if (!map->extensions) {
+			kfree(map->members);
+			return false;
+		}
+	}
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
 	map->elements = elements;
@@ -457,13 +264,14 @@ static int
 bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
 	struct bitmap_ip *map;
-	u32 first_ip, last_ip, hosts;
+	u32 first_ip, last_ip, hosts, cadt_flags = 0;
 	u64 elements;
 	u8 netmask = 32;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
 		return -IPSET_ERR_PROTOCOL;
 
 	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
@@ -526,8 +334,45 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	if (!map)
 		return -ENOMEM;
 
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		map->memsize = elements * sizeof(unsigned long);
+	map->memsize = bitmap_bytes(0, elements - 1);
+	set->variant = &bitmap_ip;
+	if (tb[IPSET_ATTR_CADT_FLAGS])
+		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
+		set->extensions |= IPSET_EXT_COUNTER;
+		if (tb[IPSET_ATTR_TIMEOUT]) {
+			map->dsize = sizeof(struct bitmap_ipct_elem);
+			map->offset[IPSET_OFFSET_TIMEOUT] =
+				offsetof(struct bitmap_ipct_elem, timeout);
+			map->offset[IPSET_OFFSET_COUNTER] =
+				offsetof(struct bitmap_ipct_elem, counter);
+
+			if (!init_map_ip(set, map, first_ip, last_ip,
+					 elements, hosts, netmask)) {
+				kfree(map);
+				return -ENOMEM;
+			}
+
+			map->timeout = ip_set_timeout_uget(
+				tb[IPSET_ATTR_TIMEOUT]);
+			set->extensions |= IPSET_EXT_TIMEOUT;
+
+			bitmap_ip_gc_init(set, bitmap_ip_gc);
+		} else {
+			map->dsize = sizeof(struct bitmap_ipc_elem);
+			map->offset[IPSET_OFFSET_COUNTER] =
+				offsetof(struct bitmap_ipc_elem, counter);
+
+			if (!init_map_ip(set, map, first_ip, last_ip,
+					 elements, hosts, netmask)) {
+				kfree(map);
+				return -ENOMEM;
+			}
+		}
+	} else if (tb[IPSET_ATTR_TIMEOUT]) {
+		map->dsize = sizeof(struct bitmap_ipt_elem);
+		map->offset[IPSET_OFFSET_TIMEOUT] =
+			offsetof(struct bitmap_ipt_elem, timeout);
 
 		if (!init_map_ip(set, map, first_ip, last_ip,
 				 elements, hosts, netmask)) {
@@ -536,19 +381,16 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		}
 
 		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->variant = &bitmap_tip;
+		set->extensions |= IPSET_EXT_TIMEOUT;
 
-		bitmap_ip_gc_init(set);
+		bitmap_ip_gc_init(set, bitmap_ip_gc);
 	} else {
-		map->memsize = bitmap_bytes(0, elements - 1);
-
+		map->dsize = 0;
 		if (!init_map_ip(set, map, first_ip, last_ip,
 				 elements, hosts, netmask)) {
 			kfree(map);
 			return -ENOMEM;
 		}
-
-		set->variant = &bitmap_ip;
 	}
 	return 0;
 }
@@ -568,6 +410,7 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
 		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
 		[IPSET_ATTR_NETMASK]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 	},
 	.adt_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
@@ -575,6 +418,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
 		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 0f92dc24cb89..3b30e0bef890 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -1,7 +1,7 @@
 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
  *                         Patrick Schaaf <bof@bof.de>
  *			   Martin Josefsson <gandalf@wlug.westbo.se>
- * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -23,340 +23,208 @@
 
 #include <linux/netfilter/ipset/pfxlen.h>
 #include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
 
 #define REVISION_MIN	0
-#define REVISION_MAX	0
+#define REVISION_MAX	1	/* Counter support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
 IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_bitmap:ip,mac");
 
+#define MTYPE		bitmap_ipmac
+#define IP_SET_BITMAP_STORED_TIMEOUT
+
 enum {
-	MAC_EMPTY,		/* element is not set */
-	MAC_FILLED,		/* element is set with MAC */
 	MAC_UNSET,		/* element is set, without MAC */
+	MAC_FILLED,		/* element is set with MAC */
 };
 
 /* Type structure */
 struct bitmap_ipmac {
 	void *members;		/* the set members */
+	void *extensions;	/* MAC + data extensions */
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
+	u32 elements;		/* number of max elements in the set */
 	u32 timeout;		/* timeout value */
 	struct timer_list gc;	/* garbage collector */
+	size_t memsize;		/* members size */
 	size_t dsize;		/* size of element */
+	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
 };
 
 /* ADT structure for generic function args */
-struct ipmac {
-	u32 id;			/* id in array */
-	unsigned char *ether;	/* ethernet address */
+struct bitmap_ipmac_adt_elem {
+	u16 id;
+	unsigned char *ether;
 };
 
-/* Member element without and with timeout */
-
-struct ipmac_elem {
+struct bitmap_ipmac_elem {
 	unsigned char ether[ETH_ALEN];
-	unsigned char match;
+	unsigned char filled;
 } __attribute__ ((aligned));
 
-struct ipmac_telem {
-	unsigned char ether[ETH_ALEN];
-	unsigned char match;
-	unsigned long timeout;
-} __attribute__ ((aligned));
-
-static inline void *
-bitmap_ipmac_elem(const struct bitmap_ipmac *map, u32 id)
+static inline u32
+ip_to_id(const struct bitmap_ipmac *m, u32 ip)
 {
-	return (void *)((char *)map->members + id * map->dsize);
+	return ip - m->first_ip;
 }
 
-static inline bool
-bitmap_timeout(const struct bitmap_ipmac *map, u32 id)
+static inline struct bitmap_ipmac_elem *
+get_elem(void *extensions, u16 id, size_t dsize)
 {
-	const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id);
-
-	return ip_set_timeout_test(elem->timeout);
+	return (struct bitmap_ipmac_elem *)(extensions + id * dsize);
 }
 
-static inline bool
-bitmap_expired(const struct bitmap_ipmac *map, u32 id)
-{
-	const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id);
-
-	return ip_set_timeout_expired(elem->timeout);
-}
+/* Common functions */
 
 static inline int
-bitmap_ipmac_exist(const struct ipmac_telem *elem)
+bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
+		     const struct bitmap_ipmac *map)
 {
-	return elem->match == MAC_UNSET ||
-	       (elem->match == MAC_FILLED &&
-		!ip_set_timeout_expired(elem->timeout));
-}
+	const struct bitmap_ipmac_elem *elem;
 
-/* Base variant */
-
-static int
-bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
-{
-	const struct bitmap_ipmac *map = set->data;
-	const struct ipmac *data = value;
-	const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
-
-	switch (elem->match) {
-	case MAC_UNSET:
-		/* Trigger kernel to fill out the ethernet address */
-		return -EAGAIN;
-	case MAC_FILLED:
-		return data->ether == NULL ||
-		       ether_addr_equal(data->ether, elem->ether);
-	}
-	return 0;
-}
-
-static int
-bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
-{
-	struct bitmap_ipmac *map = set->data;
-	const struct ipmac *data = value;
-	struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
-
-	switch (elem->match) {
-	case MAC_UNSET:
-		if (!data->ether)
-			/* Already added without ethernet address */
-			return -IPSET_ERR_EXIST;
-		/* Fill the MAC address */
-		memcpy(elem->ether, data->ether, ETH_ALEN);
-		elem->match = MAC_FILLED;
-		break;
-	case MAC_FILLED:
-		return -IPSET_ERR_EXIST;
-	case MAC_EMPTY:
-		if (data->ether) {
-			memcpy(elem->ether, data->ether, ETH_ALEN);
-			elem->match = MAC_FILLED;
-		} else
-			elem->match = MAC_UNSET;
-	}
-
-	return 0;
-}
-
-static int
-bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
-{
-	struct bitmap_ipmac *map = set->data;
-	const struct ipmac *data = value;
-	struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
-
-	if (elem->match == MAC_EMPTY)
-		return -IPSET_ERR_EXIST;
-
-	elem->match = MAC_EMPTY;
-
-	return 0;
+	if (!test_bit(e->id, map->members))
+		return 0;
+	elem = get_elem(map->extensions, e->id, map->dsize);
+	if (elem->filled == MAC_FILLED)
+		return e->ether == NULL ||
+		       ether_addr_equal(e->ether, elem->ether);
+	/* Trigger kernel to fill out the ethernet address */
+	return -EAGAIN;
 }
 
-static int
-bitmap_ipmac_list(const struct ip_set *set,
-		  struct sk_buff *skb, struct netlink_callback *cb)
+static inline int
+bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map)
 {
-	const struct bitmap_ipmac *map = set->data;
-	const struct ipmac_elem *elem;
-	struct nlattr *atd, *nested;
-	u32 id, first = cb->args[2];
-	u32 last = map->last_ip - map->first_ip;
-
-	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
-	if (!atd)
-		return -EMSGSIZE;
-	for (; cb->args[2] <= last; cb->args[2]++) {
-		id = cb->args[2];
-		elem = bitmap_ipmac_elem(map, id);
-		if (elem->match == MAC_EMPTY)
-			continue;
-		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
-		if (!nested) {
-			if (id == first) {
-				nla_nest_cancel(skb, atd);
-				return -EMSGSIZE;
-			} else
-				goto nla_put_failure;
-		}
-		if (nla_put_ipaddr4(skb, IPSET_ATTR_IP,
-				    htonl(map->first_ip + id)) ||
-		    (elem->match == MAC_FILLED &&
-		     nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN,
-			     elem->ether)))
-			goto nla_put_failure;
-		ipset_nest_end(skb, nested);
-	}
-	ipset_nest_end(skb, atd);
-	/* Set listing finished */
-	cb->args[2] = 0;
-
-	return 0;
+	const struct bitmap_ipmac_elem *elem;
 
-nla_put_failure:
-	nla_nest_cancel(skb, nested);
-	ipset_nest_end(skb, atd);
-	if (unlikely(id == first)) {
-		cb->args[2] = 0;
-		return -EMSGSIZE;
-	}
-	return 0;
+	if (!test_bit(id, map->members))
+		return 0;
+	elem = get_elem(map->extensions, id, map->dsize);
+	/* Timer not started for the incomplete elements */
+	return elem->filled == MAC_FILLED;
 }
 
-/* Timeout variant */
-
-static int
-bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
 {
-	const struct bitmap_ipmac *map = set->data;
-	const struct ipmac *data = value;
-	const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
-
-	switch (elem->match) {
-	case MAC_UNSET:
-		/* Trigger kernel to fill out the ethernet address */
-		return -EAGAIN;
-	case MAC_FILLED:
-		return (data->ether == NULL ||
-			ether_addr_equal(data->ether, elem->ether)) &&
-		       !bitmap_expired(map, data->id);
-	}
-	return 0;
+	return elem->filled == MAC_FILLED;
 }
 
-static int
-bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ipmac_add_timeout(unsigned long *timeout,
+			 const struct bitmap_ipmac_adt_elem *e,
+			 const struct ip_set_ext *ext,
+			 struct bitmap_ipmac *map, int mode)
 {
-	struct bitmap_ipmac *map = set->data;
-	const struct ipmac *data = value;
-	struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id);
-	bool flag_exist = flags & IPSET_FLAG_EXIST;
+	u32 t = ext->timeout;
 
-	switch (elem->match) {
-	case MAC_UNSET:
-		if (!(data->ether || flag_exist))
-			/* Already added without ethernet address */
-			return -IPSET_ERR_EXIST;
-		/* Fill the MAC address and activate the timer */
-		memcpy(elem->ether, data->ether, ETH_ALEN);
-		elem->match = MAC_FILLED;
-		if (timeout == map->timeout)
+	if (mode == IPSET_ADD_START_STORED_TIMEOUT) {
+		if (t == map->timeout)
 			/* Timeout was not specified, get stored one */
-			timeout = elem->timeout;
-		elem->timeout = ip_set_timeout_set(timeout);
-		break;
-	case MAC_FILLED:
-		if (!(bitmap_expired(map, data->id) || flag_exist))
-			return -IPSET_ERR_EXIST;
-		/* Fall through */
-	case MAC_EMPTY:
-		if (data->ether) {
-			memcpy(elem->ether, data->ether, ETH_ALEN);
-			elem->match = MAC_FILLED;
-		} else
-			elem->match = MAC_UNSET;
+			t = *timeout;
+		ip_set_timeout_set(timeout, t);
+	} else {
 		/* If MAC is unset yet, we store plain timeout value
 		 * because the timer is not activated yet
 		 * and we can reuse it later when MAC is filled out,
 		 * possibly by the kernel */
-		elem->timeout = data->ether ? ip_set_timeout_set(timeout)
-					    : timeout;
-		break;
+		if (e->ether)
+			ip_set_timeout_set(timeout, t);
+		else
+			*timeout = t;
 	}
-
 	return 0;
 }
 
-static int
-bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
+		    struct bitmap_ipmac *map, u32 flags)
 {
-	struct bitmap_ipmac *map = set->data;
-	const struct ipmac *data = value;
-	struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id);
+	struct bitmap_ipmac_elem *elem;
+
+	elem = get_elem(map->extensions, e->id, map->dsize);
+	if (test_and_set_bit(e->id, map->members)) {
+		if (elem->filled == MAC_FILLED) {
+			if (e->ether && (flags & IPSET_FLAG_EXIST))
+				memcpy(elem->ether, e->ether, ETH_ALEN);
+			return IPSET_ADD_FAILED;
+		} else if (!e->ether)
+			/* Already added without ethernet address */
+			return IPSET_ADD_FAILED;
+		/* Fill the MAC address and trigger the timer activation */
+		memcpy(elem->ether, e->ether, ETH_ALEN);
+		elem->filled = MAC_FILLED;
+		return IPSET_ADD_START_STORED_TIMEOUT;
+	} else if (e->ether) {
+		/* We can store MAC too */
+		memcpy(elem->ether, e->ether, ETH_ALEN);
+		elem->filled = MAC_FILLED;
+		return 0;
+	} else {
+		elem->filled = MAC_UNSET;
+		/* MAC is not stored yet, don't start timer */
+		return IPSET_ADD_STORE_PLAIN_TIMEOUT;
+	}
+}
 
-	if (elem->match == MAC_EMPTY || bitmap_expired(map, data->id))
-		return -IPSET_ERR_EXIST;
+static inline int
+bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
+		    struct bitmap_ipmac *map)
+{
+	return !test_and_clear_bit(e->id, map->members);
+}
 
-	elem->match = MAC_EMPTY;
+static inline unsigned long
+ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout)
+{
+	const struct bitmap_ipmac_elem *elem =
+		get_elem(map->extensions, id, map->dsize);
 
-	return 0;
+	return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) :
+					    *timeout;
 }
 
-static int
-bitmap_ipmac_tlist(const struct ip_set *set,
-		   struct sk_buff *skb, struct netlink_callback *cb)
+static inline int
+bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
+		     u32 id)
 {
-	const struct bitmap_ipmac *map = set->data;
-	const struct ipmac_telem *elem;
-	struct nlattr *atd, *nested;
-	u32 id, first = cb->args[2];
-	u32 timeout, last = map->last_ip - map->first_ip;
-
-	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
-	if (!atd)
-		return -EMSGSIZE;
-	for (; cb->args[2] <= last; cb->args[2]++) {
-		id = cb->args[2];
-		elem = bitmap_ipmac_elem(map, id);
-		if (!bitmap_ipmac_exist(elem))
-			continue;
-		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
-		if (!nested) {
-			if (id == first) {
-				nla_nest_cancel(skb, atd);
-				return -EMSGSIZE;
-			} else
-				goto nla_put_failure;
-		}
-		if (nla_put_ipaddr4(skb, IPSET_ATTR_IP,
-				    htonl(map->first_ip + id)) ||
-		    (elem->match == MAC_FILLED &&
-		     nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN,
-			     elem->ether)))
-			goto nla_put_failure;
-		timeout = elem->match == MAC_UNSET ? elem->timeout
-				: ip_set_timeout_get(elem->timeout);
-		if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout)))
-			goto nla_put_failure;
-		ipset_nest_end(skb, nested);
-	}
-	ipset_nest_end(skb, atd);
-	/* Set listing finished */
-	cb->args[2] = 0;
+	const struct bitmap_ipmac_elem *elem =
+		get_elem(map->extensions, id, map->dsize);
 
-	return 0;
+	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
+			       htonl(map->first_ip + id)) ||
+	       (elem->filled == MAC_FILLED &&
+		nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether));
+}
 
-nla_put_failure:
-	nla_nest_cancel(skb, nested);
-	ipset_nest_end(skb, atd);
-	return -EMSGSIZE;
+static inline int
+bitmap_ipmac_do_head(struct sk_buff *skb, const struct bitmap_ipmac *map)
+{
+	return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
+	       nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
 }
 
 static int
 bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
-		  enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
 	struct bitmap_ipmac *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct ipmac data;
+	struct bitmap_ipmac_adt_elem e = {};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	u32 ip;
 
 	/* MAC can be src only */
 	if (!(opt->flags & IPSET_DIM_TWO_SRC))
 		return 0;
 
-	data.id = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
-	if (data.id < map->first_ip || data.id > map->last_ip)
+	ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
+	if (ip < map->first_ip || ip > map->last_ip)
 		return -IPSET_ERR_BITMAP_RANGE;
 
 	/* Backward compatibility: we don't check the second flag */
@@ -364,10 +232,10 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 	    (skb_mac_header(skb) + ETH_HLEN) > skb->data)
 		return -EINVAL;
 
-	data.id -= map->first_ip;
-	data.ether = eth_hdr(skb)->h_source;
+	e.id = ip_to_id(map, ip);
+	e.ether = eth_hdr(skb)->h_source;
 
-	return adtfn(set, &data, opt_timeout(opt, map), opt->cmdflags);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
@@ -376,91 +244,39 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
 {
 	const struct bitmap_ipmac *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct ipmac data;
-	u32 timeout = map->timeout;
+	struct bitmap_ipmac_adt_elem e = {};
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+	u32 ip;
 	int ret = 0;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &data.id);
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
-	if (data.id < map->first_ip || data.id > map->last_ip)
+	if (ip < map->first_ip || ip > map->last_ip)
 		return -IPSET_ERR_BITMAP_RANGE;
 
+	e.id = ip_to_id(map, ip);
 	if (tb[IPSET_ATTR_ETHER])
-		data.ether = nla_data(tb[IPSET_ATTR_ETHER]);
+		e.ether = nla_data(tb[IPSET_ATTR_ETHER]);
 	else
-		data.ether = NULL;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(map->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
+		e.ether = NULL;
 
-	data.id -= map->first_ip;
-
-	ret = adtfn(set, &data, timeout, flags);
+	ret = adtfn(set, &e, &ext, &ext, flags);
 
 	return ip_set_eexist(ret, flags) ? 0 : ret;
 }
 
-static void
-bitmap_ipmac_destroy(struct ip_set *set)
-{
-	struct bitmap_ipmac *map = set->data;
-
-	if (with_timeout(map->timeout))
-		del_timer_sync(&map->gc);
-
-	ip_set_free(map->members);
-	kfree(map);
-
-	set->data = NULL;
-}
-
-static void
-bitmap_ipmac_flush(struct ip_set *set)
-{
-	struct bitmap_ipmac *map = set->data;
-
-	memset(map->members, 0,
-	       (map->last_ip - map->first_ip + 1) * map->dsize);
-}
-
-static int
-bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb)
-{
-	const struct bitmap_ipmac *map = set->data;
-	struct nlattr *nested;
-
-	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
-	if (!nested)
-		goto nla_put_failure;
-	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
-	    nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) ||
-	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
-	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
-			  htonl(sizeof(*map) +
-				((map->last_ip - map->first_ip + 1) *
-				 map->dsize))) ||
-	    (with_timeout(map->timeout) &&
-	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))))
-		goto nla_put_failure;
-	ipset_nest_end(skb, nested);
-
-	return 0;
-nla_put_failure:
-	return -EMSGSIZE;
-}
-
 static bool
 bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
 {
@@ -469,85 +285,64 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
 
 	return x->first_ip == y->first_ip &&
 	       x->last_ip == y->last_ip &&
-	       x->timeout == y->timeout;
+	       x->timeout == y->timeout &&
+	       a->extensions == b->extensions;
 }
 
-static const struct ip_set_type_variant bitmap_ipmac = {
-	.kadt	= bitmap_ipmac_kadt,
-	.uadt	= bitmap_ipmac_uadt,
-	.adt	= {
-		[IPSET_ADD] = bitmap_ipmac_add,
-		[IPSET_DEL] = bitmap_ipmac_del,
-		[IPSET_TEST] = bitmap_ipmac_test,
-	},
-	.destroy = bitmap_ipmac_destroy,
-	.flush	= bitmap_ipmac_flush,
-	.head	= bitmap_ipmac_head,
-	.list	= bitmap_ipmac_list,
-	.same_set = bitmap_ipmac_same_set,
-};
+/* Plain variant */
 
-static const struct ip_set_type_variant bitmap_tipmac = {
-	.kadt	= bitmap_ipmac_kadt,
-	.uadt	= bitmap_ipmac_uadt,
-	.adt	= {
-		[IPSET_ADD] = bitmap_ipmac_tadd,
-		[IPSET_DEL] = bitmap_ipmac_tdel,
-		[IPSET_TEST] = bitmap_ipmac_ttest,
-	},
-	.destroy = bitmap_ipmac_destroy,
-	.flush	= bitmap_ipmac_flush,
-	.head	= bitmap_ipmac_head,
-	.list	= bitmap_ipmac_tlist,
-	.same_set = bitmap_ipmac_same_set,
+/* Timeout variant */
+
+struct bitmap_ipmact_elem {
+	struct {
+		unsigned char ether[ETH_ALEN];
+		unsigned char filled;
+	} __attribute__ ((aligned));
+	unsigned long timeout;
 };
 
-static void
-bitmap_ipmac_gc(unsigned long ul_set)
-{
-	struct ip_set *set = (struct ip_set *) ul_set;
-	struct bitmap_ipmac *map = set->data;
-	struct ipmac_telem *elem;
-	u32 id, last = map->last_ip - map->first_ip;
-
-	/* We run parallel with other readers (test element)
-	 * but adding/deleting new entries is locked out */
-	read_lock_bh(&set->lock);
-	for (id = 0; id <= last; id++) {
-		elem = bitmap_ipmac_elem(map, id);
-		if (elem->match == MAC_FILLED &&
-		    ip_set_timeout_expired(elem->timeout))
-			elem->match = MAC_EMPTY;
-	}
-	read_unlock_bh(&set->lock);
+/* Plain variant with counter */
 
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
-	add_timer(&map->gc);
-}
+struct bitmap_ipmacc_elem {
+	struct {
+		unsigned char ether[ETH_ALEN];
+		unsigned char filled;
+	} __attribute__ ((aligned));
+	struct ip_set_counter counter;
+};
 
-static void
-bitmap_ipmac_gc_init(struct ip_set *set)
-{
-	struct bitmap_ipmac *map = set->data;
+/* Timeout variant with counter */
 
-	init_timer(&map->gc);
-	map->gc.data = (unsigned long) set;
-	map->gc.function = bitmap_ipmac_gc;
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
-	add_timer(&map->gc);
-}
+struct bitmap_ipmacct_elem {
+	struct {
+		unsigned char ether[ETH_ALEN];
+		unsigned char filled;
+	} __attribute__ ((aligned));
+	unsigned long timeout;
+	struct ip_set_counter counter;
+};
+
+#include "ip_set_bitmap_gen.h"
 
 /* Create bitmap:ip,mac type of sets */
 
 static bool
 init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
-	       u32 first_ip, u32 last_ip)
+	       u32 first_ip, u32 last_ip, u32 elements)
 {
 	map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize);
 	if (!map->members)
 		return false;
+	if (map->dsize) {
+		map->extensions = ip_set_alloc(map->dsize * elements);
+		if (!map->extensions) {
+			kfree(map->members);
+			return false;
+		}
+	}
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
+	map->elements = elements;
 	map->timeout = IPSET_NO_TIMEOUT;
 
 	set->data = map;
@@ -560,13 +355,14 @@ static int
 bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 		    u32 flags)
 {
-	u32 first_ip, last_ip;
+	u32 first_ip, last_ip, cadt_flags = 0;
 	u64 elements;
 	struct bitmap_ipmac *map;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
 		return -IPSET_ERR_PROTOCOL;
 
 	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
@@ -601,28 +397,59 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 	if (!map)
 		return -ENOMEM;
 
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		map->dsize = sizeof(struct ipmac_telem);
+	map->memsize = bitmap_bytes(0, elements - 1);
+	set->variant = &bitmap_ipmac;
+	if (tb[IPSET_ATTR_CADT_FLAGS])
+		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
+		set->extensions |= IPSET_EXT_COUNTER;
+		if (tb[IPSET_ATTR_TIMEOUT]) {
+			map->dsize = sizeof(struct bitmap_ipmacct_elem);
+			map->offset[IPSET_OFFSET_TIMEOUT] =
+				offsetof(struct bitmap_ipmacct_elem, timeout);
+			map->offset[IPSET_OFFSET_COUNTER] =
+				offsetof(struct bitmap_ipmacct_elem, counter);
+
+			if (!init_map_ipmac(set, map, first_ip, last_ip,
+					    elements)) {
+				kfree(map);
+				return -ENOMEM;
+			}
+			map->timeout = ip_set_timeout_uget(
+				tb[IPSET_ATTR_TIMEOUT]);
+			set->extensions |= IPSET_EXT_TIMEOUT;
+			bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
+		} else {
+			map->dsize = sizeof(struct bitmap_ipmacc_elem);
+			map->offset[IPSET_OFFSET_COUNTER] =
+				offsetof(struct bitmap_ipmacc_elem, counter);
+
+			if (!init_map_ipmac(set, map, first_ip, last_ip,
+					    elements)) {
+				kfree(map);
+				return -ENOMEM;
+			}
+		}
+	} else if (tb[IPSET_ATTR_TIMEOUT]) {
+		map->dsize = sizeof(struct bitmap_ipmact_elem);
+		map->offset[IPSET_OFFSET_TIMEOUT] =
+			offsetof(struct bitmap_ipmact_elem, timeout);
 
-		if (!init_map_ipmac(set, map, first_ip, last_ip)) {
+		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
 			kfree(map);
 			return -ENOMEM;
 		}
-
 		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
-		set->variant = &bitmap_tipmac;
-
-		bitmap_ipmac_gc_init(set);
+		set->extensions |= IPSET_EXT_TIMEOUT;
+		bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
 	} else {
-		map->dsize = sizeof(struct ipmac_elem);
+		map->dsize = sizeof(struct bitmap_ipmac_elem);
 
-		if (!init_map_ipmac(set, map, first_ip, last_ip)) {
+		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
 			kfree(map);
 			return -ENOMEM;
 		}
 		set->variant = &bitmap_ipmac;
-
 	}
 	return 0;
 }
@@ -641,6 +468,7 @@ static struct ip_set_type bitmap_ipmac_type = {
 		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
 		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 	},
 	.adt_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
@@ -648,6 +476,8 @@ static struct ip_set_type bitmap_ipmac_type = {
 					    .len  = ETH_ALEN },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index e6b2db76f4c3..8207d1fda528 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -19,205 +19,94 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
 #include <linux/netfilter/ipset/ip_set_getport.h>
-#define IP_SET_BITMAP_TIMEOUT
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 
 #define REVISION_MIN	0
-#define REVISION_MAX	0
+#define REVISION_MAX	1	/* Counter support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
 IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_bitmap:port");
 
+#define MTYPE		bitmap_port
+
 /* Type structure */
 struct bitmap_port {
 	void *members;		/* the set members */
+	void *extensions;	/* data extensions */
 	u16 first_port;		/* host byte order, included in range */
 	u16 last_port;		/* host byte order, included in range */
+	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
+	size_t dsize;		/* extensions struct size */
+	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
 	u32 timeout;		/* timeout parameter */
 	struct timer_list gc;	/* garbage collection */
 };
 
-/* Base variant */
+/* ADT structure for generic function args */
+struct bitmap_port_adt_elem {
+	u16 id;
+};
 
-static int
-bitmap_port_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline u16
+port_to_id(const struct bitmap_port *m, u16 port)
 {
-	const struct bitmap_port *map = set->data;
-	u16 id = *(u16 *)value;
-
-	return !!test_bit(id, map->members);
+	return port - m->first_port;
 }
 
-static int
-bitmap_port_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
-{
-	struct bitmap_port *map = set->data;
-	u16 id = *(u16 *)value;
-
-	if (test_and_set_bit(id, map->members))
-		return -IPSET_ERR_EXIST;
-
-	return 0;
-}
+/* Common functions */
 
-static int
-bitmap_port_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
+		    const struct bitmap_port *map)
 {
-	struct bitmap_port *map = set->data;
-	u16 id = *(u16 *)value;
-
-	if (!test_and_clear_bit(id, map->members))
-		return -IPSET_ERR_EXIST;
-
-	return 0;
+	return !!test_bit(e->id, map->members);
 }
 
-static int
-bitmap_port_list(const struct ip_set *set,
-		 struct sk_buff *skb, struct netlink_callback *cb)
+static inline int
+bitmap_port_gc_test(u16 id, const struct bitmap_port *map)
 {
-	const struct bitmap_port *map = set->data;
-	struct nlattr *atd, *nested;
-	u16 id, first = cb->args[2];
-	u16 last = map->last_port - map->first_port;
-
-	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
-	if (!atd)
-		return -EMSGSIZE;
-	for (; cb->args[2] <= last; cb->args[2]++) {
-		id = cb->args[2];
-		if (!test_bit(id, map->members))
-			continue;
-		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
-		if (!nested) {
-			if (id == first) {
-				nla_nest_cancel(skb, atd);
-				return -EMSGSIZE;
-			} else
-				goto nla_put_failure;
-		}
-		if (nla_put_net16(skb, IPSET_ATTR_PORT,
-				  htons(map->first_port + id)))
-			goto nla_put_failure;
-		ipset_nest_end(skb, nested);
-	}
-	ipset_nest_end(skb, atd);
-	/* Set listing finished */
-	cb->args[2] = 0;
-
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, nested);
-	ipset_nest_end(skb, atd);
-	if (unlikely(id == first)) {
-		cb->args[2] = 0;
-		return -EMSGSIZE;
-	}
-	return 0;
+	return !!test_bit(id, map->members);
 }
 
-/* Timeout variant */
-
-static int
-bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
+		   struct bitmap_port *map, u32 flags)
 {
-	const struct bitmap_port *map = set->data;
-	const unsigned long *members = map->members;
-	u16 id = *(u16 *)value;
-
-	return ip_set_timeout_test(members[id]);
+	return !!test_and_set_bit(e->id, map->members);
 }
 
-static int
-bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
+		   struct bitmap_port *map)
 {
-	struct bitmap_port *map = set->data;
-	unsigned long *members = map->members;
-	u16 id = *(u16 *)value;
-
-	if (ip_set_timeout_test(members[id]) && !(flags & IPSET_FLAG_EXIST))
-		return -IPSET_ERR_EXIST;
-
-	members[id] = ip_set_timeout_set(timeout);
-
-	return 0;
+	return !test_and_clear_bit(e->id, map->members);
 }
 
-static int
-bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id)
 {
-	struct bitmap_port *map = set->data;
-	unsigned long *members = map->members;
-	u16 id = *(u16 *)value;
-	int ret = -IPSET_ERR_EXIST;
-
-	if (ip_set_timeout_test(members[id]))
-		ret = 0;
-
-	members[id] = IPSET_ELEM_UNSET;
-	return ret;
+	return nla_put_net16(skb, IPSET_ATTR_PORT,
+			     htons(map->first_port + id));
 }
 
-static int
-bitmap_port_tlist(const struct ip_set *set,
-		  struct sk_buff *skb, struct netlink_callback *cb)
+static inline int
+bitmap_port_do_head(struct sk_buff *skb, const struct bitmap_port *map)
 {
-	const struct bitmap_port *map = set->data;
-	struct nlattr *adt, *nested;
-	u16 id, first = cb->args[2];
-	u16 last = map->last_port - map->first_port;
-	const unsigned long *members = map->members;
-
-	adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
-	if (!adt)
-		return -EMSGSIZE;
-	for (; cb->args[2] <= last; cb->args[2]++) {
-		id = cb->args[2];
-		if (!ip_set_timeout_test(members[id]))
-			continue;
-		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
-		if (!nested) {
-			if (id == first) {
-				nla_nest_cancel(skb, adt);
-				return -EMSGSIZE;
-			} else
-				goto nla_put_failure;
-		}
-		if (nla_put_net16(skb, IPSET_ATTR_PORT,
-				  htons(map->first_port + id)) ||
-		    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-				  htonl(ip_set_timeout_get(members[id]))))
-			goto nla_put_failure;
-		ipset_nest_end(skb, nested);
-	}
-	ipset_nest_end(skb, adt);
-
-	/* Set listing finished */
-	cb->args[2] = 0;
-
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, nested);
-	ipset_nest_end(skb, adt);
-	if (unlikely(id == first)) {
-		cb->args[2] = 0;
-		return -EMSGSIZE;
-	}
-	return 0;
+	return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) ||
+	       nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
 }
 
 static int
 bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
 		 const struct xt_action_param *par,
-		 enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+		 enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
 	struct bitmap_port *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct bitmap_port_adt_elem e = {};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
 	__be16 __port;
 	u16 port = 0;
 
@@ -230,9 +119,9 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
 	if (port < map->first_port || port > map->last_port)
 		return -IPSET_ERR_BITMAP_RANGE;
 
-	port -= map->first_port;
+	e.id = port_to_id(map, port);
 
-	return adtfn(set, &port, opt_timeout(opt, map), opt->cmdflags);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
@@ -241,14 +130,17 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
 {
 	struct bitmap_port *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	u32 timeout = map->timeout;
+	struct bitmap_port_adt_elem e = {};
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
 	u32 port;	/* wraparound */
-	u16 id, port_to;
+	u16 port_to;
 	int ret = 0;
 
 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_LINENO])
@@ -257,16 +149,13 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
 	port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
 	if (port < map->first_port || port > map->last_port)
 		return -IPSET_ERR_BITMAP_RANGE;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(map->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
 
 	if (adt == IPSET_TEST) {
-		id = port - map->first_port;
-		return adtfn(set, &id, timeout, flags);
+		e.id = port_to_id(map, port);
+		return adtfn(set, &e, &ext, &ext, flags);
 	}
 
 	if (tb[IPSET_ATTR_PORT_TO]) {
@@ -283,8 +172,8 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
 		return -IPSET_ERR_BITMAP_RANGE;
 
 	for (; port <= port_to; port++) {
-		id = port - map->first_port;
-		ret = adtfn(set, &id, timeout, flags);
+		e.id = port_to_id(map, port);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
@@ -294,52 +183,6 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-static void
-bitmap_port_destroy(struct ip_set *set)
-{
-	struct bitmap_port *map = set->data;
-
-	if (with_timeout(map->timeout))
-		del_timer_sync(&map->gc);
-
-	ip_set_free(map->members);
-	kfree(map);
-
-	set->data = NULL;
-}
-
-static void
-bitmap_port_flush(struct ip_set *set)
-{
-	struct bitmap_port *map = set->data;
-
-	memset(map->members, 0, map->memsize);
-}
-
-static int
-bitmap_port_head(struct ip_set *set, struct sk_buff *skb)
-{
-	const struct bitmap_port *map = set->data;
-	struct nlattr *nested;
-
-	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
-	if (!nested)
-		goto nla_put_failure;
-	if (nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) ||
-	    nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)) ||
-	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
-	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
-			  htonl(sizeof(*map) + map->memsize)) ||
-	    (with_timeout(map->timeout) &&
-	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))))
-		goto nla_put_failure;
-	ipset_nest_end(skb, nested);
-
-	return 0;
-nla_put_failure:
-	return -EMSGSIZE;
-}
-
 static bool
 bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
 {
@@ -348,71 +191,35 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
 
 	return x->first_port == y->first_port &&
 	       x->last_port == y->last_port &&
-	       x->timeout == y->timeout;
+	       x->timeout == y->timeout &&
+	       a->extensions == b->extensions;
 }
 
-static const struct ip_set_type_variant bitmap_port = {
-	.kadt	= bitmap_port_kadt,
-	.uadt	= bitmap_port_uadt,
-	.adt	= {
-		[IPSET_ADD] = bitmap_port_add,
-		[IPSET_DEL] = bitmap_port_del,
-		[IPSET_TEST] = bitmap_port_test,
-	},
-	.destroy = bitmap_port_destroy,
-	.flush	= bitmap_port_flush,
-	.head	= bitmap_port_head,
-	.list	= bitmap_port_list,
-	.same_set = bitmap_port_same_set,
+/* Plain variant */
+
+struct bitmap_port_elem {
 };
 
-static const struct ip_set_type_variant bitmap_tport = {
-	.kadt	= bitmap_port_kadt,
-	.uadt	= bitmap_port_uadt,
-	.adt	= {
-		[IPSET_ADD] = bitmap_port_tadd,
-		[IPSET_DEL] = bitmap_port_tdel,
-		[IPSET_TEST] = bitmap_port_ttest,
-	},
-	.destroy = bitmap_port_destroy,
-	.flush	= bitmap_port_flush,
-	.head	= bitmap_port_head,
-	.list	= bitmap_port_tlist,
-	.same_set = bitmap_port_same_set,
+/* Timeout variant */
+
+struct bitmap_portt_elem {
+	unsigned long timeout;
 };
 
-static void
-bitmap_port_gc(unsigned long ul_set)
-{
-	struct ip_set *set = (struct ip_set *) ul_set;
-	struct bitmap_port *map = set->data;
-	unsigned long *table = map->members;
-	u32 id;	/* wraparound */
-	u16 last = map->last_port - map->first_port;
-
-	/* We run parallel with other readers (test element)
-	 * but adding/deleting new entries is locked out */
-	read_lock_bh(&set->lock);
-	for (id = 0; id <= last; id++)
-		if (ip_set_timeout_expired(table[id]))
-			table[id] = IPSET_ELEM_UNSET;
-	read_unlock_bh(&set->lock);
-
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
-	add_timer(&map->gc);
-}
+/* Plain variant with counter */
 
-static void
-bitmap_port_gc_init(struct ip_set *set)
-{
-	struct bitmap_port *map = set->data;
+struct bitmap_portc_elem {
+	struct ip_set_counter counter;
+};
 
-	init_timer(&map->gc);
-	map->gc.data = (unsigned long) set;
-	map->gc.function = bitmap_port_gc;
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
-	add_timer(&map->gc);
-}
+/* Timeout variant with counter */
+
+struct bitmap_portct_elem {
+	unsigned long timeout;
+	struct ip_set_counter counter;
+};
+
+#include "ip_set_bitmap_gen.h"
 
 /* Create bitmap:ip type of sets */
 
@@ -423,6 +230,13 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
+	if (map->dsize) {
+		map->extensions = ip_set_alloc(map->dsize * map->elements);
+		if (!map->extensions) {
+			kfree(map->members);
+			return false;
+		}
+	}
 	map->first_port = first_port;
 	map->last_port = last_port;
 	map->timeout = IPSET_NO_TIMEOUT;
@@ -434,15 +248,16 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 }
 
 static int
-bitmap_port_create(struct ip_set *set, struct nlattr *tb[],
-		 u32 flags)
+bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
 	struct bitmap_port *map;
 	u16 first_port, last_port;
+	u32 cadt_flags = 0;
 
 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
 		return -IPSET_ERR_PROTOCOL;
 
 	first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
@@ -458,28 +273,56 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[],
 	if (!map)
 		return -ENOMEM;
 
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		map->memsize = (last_port - first_port + 1)
-			       * sizeof(unsigned long);
-
+	map->elements = last_port - first_port + 1;
+	map->memsize = map->elements * sizeof(unsigned long);
+	set->variant = &bitmap_port;
+	if (tb[IPSET_ATTR_CADT_FLAGS])
+		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
+		set->extensions |= IPSET_EXT_COUNTER;
+		if (tb[IPSET_ATTR_TIMEOUT]) {
+			map->dsize = sizeof(struct bitmap_portct_elem);
+			map->offset[IPSET_OFFSET_TIMEOUT] =
+				offsetof(struct bitmap_portct_elem, timeout);
+			map->offset[IPSET_OFFSET_COUNTER] =
+				offsetof(struct bitmap_portct_elem, counter);
+			if (!init_map_port(set, map, first_port, last_port)) {
+				kfree(map);
+				return -ENOMEM;
+			}
+
+			map->timeout =
+				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+			set->extensions |= IPSET_EXT_TIMEOUT;
+			bitmap_port_gc_init(set, bitmap_port_gc);
+		} else {
+			map->dsize = sizeof(struct bitmap_portc_elem);
+			map->offset[IPSET_OFFSET_COUNTER] =
+				offsetof(struct bitmap_portc_elem, counter);
+			if (!init_map_port(set, map, first_port, last_port)) {
+				kfree(map);
+				return -ENOMEM;
+			}
+		}
+	} else if (tb[IPSET_ATTR_TIMEOUT]) {
+		map->dsize = sizeof(struct bitmap_portt_elem);
+		map->offset[IPSET_OFFSET_TIMEOUT] =
+			offsetof(struct bitmap_portt_elem, timeout);
 		if (!init_map_port(set, map, first_port, last_port)) {
 			kfree(map);
 			return -ENOMEM;
 		}
 
 		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->variant = &bitmap_tport;
-
-		bitmap_port_gc_init(set);
+		set->extensions |= IPSET_EXT_TIMEOUT;
+		bitmap_port_gc_init(set, bitmap_port_gc);
 	} else {
-		map->memsize = bitmap_bytes(0, last_port - first_port);
-		pr_debug("memsize: %zu\n", map->memsize);
+		map->dsize = 0;
 		if (!init_map_port(set, map, first_port, last_port)) {
 			kfree(map);
 			return -ENOMEM;
 		}
 
-		set->variant = &bitmap_port;
 	}
 	return 0;
 }
@@ -497,12 +340,15 @@ static struct ip_set_type bitmap_port_type = {
 		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
 		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 	},
 	.adt_policy	= {
 		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
 		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 6d6d8f2b033e..f77139007983 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1,6 +1,6 @@
 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
  *                         Patrick Schaaf <bof@bof.de>
- * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -15,7 +15,6 @@
 #include <linux/ip.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
-#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <net/netlink.h>
 
@@ -88,14 +87,14 @@ find_set_type(const char *name, u8 family, u8 revision)
 static bool
 load_settype(const char *name)
 {
-	nfnl_unlock();
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
 	pr_debug("try to load ip_set_%s\n", name);
 	if (request_module("ip_set_%s", name) < 0) {
 		pr_warning("Can't find ip_set type %s\n", name);
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_IPSET);
 		return false;
 	}
-	nfnl_lock();
+	nfnl_lock(NFNL_SUBSYS_IPSET);
 	return true;
 }
 
@@ -316,6 +315,29 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 
+int
+ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
+		      struct ip_set_ext *ext)
+{
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!(set->extensions & IPSET_EXT_TIMEOUT))
+			return -IPSET_ERR_TIMEOUT;
+		ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+	if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
+		if (!(set->extensions & IPSET_EXT_COUNTER))
+			return -IPSET_ERR_COUNTER;
+		if (tb[IPSET_ATTR_BYTES])
+			ext->bytes = be64_to_cpu(nla_get_be64(
+						 tb[IPSET_ATTR_BYTES]));
+		if (tb[IPSET_ATTR_PACKETS])
+			ext->packets = be64_to_cpu(nla_get_be64(
+						   tb[IPSET_ATTR_PACKETS]));
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_set_get_extensions);
+
 /*
  * Creating/destroying/renaming/swapping affect the existence and
  * the properties of a set. All of these can be executed from userspace
@@ -366,8 +388,7 @@ ip_set_rcu_get(ip_set_id_t index)
 
 int
 ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
-	    const struct xt_action_param *par,
-	    const struct ip_set_adt_opt *opt)
+	    const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
 	struct ip_set *set = ip_set_rcu_get(index);
 	int ret = 0;
@@ -392,7 +413,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 		ret = 1;
 	} else {
 		/* --return-nomatch: invert matched element */
-		if ((opt->flags & IPSET_RETURN_NOMATCH) &&
+		if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&
 		    (set->type->features & IPSET_TYPE_NOMATCH) &&
 		    (ret > 0 || ret == -ENOTEMPTY))
 			ret = -ret;
@@ -405,8 +426,7 @@ EXPORT_SYMBOL_GPL(ip_set_test);
 
 int
 ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
-	   const struct xt_action_param *par,
-	   const struct ip_set_adt_opt *opt)
+	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
 	struct ip_set *set = ip_set_rcu_get(index);
 	int ret;
@@ -428,8 +448,7 @@ EXPORT_SYMBOL_GPL(ip_set_add);
 
 int
 ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
-	   const struct xt_action_param *par,
-	   const struct ip_set_adt_opt *opt)
+	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
 	struct ip_set *set = ip_set_rcu_get(index);
 	int ret = 0;
@@ -532,7 +551,7 @@ ip_set_nfnl_get(const char *name)
 	ip_set_id_t i, index = IPSET_INVALID_ID;
 	struct ip_set *s;
 
-	nfnl_lock();
+	nfnl_lock(NFNL_SUBSYS_IPSET);
 	for (i = 0; i < ip_set_max; i++) {
 		s = nfnl_set(i);
 		if (s != NULL && STREQ(s->name, name)) {
@@ -541,7 +560,7 @@ ip_set_nfnl_get(const char *name)
 			break;
 		}
 	}
-	nfnl_unlock();
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
 
 	return index;
 }
@@ -561,13 +580,13 @@ ip_set_nfnl_get_byindex(ip_set_id_t index)
 	if (index > ip_set_max)
 		return IPSET_INVALID_ID;
 
-	nfnl_lock();
+	nfnl_lock(NFNL_SUBSYS_IPSET);
 	set = nfnl_set(index);
 	if (set)
 		__ip_set_get(set);
 	else
 		index = IPSET_INVALID_ID;
-	nfnl_unlock();
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
 
 	return index;
 }
@@ -584,11 +603,11 @@ void
 ip_set_nfnl_put(ip_set_id_t index)
 {
 	struct ip_set *set;
-	nfnl_lock();
+	nfnl_lock(NFNL_SUBSYS_IPSET);
 	set = nfnl_set(index);
 	if (set != NULL)
 		__ip_set_put(set);
-	nfnl_unlock();
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
 
@@ -1085,7 +1104,7 @@ static int
 dump_init(struct netlink_callback *cb)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
-	int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
+	int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
 	struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
 	struct nlattr *attr = (void *)nlh + min_len;
 	u32 dump_type;
@@ -1301,7 +1320,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
 		struct sk_buff *skb2;
 		struct nlmsgerr *errmsg;
 		size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
-		int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
+		int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
 		struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
 		struct nlattr *cmdattr;
 		u32 *errline;
@@ -1470,7 +1489,8 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
 	if (ret == -EAGAIN)
 		ret = 1;
 
-	return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST;
+	return (ret < 0 && ret != -ENOTEMPTY) ? ret :
+		ret > 0 ? 0 : -IPSET_ERR_EXIST;
 }
 
 /* Get headed data of a set */
@@ -1763,10 +1783,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 			goto done;
 		}
 		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_IPSET);
 		find_set_and_id(req_get->set.name, &id);
 		req_get->set.index = id;
-		nfnl_unlock();
+		nfnl_unlock(NFNL_SUBSYS_IPSET);
 		goto copy;
 	}
 	case IP_SET_OP_GET_BYINDEX: {
@@ -1778,11 +1798,11 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 			ret = -EINVAL;
 			goto done;
 		}
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_IPSET);
 		set = nfnl_set(req_get->set.index);
 		strncpy(req_get->set.name, set ? set->name : "",
 			IPSET_MAXNAMELEN);
-		nfnl_unlock();
+		nfnl_unlock(NFNL_SUBSYS_IPSET);
 		goto copy;
 	}
 	default:
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
new file mode 100644
index 000000000000..57beb1762b2d
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -0,0 +1,1100 @@
+/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _IP_SET_HASH_GEN_H
+#define _IP_SET_HASH_GEN_H
+
+#include <linux/rcupdate.h>
+#include <linux/jhash.h>
+#include <linux/netfilter/ipset/ip_set_timeout.h>
+#ifndef rcu_dereference_bh
+#define rcu_dereference_bh(p)	rcu_dereference(p)
+#endif
+
+#define CONCAT(a, b)		a##b
+#define TOKEN(a, b)		CONCAT(a, b)
+
+/* Hashing which uses arrays to resolve clashing. The hash table is resized
+ * (doubled) when searching becomes too long.
+ * Internally jhash is used with the assumption that the size of the
+ * stored data is a multiple of sizeof(u32). If storage supports timeout,
+ * the timeout field must be the last one in the data structure - that field
+ * is ignored when computing the hash key.
+ *
+ * Readers and resizing
+ *
+ * Resizing can be triggered by userspace command only, and those
+ * are serialized by the nfnl mutex. During resizing the set is
+ * read-locked, so the only possible concurrent operations are
+ * the kernel side readers. Those must be protected by proper RCU locking.
+ */
+
+/* Number of elements to store in an initial array block */
+#define AHASH_INIT_SIZE			4
+/* Max number of elements to store in an array block */
+#define AHASH_MAX_SIZE			(3*AHASH_INIT_SIZE)
+
+/* Max number of elements can be tuned */
+#ifdef IP_SET_HASH_WITH_MULTI
+#define AHASH_MAX(h)			((h)->ahash_max)
+
+static inline u8
+tune_ahash_max(u8 curr, u32 multi)
+{
+	u32 n;
+
+	if (multi < curr)
+		return curr;
+
+	n = curr + AHASH_INIT_SIZE;
+	/* Currently, at listing one hash bucket must fit into a message.
+	 * Therefore we have a hard limit here.
+	 */
+	return n > curr && n <= 64 ? n : curr;
+}
+#define TUNE_AHASH_MAX(h, multi)	\
+	((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
+#else
+#define AHASH_MAX(h)			AHASH_MAX_SIZE
+#define TUNE_AHASH_MAX(h, multi)
+#endif
+
+/* A hash bucket */
+struct hbucket {
+	void *value;		/* the array of the values */
+	u8 size;		/* size of the array */
+	u8 pos;			/* position of the first free entry */
+};
+
+/* The hash table: the table size stored here in order to make resizing easy */
+struct htable {
+	u8 htable_bits;		/* size of hash table == 2^htable_bits */
+	struct hbucket bucket[0]; /* hashtable buckets */
+};
+
+#define hbucket(h, i)		(&((h)->bucket[i]))
+
+/* Book-keeping of the prefixes added to the set */
+struct net_prefixes {
+	u8 cidr;		/* the different cidr values in the set */
+	u32 nets;		/* number of elements per cidr */
+};
+
+/* Compute the hash table size */
+static size_t
+htable_size(u8 hbits)
+{
+	size_t hsize;
+
+	/* We must fit both into u32 in jhash and size_t */
+	if (hbits > 31)
+		return 0;
+	hsize = jhash_size(hbits);
+	if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket)
+	    < hsize)
+		return 0;
+
+	return hsize * sizeof(struct hbucket) + sizeof(struct htable);
+}
+
+/* Compute htable_bits from the user input parameter hashsize */
+static u8
+htable_bits(u32 hashsize)
+{
+	/* Assume that hashsize == 2^htable_bits */
+	u8 bits = fls(hashsize - 1);
+	if (jhash_size(bits) != hashsize)
+		/* Round up to the first 2^n value */
+		bits = fls(hashsize);
+
+	return bits;
+}
+
+/* Destroy the hashtable part of the set */
+static void
+ahash_destroy(struct htable *t)
+{
+	struct hbucket *n;
+	u32 i;
+
+	for (i = 0; i < jhash_size(t->htable_bits); i++) {
+		n = hbucket(t, i);
+		if (n->size)
+			/* FIXME: use slab cache */
+			kfree(n->value);
+	}
+
+	ip_set_free(t);
+}
+
+static int
+hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
+{
+	if (n->pos >= n->size) {
+		void *tmp;
+
+		if (n->size >= ahash_max)
+			/* Trigger rehashing */
+			return -EAGAIN;
+
+		tmp = kzalloc((n->size + AHASH_INIT_SIZE) * dsize,
+			      GFP_ATOMIC);
+		if (!tmp)
+			return -ENOMEM;
+		if (n->size) {
+			memcpy(tmp, n->value, n->size * dsize);
+			kfree(n->value);
+		}
+		n->value = tmp;
+		n->size += AHASH_INIT_SIZE;
+	}
+	return 0;
+}
+
+#ifdef IP_SET_HASH_WITH_NETS
+#ifdef IP_SET_HASH_WITH_NETS_PACKED
+/* When cidr is packed with nomatch, cidr - 1 is stored in the entry */
+#define CIDR(cidr)		(cidr + 1)
+#else
+#define CIDR(cidr)		(cidr)
+#endif
+
+#define SET_HOST_MASK(family)	(family == AF_INET ? 32 : 128)
+
+#ifdef IP_SET_HASH_WITH_MULTI
+#define NETS_LENGTH(family)	(SET_HOST_MASK(family) + 1)
+#else
+#define NETS_LENGTH(family)	SET_HOST_MASK(family)
+#endif
+
+#else
+#define NETS_LENGTH(family)	0
+#endif /* IP_SET_HASH_WITH_NETS */
+
+#define ext_timeout(e, h)	\
+(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT])
+#define ext_counter(e, h)	\
+(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER])
+
+#endif /* _IP_SET_HASH_GEN_H */
+
+/* Family dependent templates */
+
+#undef ahash_data
+#undef mtype_data_equal
+#undef mtype_do_data_match
+#undef mtype_data_set_flags
+#undef mtype_data_reset_flags
+#undef mtype_data_netmask
+#undef mtype_data_list
+#undef mtype_data_next
+#undef mtype_elem
+
+#undef mtype_add_cidr
+#undef mtype_del_cidr
+#undef mtype_ahash_memsize
+#undef mtype_flush
+#undef mtype_destroy
+#undef mtype_gc_init
+#undef mtype_same_set
+#undef mtype_kadt
+#undef mtype_uadt
+#undef mtype
+
+#undef mtype_add
+#undef mtype_del
+#undef mtype_test_cidrs
+#undef mtype_test
+#undef mtype_expire
+#undef mtype_resize
+#undef mtype_head
+#undef mtype_list
+#undef mtype_gc
+#undef mtype_gc_init
+#undef mtype_variant
+#undef mtype_data_match
+
+#undef HKEY
+
+#define mtype_data_equal	TOKEN(MTYPE, _data_equal)
+#ifdef IP_SET_HASH_WITH_NETS
+#define mtype_do_data_match	TOKEN(MTYPE, _do_data_match)
+#else
+#define mtype_do_data_match(d)	1
+#endif
+#define mtype_data_set_flags	TOKEN(MTYPE, _data_set_flags)
+#define mtype_data_reset_flags	TOKEN(MTYPE, _data_reset_flags)
+#define mtype_data_netmask	TOKEN(MTYPE, _data_netmask)
+#define mtype_data_list		TOKEN(MTYPE, _data_list)
+#define mtype_data_next		TOKEN(MTYPE, _data_next)
+#define mtype_elem		TOKEN(MTYPE, _elem)
+#define mtype_add_cidr		TOKEN(MTYPE, _add_cidr)
+#define mtype_del_cidr		TOKEN(MTYPE, _del_cidr)
+#define mtype_ahash_memsize	TOKEN(MTYPE, _ahash_memsize)
+#define mtype_flush		TOKEN(MTYPE, _flush)
+#define mtype_destroy		TOKEN(MTYPE, _destroy)
+#define mtype_gc_init		TOKEN(MTYPE, _gc_init)
+#define mtype_same_set		TOKEN(MTYPE, _same_set)
+#define mtype_kadt		TOKEN(MTYPE, _kadt)
+#define mtype_uadt		TOKEN(MTYPE, _uadt)
+#define mtype			MTYPE
+
+#define mtype_elem		TOKEN(MTYPE, _elem)
+#define mtype_add		TOKEN(MTYPE, _add)
+#define mtype_del		TOKEN(MTYPE, _del)
+#define mtype_test_cidrs	TOKEN(MTYPE, _test_cidrs)
+#define mtype_test		TOKEN(MTYPE, _test)
+#define mtype_expire		TOKEN(MTYPE, _expire)
+#define mtype_resize		TOKEN(MTYPE, _resize)
+#define mtype_head		TOKEN(MTYPE, _head)
+#define mtype_list		TOKEN(MTYPE, _list)
+#define mtype_gc		TOKEN(MTYPE, _gc)
+#define mtype_variant		TOKEN(MTYPE, _variant)
+#define mtype_data_match	TOKEN(MTYPE, _data_match)
+
+#ifndef HKEY_DATALEN
+#define HKEY_DATALEN		sizeof(struct mtype_elem)
+#endif
+
+#define HKEY(data, initval, htable_bits)			\
+(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval)	\
+	& jhash_mask(htable_bits))
+
+#ifndef htype
+#define htype			HTYPE
+
+/* The generic hash structure */
+struct htype {
+	struct htable *table;	/* the hash table */
+	u32 maxelem;		/* max elements in the hash */
+	u32 elements;		/* current element (vs timeout) */
+	u32 initval;		/* random jhash init value */
+	u32 timeout;		/* timeout value, if enabled */
+	size_t dsize;		/* data struct size */
+	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+	struct timer_list gc;	/* garbage collection when timeout enabled */
+	struct mtype_elem next; /* temporary storage for uadd */
+#ifdef IP_SET_HASH_WITH_MULTI
+	u8 ahash_max;		/* max elements in an array block */
+#endif
+#ifdef IP_SET_HASH_WITH_NETMASK
+	u8 netmask;		/* netmask value for subnets to store */
+#endif
+#ifdef IP_SET_HASH_WITH_RBTREE
+	struct rb_root rbtree;
+#endif
+#ifdef IP_SET_HASH_WITH_NETS
+	struct net_prefixes nets[0]; /* book-keeping of prefixes */
+#endif
+};
+#endif
+
+#ifdef IP_SET_HASH_WITH_NETS
+/* Network cidr size book keeping when the hash stores different
+ * sized networks */
+static void
+mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length)
+{
+	int i, j;
+
+	/* Add in increasing prefix order, so larger cidr first */
+	for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) {
+		if (j != -1)
+			continue;
+		else if (h->nets[i].cidr < cidr)
+			j = i;
+		else if (h->nets[i].cidr == cidr) {
+			h->nets[i].nets++;
+			return;
+		}
+	}
+	if (j != -1) {
+		for (; i > j; i--) {
+			h->nets[i].cidr = h->nets[i - 1].cidr;
+			h->nets[i].nets = h->nets[i - 1].nets;
+		}
+	}
+	h->nets[i].cidr = cidr;
+	h->nets[i].nets = 1;
+}
+
+static void
+mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length)
+{
+	u8 i, j;
+
+	for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++)
+		;
+	h->nets[i].nets--;
+
+	if (h->nets[i].nets != 0)
+		return;
+
+	for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) {
+		h->nets[j].cidr = h->nets[j + 1].cidr;
+		h->nets[j].nets = h->nets[j + 1].nets;
+	}
+}
+#endif
+
+/* Calculate the actual memory size of the set data */
+static size_t
+mtype_ahash_memsize(const struct htype *h, u8 nets_length)
+{
+	u32 i;
+	struct htable *t = h->table;
+	size_t memsize = sizeof(*h)
+			 + sizeof(*t)
+#ifdef IP_SET_HASH_WITH_NETS
+			 + sizeof(struct net_prefixes) * nets_length
+#endif
+			 + jhash_size(t->htable_bits) * sizeof(struct hbucket);
+
+	for (i = 0; i < jhash_size(t->htable_bits); i++)
+		memsize += t->bucket[i].size * h->dsize;
+
+	return memsize;
+}
+
+/* Flush a hash type of set: destroy all elements */
+static void
+mtype_flush(struct ip_set *set)
+{
+	struct htype *h = set->data;
+	struct htable *t = h->table;
+	struct hbucket *n;
+	u32 i;
+
+	for (i = 0; i < jhash_size(t->htable_bits); i++) {
+		n = hbucket(t, i);
+		if (n->size) {
+			n->size = n->pos = 0;
+			/* FIXME: use slab cache */
+			kfree(n->value);
+		}
+	}
+#ifdef IP_SET_HASH_WITH_NETS
+	memset(h->nets, 0, sizeof(struct net_prefixes)
+			   * NETS_LENGTH(set->family));
+#endif
+	h->elements = 0;
+}
+
+/* Destroy a hash type of set */
+static void
+mtype_destroy(struct ip_set *set)
+{
+	struct htype *h = set->data;
+
+	if (set->extensions & IPSET_EXT_TIMEOUT)
+		del_timer_sync(&h->gc);
+
+	ahash_destroy(h->table);
+#ifdef IP_SET_HASH_WITH_RBTREE
+	rbtree_destroy(&h->rbtree);
+#endif
+	kfree(h);
+
+	set->data = NULL;
+}
+
+static void
+mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+{
+	struct htype *h = set->data;
+
+	init_timer(&h->gc);
+	h->gc.data = (unsigned long) set;
+	h->gc.function = gc;
+	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
+	add_timer(&h->gc);
+	pr_debug("gc initialized, run in every %u\n",
+		 IPSET_GC_PERIOD(h->timeout));
+}
+
+static bool
+mtype_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct htype *x = a->data;
+	const struct htype *y = b->data;
+
+	/* Resizing changes htable_bits, so we ignore it */
+	return x->maxelem == y->maxelem &&
+	       x->timeout == y->timeout &&
+#ifdef IP_SET_HASH_WITH_NETMASK
+	       x->netmask == y->netmask &&
+#endif
+	       a->extensions == b->extensions;
+}
+
+/* Get the ith element from the array block n */
+#define ahash_data(n, i, dsize)	\
+	((struct mtype_elem *)((n)->value + ((i) * (dsize))))
+
+/* Delete expired elements from the hashtable */
+static void
+mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
+{
+	struct htable *t = h->table;
+	struct hbucket *n;
+	struct mtype_elem *data;
+	u32 i;
+	int j;
+
+	for (i = 0; i < jhash_size(t->htable_bits); i++) {
+		n = hbucket(t, i);
+		for (j = 0; j < n->pos; j++) {
+			data = ahash_data(n, j, dsize);
+			if (ip_set_timeout_expired(ext_timeout(data, h))) {
+				pr_debug("expired %u/%u\n", i, j);
+#ifdef IP_SET_HASH_WITH_NETS
+				mtype_del_cidr(h, CIDR(data->cidr),
+					       nets_length);
+#endif
+				if (j != n->pos - 1)
+					/* Not last one */
+					memcpy(data,
+					       ahash_data(n, n->pos - 1, dsize),
+					       dsize);
+				n->pos--;
+				h->elements--;
+			}
+		}
+		if (n->pos + AHASH_INIT_SIZE < n->size) {
+			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
+					    * dsize,
+					    GFP_ATOMIC);
+			if (!tmp)
+				/* Still try to delete expired elements */
+				continue;
+			n->size -= AHASH_INIT_SIZE;
+			memcpy(tmp, n->value, n->size * dsize);
+			kfree(n->value);
+			n->value = tmp;
+		}
+	}
+}
+
+static void
+mtype_gc(unsigned long ul_set)
+{
+	struct ip_set *set = (struct ip_set *) ul_set;
+	struct htype *h = set->data;
+
+	pr_debug("called\n");
+	write_lock_bh(&set->lock);
+	mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
+	write_unlock_bh(&set->lock);
+
+	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
+	add_timer(&h->gc);
+}
+
+/* Resize a hash: create a new hash table with doubling the hashsize
+ * and inserting the elements to it. Repeat until we succeed or
+ * fail due to memory pressures. */
+static int
+mtype_resize(struct ip_set *set, bool retried)
+{
+	struct htype *h = set->data;
+	struct htable *t, *orig = h->table;
+	u8 htable_bits = orig->htable_bits;
+#ifdef IP_SET_HASH_WITH_NETS
+	u8 flags;
+#endif
+	struct mtype_elem *data;
+	struct mtype_elem *d;
+	struct hbucket *n, *m;
+	u32 i, j;
+	int ret;
+
+	/* Try to cleanup once */
+	if (SET_WITH_TIMEOUT(set) && !retried) {
+		i = h->elements;
+		write_lock_bh(&set->lock);
+		mtype_expire(set->data, NETS_LENGTH(set->family),
+			     h->dsize);
+		write_unlock_bh(&set->lock);
+		if (h->elements < i)
+			return 0;
+	}
+
+retry:
+	ret = 0;
+	htable_bits++;
+	pr_debug("attempt to resize set %s from %u to %u, t %p\n",
+		 set->name, orig->htable_bits, htable_bits, orig);
+	if (!htable_bits) {
+		/* In case we have plenty of memory :-) */
+		pr_warning("Cannot increase the hashsize of set %s further\n",
+			   set->name);
+		return -IPSET_ERR_HASH_FULL;
+	}
+	t = ip_set_alloc(sizeof(*t)
+			 + jhash_size(htable_bits) * sizeof(struct hbucket));
+	if (!t)
+		return -ENOMEM;
+	t->htable_bits = htable_bits;
+
+	read_lock_bh(&set->lock);
+	for (i = 0; i < jhash_size(orig->htable_bits); i++) {
+		n = hbucket(orig, i);
+		for (j = 0; j < n->pos; j++) {
+			data = ahash_data(n, j, h->dsize);
+#ifdef IP_SET_HASH_WITH_NETS
+			flags = 0;
+			mtype_data_reset_flags(data, &flags);
+#endif
+			m = hbucket(t, HKEY(data, h->initval, htable_bits));
+			ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize);
+			if (ret < 0) {
+#ifdef IP_SET_HASH_WITH_NETS
+				mtype_data_reset_flags(data, &flags);
+#endif
+				read_unlock_bh(&set->lock);
+				ahash_destroy(t);
+				if (ret == -EAGAIN)
+					goto retry;
+				return ret;
+			}
+			d = ahash_data(m, m->pos++, h->dsize);
+			memcpy(d, data, h->dsize);
+#ifdef IP_SET_HASH_WITH_NETS
+			mtype_data_reset_flags(d, &flags);
+#endif
+		}
+	}
+
+	rcu_assign_pointer(h->table, t);
+	read_unlock_bh(&set->lock);
+
+	/* Give time to other readers of the set */
+	synchronize_rcu_bh();
+
+	pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
+		 orig->htable_bits, orig, t->htable_bits, t);
+	ahash_destroy(orig);
+
+	return 0;
+}
+
+/* Add an element to a hash and update the internal counters when succeeded,
+ * otherwise report the proper error code. */
+static int
+mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	  struct ip_set_ext *mext, u32 flags)
+{
+	struct htype *h = set->data;
+	struct htable *t;
+	const struct mtype_elem *d = value;
+	struct mtype_elem *data;
+	struct hbucket *n;
+	int i, ret = 0;
+	int j = AHASH_MAX(h) + 1;
+	bool flag_exist = flags & IPSET_FLAG_EXIST;
+	u32 key, multi = 0;
+
+	if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
+		/* FIXME: when set is full, we slow down here */
+		mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
+
+	if (h->elements >= h->maxelem) {
+		if (net_ratelimit())
+			pr_warning("Set %s is full, maxelem %u reached\n",
+				   set->name, h->maxelem);
+		return -IPSET_ERR_HASH_FULL;
+	}
+
+	rcu_read_lock_bh();
+	t = rcu_dereference_bh(h->table);
+	key = HKEY(value, h->initval, t->htable_bits);
+	n = hbucket(t, key);
+	for (i = 0; i < n->pos; i++) {
+		data = ahash_data(n, i, h->dsize);
+		if (mtype_data_equal(data, d, &multi)) {
+			if (flag_exist ||
+			    (SET_WITH_TIMEOUT(set) &&
+			     ip_set_timeout_expired(ext_timeout(data, h)))) {
+				/* Just the extensions could be overwritten */
+				j = i;
+				goto reuse_slot;
+			} else {
+				ret = -IPSET_ERR_EXIST;
+				goto out;
+			}
+		}
+		/* Reuse first timed out entry */
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(data, h)) &&
+		    j != AHASH_MAX(h) + 1)
+			j = i;
+	}
+reuse_slot:
+	if (j != AHASH_MAX(h) + 1) {
+		/* Fill out reused slot */
+		data = ahash_data(n, j, h->dsize);
+#ifdef IP_SET_HASH_WITH_NETS
+		mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family));
+		mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+#endif
+	} else {
+		/* Use/create a new slot */
+		TUNE_AHASH_MAX(h, multi);
+		ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize);
+		if (ret != 0) {
+			if (ret == -EAGAIN)
+				mtype_data_next(&h->next, d);
+			goto out;
+		}
+		data = ahash_data(n, n->pos++, h->dsize);
+#ifdef IP_SET_HASH_WITH_NETS
+		mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+#endif
+		h->elements++;
+	}
+	memcpy(data, d, sizeof(struct mtype_elem));
+#ifdef IP_SET_HASH_WITH_NETS
+	mtype_data_set_flags(data, flags);
+#endif
+	if (SET_WITH_TIMEOUT(set))
+		ip_set_timeout_set(ext_timeout(data, h), ext->timeout);
+	if (SET_WITH_COUNTER(set))
+		ip_set_init_counter(ext_counter(data, h), ext);
+
+out:
+	rcu_read_unlock_bh();
+	return ret;
+}
+
+/* Delete an element from the hash: swap it with the last element
+ * and free up space if possible.
+ */
+static int
+mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	  struct ip_set_ext *mext, u32 flags)
+{
+	struct htype *h = set->data;
+	struct htable *t = h->table;
+	const struct mtype_elem *d = value;
+	struct mtype_elem *data;
+	struct hbucket *n;
+	int i;
+	u32 key, multi = 0;
+
+	key = HKEY(value, h->initval, t->htable_bits);
+	n = hbucket(t, key);
+	for (i = 0; i < n->pos; i++) {
+		data = ahash_data(n, i, h->dsize);
+		if (!mtype_data_equal(data, d, &multi))
+			continue;
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(data, h)))
+			return -IPSET_ERR_EXIST;
+		if (i != n->pos - 1)
+			/* Not last one */
+			memcpy(data, ahash_data(n, n->pos - 1, h->dsize),
+			       h->dsize);
+
+		n->pos--;
+		h->elements--;
+#ifdef IP_SET_HASH_WITH_NETS
+		mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+#endif
+		if (n->pos + AHASH_INIT_SIZE < n->size) {
+			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
+					    * h->dsize,
+					    GFP_ATOMIC);
+			if (!tmp)
+				return 0;
+			n->size -= AHASH_INIT_SIZE;
+			memcpy(tmp, n->value, n->size * h->dsize);
+			kfree(n->value);
+			n->value = tmp;
+		}
+		return 0;
+	}
+
+	return -IPSET_ERR_EXIST;
+}
+
+static inline int
+mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
+		 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
+{
+	if (SET_WITH_COUNTER(set))
+		ip_set_update_counter(ext_counter(data,
+						  (struct htype *)(set->data)),
+				      ext, mext, flags);
+	return mtype_do_data_match(data);
+}
+
+#ifdef IP_SET_HASH_WITH_NETS
+/* Special test function which takes into account the different network
+ * sizes added to the set */
+static int
+mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
+		 const struct ip_set_ext *ext,
+		 struct ip_set_ext *mext, u32 flags)
+{
+	struct htype *h = set->data;
+	struct htable *t = h->table;
+	struct hbucket *n;
+	struct mtype_elem *data;
+	int i, j = 0;
+	u32 key, multi = 0;
+	u8 nets_length = NETS_LENGTH(set->family);
+
+	pr_debug("test by nets\n");
+	for (; j < nets_length && h->nets[j].nets && !multi; j++) {
+		mtype_data_netmask(d, h->nets[j].cidr);
+		key = HKEY(d, h->initval, t->htable_bits);
+		n = hbucket(t, key);
+		for (i = 0; i < n->pos; i++) {
+			data = ahash_data(n, i, h->dsize);
+			if (!mtype_data_equal(data, d, &multi))
+				continue;
+			if (SET_WITH_TIMEOUT(set)) {
+				if (!ip_set_timeout_expired(
+							ext_timeout(data, h)))
+					return mtype_data_match(data, ext,
+								mext, set,
+								flags);
+#ifdef IP_SET_HASH_WITH_MULTI
+				multi = 0;
+#endif
+			} else
+				return mtype_data_match(data, ext,
+							mext, set, flags);
+		}
+	}
+	return 0;
+}
+#endif
+
+/* Test whether the element is added to the set */
+static int
+mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	   struct ip_set_ext *mext, u32 flags)
+{
+	struct htype *h = set->data;
+	struct htable *t = h->table;
+	struct mtype_elem *d = value;
+	struct hbucket *n;
+	struct mtype_elem *data;
+	int i;
+	u32 key, multi = 0;
+
+#ifdef IP_SET_HASH_WITH_NETS
+	/* If we test an IP address and not a network address,
+	 * try all possible network sizes */
+	if (CIDR(d->cidr) == SET_HOST_MASK(set->family))
+		return mtype_test_cidrs(set, d, ext, mext, flags);
+#endif
+
+	key = HKEY(d, h->initval, t->htable_bits);
+	n = hbucket(t, key);
+	for (i = 0; i < n->pos; i++) {
+		data = ahash_data(n, i, h->dsize);
+		if (mtype_data_equal(data, d, &multi) &&
+		    !(SET_WITH_TIMEOUT(set) &&
+		      ip_set_timeout_expired(ext_timeout(data, h))))
+			return mtype_data_match(data, ext, mext, set, flags);
+	}
+	return 0;
+}
+
+/* Reply a HEADER request: fill out the header part of the set */
+static int
+mtype_head(struct ip_set *set, struct sk_buff *skb)
+{
+	const struct htype *h = set->data;
+	struct nlattr *nested;
+	size_t memsize;
+
+	read_lock_bh(&set->lock);
+	memsize = mtype_ahash_memsize(h, NETS_LENGTH(set->family));
+	read_unlock_bh(&set->lock);
+
+	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+	if (!nested)
+		goto nla_put_failure;
+	if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
+			  htonl(jhash_size(h->table->htable_bits))) ||
+	    nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
+		goto nla_put_failure;
+#ifdef IP_SET_HASH_WITH_NETMASK
+	if (h->netmask != HOST_MASK &&
+	    nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
+		goto nla_put_failure;
+#endif
+	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+	    ((set->extensions & IPSET_EXT_TIMEOUT) &&
+	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) ||
+	    ((set->extensions & IPSET_EXT_COUNTER) &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
+			   htonl(IPSET_FLAG_WITH_COUNTERS))))
+		goto nla_put_failure;
+	ipset_nest_end(skb, nested);
+
+	return 0;
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+/* Reply a LIST/SAVE request: dump the elements of the specified set */
+static int
+mtype_list(const struct ip_set *set,
+	   struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct htype *h = set->data;
+	const struct htable *t = h->table;
+	struct nlattr *atd, *nested;
+	const struct hbucket *n;
+	const struct mtype_elem *e;
+	u32 first = cb->args[2];
+	/* We assume that one hash bucket fills into one page */
+	void *incomplete;
+	int i;
+
+	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
+	if (!atd)
+		return -EMSGSIZE;
+	pr_debug("list hash set %s\n", set->name);
+	for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
+		incomplete = skb_tail_pointer(skb);
+		n = hbucket(t, cb->args[2]);
+		pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
+		for (i = 0; i < n->pos; i++) {
+			e = ahash_data(n, i, h->dsize);
+			if (SET_WITH_TIMEOUT(set) &&
+			    ip_set_timeout_expired(ext_timeout(e, h)))
+				continue;
+			pr_debug("list hash %lu hbucket %p i %u, data %p\n",
+				 cb->args[2], n, i, e);
+			nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+			if (!nested) {
+				if (cb->args[2] == first) {
+					nla_nest_cancel(skb, atd);
+					return -EMSGSIZE;
+				} else
+					goto nla_put_failure;
+			}
+			if (mtype_data_list(skb, e))
+				goto nla_put_failure;
+			if (SET_WITH_TIMEOUT(set) &&
+			    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+					  htonl(ip_set_timeout_get(
+						ext_timeout(e, h)))))
+				goto nla_put_failure;
+			if (SET_WITH_COUNTER(set) &&
+			    ip_set_put_counter(skb, ext_counter(e, h)))
+				goto nla_put_failure;
+			ipset_nest_end(skb, nested);
+		}
+	}
+	ipset_nest_end(skb, atd);
+	/* Set listing finished */
+	cb->args[2] = 0;
+
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, incomplete);
+	ipset_nest_end(skb, atd);
+	if (unlikely(first == cb->args[2])) {
+		pr_warning("Can't list set %s: one bucket does not fit into "
+			   "a message. Please report it!\n", set->name);
+		cb->args[2] = 0;
+		return -EMSGSIZE;
+	}
+	return 0;
+}
+
+static int
+TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
+	      const struct xt_action_param *par,
+	      enum ipset_adt adt, struct ip_set_adt_opt *opt);
+
+static int
+TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
+	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
+
+static const struct ip_set_type_variant mtype_variant = {
+	.kadt	= mtype_kadt,
+	.uadt	= mtype_uadt,
+	.adt	= {
+		[IPSET_ADD] = mtype_add,
+		[IPSET_DEL] = mtype_del,
+		[IPSET_TEST] = mtype_test,
+	},
+	.destroy = mtype_destroy,
+	.flush	= mtype_flush,
+	.head	= mtype_head,
+	.list	= mtype_list,
+	.resize	= mtype_resize,
+	.same_set = mtype_same_set,
+};
+
+#ifdef IP_SET_EMIT_CREATE
+static int
+TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
+{
+	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+	u32 cadt_flags = 0;
+	u8 hbits;
+#ifdef IP_SET_HASH_WITH_NETMASK
+	u8 netmask;
+#endif
+	size_t hsize;
+	struct HTYPE *h;
+
+	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
+		return -IPSET_ERR_INVALID_FAMILY;
+#ifdef IP_SET_HASH_WITH_NETMASK
+	netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
+	pr_debug("Create set %s with family %s\n",
+		 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
+#endif
+
+	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_HASHSIZE]) {
+		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
+		if (hashsize < IPSET_MIMINAL_HASHSIZE)
+			hashsize = IPSET_MIMINAL_HASHSIZE;
+	}
+
+	if (tb[IPSET_ATTR_MAXELEM])
+		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
+
+#ifdef IP_SET_HASH_WITH_NETMASK
+	if (tb[IPSET_ATTR_NETMASK]) {
+		netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
+
+		if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
+		    (set->family == NFPROTO_IPV6 && netmask > 128) ||
+		    netmask == 0)
+			return -IPSET_ERR_INVALID_NETMASK;
+	}
+#endif
+
+	hsize = sizeof(*h);
+#ifdef IP_SET_HASH_WITH_NETS
+	hsize += sizeof(struct net_prefixes) *
+		(set->family == NFPROTO_IPV4 ? 32 : 128);
+#endif
+	h = kzalloc(hsize, GFP_KERNEL);
+	if (!h)
+		return -ENOMEM;
+
+	h->maxelem = maxelem;
+#ifdef IP_SET_HASH_WITH_NETMASK
+	h->netmask = netmask;
+#endif
+	get_random_bytes(&h->initval, sizeof(h->initval));
+	h->timeout = IPSET_NO_TIMEOUT;
+
+	hbits = htable_bits(hashsize);
+	hsize = htable_size(hbits);
+	if (hsize == 0) {
+		kfree(h);
+		return -ENOMEM;
+	}
+	h->table = ip_set_alloc(hsize);
+	if (!h->table) {
+		kfree(h);
+		return -ENOMEM;
+	}
+	h->table->htable_bits = hbits;
+
+	set->data = h;
+	if (set->family ==  NFPROTO_IPV4)
+		set->variant = &TOKEN(HTYPE, 4_variant);
+	else
+		set->variant = &TOKEN(HTYPE, 6_variant);
+
+	if (tb[IPSET_ATTR_CADT_FLAGS])
+		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
+		set->extensions |= IPSET_EXT_COUNTER;
+		if (tb[IPSET_ATTR_TIMEOUT]) {
+			h->timeout =
+				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+			set->extensions |= IPSET_EXT_TIMEOUT;
+			if (set->family == NFPROTO_IPV4) {
+				h->dsize =
+					sizeof(struct TOKEN(HTYPE, 4ct_elem));
+				h->offset[IPSET_OFFSET_TIMEOUT] =
+					offsetof(struct TOKEN(HTYPE, 4ct_elem),
+						 timeout);
+				h->offset[IPSET_OFFSET_COUNTER] =
+					offsetof(struct TOKEN(HTYPE, 4ct_elem),
+						 counter);
+				TOKEN(HTYPE, 4_gc_init)(set,
+					TOKEN(HTYPE, 4_gc));
+			} else {
+				h->dsize =
+					sizeof(struct TOKEN(HTYPE, 6ct_elem));
+				h->offset[IPSET_OFFSET_TIMEOUT] =
+					offsetof(struct TOKEN(HTYPE, 6ct_elem),
+						 timeout);
+				h->offset[IPSET_OFFSET_COUNTER] =
+					offsetof(struct TOKEN(HTYPE, 6ct_elem),
+						 counter);
+				TOKEN(HTYPE, 6_gc_init)(set,
+					TOKEN(HTYPE, 6_gc));
+			}
+		} else {
+			if (set->family == NFPROTO_IPV4) {
+				h->dsize =
+					sizeof(struct TOKEN(HTYPE, 4c_elem));
+				h->offset[IPSET_OFFSET_COUNTER] =
+					offsetof(struct TOKEN(HTYPE, 4c_elem),
+						 counter);
+			} else {
+				h->dsize =
+					sizeof(struct TOKEN(HTYPE, 6c_elem));
+				h->offset[IPSET_OFFSET_COUNTER] =
+					offsetof(struct TOKEN(HTYPE, 6c_elem),
+						 counter);
+			}
+		}
+	} else if (tb[IPSET_ATTR_TIMEOUT]) {
+		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		set->extensions |= IPSET_EXT_TIMEOUT;
+		if (set->family == NFPROTO_IPV4) {
+			h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem));
+			h->offset[IPSET_OFFSET_TIMEOUT] =
+				offsetof(struct TOKEN(HTYPE, 4t_elem),
+					 timeout);
+			TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc));
+		} else {
+			h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem));
+			h->offset[IPSET_OFFSET_TIMEOUT] =
+				offsetof(struct TOKEN(HTYPE, 6t_elem),
+					 timeout);
+			TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc));
+		}
+	} else {
+		if (set->family == NFPROTO_IPV4)
+			h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem));
+		else
+			h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem));
+	}
+
+	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
+		 set->name, jhash_size(h->table->htable_bits),
+		 h->table->htable_bits, h->maxelem, set->data, h->table);
+
+	return 0;
+}
+#endif /* IP_SET_EMIT_CREATE */
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 5c0b78528e55..c74e6e14cd93 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -21,11 +21,10 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/ipset/pfxlen.h>
 #include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define REVISION_MIN	0
-#define REVISION_MAX	0
+#define REVISION_MAX	1	/* Counters support */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -33,58 +32,47 @@ IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:ip");
 
 /* Type specific function prefix */
-#define TYPE		hash_ip
-
-static bool
-hash_ip_same_set(const struct ip_set *a, const struct ip_set *b);
-
-#define hash_ip4_same_set	hash_ip_same_set
-#define hash_ip6_same_set	hash_ip_same_set
+#define HTYPE		hash_ip
+#define IP_SET_HASH_WITH_NETMASK
 
-/* The type variant functions: IPv4 */
+/* IPv4 variants */
 
-/* Member elements without timeout */
+/* Member elements */
 struct hash_ip4_elem {
+	/* Zero valued IP addresses cannot be stored */
 	__be32 ip;
 };
 
-/* Member elements with timeout support */
-struct hash_ip4_telem {
+struct hash_ip4t_elem {
 	__be32 ip;
 	unsigned long timeout;
 };
 
-static inline bool
-hash_ip4_data_equal(const struct hash_ip4_elem *ip1,
-		    const struct hash_ip4_elem *ip2,
-		    u32 *multi)
-{
-	return ip1->ip == ip2->ip;
-}
+struct hash_ip4c_elem {
+	__be32 ip;
+	struct ip_set_counter counter;
+};
 
-static inline bool
-hash_ip4_data_isnull(const struct hash_ip4_elem *elem)
-{
-	return elem->ip == 0;
-}
+struct hash_ip4ct_elem {
+	__be32 ip;
+	struct ip_set_counter counter;
+	unsigned long timeout;
+};
 
-static inline void
-hash_ip4_data_copy(struct hash_ip4_elem *dst, const struct hash_ip4_elem *src)
-{
-	dst->ip = src->ip;
-}
+/* Common functions */
 
-/* Zero valued IP addresses cannot be stored */
-static inline void
-hash_ip4_data_zero_out(struct hash_ip4_elem *elem)
+static inline bool
+hash_ip4_data_equal(const struct hash_ip4_elem *e1,
+		    const struct hash_ip4_elem *e2,
+		    u32 *multi)
 {
-	elem->ip = 0;
+	return e1->ip == e2->ip;
 }
 
 static inline bool
-hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *data)
+hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e)
 {
-	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip))
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip))
 		goto nla_put_failure;
 	return 0;
 
@@ -92,41 +80,26 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_ip4_data_tlist(struct sk_buff *skb, const struct hash_ip4_elem *data)
+static inline void
+hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)
 {
-	const struct hash_ip4_telem *tdata =
-		(const struct hash_ip4_telem *)data;
-
-	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(tdata->timeout))))
-		goto nla_put_failure;
-
-	return 0;
-
-nla_put_failure:
-	return 1;
+	next->ip = e->ip;
 }
 
-#define IP_SET_HASH_WITH_NETMASK
+#define MTYPE		hash_ip4
 #define PF		4
 #define HOST_MASK	32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_ip4_data_next(struct ip_set_hash *h, const struct hash_ip4_elem *d)
-{
-	h->next.ip = d->ip;
-}
+#include "ip_set_hash_gen.h"
 
 static int
 hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	      const struct xt_action_param *par,
-	      enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+	      enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ip4_elem e = {};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 	__be32 ip;
 
 	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip);
@@ -134,43 +107,42 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	if (ip == 0)
 		return -EINVAL;
 
-	return adtfn(set, &ip, opt_timeout(opt, h), opt->cmdflags);
+	e.ip = ip;
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	u32 ip, ip_to, hosts, timeout = h->timeout;
-	__be32 nip;
+	struct hash_ip4_elem e = {};
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	u32 ip, ip_to, hosts;
 	int ret = 0;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
 	ip &= ip_set_hostmask(h->netmask);
 
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
-
 	if (adt == IPSET_TEST) {
-		nip = htonl(ip);
-		if (nip == 0)
+		e.ip = htonl(ip);
+		if (e.ip == 0)
 			return -IPSET_ERR_HASH_ELEM;
-		return adtfn(set, &nip, timeout, flags);
+		return adtfn(set, &e, &ext, &ext, flags);
 	}
 
 	ip_to = ip;
@@ -193,10 +165,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (retried)
 		ip = ntohl(h->next.ip);
 	for (; !before(ip_to, ip); ip += hosts) {
-		nip = htonl(ip);
-		if (nip == 0)
+		e.ip = htonl(ip);
+		if (e.ip == 0)
 			return -IPSET_ERR_HASH_ELEM;
-		ret = adtfn(set, &nip, timeout, flags);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
@@ -206,68 +178,49 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-static bool
-hash_ip_same_set(const struct ip_set *a, const struct ip_set *b)
-{
-	const struct ip_set_hash *x = a->data;
-	const struct ip_set_hash *y = b->data;
+/* IPv6 variants */
 
-	/* Resizing changes htable_bits, so we ignore it */
-	return x->maxelem == y->maxelem &&
-	       x->timeout == y->timeout &&
-	       x->netmask == y->netmask;
-}
+/* Member elements */
+struct hash_ip6_elem {
+	union nf_inet_addr ip;
+};
 
-/* The type variant functions: IPv6 */
+struct hash_ip6t_elem {
+	union nf_inet_addr ip;
+	unsigned long timeout;
+};
 
-struct hash_ip6_elem {
+struct hash_ip6c_elem {
 	union nf_inet_addr ip;
+	struct ip_set_counter counter;
 };
 
-struct hash_ip6_telem {
+struct hash_ip6ct_elem {
 	union nf_inet_addr ip;
+	struct ip_set_counter counter;
 	unsigned long timeout;
 };
 
+/* Common functions */
+
 static inline bool
 hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
 		    const struct hash_ip6_elem *ip2,
 		    u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0;
-}
-
-static inline bool
-hash_ip6_data_isnull(const struct hash_ip6_elem *elem)
-{
-	return ipv6_addr_any(&elem->ip.in6);
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6);
 }
 
 static inline void
-hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src)
+hash_ip6_netmask(union nf_inet_addr *ip, u8 prefix)
 {
-	dst->ip.in6 = src->ip.in6;
-}
-
-static inline void
-hash_ip6_data_zero_out(struct hash_ip6_elem *elem)
-{
-	ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0);
-}
-
-static inline void
-ip6_netmask(union nf_inet_addr *ip, u8 prefix)
-{
-	ip->ip6[0] &= ip_set_netmask6(prefix)[0];
-	ip->ip6[1] &= ip_set_netmask6(prefix)[1];
-	ip->ip6[2] &= ip_set_netmask6(prefix)[2];
-	ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+	ip6_netmask(ip, prefix);
 }
 
 static bool
-hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data)
+hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e)
 {
-	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6))
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6))
 		goto nla_put_failure;
 	return 0;
 
@@ -275,69 +228,55 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_ip6_data_tlist(struct sk_buff *skb, const struct hash_ip6_elem *data)
+static inline void
+hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e)
 {
-	const struct hash_ip6_telem *e =
-		(const struct hash_ip6_telem *)data;
-
-	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(e->timeout))))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return 1;
 }
 
+#undef MTYPE
 #undef PF
 #undef HOST_MASK
+#undef HKEY_DATALEN
 
+#define MTYPE		hash_ip6
 #define PF		6
 #define HOST_MASK	128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
 
-static inline void
-hash_ip6_data_next(struct ip_set_hash *h, const struct hash_ip6_elem *d)
-{
-}
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
 
 static int
 hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	      const struct xt_action_param *par,
-	      enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+	      enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	union nf_inet_addr ip;
+	struct hash_ip6_elem e = {};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
-	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip.in6);
-	ip6_netmask(&ip, h->netmask);
-	if (ipv6_addr_any(&ip.in6))
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	hash_ip6_netmask(&e.ip, h->netmask);
+	if (ipv6_addr_any(&e.ip.in6))
 		return -EINVAL;
 
-	return adtfn(set, &ip, opt_timeout(opt, h), opt->cmdflags);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
-static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = {
-	[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
-	[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
-	[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
-};
-
 static int
 hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	union nf_inet_addr ip;
-	u32 timeout = h->timeout;
+	struct hash_ip6_elem e = {};
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
 		     tb[IPSET_ATTR_IP_TO] ||
 		     tb[IPSET_ATTR_CIDR]))
 		return -IPSET_ERR_PROTOCOL;
@@ -345,110 +284,20 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &ip);
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
-	ip6_netmask(&ip, h->netmask);
-	if (ipv6_addr_any(&ip.in6))
+	hash_ip6_netmask(&e.ip, h->netmask);
+	if (ipv6_addr_any(&e.ip.in6))
 		return -IPSET_ERR_HASH_ELEM;
 
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
-
-	ret = adtfn(set, &ip, timeout, flags);
+	ret = adtfn(set, &e, &ext, &ext, flags);
 
 	return ip_set_eexist(ret, flags) ? 0 : ret;
 }
 
-/* Create hash:ip type of sets */
-
-static int
-hash_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
-	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
-	u8 netmask, hbits;
-	size_t hsize;
-	struct ip_set_hash *h;
-
-	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
-		return -IPSET_ERR_INVALID_FAMILY;
-	netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
-	pr_debug("Create set %s with family %s\n",
-		 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
-
-	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
-		return -IPSET_ERR_PROTOCOL;
-
-	if (tb[IPSET_ATTR_HASHSIZE]) {
-		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
-		if (hashsize < IPSET_MIMINAL_HASHSIZE)
-			hashsize = IPSET_MIMINAL_HASHSIZE;
-	}
-
-	if (tb[IPSET_ATTR_MAXELEM])
-		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
-	if (tb[IPSET_ATTR_NETMASK]) {
-		netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
-
-		if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
-		    (set->family == NFPROTO_IPV6 && netmask > 128) ||
-		    netmask == 0)
-			return -IPSET_ERR_INVALID_NETMASK;
-	}
-
-	h = kzalloc(sizeof(*h), GFP_KERNEL);
-	if (!h)
-		return -ENOMEM;
-
-	h->maxelem = maxelem;
-	h->netmask = netmask;
-	get_random_bytes(&h->initval, sizeof(h->initval));
-	h->timeout = IPSET_NO_TIMEOUT;
-
-	hbits = htable_bits(hashsize);
-	hsize = htable_size(hbits);
-	if (hsize == 0) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table = ip_set_alloc(hsize);
-	if (!h->table) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table->htable_bits = hbits;
-
-	set->data = h;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_ip4_tvariant : &hash_ip6_tvariant;
-
-		if (set->family == NFPROTO_IPV4)
-			hash_ip4_gc_init(set);
-		else
-			hash_ip6_gc_init(set);
-	} else {
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_ip4_variant : &hash_ip6_variant;
-	}
-
-	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
-		 set->name, jhash_size(h->table->htable_bits),
-		 h->table->htable_bits, h->maxelem, set->data, h->table);
-
-	return 0;
-}
-
 static struct ip_set_type hash_ip_type __read_mostly = {
 	.name		= "hash:ip",
 	.protocol	= IPSET_PROTOCOL,
@@ -465,6 +314,7 @@ static struct ip_set_type hash_ip_type __read_mostly = {
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_NETMASK]	= { .type = NLA_U8  },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 	},
 	.adt_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
@@ -472,6 +322,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {
 		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 6283351f4eeb..7a2d2bd98d04 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -21,12 +21,12 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/ipset/pfxlen.h>
 #include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define REVISION_MIN	0
-#define REVISION_MAX	1 /* SCTP and UDPLITE support added */
+/*			1    SCTP and UDPLITE support added */
+#define REVISION_MAX	2 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -34,33 +34,45 @@ IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port");
 
 /* Type specific function prefix */
-#define TYPE		hash_ipport
+#define HTYPE		hash_ipport
 
-static bool
-hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b);
+/* IPv4 variants */
 
-#define hash_ipport4_same_set	hash_ipport_same_set
-#define hash_ipport6_same_set	hash_ipport_same_set
+/* Member elements */
+struct hash_ipport4_elem {
+	__be32 ip;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+};
 
-/* The type variant functions: IPv4 */
+struct hash_ipport4t_elem {
+	__be32 ip;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+	unsigned long timeout;
+};
 
-/* Member elements without timeout */
-struct hash_ipport4_elem {
+struct hash_ipport4c_elem {
 	__be32 ip;
 	__be16 port;
 	u8 proto;
 	u8 padding;
+	struct ip_set_counter counter;
 };
 
-/* Member elements with timeout support */
-struct hash_ipport4_telem {
+struct hash_ipport4ct_elem {
 	__be32 ip;
 	__be16 port;
 	u8 proto;
 	u8 padding;
+	struct ip_set_counter counter;
 	unsigned long timeout;
 };
 
+/* Common functions */
+
 static inline bool
 hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
 			const struct hash_ipport4_elem *ip2,
@@ -71,27 +83,6 @@ hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
 	       ip1->proto == ip2->proto;
 }
 
-static inline bool
-hash_ipport4_data_isnull(const struct hash_ipport4_elem *elem)
-{
-	return elem->proto == 0;
-}
-
-static inline void
-hash_ipport4_data_copy(struct hash_ipport4_elem *dst,
-		       const struct hash_ipport4_elem *src)
-{
-	dst->ip = src->ip;
-	dst->port = src->port;
-	dst->proto = src->proto;
-}
-
-static inline void
-hash_ipport4_data_zero_out(struct hash_ipport4_elem *elem)
-{
-	elem->proto = 0;
-}
-
 static bool
 hash_ipport4_data_list(struct sk_buff *skb,
 		       const struct hash_ipport4_elem *data)
@@ -106,111 +97,91 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_ipport4_data_tlist(struct sk_buff *skb,
-			const struct hash_ipport4_elem *data)
-{
-	const struct hash_ipport4_telem *tdata =
-		(const struct hash_ipport4_telem *)data;
-
-	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
-	    nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) ||
-	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(tdata->timeout))))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return 1;
-}
-
-#define PF		4
-#define HOST_MASK	32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
 static inline void
-hash_ipport4_data_next(struct ip_set_hash *h,
+hash_ipport4_data_next(struct hash_ipport4_elem *next,
 		       const struct hash_ipport4_elem *d)
 {
-	h->next.ip = d->ip;
-	h->next.port = d->port;
+	next->ip = d->ip;
+	next->port = d->port;
 }
 
+#define MTYPE           hash_ipport4
+#define PF              4
+#define HOST_MASK       32
+#define HKEY_DATALEN	sizeof(struct hash_ipport4_elem)
+#include "ip_set_hash_gen.h"
+
 static int
 hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
-		  enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_ipport4_elem data = { };
+	struct hash_ipport4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
 	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
-				 &data.port, &data.proto))
+				 &e.port, &e.proto))
 		return -EINVAL;
 
-	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip);
-
-	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_ipport4_elem data = { };
+	struct hash_ipport4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	u32 ip, ip_to, p = 0, port, port_to;
-	u32 timeout = h->timeout;
 	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
 	if (tb[IPSET_ATTR_PORT])
-		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
 	else
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_PROTO]) {
-		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
-		with_ports = ip_set_proto_with_ports(data.proto);
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
 
-		if (data.proto == 0)
+		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	if (!(with_ports || data.proto == IPPROTO_ICMP))
-		data.port = 0;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
+	if (!(with_ports || e.proto == IPPROTO_ICMP))
+		e.port = 0;
 
 	if (adt == IPSET_TEST ||
 	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
 	      tb[IPSET_ATTR_PORT_TO])) {
-		ret = adtfn(set, &data, timeout, flags);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
-	ip_to = ip = ntohl(data.ip);
+	ip_to = ip = ntohl(e.ip);
 	if (tb[IPSET_ATTR_IP_TO]) {
 		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
 		if (ret)
@@ -225,7 +196,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		ip_set_mask_from_to(ip, ip_to, cidr);
 	}
 
-	port_to = port = ntohs(data.port);
+	port_to = port = ntohs(e.port);
 	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port > port_to)
@@ -238,9 +209,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
 						       : port;
 		for (; p <= port_to; p++) {
-			data.ip = htonl(ip);
-			data.port = htons(p);
-			ret = adtfn(set, &data, timeout, flags);
+			e.ip = htonl(ip);
+			e.port = htons(p);
+			ret = adtfn(set, &e, &ext, &ext, flags);
 
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
@@ -251,63 +222,52 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-static bool
-hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b)
-{
-	const struct ip_set_hash *x = a->data;
-	const struct ip_set_hash *y = b->data;
+/* IPv6 variants */
 
-	/* Resizing changes htable_bits, so we ignore it */
-	return x->maxelem == y->maxelem &&
-	       x->timeout == y->timeout;
-}
+struct hash_ipport6_elem {
+	union nf_inet_addr ip;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+};
 
-/* The type variant functions: IPv6 */
+struct hash_ipport6t_elem {
+	union nf_inet_addr ip;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+	unsigned long timeout;
+};
 
-struct hash_ipport6_elem {
+struct hash_ipport6c_elem {
 	union nf_inet_addr ip;
 	__be16 port;
 	u8 proto;
 	u8 padding;
+	struct ip_set_counter counter;
 };
 
-struct hash_ipport6_telem {
+struct hash_ipport6ct_elem {
 	union nf_inet_addr ip;
 	__be16 port;
 	u8 proto;
 	u8 padding;
+	struct ip_set_counter counter;
 	unsigned long timeout;
 };
 
+/* Common functions */
+
 static inline bool
 hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
 			const struct hash_ipport6_elem *ip2,
 			u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
 	       ip1->port == ip2->port &&
 	       ip1->proto == ip2->proto;
 }
 
-static inline bool
-hash_ipport6_data_isnull(const struct hash_ipport6_elem *elem)
-{
-	return elem->proto == 0;
-}
-
-static inline void
-hash_ipport6_data_copy(struct hash_ipport6_elem *dst,
-		       const struct hash_ipport6_elem *src)
-{
-	memcpy(dst, src, sizeof(*dst));
-}
-
-static inline void
-hash_ipport6_data_zero_out(struct hash_ipport6_elem *elem)
-{
-	elem->proto = 0;
-}
-
 static bool
 hash_ipport6_data_list(struct sk_buff *skb,
 		       const struct hash_ipport6_elem *data)
@@ -322,66 +282,52 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_ipport6_data_tlist(struct sk_buff *skb,
-			const struct hash_ipport6_elem *data)
+static inline void
+hash_ipport6_data_next(struct hash_ipport4_elem *next,
+		       const struct hash_ipport6_elem *d)
 {
-	const struct hash_ipport6_telem *e =
-		(const struct hash_ipport6_telem *)data;
-
-	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
-	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
-	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(e->timeout))))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return 1;
+	next->port = d->port;
 }
 
+#undef MTYPE
 #undef PF
 #undef HOST_MASK
+#undef HKEY_DATALEN
 
+#define MTYPE		hash_ipport6
 #define PF		6
 #define HOST_MASK	128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_ipport6_data_next(struct ip_set_hash *h,
-		       const struct hash_ipport6_elem *d)
-{
-	h->next.port = d->port;
-}
+#define HKEY_DATALEN	sizeof(struct hash_ipport6_elem)
+#define	IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
 
 static int
 hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
-		  enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_ipport6_elem data = { };
+	struct hash_ipport6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
 	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
-				 &data.port, &data.proto))
+				 &e.port, &e.proto))
 		return -EINVAL;
 
-	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
-
-	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_ipport6_elem data = { };
+	struct hash_ipport6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	u32 port, port_to;
-	u32 timeout = h->timeout;
 	bool with_ports = false;
 	int ret;
 
@@ -389,6 +335,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
 		     tb[IPSET_ATTR_IP_TO] ||
 		     tb[IPSET_ATTR_CIDR]))
 		return -IPSET_ERR_PROTOCOL;
@@ -396,39 +344,34 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
 	if (tb[IPSET_ATTR_PORT])
-		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
 	else
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_PROTO]) {
-		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
-		with_ports = ip_set_proto_with_ports(data.proto);
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
 
-		if (data.proto == 0)
+		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
-		data.port = 0;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
+	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+		e.port = 0;
 
 	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
-		ret = adtfn(set, &data, timeout, flags);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
-	port = ntohs(data.port);
+	port = ntohs(e.port);
 	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 	if (port > port_to)
 		swap(port, port_to);
@@ -436,8 +379,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (retried)
 		port = ntohs(h->next.port);
 	for (; port <= port_to; port++) {
-		data.port = htons(port);
-		ret = adtfn(set, &data, timeout, flags);
+		e.port = htons(port);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
@@ -447,78 +390,6 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* Create hash:ip type of sets */
-
-static int
-hash_ipport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
-	struct ip_set_hash *h;
-	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
-	u8 hbits;
-	size_t hsize;
-
-	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
-		return -IPSET_ERR_INVALID_FAMILY;
-
-	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
-		return -IPSET_ERR_PROTOCOL;
-
-	if (tb[IPSET_ATTR_HASHSIZE]) {
-		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
-		if (hashsize < IPSET_MIMINAL_HASHSIZE)
-			hashsize = IPSET_MIMINAL_HASHSIZE;
-	}
-
-	if (tb[IPSET_ATTR_MAXELEM])
-		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
-	h = kzalloc(sizeof(*h), GFP_KERNEL);
-	if (!h)
-		return -ENOMEM;
-
-	h->maxelem = maxelem;
-	get_random_bytes(&h->initval, sizeof(h->initval));
-	h->timeout = IPSET_NO_TIMEOUT;
-
-	hbits = htable_bits(hashsize);
-	hsize = htable_size(hbits);
-	if (hsize == 0) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table = ip_set_alloc(hsize);
-	if (!h->table) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table->htable_bits = hbits;
-
-	set->data = h;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_ipport4_tvariant : &hash_ipport6_tvariant;
-
-		if (set->family == NFPROTO_IPV4)
-			hash_ipport4_gc_init(set);
-		else
-			hash_ipport6_gc_init(set);
-	} else {
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_ipport4_variant : &hash_ipport6_variant;
-	}
-
-	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
-		 set->name, jhash_size(h->table->htable_bits),
-		 h->table->htable_bits, h->maxelem, set->data, h->table);
-
-	return 0;
-}
-
 static struct ip_set_type hash_ipport_type __read_mostly = {
 	.name		= "hash:ip,port",
 	.protocol	= IPSET_PROTOCOL,
@@ -535,6 +406,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 	},
 	.adt_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
@@ -545,6 +417,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
 		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 6a21271c8d5a..34e8a1acce42 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -21,12 +21,12 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/ipset/pfxlen.h>
 #include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define REVISION_MIN	0
-#define REVISION_MAX	1 /* SCTP and UDPLITE support added */
+/*			1    SCTP and UDPLITE support added */
+#define REVISION_MAX	2 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -34,32 +34,44 @@ IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port,ip");
 
 /* Type specific function prefix */
-#define TYPE		hash_ipportip
+#define HTYPE		hash_ipportip
 
-static bool
-hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b);
+/* IPv4 variants */
 
-#define hash_ipportip4_same_set	hash_ipportip_same_set
-#define hash_ipportip6_same_set	hash_ipportip_same_set
+/* Member elements  */
+struct hash_ipportip4_elem {
+	__be32 ip;
+	__be32 ip2;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+};
 
-/* The type variant functions: IPv4 */
+struct hash_ipportip4t_elem {
+	__be32 ip;
+	__be32 ip2;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+	unsigned long timeout;
+};
 
-/* Member elements without timeout */
-struct hash_ipportip4_elem {
+struct hash_ipportip4c_elem {
 	__be32 ip;
 	__be32 ip2;
 	__be16 port;
 	u8 proto;
 	u8 padding;
+	struct ip_set_counter counter;
 };
 
-/* Member elements with timeout support */
-struct hash_ipportip4_telem {
+struct hash_ipportip4ct_elem {
 	__be32 ip;
 	__be32 ip2;
 	__be16 port;
 	u8 proto;
 	u8 padding;
+	struct ip_set_counter counter;
 	unsigned long timeout;
 };
 
@@ -74,25 +86,6 @@ hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
 	       ip1->proto == ip2->proto;
 }
 
-static inline bool
-hash_ipportip4_data_isnull(const struct hash_ipportip4_elem *elem)
-{
-	return elem->proto == 0;
-}
-
-static inline void
-hash_ipportip4_data_copy(struct hash_ipportip4_elem *dst,
-			 const struct hash_ipportip4_elem *src)
-{
-	memcpy(dst, src, sizeof(*dst));
-}
-
-static inline void
-hash_ipportip4_data_zero_out(struct hash_ipportip4_elem *elem)
-{
-	elem->proto = 0;
-}
-
 static bool
 hash_ipportip4_data_list(struct sk_buff *skb,
 		       const struct hash_ipportip4_elem *data)
@@ -108,117 +101,96 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_ipportip4_data_tlist(struct sk_buff *skb,
-			const struct hash_ipportip4_elem *data)
+static inline void
+hash_ipportip4_data_next(struct hash_ipportip4_elem *next,
+			 const struct hash_ipportip4_elem *d)
 {
-	const struct hash_ipportip4_telem *tdata =
-		(const struct hash_ipportip4_telem *)data;
-
-	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
-	    nla_put_ipaddr4(skb, IPSET_ATTR_IP2, tdata->ip2) ||
-	    nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) ||
-	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(tdata->timeout))))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return 1;
+	next->ip = d->ip;
+	next->port = d->port;
 }
 
+/* Common functions */
+#define MTYPE		hash_ipportip4
 #define PF		4
 #define HOST_MASK	32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_ipportip4_data_next(struct ip_set_hash *h,
-			 const struct hash_ipportip4_elem *d)
-{
-	h->next.ip = d->ip;
-	h->next.port = d->port;
-}
+#include "ip_set_hash_gen.h"
 
 static int
 hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		    const struct xt_action_param *par,
-		    enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_ipportip4_elem data = { };
+	struct hash_ipportip4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
 	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
-				 &data.port, &data.proto))
+				 &e.port, &e.proto))
 		return -EINVAL;
 
-	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip);
-	ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2);
-
-	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_ipportip4_elem data = { };
+	struct hash_ipportip4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	u32 ip, ip_to, p = 0, port, port_to;
-	u32 timeout = h->timeout;
 	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
-	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2);
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &e.ip2);
 	if (ret)
 		return ret;
 
 	if (tb[IPSET_ATTR_PORT])
-		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
 	else
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_PROTO]) {
-		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
-		with_ports = ip_set_proto_with_ports(data.proto);
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
 
-		if (data.proto == 0)
+		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	if (!(with_ports || data.proto == IPPROTO_ICMP))
-		data.port = 0;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
+	if (!(with_ports || e.proto == IPPROTO_ICMP))
+		e.port = 0;
 
 	if (adt == IPSET_TEST ||
 	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
 	      tb[IPSET_ATTR_PORT_TO])) {
-		ret = adtfn(set, &data, timeout, flags);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
-	ip_to = ip = ntohl(data.ip);
+	ip_to = ip = ntohl(e.ip);
 	if (tb[IPSET_ATTR_IP_TO]) {
 		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
 		if (ret)
@@ -233,7 +205,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 		ip_set_mask_from_to(ip, ip_to, cidr);
 	}
 
-	port_to = port = ntohs(data.port);
+	port_to = port = ntohs(e.port);
 	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port > port_to)
@@ -246,9 +218,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
 						       : port;
 		for (; p <= port_to; p++) {
-			data.ip = htonl(ip);
-			data.port = htons(p);
-			ret = adtfn(set, &data, timeout, flags);
+			e.ip = htonl(ip);
+			e.port = htons(p);
+			ret = adtfn(set, &e, &ext, &ext, flags);
 
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
@@ -259,66 +231,57 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-static bool
-hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b)
-{
-	const struct ip_set_hash *x = a->data;
-	const struct ip_set_hash *y = b->data;
+/* IPv6 variants */
 
-	/* Resizing changes htable_bits, so we ignore it */
-	return x->maxelem == y->maxelem &&
-	       x->timeout == y->timeout;
-}
+struct hash_ipportip6_elem {
+	union nf_inet_addr ip;
+	union nf_inet_addr ip2;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+};
 
-/* The type variant functions: IPv6 */
+struct hash_ipportip6t_elem {
+	union nf_inet_addr ip;
+	union nf_inet_addr ip2;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+	unsigned long timeout;
+};
 
-struct hash_ipportip6_elem {
+struct hash_ipportip6c_elem {
 	union nf_inet_addr ip;
 	union nf_inet_addr ip2;
 	__be16 port;
 	u8 proto;
 	u8 padding;
+	struct ip_set_counter counter;
 };
 
-struct hash_ipportip6_telem {
+struct hash_ipportip6ct_elem {
 	union nf_inet_addr ip;
 	union nf_inet_addr ip2;
 	__be16 port;
 	u8 proto;
 	u8 padding;
+	struct ip_set_counter counter;
 	unsigned long timeout;
 };
 
+/* Common functions */
+
 static inline bool
 hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
 			  const struct hash_ipportip6_elem *ip2,
 			  u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
-	       ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 &&
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+	       ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) &&
 	       ip1->port == ip2->port &&
 	       ip1->proto == ip2->proto;
 }
 
-static inline bool
-hash_ipportip6_data_isnull(const struct hash_ipportip6_elem *elem)
-{
-	return elem->proto == 0;
-}
-
-static inline void
-hash_ipportip6_data_copy(struct hash_ipportip6_elem *dst,
-			 const struct hash_ipportip6_elem *src)
-{
-	memcpy(dst, src, sizeof(*dst));
-}
-
-static inline void
-hash_ipportip6_data_zero_out(struct hash_ipportip6_elem *elem)
-{
-	elem->proto = 0;
-}
-
 static bool
 hash_ipportip6_data_list(struct sk_buff *skb,
 			 const struct hash_ipportip6_elem *data)
@@ -334,68 +297,51 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_ipportip6_data_tlist(struct sk_buff *skb,
-			  const struct hash_ipportip6_elem *data)
+static inline void
+hash_ipportip6_data_next(struct hash_ipportip4_elem *next,
+			 const struct hash_ipportip6_elem *d)
 {
-	const struct hash_ipportip6_telem *e =
-		(const struct hash_ipportip6_telem *)data;
-
-	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
-	    nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) ||
-	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
-	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(e->timeout))))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return 1;
+	next->port = d->port;
 }
 
+#undef MTYPE
 #undef PF
 #undef HOST_MASK
 
+#define MTYPE		hash_ipportip6
 #define PF		6
 #define HOST_MASK	128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_ipportip6_data_next(struct ip_set_hash *h,
-			 const struct hash_ipportip6_elem *d)
-{
-	h->next.port = d->port;
-}
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
 
 static int
 hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		    const struct xt_action_param *par,
-		    enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_ipportip6_elem data = { };
+	struct hash_ipportip6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
 	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
-				 &data.port, &data.proto))
+				 &e.port, &e.proto))
 		return -EINVAL;
 
-	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
-	ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2.in6);
-
-	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_ipportip6_elem data = { };
+	struct hash_ipportip6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	u32 port, port_to;
-	u32 timeout = h->timeout;
 	bool with_ports = false;
 	int ret;
 
@@ -403,6 +349,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
 		     tb[IPSET_ATTR_IP_TO] ||
 		     tb[IPSET_ATTR_CIDR]))
 		return -IPSET_ERR_PROTOCOL;
@@ -410,43 +358,38 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
-	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2);
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2);
 	if (ret)
 		return ret;
 
 	if (tb[IPSET_ATTR_PORT])
-		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
 	else
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_PROTO]) {
-		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
-		with_ports = ip_set_proto_with_ports(data.proto);
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
 
-		if (data.proto == 0)
+		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
-		data.port = 0;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
+	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+		e.port = 0;
 
 	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
-		ret = adtfn(set, &data, timeout, flags);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
-	port = ntohs(data.port);
+	port = ntohs(e.port);
 	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 	if (port > port_to)
 		swap(port, port_to);
@@ -454,8 +397,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (retried)
 		port = ntohs(h->next.port);
 	for (; port <= port_to; port++) {
-		data.port = htons(port);
-		ret = adtfn(set, &data, timeout, flags);
+		e.port = htons(port);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
@@ -465,78 +408,6 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* Create hash:ip type of sets */
-
-static int
-hash_ipportip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
-	struct ip_set_hash *h;
-	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
-	u8 hbits;
-	size_t hsize;
-
-	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
-		return -IPSET_ERR_INVALID_FAMILY;
-
-	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
-		return -IPSET_ERR_PROTOCOL;
-
-	if (tb[IPSET_ATTR_HASHSIZE]) {
-		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
-		if (hashsize < IPSET_MIMINAL_HASHSIZE)
-			hashsize = IPSET_MIMINAL_HASHSIZE;
-	}
-
-	if (tb[IPSET_ATTR_MAXELEM])
-		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
-	h = kzalloc(sizeof(*h), GFP_KERNEL);
-	if (!h)
-		return -ENOMEM;
-
-	h->maxelem = maxelem;
-	get_random_bytes(&h->initval, sizeof(h->initval));
-	h->timeout = IPSET_NO_TIMEOUT;
-
-	hbits = htable_bits(hashsize);
-	hsize = htable_size(hbits);
-	if (hsize == 0) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table = ip_set_alloc(hsize);
-	if (!h->table) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table->htable_bits = hbits;
-
-	set->data = h;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_ipportip4_tvariant : &hash_ipportip6_tvariant;
-
-		if (set->family == NFPROTO_IPV4)
-			hash_ipportip4_gc_init(set);
-		else
-			hash_ipportip6_gc_init(set);
-	} else {
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_ipportip4_variant : &hash_ipportip6_variant;
-	}
-
-	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
-		 set->name, jhash_size(h->table->htable_bits),
-		 h->table->htable_bits, h->maxelem, set->data, h->table);
-
-	return 0;
-}
-
 static struct ip_set_type hash_ipportip_type __read_mostly = {
 	.name		= "hash:ip,port,ip",
 	.protocol	= IPSET_PROTOCOL,
@@ -552,6 +423,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
 		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 	},
 	.adt_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
@@ -563,6 +435,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
 		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 2d5cd4ee30eb..c6a525373be4 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -21,14 +21,14 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/ipset/pfxlen.h>
 #include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define REVISION_MIN	0
 /*			1    SCTP and UDPLITE support added */
 /*			2    Range as input support for IPv4 added */
-#define REVISION_MAX	3 /* nomatch flag support added */
+/*			3    nomatch flag support added */
+#define REVISION_MAX	4 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -36,23 +36,19 @@ IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port,net");
 
 /* Type specific function prefix */
-#define TYPE		hash_ipportnet
-
-static bool
-hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b);
-
-#define hash_ipportnet4_same_set	hash_ipportnet_same_set
-#define hash_ipportnet6_same_set	hash_ipportnet_same_set
-
-/* The type variant functions: IPv4 */
+#define HTYPE		hash_ipportnet
 
 /* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0
  * However this way we have to store internally cidr - 1,
  * dancing back and forth.
  */
 #define IP_SET_HASH_WITH_NETS_PACKED
+#define IP_SET_HASH_WITH_PROTO
+#define IP_SET_HASH_WITH_NETS
+
+/* IPv4 variants */
 
-/* Member elements without timeout */
+/* Member elements */
 struct hash_ipportnet4_elem {
 	__be32 ip;
 	__be32 ip2;
@@ -62,8 +58,7 @@ struct hash_ipportnet4_elem {
 	u8 proto;
 };
 
-/* Member elements with timeout support */
-struct hash_ipportnet4_telem {
+struct hash_ipportnet4t_elem {
 	__be32 ip;
 	__be32 ip2;
 	__be16 port;
@@ -73,6 +68,29 @@ struct hash_ipportnet4_telem {
 	unsigned long timeout;
 };
 
+struct hash_ipportnet4c_elem {
+	__be32 ip;
+	__be32 ip2;
+	__be16 port;
+	u8 cidr:7;
+	u8 nomatch:1;
+	u8 proto;
+	struct ip_set_counter counter;
+};
+
+struct hash_ipportnet4ct_elem {
+	__be32 ip;
+	__be32 ip2;
+	__be16 port;
+	u8 cidr:7;
+	u8 nomatch:1;
+	u8 proto;
+	struct ip_set_counter counter;
+	unsigned long timeout;
+};
+
+/* Common functions */
+
 static inline bool
 hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
 			   const struct hash_ipportnet4_elem *ip2,
@@ -85,29 +103,22 @@ hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
 	       ip1->proto == ip2->proto;
 }
 
-static inline bool
-hash_ipportnet4_data_isnull(const struct hash_ipportnet4_elem *elem)
+static inline int
+hash_ipportnet4_do_data_match(const struct hash_ipportnet4_elem *elem)
 {
-	return elem->proto == 0;
+	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
-hash_ipportnet4_data_copy(struct hash_ipportnet4_elem *dst,
-			  const struct hash_ipportnet4_elem *src)
+hash_ipportnet4_data_set_flags(struct hash_ipportnet4_elem *elem, u32 flags)
 {
-	memcpy(dst, src, sizeof(*dst));
+	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
 }
 
 static inline void
-hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags)
+hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *elem, u8 *flags)
 {
-	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
-}
-
-static inline int
-hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem)
-{
-	return elem->nomatch ? -ENOTEMPTY : 1;
+	swap(*flags, elem->nomatch);
 }
 
 static inline void
@@ -117,12 +128,6 @@ hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)
 	elem->cidr = cidr - 1;
 }
 
-static inline void
-hash_ipportnet4_data_zero_out(struct hash_ipportnet4_elem *elem)
-{
-	elem->proto = 0;
-}
-
 static bool
 hash_ipportnet4_data_list(struct sk_buff *skb,
 			  const struct hash_ipportnet4_elem *data)
@@ -143,81 +148,56 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_ipportnet4_data_tlist(struct sk_buff *skb,
-			   const struct hash_ipportnet4_elem *data)
+static inline void
+hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next,
+			  const struct hash_ipportnet4_elem *d)
 {
-	const struct hash_ipportnet4_telem *tdata =
-		(const struct hash_ipportnet4_telem *)data;
-	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
-
-	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
-	    nla_put_ipaddr4(skb, IPSET_ATTR_IP2, tdata->ip2) ||
-	    nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) ||
-	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) ||
-	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(tdata->timeout))) ||
-	    (flags &&
-	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return 1;
+	next->ip = d->ip;
+	next->port = d->port;
+	next->ip2 = d->ip2;
 }
 
-#define IP_SET_HASH_WITH_PROTO
-#define IP_SET_HASH_WITH_NETS
-
+#define MTYPE		hash_ipportnet4
 #define PF		4
 #define HOST_MASK	32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_ipportnet4_data_next(struct ip_set_hash *h,
-			  const struct hash_ipportnet4_elem *d)
-{
-	h->next.ip = d->ip;
-	h->next.port = d->port;
-	h->next.ip2 = d->ip2;
-}
+#include "ip_set_hash_gen.h"
 
 static int
 hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		     const struct xt_action_param *par,
-		     enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+		     enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_ipportnet4_elem data = {
+	struct hash_ipportnet4_elem e = {
 		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
 	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
 	if (adt == IPSET_TEST)
-		data.cidr = HOST_MASK - 1;
+		e.cidr = HOST_MASK - 1;
 
 	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
-				 &data.port, &data.proto))
+				 &e.port, &e.proto))
 		return -EINVAL;
 
-	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip);
-	ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2);
-	data.ip2 &= ip_set_netmask(data.cidr + 1);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2);
+	e.ip2 &= ip_set_netmask(e.cidr + 1);
 
-	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_ipportnet4_elem data = { .cidr = HOST_MASK - 1 };
+	struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	u32 ip, ip_to, p = 0, port, port_to;
 	u32 ip2_from, ip2_to, ip2_last, ip2;
-	u32 timeout = h->timeout;
 	bool with_ports = false;
 	u8 cidr;
 	int ret;
@@ -226,13 +206,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
@@ -244,46 +227,41 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
 		if (!cidr || cidr > HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
-		data.cidr = cidr - 1;
+		e.cidr = cidr - 1;
 	}
 
 	if (tb[IPSET_ATTR_PORT])
-		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
 	else
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_PROTO]) {
-		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
-		with_ports = ip_set_proto_with_ports(data.proto);
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
 
-		if (data.proto == 0)
+		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	if (!(with_ports || data.proto == IPPROTO_ICMP))
-		data.port = 0;
+	if (!(with_ports || e.proto == IPPROTO_ICMP))
+		e.port = 0;
 
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
-
-	if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
-			flags |= (cadt_flags << 16);
+			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
 
 	with_ports = with_ports && tb[IPSET_ATTR_PORT_TO];
 	if (adt == IPSET_TEST ||
 	    !(tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_IP_TO] || with_ports ||
 	      tb[IPSET_ATTR_IP2_TO])) {
-		data.ip = htonl(ip);
-		data.ip2 = htonl(ip2_from & ip_set_hostmask(data.cidr + 1));
-		ret = adtfn(set, &data, timeout, flags);
-		return ip_set_eexist(ret, flags) ? 0 : ret;
+		e.ip = htonl(ip);
+		e.ip2 = htonl(ip2_from & ip_set_hostmask(e.cidr + 1));
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt) ? 1 :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
 	ip_to = ip;
@@ -301,7 +279,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		ip_set_mask_from_to(ip, ip_to, cidr);
 	}
 
-	port_to = port = ntohs(data.port);
+	port_to = port = ntohs(e.port);
 	if (tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port > port_to)
@@ -317,28 +295,27 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip2_from, ip2_to);
 		if (ip2_from + UINT_MAX == ip2_to)
 			return -IPSET_ERR_HASH_RANGE;
-	} else {
-		ip_set_mask_from_to(ip2_from, ip2_to, data.cidr + 1);
-	}
+	} else
+		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);
 
 	if (retried)
 		ip = ntohl(h->next.ip);
 	for (; !before(ip_to, ip); ip++) {
-		data.ip = htonl(ip);
+		e.ip = htonl(ip);
 		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
 						       : port;
 		for (; p <= port_to; p++) {
-			data.port = htons(p);
+			e.port = htons(p);
 			ip2 = retried
 			      && ip == ntohl(h->next.ip)
 			      && p == ntohs(h->next.port)
 				? ntohl(h->next.ip2) : ip2_from;
 			while (!after(ip2, ip2_to)) {
-				data.ip2 = htonl(ip2);
+				e.ip2 = htonl(ip2);
 				ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
 								&cidr);
-				data.cidr = cidr - 1;
-				ret = adtfn(set, &data, timeout, flags);
+				e.cidr = cidr - 1;
+				ret = adtfn(set, &e, &ext, &ext, flags);
 
 				if (ret && !ip_set_eexist(ret, flags))
 					return ret;
@@ -351,88 +328,78 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-static bool
-hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b)
-{
-	const struct ip_set_hash *x = a->data;
-	const struct ip_set_hash *y = b->data;
+/* IPv6 variants */
 
-	/* Resizing changes htable_bits, so we ignore it */
-	return x->maxelem == y->maxelem &&
-	       x->timeout == y->timeout;
-}
+struct hash_ipportnet6_elem {
+	union nf_inet_addr ip;
+	union nf_inet_addr ip2;
+	__be16 port;
+	u8 cidr:7;
+	u8 nomatch:1;
+	u8 proto;
+};
 
-/* The type variant functions: IPv6 */
+struct hash_ipportnet6t_elem {
+	union nf_inet_addr ip;
+	union nf_inet_addr ip2;
+	__be16 port;
+	u8 cidr:7;
+	u8 nomatch:1;
+	u8 proto;
+	unsigned long timeout;
+};
 
-struct hash_ipportnet6_elem {
+struct hash_ipportnet6c_elem {
 	union nf_inet_addr ip;
 	union nf_inet_addr ip2;
 	__be16 port;
 	u8 cidr:7;
 	u8 nomatch:1;
 	u8 proto;
+	struct ip_set_counter counter;
 };
 
-struct hash_ipportnet6_telem {
+struct hash_ipportnet6ct_elem {
 	union nf_inet_addr ip;
 	union nf_inet_addr ip2;
 	__be16 port;
 	u8 cidr:7;
 	u8 nomatch:1;
 	u8 proto;
+	struct ip_set_counter counter;
 	unsigned long timeout;
 };
 
+/* Common functions */
+
 static inline bool
 hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
 			   const struct hash_ipportnet6_elem *ip2,
 			   u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
-	       ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 &&
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+	       ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) &&
 	       ip1->cidr == ip2->cidr &&
 	       ip1->port == ip2->port &&
 	       ip1->proto == ip2->proto;
 }
 
-static inline bool
-hash_ipportnet6_data_isnull(const struct hash_ipportnet6_elem *elem)
-{
-	return elem->proto == 0;
-}
-
-static inline void
-hash_ipportnet6_data_copy(struct hash_ipportnet6_elem *dst,
-			  const struct hash_ipportnet6_elem *src)
-{
-	memcpy(dst, src, sizeof(*dst));
-}
-
-static inline void
-hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags)
-{
-	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
-}
-
 static inline int
-hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem)
+hash_ipportnet6_do_data_match(const struct hash_ipportnet6_elem *elem)
 {
 	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
-hash_ipportnet6_data_zero_out(struct hash_ipportnet6_elem *elem)
+hash_ipportnet6_data_set_flags(struct hash_ipportnet6_elem *elem, u32 flags)
 {
-	elem->proto = 0;
+	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
 }
 
 static inline void
-ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *elem, u8 *flags)
 {
-	ip->ip6[0] &= ip_set_netmask6(prefix)[0];
-	ip->ip6[1] &= ip_set_netmask6(prefix)[1];
-	ip->ip6[2] &= ip_set_netmask6(prefix)[2];
-	ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+	swap(*flags, elem->nomatch);
 }
 
 static inline void
@@ -462,78 +429,58 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_ipportnet6_data_tlist(struct sk_buff *skb,
-			   const struct hash_ipportnet6_elem *data)
+static inline void
+hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next,
+			  const struct hash_ipportnet6_elem *d)
 {
-	const struct hash_ipportnet6_telem *e =
-		(const struct hash_ipportnet6_telem *)data;
-	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
-
-	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
-	    nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) ||
-	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
-	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) ||
-	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(e->timeout))) ||
-	    (flags &&
-	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return 1;
+	next->port = d->port;
 }
 
+#undef MTYPE
 #undef PF
 #undef HOST_MASK
 
+#define MTYPE		hash_ipportnet6
 #define PF		6
 #define HOST_MASK	128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_ipportnet6_data_next(struct ip_set_hash *h,
-			  const struct hash_ipportnet6_elem *d)
-{
-	h->next.port = d->port;
-}
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
 
 static int
 hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		     const struct xt_action_param *par,
-		     enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+		     enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_ipportnet6_elem data = {
+	struct hash_ipportnet6_elem e = {
 		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
 	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
 	if (adt == IPSET_TEST)
-		data.cidr = HOST_MASK - 1;
+		e.cidr = HOST_MASK - 1;
 
 	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
-				 &data.port, &data.proto))
+				 &e.port, &e.proto))
 		return -EINVAL;
 
-	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
-	ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2.in6);
-	ip6_netmask(&data.ip2, data.cidr + 1);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6);
+	ip6_netmask(&e.ip2, e.cidr + 1);
 
-	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_ipportnet6_elem data = { .cidr = HOST_MASK - 1 };
+	struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	u32 port, port_to;
-	u32 timeout = h->timeout;
 	bool with_ports = false;
 	u8 cidr;
 	int ret;
@@ -543,6 +490,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
 		     tb[IPSET_ATTR_IP_TO] ||
 		     tb[IPSET_ATTR_CIDR]))
 		return -IPSET_ERR_PROTOCOL;
@@ -552,11 +501,12 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
-	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2);
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2);
 	if (ret)
 		return ret;
 
@@ -564,46 +514,41 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
 		if (!cidr || cidr > HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
-		data.cidr = cidr - 1;
+		e.cidr = cidr - 1;
 	}
 
-	ip6_netmask(&data.ip2, data.cidr + 1);
+	ip6_netmask(&e.ip2, e.cidr + 1);
 
 	if (tb[IPSET_ATTR_PORT])
-		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
 	else
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_PROTO]) {
-		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
-		with_ports = ip_set_proto_with_ports(data.proto);
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
 
-		if (data.proto == 0)
+		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
-		data.port = 0;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
+	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+		e.port = 0;
 
-	if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
-			flags |= (cadt_flags << 16);
+			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
 
 	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
-		ret = adtfn(set, &data, timeout, flags);
-		return ip_set_eexist(ret, flags) ? 0 : ret;
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt) ? 1 :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
-	port = ntohs(data.port);
+	port = ntohs(e.port);
 	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 	if (port > port_to)
 		swap(port, port_to);
@@ -611,8 +556,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (retried)
 		port = ntohs(h->next.port);
 	for (; port <= port_to; port++) {
-		data.port = htons(port);
-		ret = adtfn(set, &data, timeout, flags);
+		e.port = htons(port);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
@@ -622,81 +567,6 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* Create hash:ip type of sets */
-
-static int
-hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
-	struct ip_set_hash *h;
-	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
-	u8 hbits;
-	size_t hsize;
-
-	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
-		return -IPSET_ERR_INVALID_FAMILY;
-
-	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
-		return -IPSET_ERR_PROTOCOL;
-
-	if (tb[IPSET_ATTR_HASHSIZE]) {
-		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
-		if (hashsize < IPSET_MIMINAL_HASHSIZE)
-			hashsize = IPSET_MIMINAL_HASHSIZE;
-	}
-
-	if (tb[IPSET_ATTR_MAXELEM])
-		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
-	h = kzalloc(sizeof(*h)
-		    + sizeof(struct ip_set_hash_nets)
-		      * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL);
-	if (!h)
-		return -ENOMEM;
-
-	h->maxelem = maxelem;
-	get_random_bytes(&h->initval, sizeof(h->initval));
-	h->timeout = IPSET_NO_TIMEOUT;
-
-	hbits = htable_bits(hashsize);
-	hsize = htable_size(hbits);
-	if (hsize == 0) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table = ip_set_alloc(hsize);
-	if (!h->table) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table->htable_bits = hbits;
-
-	set->data = h;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_ipportnet4_tvariant
-			: &hash_ipportnet6_tvariant;
-
-		if (set->family == NFPROTO_IPV4)
-			hash_ipportnet4_gc_init(set);
-		else
-			hash_ipportnet6_gc_init(set);
-	} else {
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_ipportnet4_variant : &hash_ipportnet6_variant;
-	}
-
-	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
-		 set->name, jhash_size(h->table->htable_bits),
-		 h->table->htable_bits, h->maxelem, set->data, h->table);
-
-	return 0;
-}
-
 static struct ip_set_type hash_ipportnet_type __read_mostly = {
 	.name		= "hash:ip,port,net",
 	.protocol	= IPSET_PROTOCOL,
@@ -713,6 +583,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
 		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 	},
 	.adt_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
@@ -727,6 +598,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 29e94b981f3f..da740ceb56ae 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -20,12 +20,12 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/ipset/pfxlen.h>
 #include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define REVISION_MIN	0
 /*			1    Range as input support for IPv4 added */
-#define REVISION_MAX	2 /* nomatch flag support added */
+/*			2    nomatch flag support added */
+#define REVISION_MAX	3 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -33,33 +33,46 @@ IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:net");
 
 /* Type specific function prefix */
-#define TYPE		hash_net
+#define HTYPE		hash_net
+#define IP_SET_HASH_WITH_NETS
 
-static bool
-hash_net_same_set(const struct ip_set *a, const struct ip_set *b);
+/* IPv4 variants */
 
-#define hash_net4_same_set	hash_net_same_set
-#define hash_net6_same_set	hash_net_same_set
+/* Member elements  */
+struct hash_net4_elem {
+	__be32 ip;
+	u16 padding0;
+	u8 nomatch;
+	u8 cidr;
+};
 
-/* The type variant functions: IPv4 */
+struct hash_net4t_elem {
+	__be32 ip;
+	u16 padding0;
+	u8 nomatch;
+	u8 cidr;
+	unsigned long timeout;
+};
 
-/* Member elements without timeout */
-struct hash_net4_elem {
+struct hash_net4c_elem {
 	__be32 ip;
 	u16 padding0;
 	u8 nomatch;
 	u8 cidr;
+	struct ip_set_counter counter;
 };
 
-/* Member elements with timeout support */
-struct hash_net4_telem {
+struct hash_net4ct_elem {
 	__be32 ip;
 	u16 padding0;
 	u8 nomatch;
 	u8 cidr;
+	struct ip_set_counter counter;
 	unsigned long timeout;
 };
 
+/* Common functions */
+
 static inline bool
 hash_net4_data_equal(const struct hash_net4_elem *ip1,
 		     const struct hash_net4_elem *ip2,
@@ -69,31 +82,22 @@ hash_net4_data_equal(const struct hash_net4_elem *ip1,
 	       ip1->cidr == ip2->cidr;
 }
 
-static inline bool
-hash_net4_data_isnull(const struct hash_net4_elem *elem)
+static inline int
+hash_net4_do_data_match(const struct hash_net4_elem *elem)
 {
-	return elem->cidr == 0;
+	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
-hash_net4_data_copy(struct hash_net4_elem *dst,
-		    const struct hash_net4_elem *src)
+hash_net4_data_set_flags(struct hash_net4_elem *elem, u32 flags)
 {
-	dst->ip = src->ip;
-	dst->cidr = src->cidr;
-	dst->nomatch = src->nomatch;
+	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
 }
 
 static inline void
-hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags)
-{
-	dst->nomatch = flags & IPSET_FLAG_NOMATCH;
-}
-
-static inline int
-hash_net4_data_match(const struct hash_net4_elem *elem)
+hash_net4_data_reset_flags(struct hash_net4_elem *elem, u8 *flags)
 {
-	return elem->nomatch ? -ENOTEMPTY : 1;
+	swap(*flags, elem->nomatch);
 }
 
 static inline void
@@ -103,13 +107,6 @@ hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)
 	elem->cidr = cidr;
 }
 
-/* Zero CIDR values cannot be stored */
-static inline void
-hash_net4_data_zero_out(struct hash_net4_elem *elem)
-{
-	elem->cidr = 0;
-}
-
 static bool
 hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data)
 {
@@ -126,106 +123,84 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_net4_data_tlist(struct sk_buff *skb, const struct hash_net4_elem *data)
+static inline void
+hash_net4_data_next(struct hash_net4_elem *next,
+		    const struct hash_net4_elem *d)
 {
-	const struct hash_net4_telem *tdata =
-		(const struct hash_net4_telem *)data;
-	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
-
-	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
-	    nla_put_u8(skb, IPSET_ATTR_CIDR, tdata->cidr) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(tdata->timeout))) ||
-	    (flags &&
-	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return 1;
+	next->ip = d->ip;
 }
 
-#define IP_SET_HASH_WITH_NETS
-
+#define MTYPE		hash_net4
 #define PF		4
 #define HOST_MASK	32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_net4_data_next(struct ip_set_hash *h,
-		    const struct hash_net4_elem *d)
-{
-	h->next.ip = d->ip;
-}
+#include "ip_set_hash_gen.h"
 
 static int
 hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	       const struct xt_action_param *par,
-	       enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_net4_elem data = {
+	struct hash_net4_elem e = {
 		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
 	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
-	if (data.cidr == 0)
+	if (e.cidr == 0)
 		return -EINVAL;
 	if (adt == IPSET_TEST)
-		data.cidr = HOST_MASK;
+		e.cidr = HOST_MASK;
 
-	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip);
-	data.ip &= ip_set_netmask(data.cidr);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	e.ip &= ip_set_netmask(e.cidr);
 
-	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_net4_elem data = { .cidr = HOST_MASK };
-	u32 timeout = h->timeout;
+	struct hash_net4_elem e = { .cidr = HOST_MASK };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	u32 ip = 0, ip_to, last;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
 	if (tb[IPSET_ATTR_CIDR]) {
-		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
-		if (!data.cidr || data.cidr > HOST_MASK)
+		e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (!e.cidr || e.cidr > HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
 	}
 
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
-
-	if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
-			flags |= (cadt_flags << 16);
+			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
 
 	if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
-		data.ip = htonl(ip & ip_set_hostmask(data.cidr));
-		ret = adtfn(set, &data, timeout, flags);
-		return ip_set_eexist(ret, flags) ? 0 : ret;
+		e.ip = htonl(ip & ip_set_hostmask(e.cidr));
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt) ? 1 :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
 	ip_to = ip;
@@ -241,9 +216,9 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (retried)
 		ip = ntohl(h->next.ip);
 	while (!after(ip, ip_to)) {
-		data.ip = htonl(ip);
-		last = ip_set_range_to_cidr(ip, ip_to, &data.cidr);
-		ret = adtfn(set, &data, timeout, flags);
+		e.ip = htonl(ip);
+		last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
 		else
@@ -253,83 +228,67 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-static bool
-hash_net_same_set(const struct ip_set *a, const struct ip_set *b)
-{
-	const struct ip_set_hash *x = a->data;
-	const struct ip_set_hash *y = b->data;
+/* IPv6 variants */
 
-	/* Resizing changes htable_bits, so we ignore it */
-	return x->maxelem == y->maxelem &&
-	       x->timeout == y->timeout;
-}
+struct hash_net6_elem {
+	union nf_inet_addr ip;
+	u16 padding0;
+	u8 nomatch;
+	u8 cidr;
+};
 
-/* The type variant functions: IPv6 */
+struct hash_net6t_elem {
+	union nf_inet_addr ip;
+	u16 padding0;
+	u8 nomatch;
+	u8 cidr;
+	unsigned long timeout;
+};
 
-struct hash_net6_elem {
+struct hash_net6c_elem {
 	union nf_inet_addr ip;
 	u16 padding0;
 	u8 nomatch;
 	u8 cidr;
+	struct ip_set_counter counter;
 };
 
-struct hash_net6_telem {
+struct hash_net6ct_elem {
 	union nf_inet_addr ip;
 	u16 padding0;
 	u8 nomatch;
 	u8 cidr;
+	struct ip_set_counter counter;
 	unsigned long timeout;
 };
 
+/* Common functions */
+
 static inline bool
 hash_net6_data_equal(const struct hash_net6_elem *ip1,
 		     const struct hash_net6_elem *ip2,
 		     u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
 	       ip1->cidr == ip2->cidr;
 }
 
-static inline bool
-hash_net6_data_isnull(const struct hash_net6_elem *elem)
-{
-	return elem->cidr == 0;
-}
-
-static inline void
-hash_net6_data_copy(struct hash_net6_elem *dst,
-		    const struct hash_net6_elem *src)
-{
-	dst->ip.in6 = src->ip.in6;
-	dst->cidr = src->cidr;
-	dst->nomatch = src->nomatch;
-}
-
-static inline void
-hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags)
-{
-	dst->nomatch = flags & IPSET_FLAG_NOMATCH;
-}
-
 static inline int
-hash_net6_data_match(const struct hash_net6_elem *elem)
+hash_net6_do_data_match(const struct hash_net6_elem *elem)
 {
 	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
-hash_net6_data_zero_out(struct hash_net6_elem *elem)
+hash_net6_data_set_flags(struct hash_net6_elem *elem, u32 flags)
 {
-	elem->cidr = 0;
+	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
 }
 
 static inline void
-ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+hash_net6_data_reset_flags(struct hash_net6_elem *elem, u8 *flags)
 {
-	ip->ip6[0] &= ip_set_netmask6(prefix)[0];
-	ip->ip6[1] &= ip_set_netmask6(prefix)[1];
-	ip->ip6[2] &= ip_set_netmask6(prefix)[2];
-	ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+	swap(*flags, elem->nomatch);
 }
 
 static inline void
@@ -355,74 +314,60 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_net6_data_tlist(struct sk_buff *skb, const struct hash_net6_elem *data)
+static inline void
+hash_net6_data_next(struct hash_net4_elem *next,
+		    const struct hash_net6_elem *d)
 {
-	const struct hash_net6_telem *e =
-		(const struct hash_net6_telem *)data;
-	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
-
-	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
-	    nla_put_u8(skb, IPSET_ATTR_CIDR, e->cidr) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(e->timeout))) ||
-	    (flags &&
-	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return 1;
 }
 
+#undef MTYPE
 #undef PF
 #undef HOST_MASK
 
+#define MTYPE		hash_net6
 #define PF		6
 #define HOST_MASK	128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_net6_data_next(struct ip_set_hash *h,
-		    const struct hash_net6_elem *d)
-{
-}
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
 
 static int
 hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	       const struct xt_action_param *par,
-	       enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_net6_elem data = {
+	struct hash_net6_elem e = {
 		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
 	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
-	if (data.cidr == 0)
+	if (e.cidr == 0)
 		return -EINVAL;
 	if (adt == IPSET_TEST)
-		data.cidr = HOST_MASK;
+		e.cidr = HOST_MASK;
 
-	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
-	ip6_netmask(&data.ip, data.cidr);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	ip6_netmask(&e.ip, e.cidr);
 
-	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
 	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_net6_elem data = { .cidr = HOST_MASK };
-	u32 timeout = h->timeout;
+	struct hash_net6_elem e = { .cidr = HOST_MASK };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 	if (unlikely(tb[IPSET_ATTR_IP_TO]))
 		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -430,107 +375,29 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
 	if (tb[IPSET_ATTR_CIDR])
-		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
 
-	if (!data.cidr || data.cidr > HOST_MASK)
+	if (!e.cidr || e.cidr > HOST_MASK)
 		return -IPSET_ERR_INVALID_CIDR;
 
-	ip6_netmask(&data.ip, data.cidr);
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
+	ip6_netmask(&e.ip, e.cidr);
 
-	if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
-			flags |= (cadt_flags << 16);
-	}
-
-	ret = adtfn(set, &data, timeout, flags);
-
-	return ip_set_eexist(ret, flags) ? 0 : ret;
-}
-
-/* Create hash:ip type of sets */
-
-static int
-hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
-	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
-	struct ip_set_hash *h;
-	u8 hbits;
-	size_t hsize;
-
-	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
-		return -IPSET_ERR_INVALID_FAMILY;
-
-	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
-		return -IPSET_ERR_PROTOCOL;
-
-	if (tb[IPSET_ATTR_HASHSIZE]) {
-		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
-		if (hashsize < IPSET_MIMINAL_HASHSIZE)
-			hashsize = IPSET_MIMINAL_HASHSIZE;
-	}
-
-	if (tb[IPSET_ATTR_MAXELEM])
-		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
-	h = kzalloc(sizeof(*h)
-		    + sizeof(struct ip_set_hash_nets)
-		      * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL);
-	if (!h)
-		return -ENOMEM;
-
-	h->maxelem = maxelem;
-	get_random_bytes(&h->initval, sizeof(h->initval));
-	h->timeout = IPSET_NO_TIMEOUT;
-
-	hbits = htable_bits(hashsize);
-	hsize = htable_size(hbits);
-	if (hsize == 0) {
-		kfree(h);
-		return -ENOMEM;
+			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
-	h->table = ip_set_alloc(hsize);
-	if (!h->table) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table->htable_bits = hbits;
-
-	set->data = h;
 
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	ret = adtfn(set, &e, &ext, &ext, flags);
 
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_net4_tvariant : &hash_net6_tvariant;
-
-		if (set->family == NFPROTO_IPV4)
-			hash_net4_gc_init(set);
-		else
-			hash_net6_gc_init(set);
-	} else {
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_net4_variant : &hash_net6_variant;
-	}
-
-	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
-		 set->name, jhash_size(h->table->htable_bits),
-		 h->table->htable_bits, h->maxelem, set->data, h->table);
-
-	return 0;
+	return ip_set_enomatch(ret, flags, adt) ? 1 :
+	       ip_set_eexist(ret, flags) ? 0 : ret;
 }
 
 static struct ip_set_type hash_net_type __read_mostly = {
@@ -548,6 +415,7 @@ static struct ip_set_type hash_net_type __read_mostly = {
 		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 	},
 	.adt_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
@@ -555,6 +423,8 @@ static struct ip_set_type hash_net_type __read_mostly = {
 		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 45a101439bc5..84ae6f6ce624 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2011-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -21,12 +21,12 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/ipset/pfxlen.h>
 #include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define REVISION_MIN	0
 /*			1    nomatch flag support added */
-#define REVISION_MAX	2 /* /0 support added */
+/*			2    /0 support added */
+#define REVISION_MAX	3 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -127,17 +127,14 @@ iface_add(struct rb_root *root, const char **iface)
 }
 
 /* Type specific function prefix */
-#define TYPE		hash_netiface
-
-static bool
-hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b);
-
-#define hash_netiface4_same_set	hash_netiface_same_set
-#define hash_netiface6_same_set	hash_netiface_same_set
+#define HTYPE		hash_netiface
+#define IP_SET_HASH_WITH_NETS
+#define IP_SET_HASH_WITH_RBTREE
+#define IP_SET_HASH_WITH_MULTI
 
 #define STREQ(a, b)	(strcmp(a, b) == 0)
 
-/* The type variant functions: IPv4 */
+/* IPv4 variants */
 
 struct hash_netiface4_elem_hashed {
 	__be32 ip;
@@ -147,8 +144,6 @@ struct hash_netiface4_elem_hashed {
 	u8 elem;
 };
 
-#define HKEY_DATALEN	sizeof(struct hash_netiface4_elem_hashed)
-
 /* Member elements without timeout */
 struct hash_netiface4_elem {
 	__be32 ip;
@@ -159,17 +154,39 @@ struct hash_netiface4_elem {
 	const char *iface;
 };
 
-/* Member elements with timeout support */
-struct hash_netiface4_telem {
+struct hash_netiface4t_elem {
+	__be32 ip;
+	u8 physdev;
+	u8 cidr;
+	u8 nomatch;
+	u8 elem;
+	const char *iface;
+	unsigned long timeout;
+};
+
+struct hash_netiface4c_elem {
+	__be32 ip;
+	u8 physdev;
+	u8 cidr;
+	u8 nomatch;
+	u8 elem;
+	const char *iface;
+	struct ip_set_counter counter;
+};
+
+struct hash_netiface4ct_elem {
 	__be32 ip;
 	u8 physdev;
 	u8 cidr;
 	u8 nomatch;
 	u8 elem;
 	const char *iface;
+	struct ip_set_counter counter;
 	unsigned long timeout;
 };
 
+/* Common functions */
+
 static inline bool
 hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
 			  const struct hash_netiface4_elem *ip2,
@@ -182,29 +199,22 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
 	       ip1->iface == ip2->iface;
 }
 
-static inline bool
-hash_netiface4_data_isnull(const struct hash_netiface4_elem *elem)
+static inline int
+hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem)
 {
-	return elem->elem == 0;
+	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
-hash_netiface4_data_copy(struct hash_netiface4_elem *dst,
-			 const struct hash_netiface4_elem *src)
+hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags)
 {
-	memcpy(dst, src, sizeof(*dst));
+	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
 }
 
 static inline void
-hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags)
-{
-	dst->nomatch = flags & IPSET_FLAG_NOMATCH;
-}
-
-static inline int
-hash_netiface4_data_match(const struct hash_netiface4_elem *elem)
+hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags)
 {
-	return elem->nomatch ? -ENOTEMPTY : 1;
+	swap(*flags, elem->nomatch);
 }
 
 static inline void
@@ -214,12 +224,6 @@ hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr)
 	elem->cidr = cidr;
 }
 
-static inline void
-hash_netiface4_data_zero_out(struct hash_netiface4_elem *elem)
-{
-	elem->elem = 0;
-}
-
 static bool
 hash_netiface4_data_list(struct sk_buff *skb,
 			 const struct hash_netiface4_elem *data)
@@ -240,66 +244,40 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_netiface4_data_tlist(struct sk_buff *skb,
-			  const struct hash_netiface4_elem *data)
+static inline void
+hash_netiface4_data_next(struct hash_netiface4_elem *next,
+			 const struct hash_netiface4_elem *d)
 {
-	const struct hash_netiface4_telem *tdata =
-		(const struct hash_netiface4_telem *)data;
-	u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0;
-
-	if (data->nomatch)
-		flags |= IPSET_FLAG_NOMATCH;
-	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
-	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) ||
-	    nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) ||
-	    (flags &&
-	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(tdata->timeout))))
-		goto nla_put_failure;
-
-	return 0;
-
-nla_put_failure:
-	return 1;
+	next->ip = d->ip;
 }
 
-#define IP_SET_HASH_WITH_NETS
-#define IP_SET_HASH_WITH_RBTREE
-#define IP_SET_HASH_WITH_MULTI
-
+#define MTYPE		hash_netiface4
 #define PF		4
 #define HOST_MASK	32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_netiface4_data_next(struct ip_set_hash *h,
-			 const struct hash_netiface4_elem *d)
-{
-	h->next.ip = d->ip;
-}
+#define HKEY_DATALEN	sizeof(struct hash_netiface4_elem_hashed)
+#include "ip_set_hash_gen.h"
 
 static int
 hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		    const struct xt_action_param *par,
-		    enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	struct ip_set_hash *h = set->data;
+	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_netiface4_elem data = {
+	struct hash_netiface4_elem e = {
 		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
 		.elem = 1,
 	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 	int ret;
 
-	if (data.cidr == 0)
+	if (e.cidr == 0)
 		return -EINVAL;
 	if (adt == IPSET_TEST)
-		data.cidr = HOST_MASK;
+		e.cidr = HOST_MASK;
 
-	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip);
-	data.ip &= ip_set_netmask(data.cidr);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	e.ip &= ip_set_netmask(e.cidr);
 
 #define IFACE(dir)	(par->dir ? par->dir->name : NULL)
 #define PHYSDEV(dir)	(nf_bridge->dir ? nf_bridge->dir->name : NULL)
@@ -311,72 +289,69 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
 
 		if (!nf_bridge)
 			return -EINVAL;
-		data.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
-		data.physdev = 1;
+		e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
+		e.physdev = 1;
 #else
-		data.iface = NULL;
+		e.iface = NULL;
 #endif
 	} else
-		data.iface = SRCDIR ? IFACE(in) : IFACE(out);
+		e.iface = SRCDIR ? IFACE(in) : IFACE(out);
 
-	if (!data.iface)
+	if (!e.iface)
 		return -EINVAL;
-	ret = iface_test(&h->rbtree, &data.iface);
+	ret = iface_test(&h->rbtree, &e.iface);
 	if (adt == IPSET_ADD) {
 		if (!ret) {
-			ret = iface_add(&h->rbtree, &data.iface);
+			ret = iface_add(&h->rbtree, &e.iface);
 			if (ret)
 				return ret;
 		}
 	} else if (!ret)
 		return ret;
 
-	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	struct ip_set_hash *h = set->data;
+	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_netiface4_elem data = { .cidr = HOST_MASK, .elem = 1 };
+	struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	u32 ip = 0, ip_to, last;
-	u32 timeout = h->timeout;
 	char iface[IFNAMSIZ];
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
 		     !tb[IPSET_ATTR_IFACE] ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
 	if (tb[IPSET_ATTR_CIDR]) {
-		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
-		if (data.cidr > HOST_MASK)
+		e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (e.cidr > HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
 	}
 
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
-
 	strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE]));
-	data.iface = iface;
-	ret = iface_test(&h->rbtree, &data.iface);
+	e.iface = iface;
+	ret = iface_test(&h->rbtree, &e.iface);
 	if (adt == IPSET_ADD) {
 		if (!ret) {
-			ret = iface_add(&h->rbtree, &data.iface);
+			ret = iface_add(&h->rbtree, &e.iface);
 			if (ret)
 				return ret;
 		}
@@ -386,14 +361,15 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 		if (cadt_flags & IPSET_FLAG_PHYSDEV)
-			data.physdev = 1;
-		if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH))
-			flags |= (cadt_flags << 16);
+			e.physdev = 1;
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
 	if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
-		data.ip = htonl(ip & ip_set_hostmask(data.cidr));
-		ret = adtfn(set, &data, timeout, flags);
-		return ip_set_eexist(ret, flags) ? 0 : ret;
+		e.ip = htonl(ip & ip_set_hostmask(e.cidr));
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt) ? 1 :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
 	if (tb[IPSET_ATTR_IP_TO]) {
@@ -404,16 +380,15 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip, ip_to);
 		if (ip + UINT_MAX == ip_to)
 			return -IPSET_ERR_HASH_RANGE;
-	} else {
-		ip_set_mask_from_to(ip, ip_to, data.cidr);
-	}
+	} else
+		ip_set_mask_from_to(ip, ip_to, e.cidr);
 
 	if (retried)
 		ip = ntohl(h->next.ip);
 	while (!after(ip, ip_to)) {
-		data.ip = htonl(ip);
-		last = ip_set_range_to_cidr(ip, ip_to, &data.cidr);
-		ret = adtfn(set, &data, timeout, flags);
+		e.ip = htonl(ip);
+		last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
@@ -424,18 +399,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-static bool
-hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b)
-{
-	const struct ip_set_hash *x = a->data;
-	const struct ip_set_hash *y = b->data;
-
-	/* Resizing changes htable_bits, so we ignore it */
-	return x->maxelem == y->maxelem &&
-	       x->timeout == y->timeout;
-}
-
-/* The type variant functions: IPv6 */
+/* IPv6 variants */
 
 struct hash_netiface6_elem_hashed {
 	union nf_inet_addr ip;
@@ -445,8 +409,6 @@ struct hash_netiface6_elem_hashed {
 	u8 elem;
 };
 
-#define HKEY_DATALEN	sizeof(struct hash_netiface6_elem_hashed)
-
 struct hash_netiface6_elem {
 	union nf_inet_addr ip;
 	u8 physdev;
@@ -456,66 +418,67 @@ struct hash_netiface6_elem {
 	const char *iface;
 };
 
-struct hash_netiface6_telem {
+struct hash_netiface6t_elem {
+	union nf_inet_addr ip;
+	u8 physdev;
+	u8 cidr;
+	u8 nomatch;
+	u8 elem;
+	const char *iface;
+	unsigned long timeout;
+};
+
+struct hash_netiface6c_elem {
+	union nf_inet_addr ip;
+	u8 physdev;
+	u8 cidr;
+	u8 nomatch;
+	u8 elem;
+	const char *iface;
+	struct ip_set_counter counter;
+};
+
+struct hash_netiface6ct_elem {
 	union nf_inet_addr ip;
 	u8 physdev;
 	u8 cidr;
 	u8 nomatch;
 	u8 elem;
 	const char *iface;
+	struct ip_set_counter counter;
 	unsigned long timeout;
 };
 
+/* Common functions */
+
 static inline bool
 hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
 			  const struct hash_netiface6_elem *ip2,
 			  u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
 	       ip1->cidr == ip2->cidr &&
 	       (++*multi) &&
 	       ip1->physdev == ip2->physdev &&
 	       ip1->iface == ip2->iface;
 }
 
-static inline bool
-hash_netiface6_data_isnull(const struct hash_netiface6_elem *elem)
-{
-	return elem->elem == 0;
-}
-
-static inline void
-hash_netiface6_data_copy(struct hash_netiface6_elem *dst,
-			 const struct hash_netiface6_elem *src)
-{
-	memcpy(dst, src, sizeof(*dst));
-}
-
-static inline void
-hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags)
-{
-	dst->nomatch = flags & IPSET_FLAG_NOMATCH;
-}
-
 static inline int
-hash_netiface6_data_match(const struct hash_netiface6_elem *elem)
+hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem)
 {
 	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
-hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem)
+hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags)
 {
-	elem->elem = 0;
+	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
 }
 
 static inline void
-ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags)
 {
-	ip->ip6[0] &= ip_set_netmask6(prefix)[0];
-	ip->ip6[1] &= ip_set_netmask6(prefix)[1];
-	ip->ip6[2] &= ip_set_netmask6(prefix)[2];
-	ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+	swap(*flags, elem->nomatch);
 }
 
 static inline void
@@ -545,63 +508,45 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_netiface6_data_tlist(struct sk_buff *skb,
-			  const struct hash_netiface6_elem *data)
+static inline void
+hash_netiface6_data_next(struct hash_netiface4_elem *next,
+			 const struct hash_netiface6_elem *d)
 {
-	const struct hash_netiface6_telem *e =
-		(const struct hash_netiface6_telem *)data;
-	u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0;
-
-	if (data->nomatch)
-		flags |= IPSET_FLAG_NOMATCH;
-	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
-	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) ||
-	    nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) ||
-	    (flags &&
-	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(e->timeout))))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return 1;
 }
 
+#undef MTYPE
 #undef PF
 #undef HOST_MASK
+#undef HKEY_DATALEN
 
+#define MTYPE		hash_netiface6
 #define PF		6
 #define HOST_MASK	128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_netiface6_data_next(struct ip_set_hash *h,
-			 const struct hash_netiface6_elem *d)
-{
-}
+#define HKEY_DATALEN	sizeof(struct hash_netiface6_elem_hashed)
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
 
 static int
 hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		    const struct xt_action_param *par,
-		    enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	struct ip_set_hash *h = set->data;
+	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_netiface6_elem data = {
+	struct hash_netiface6_elem e = {
 		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
 		.elem = 1,
 	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 	int ret;
 
-	if (data.cidr == 0)
+	if (e.cidr == 0)
 		return -EINVAL;
 	if (adt == IPSET_TEST)
-		data.cidr = HOST_MASK;
+		e.cidr = HOST_MASK;
 
-	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
-	ip6_netmask(&data.ip, data.cidr);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	ip6_netmask(&e.ip, e.cidr);
 
 	if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -609,44 +554,46 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
 
 		if (!nf_bridge)
 			return -EINVAL;
-		data.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
-		data.physdev = 1;
+		e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
+		e.physdev = 1;
 #else
-		data.iface = NULL;
+		e.iface = NULL;
 #endif
 	} else
-		data.iface = SRCDIR ? IFACE(in) : IFACE(out);
+		e.iface = SRCDIR ? IFACE(in) : IFACE(out);
 
-	if (!data.iface)
+	if (!e.iface)
 		return -EINVAL;
-	ret = iface_test(&h->rbtree, &data.iface);
+	ret = iface_test(&h->rbtree, &e.iface);
 	if (adt == IPSET_ADD) {
 		if (!ret) {
-			ret = iface_add(&h->rbtree, &data.iface);
+			ret = iface_add(&h->rbtree, &e.iface);
 			if (ret)
 				return ret;
 		}
 	} else if (!ret)
 		return ret;
 
-	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
 		   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	struct ip_set_hash *h = set->data;
+	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_netiface6_elem data = { .cidr = HOST_MASK, .elem = 1 };
-	u32 timeout = h->timeout;
+	struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	char iface[IFNAMSIZ];
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
 		     !tb[IPSET_ATTR_IFACE] ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 	if (unlikely(tb[IPSET_ATTR_IP_TO]))
 		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -654,28 +601,23 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
 	if (tb[IPSET_ATTR_CIDR])
-		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
-	if (data.cidr > HOST_MASK)
+		e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+	if (e.cidr > HOST_MASK)
 		return -IPSET_ERR_INVALID_CIDR;
-	ip6_netmask(&data.ip, data.cidr);
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
+	ip6_netmask(&e.ip, e.cidr);
 
 	strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE]));
-	data.iface = iface;
-	ret = iface_test(&h->rbtree, &data.iface);
+	e.iface = iface;
+	ret = iface_test(&h->rbtree, &e.iface);
 	if (adt == IPSET_ADD) {
 		if (!ret) {
-			ret = iface_add(&h->rbtree, &data.iface);
+			ret = iface_add(&h->rbtree, &e.iface);
 			if (ret)
 				return ret;
 		}
@@ -685,90 +627,15 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 		if (cadt_flags & IPSET_FLAG_PHYSDEV)
-			data.physdev = 1;
-		if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH))
-			flags |= (cadt_flags << 16);
+			e.physdev = 1;
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
 
-	ret = adtfn(set, &data, timeout, flags);
+	ret = adtfn(set, &e, &ext, &ext, flags);
 
-	return ip_set_eexist(ret, flags) ? 0 : ret;
-}
-
-/* Create hash:ip type of sets */
-
-static int
-hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
-	struct ip_set_hash *h;
-	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
-	u8 hbits;
-	size_t hsize;
-
-	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
-		return -IPSET_ERR_INVALID_FAMILY;
-
-	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
-		return -IPSET_ERR_PROTOCOL;
-
-	if (tb[IPSET_ATTR_HASHSIZE]) {
-		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
-		if (hashsize < IPSET_MIMINAL_HASHSIZE)
-			hashsize = IPSET_MIMINAL_HASHSIZE;
-	}
-
-	if (tb[IPSET_ATTR_MAXELEM])
-		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
-	h = kzalloc(sizeof(*h)
-		    + sizeof(struct ip_set_hash_nets)
-		      * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL);
-	if (!h)
-		return -ENOMEM;
-
-	h->maxelem = maxelem;
-	get_random_bytes(&h->initval, sizeof(h->initval));
-	h->timeout = IPSET_NO_TIMEOUT;
-	h->ahash_max = AHASH_MAX_SIZE;
-
-	hbits = htable_bits(hashsize);
-	hsize = htable_size(hbits);
-	if (hsize == 0) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table = ip_set_alloc(hsize);
-	if (!h->table) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table->htable_bits = hbits;
-	h->rbtree = RB_ROOT;
-
-	set->data = h;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_netiface4_tvariant : &hash_netiface6_tvariant;
-
-		if (set->family == NFPROTO_IPV4)
-			hash_netiface4_gc_init(set);
-		else
-			hash_netiface6_gc_init(set);
-	} else {
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_netiface4_variant : &hash_netiface6_variant;
-	}
-
-	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
-		 set->name, jhash_size(h->table->htable_bits),
-		 h->table->htable_bits, h->maxelem, set->data, h->table);
-
-	return 0;
+	return ip_set_enomatch(ret, flags, adt) ? 1 :
+	       ip_set_eexist(ret, flags) ? 0 : ret;
 }
 
 static struct ip_set_type hash_netiface_type __read_mostly = {
@@ -788,6 +655,7 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 	},
 	.adt_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
@@ -798,6 +666,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
 		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 7ef700de596c..9a0869853be5 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -20,14 +20,14 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/ipset/pfxlen.h>
 #include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define REVISION_MIN	0
 /*			1    SCTP and UDPLITE support added */
 /*			2    Range as input support for IPv4 added */
-#define REVISION_MAX	3 /* nomatch flag support added */
+/*			3    nomatch flag support added */
+#define REVISION_MAX	4 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -35,15 +35,9 @@ IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:net,port");
 
 /* Type specific function prefix */
-#define TYPE		hash_netport
-
-static bool
-hash_netport_same_set(const struct ip_set *a, const struct ip_set *b);
-
-#define hash_netport4_same_set	hash_netport_same_set
-#define hash_netport6_same_set	hash_netport_same_set
-
-/* The type variant functions: IPv4 */
+#define HTYPE		hash_netport
+#define IP_SET_HASH_WITH_PROTO
+#define IP_SET_HASH_WITH_NETS
 
 /* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0
  * However this way we have to store internally cidr - 1,
@@ -51,7 +45,9 @@ hash_netport_same_set(const struct ip_set *a, const struct ip_set *b);
  */
 #define IP_SET_HASH_WITH_NETS_PACKED
 
-/* Member elements without timeout */
+/* IPv4 variants */
+
+/* Member elements */
 struct hash_netport4_elem {
 	__be32 ip;
 	__be16 port;
@@ -60,8 +56,7 @@ struct hash_netport4_elem {
 	u8 nomatch:1;
 };
 
-/* Member elements with timeout support */
-struct hash_netport4_telem {
+struct hash_netport4t_elem {
 	__be32 ip;
 	__be16 port;
 	u8 proto;
@@ -70,6 +65,27 @@ struct hash_netport4_telem {
 	unsigned long timeout;
 };
 
+struct hash_netport4c_elem {
+	__be32 ip;
+	__be16 port;
+	u8 proto;
+	u8 cidr:7;
+	u8 nomatch:1;
+	struct ip_set_counter counter;
+};
+
+struct hash_netport4ct_elem {
+	__be32 ip;
+	__be16 port;
+	u8 proto;
+	u8 cidr:7;
+	u8 nomatch:1;
+	struct ip_set_counter counter;
+	unsigned long timeout;
+};
+
+/* Common functions */
+
 static inline bool
 hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
 			 const struct hash_netport4_elem *ip2,
@@ -81,33 +97,22 @@ hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
 	       ip1->cidr == ip2->cidr;
 }
 
-static inline bool
-hash_netport4_data_isnull(const struct hash_netport4_elem *elem)
+static inline int
+hash_netport4_do_data_match(const struct hash_netport4_elem *elem)
 {
-	return elem->proto == 0;
+	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
-hash_netport4_data_copy(struct hash_netport4_elem *dst,
-			const struct hash_netport4_elem *src)
+hash_netport4_data_set_flags(struct hash_netport4_elem *elem, u32 flags)
 {
-	dst->ip = src->ip;
-	dst->port = src->port;
-	dst->proto = src->proto;
-	dst->cidr = src->cidr;
-	dst->nomatch = src->nomatch;
+	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
 }
 
 static inline void
-hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags)
+hash_netport4_data_reset_flags(struct hash_netport4_elem *elem, u8 *flags)
 {
-	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
-}
-
-static inline int
-hash_netport4_data_match(const struct hash_netport4_elem *elem)
-{
-	return elem->nomatch ? -ENOTEMPTY : 1;
+	swap(*flags, elem->nomatch);
 }
 
 static inline void
@@ -117,12 +122,6 @@ hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)
 	elem->cidr = cidr - 1;
 }
 
-static inline void
-hash_netport4_data_zero_out(struct hash_netport4_elem *elem)
-{
-	elem->proto = 0;
-}
-
 static bool
 hash_netport4_data_list(struct sk_buff *skb,
 			const struct hash_netport4_elem *data)
@@ -142,77 +141,53 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_netport4_data_tlist(struct sk_buff *skb,
-			 const struct hash_netport4_elem *data)
+static inline void
+hash_netport4_data_next(struct hash_netport4_elem *next,
+			const struct hash_netport4_elem *d)
 {
-	const struct hash_netport4_telem *tdata =
-		(const struct hash_netport4_telem *)data;
-	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
-
-	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
-	    nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) ||
-	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) ||
-	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(tdata->timeout))) ||
-	    (flags &&
-	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return 1;
+	next->ip = d->ip;
+	next->port = d->port;
 }
 
-#define IP_SET_HASH_WITH_PROTO
-#define IP_SET_HASH_WITH_NETS
-
+#define MTYPE		hash_netport4
 #define PF		4
 #define HOST_MASK	32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_netport4_data_next(struct ip_set_hash *h,
-			const struct hash_netport4_elem *d)
-{
-	h->next.ip = d->ip;
-	h->next.port = d->port;
-}
+#include "ip_set_hash_gen.h"
 
 static int
 hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		   const struct xt_action_param *par,
-		   enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+		   enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_netport4_elem data = {
+	struct hash_netport4_elem e = {
 		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
 	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
 	if (adt == IPSET_TEST)
-		data.cidr = HOST_MASK - 1;
+		e.cidr = HOST_MASK - 1;
 
 	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
-				 &data.port, &data.proto))
+				 &e.port, &e.proto))
 		return -EINVAL;
 
-	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip);
-	data.ip &= ip_set_netmask(data.cidr + 1);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	e.ip &= ip_set_netmask(e.cidr + 1);
 
-	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_netport4_elem data = { .cidr = HOST_MASK - 1 };
+	struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	u32 port, port_to, p = 0, ip = 0, ip_to, last;
-	u32 timeout = h->timeout;
 	bool with_ports = false;
 	u8 cidr;
 	int ret;
@@ -221,13 +196,16 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
@@ -235,47 +213,42 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
 		if (!cidr || cidr > HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
-		data.cidr = cidr - 1;
+		e.cidr = cidr - 1;
 	}
 
 	if (tb[IPSET_ATTR_PORT])
-		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
 	else
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_PROTO]) {
-		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
-		with_ports = ip_set_proto_with_ports(data.proto);
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
 
-		if (data.proto == 0)
+		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	if (!(with_ports || data.proto == IPPROTO_ICMP))
-		data.port = 0;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
+	if (!(with_ports || e.proto == IPPROTO_ICMP))
+		e.port = 0;
 
 	with_ports = with_ports && tb[IPSET_ATTR_PORT_TO];
 
-	if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
-			flags |= (cadt_flags << 16);
+			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
 
 	if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) {
-		data.ip = htonl(ip & ip_set_hostmask(data.cidr + 1));
-		ret = adtfn(set, &data, timeout, flags);
-		return ip_set_eexist(ret, flags) ? 0 : ret;
+		e.ip = htonl(ip & ip_set_hostmask(e.cidr + 1));
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt) ? 1 :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
-	port = port_to = ntohs(data.port);
+	port = port_to = ntohs(e.port);
 	if (tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port_to < port)
@@ -289,21 +262,20 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip, ip_to);
 		if (ip + UINT_MAX == ip_to)
 			return -IPSET_ERR_HASH_RANGE;
-	} else {
-		ip_set_mask_from_to(ip, ip_to, data.cidr + 1);
-	}
+	} else
+		ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
 
 	if (retried)
 		ip = ntohl(h->next.ip);
 	while (!after(ip, ip_to)) {
-		data.ip = htonl(ip);
+		e.ip = htonl(ip);
 		last = ip_set_range_to_cidr(ip, ip_to, &cidr);
-		data.cidr = cidr - 1;
+		e.cidr = cidr - 1;
 		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
 						       : port;
 		for (; p <= port_to; p++) {
-			data.port = htons(p);
-			ret = adtfn(set, &data, timeout, flags);
+			e.port = htons(p);
+			ret = adtfn(set, &e, &ext, &ext, flags);
 
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
@@ -315,85 +287,73 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-static bool
-hash_netport_same_set(const struct ip_set *a, const struct ip_set *b)
-{
-	const struct ip_set_hash *x = a->data;
-	const struct ip_set_hash *y = b->data;
+/* IPv6 variants */
 
-	/* Resizing changes htable_bits, so we ignore it */
-	return x->maxelem == y->maxelem &&
-	       x->timeout == y->timeout;
-}
+struct hash_netport6_elem {
+	union nf_inet_addr ip;
+	__be16 port;
+	u8 proto;
+	u8 cidr:7;
+	u8 nomatch:1;
+};
 
-/* The type variant functions: IPv6 */
+struct hash_netport6t_elem {
+	union nf_inet_addr ip;
+	__be16 port;
+	u8 proto;
+	u8 cidr:7;
+	u8 nomatch:1;
+	unsigned long timeout;
+};
 
-struct hash_netport6_elem {
+struct hash_netport6c_elem {
 	union nf_inet_addr ip;
 	__be16 port;
 	u8 proto;
 	u8 cidr:7;
 	u8 nomatch:1;
+	struct ip_set_counter counter;
 };
 
-struct hash_netport6_telem {
+struct hash_netport6ct_elem {
 	union nf_inet_addr ip;
 	__be16 port;
 	u8 proto;
 	u8 cidr:7;
 	u8 nomatch:1;
+	struct ip_set_counter counter;
 	unsigned long timeout;
 };
 
+/* Common functions */
+
 static inline bool
 hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
 			 const struct hash_netport6_elem *ip2,
 			 u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
 	       ip1->port == ip2->port &&
 	       ip1->proto == ip2->proto &&
 	       ip1->cidr == ip2->cidr;
 }
 
-static inline bool
-hash_netport6_data_isnull(const struct hash_netport6_elem *elem)
-{
-	return elem->proto == 0;
-}
-
-static inline void
-hash_netport6_data_copy(struct hash_netport6_elem *dst,
-			const struct hash_netport6_elem *src)
-{
-	memcpy(dst, src, sizeof(*dst));
-}
-
-static inline void
-hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags)
-{
-	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
-}
-
 static inline int
-hash_netport6_data_match(const struct hash_netport6_elem *elem)
+hash_netport6_do_data_match(const struct hash_netport6_elem *elem)
 {
 	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
-hash_netport6_data_zero_out(struct hash_netport6_elem *elem)
+hash_netport6_data_set_flags(struct hash_netport6_elem *elem, u32 flags)
 {
-	elem->proto = 0;
+	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
 }
 
 static inline void
-ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+hash_netport6_data_reset_flags(struct hash_netport6_elem *elem, u8 *flags)
 {
-	ip->ip6[0] &= ip_set_netmask6(prefix)[0];
-	ip->ip6[1] &= ip_set_netmask6(prefix)[1];
-	ip->ip6[2] &= ip_set_netmask6(prefix)[2];
-	ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+	swap(*flags, elem->nomatch);
 }
 
 static inline void
@@ -422,76 +382,57 @@ nla_put_failure:
 	return 1;
 }
 
-static bool
-hash_netport6_data_tlist(struct sk_buff *skb,
-			 const struct hash_netport6_elem *data)
+static inline void
+hash_netport6_data_next(struct hash_netport4_elem *next,
+			const struct hash_netport6_elem *d)
 {
-	const struct hash_netport6_telem *e =
-		(const struct hash_netport6_telem *)data;
-	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
-
-	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
-	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
-	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) ||
-	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
-	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-			  htonl(ip_set_timeout_get(e->timeout))) ||
-	    (flags &&
-	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return 1;
+	next->port = d->port;
 }
 
+#undef MTYPE
 #undef PF
 #undef HOST_MASK
 
+#define MTYPE		hash_netport6
 #define PF		6
 #define HOST_MASK	128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_netport6_data_next(struct ip_set_hash *h,
-			const struct hash_netport6_elem *d)
-{
-	h->next.port = d->port;
-}
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
 
 static int
 hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		   const struct xt_action_param *par,
-		   enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+		   enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_netport6_elem data = {
+	struct hash_netport6_elem e = {
 		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1,
 	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
 	if (adt == IPSET_TEST)
-		data.cidr = HOST_MASK - 1;
+		e.cidr = HOST_MASK - 1;
 
 	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
-				 &data.port, &data.proto))
+				 &e.port, &e.proto))
 		return -EINVAL;
 
-	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
-	ip6_netmask(&data.ip, data.cidr + 1);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	ip6_netmask(&e.ip, e.cidr + 1);
 
-	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
 
 static int
 hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 		   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct ip_set_hash *h = set->data;
+	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_netport6_elem data = { .cidr = HOST_MASK  - 1 };
+	struct hash_netport6_elem e = { .cidr = HOST_MASK  - 1 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
 	u32 port, port_to;
-	u32 timeout = h->timeout;
 	bool with_ports = false;
 	u8 cidr;
 	int ret;
@@ -500,7 +441,9 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 	if (unlikely(tb[IPSET_ATTR_IP_TO]))
 		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -508,7 +451,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
 
@@ -516,45 +460,40 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
 		if (!cidr || cidr > HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
-		data.cidr = cidr - 1;
+		e.cidr = cidr - 1;
 	}
-	ip6_netmask(&data.ip, data.cidr + 1);
+	ip6_netmask(&e.ip, e.cidr + 1);
 
 	if (tb[IPSET_ATTR_PORT])
-		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
 	else
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_PROTO]) {
-		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
-		with_ports = ip_set_proto_with_ports(data.proto);
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
 
-		if (data.proto == 0)
+		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
-		data.port = 0;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout(h->timeout))
-			return -IPSET_ERR_TIMEOUT;
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-	}
+	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+		e.port = 0;
 
-	if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
-			flags |= (cadt_flags << 16);
+			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
 
 	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
-		ret = adtfn(set, &data, timeout, flags);
-		return ip_set_eexist(ret, flags) ? 0 : ret;
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt) ? 1 :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
-	port = ntohs(data.port);
+	port = ntohs(e.port);
 	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 	if (port > port_to)
 		swap(port, port_to);
@@ -562,8 +501,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (retried)
 		port = ntohs(h->next.port);
 	for (; port <= port_to; port++) {
-		data.port = htons(port);
-		ret = adtfn(set, &data, timeout, flags);
+		e.port = htons(port);
+		ret = adtfn(set, &e, &ext, &ext, flags);
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
@@ -573,80 +512,6 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* Create hash:ip type of sets */
-
-static int
-hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
-	struct ip_set_hash *h;
-	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
-	u8 hbits;
-	size_t hsize;
-
-	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
-		return -IPSET_ERR_INVALID_FAMILY;
-
-	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
-		return -IPSET_ERR_PROTOCOL;
-
-	if (tb[IPSET_ATTR_HASHSIZE]) {
-		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
-		if (hashsize < IPSET_MIMINAL_HASHSIZE)
-			hashsize = IPSET_MIMINAL_HASHSIZE;
-	}
-
-	if (tb[IPSET_ATTR_MAXELEM])
-		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
-	h = kzalloc(sizeof(*h)
-		    + sizeof(struct ip_set_hash_nets)
-		      * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL);
-	if (!h)
-		return -ENOMEM;
-
-	h->maxelem = maxelem;
-	get_random_bytes(&h->initval, sizeof(h->initval));
-	h->timeout = IPSET_NO_TIMEOUT;
-
-	hbits = htable_bits(hashsize);
-	hsize = htable_size(hbits);
-	if (hsize == 0) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table = ip_set_alloc(hsize);
-	if (!h->table) {
-		kfree(h);
-		return -ENOMEM;
-	}
-	h->table->htable_bits = hbits;
-
-	set->data = h;
-
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_netport4_tvariant : &hash_netport6_tvariant;
-
-		if (set->family == NFPROTO_IPV4)
-			hash_netport4_gc_init(set);
-		else
-			hash_netport6_gc_init(set);
-	} else {
-		set->variant = set->family == NFPROTO_IPV4
-			? &hash_netport4_variant : &hash_netport6_variant;
-	}
-
-	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
-		 set->name, jhash_size(h->table->htable_bits),
-		 h->table->htable_bits, h->maxelem, set->data, h->table);
-
-	return 0;
-}
-
 static struct ip_set_type hash_netport_type __read_mostly = {
 	.name		= "hash:net,port",
 	.protocol	= IPSET_PROTOCOL,
@@ -663,6 +528,7 @@ static struct ip_set_type hash_netport_type __read_mostly = {
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 	},
 	.adt_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
@@ -674,6 +540,8 @@ static struct ip_set_type hash_netport_type __read_mostly = {
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 8371c2bac2e4..979b8c90e422 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2008-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -13,30 +13,53 @@
 #include <linux/errno.h>
 
 #include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_list.h>
 
 #define REVISION_MIN	0
-#define REVISION_MAX	0
+#define REVISION_MAX	1 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
 IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_list:set");
 
-/* Member elements without and with timeout */
+/* Member elements  */
 struct set_elem {
 	ip_set_id_t id;
 };
 
-struct set_telem {
-	ip_set_id_t id;
+struct sett_elem {
+	struct {
+		ip_set_id_t id;
+	} __attribute__ ((aligned));
+	unsigned long timeout;
+};
+
+struct setc_elem {
+	struct {
+		ip_set_id_t id;
+	} __attribute__ ((aligned));
+	struct ip_set_counter counter;
+};
+
+struct setct_elem {
+	struct {
+		ip_set_id_t id;
+	} __attribute__ ((aligned));
+	struct ip_set_counter counter;
 	unsigned long timeout;
 };
 
+struct set_adt_elem {
+	ip_set_id_t id;
+	ip_set_id_t refid;
+	int before;
+};
+
 /* Type structure */
 struct list_set {
 	size_t dsize;		/* element size */
+	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
 	u32 size;		/* size of set list array */
 	u32 timeout;		/* timeout value */
 	struct timer_list gc;	/* garbage collection */
@@ -49,175 +72,311 @@ list_set_elem(const struct list_set *map, u32 id)
 	return (struct set_elem *)((void *)map->members + id * map->dsize);
 }
 
-static inline struct set_telem *
-list_set_telem(const struct list_set *map, u32 id)
-{
-	return (struct set_telem *)((void *)map->members + id * map->dsize);
-}
+#define ext_timeout(e, m)	\
+(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
+#define ext_counter(e, m)	\
+(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER])
 
-static inline bool
-list_set_timeout(const struct list_set *map, u32 id)
+static int
+list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
+	       const struct xt_action_param *par,
+	       struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
 {
-	const struct set_telem *elem = list_set_telem(map, id);
+	struct list_set *map = set->data;
+	struct set_elem *e;
+	u32 i, cmdflags = opt->cmdflags;
+	int ret;
 
-	return ip_set_timeout_test(elem->timeout);
+	/* Don't lookup sub-counters at all */
+	opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS;
+	if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
+		opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
+	for (i = 0; i < map->size; i++) {
+		e = list_set_elem(map, i);
+		if (e->id == IPSET_INVALID_ID)
+			return 0;
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(e, map)))
+			continue;
+		ret = ip_set_test(e->id, skb, par, opt);
+		if (ret > 0) {
+			if (SET_WITH_COUNTER(set))
+				ip_set_update_counter(ext_counter(e, map),
+						      ext, &opt->ext,
+						      cmdflags);
+			return ret;
+		}
+	}
+	return 0;
 }
 
-static inline bool
-list_set_expired(const struct list_set *map, u32 id)
+static int
+list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
+	      const struct xt_action_param *par,
+	      struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
 {
-	const struct set_telem *elem = list_set_telem(map, id);
+	struct list_set *map = set->data;
+	struct set_elem *e;
+	u32 i;
+	int ret;
 
-	return ip_set_timeout_expired(elem->timeout);
+	for (i = 0; i < map->size; i++) {
+		e = list_set_elem(map, i);
+		if (e->id == IPSET_INVALID_ID)
+			return 0;
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(e, map)))
+			continue;
+		ret = ip_set_add(e->id, skb, par, opt);
+		if (ret == 0)
+			return ret;
+	}
+	return 0;
 }
 
-/* Set list without and with timeout */
-
 static int
-list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
+list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
 	      const struct xt_action_param *par,
-	      enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+	      struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
 {
 	struct list_set *map = set->data;
-	struct set_elem *elem;
+	struct set_elem *e;
 	u32 i;
 	int ret;
 
 	for (i = 0; i < map->size; i++) {
-		elem = list_set_elem(map, i);
-		if (elem->id == IPSET_INVALID_ID)
+		e = list_set_elem(map, i);
+		if (e->id == IPSET_INVALID_ID)
 			return 0;
-		if (with_timeout(map->timeout) && list_set_expired(map, i))
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(e, map)))
 			continue;
-		switch (adt) {
-		case IPSET_TEST:
-			ret = ip_set_test(elem->id, skb, par, opt);
-			if (ret > 0)
-				return ret;
-			break;
-		case IPSET_ADD:
-			ret = ip_set_add(elem->id, skb, par, opt);
-			if (ret == 0)
-				return ret;
-			break;
-		case IPSET_DEL:
-			ret = ip_set_del(elem->id, skb, par, opt);
-			if (ret == 0)
-				return ret;
-			break;
-		default:
-			break;
-		}
+		ret = ip_set_del(e->id, skb, par, opt);
+		if (ret == 0)
+			return ret;
+	}
+	return 0;
+}
+
+static int
+list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
+	      const struct xt_action_param *par,
+	      enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	struct list_set *map = set->data;
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+
+	switch (adt) {
+	case IPSET_TEST:
+		return list_set_ktest(set, skb, par, opt, &ext);
+	case IPSET_ADD:
+		return list_set_kadd(set, skb, par, opt, &ext);
+	case IPSET_DEL:
+		return list_set_kdel(set, skb, par, opt, &ext);
+	default:
+		break;
 	}
 	return -EINVAL;
 }
 
 static bool
-id_eq(const struct list_set *map, u32 i, ip_set_id_t id)
+id_eq(const struct ip_set *set, u32 i, ip_set_id_t id)
 {
-	const struct set_elem *elem;
+	const struct list_set *map = set->data;
+	const struct set_elem *e;
+
+	if (i >= map->size)
+		return 0;
 
-	if (i < map->size) {
-		elem = list_set_elem(map, i);
-		return elem->id == id;
+	e = list_set_elem(map, i);
+	return !!(e->id == id &&
+		 !(SET_WITH_TIMEOUT(set) &&
+		   ip_set_timeout_expired(ext_timeout(e, map))));
+}
+
+static int
+list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
+	     const struct ip_set_ext *ext)
+{
+	struct list_set *map = set->data;
+	struct set_elem *e = list_set_elem(map, i);
+
+	if (e->id != IPSET_INVALID_ID) {
+		if (i == map->size - 1)
+			/* Last element replaced: e.g. add new,before,last */
+			ip_set_put_byindex(e->id);
+		else {
+			struct set_elem *x = list_set_elem(map, map->size - 1);
+
+			/* Last element pushed off */
+			if (x->id != IPSET_INVALID_ID)
+				ip_set_put_byindex(x->id);
+			memmove(list_set_elem(map, i + 1), e,
+				map->dsize * (map->size - (i + 1)));
+		}
 	}
 
+	e->id = d->id;
+	if (SET_WITH_TIMEOUT(set))
+		ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
+	if (SET_WITH_COUNTER(set))
+		ip_set_init_counter(ext_counter(e, map), ext);
 	return 0;
 }
 
-static bool
-id_eq_timeout(const struct list_set *map, u32 i, ip_set_id_t id)
+static int
+list_set_del(struct ip_set *set, u32 i)
 {
-	const struct set_elem *elem;
+	struct list_set *map = set->data;
+	struct set_elem *e = list_set_elem(map, i);
 
-	if (i < map->size) {
-		elem = list_set_elem(map, i);
-		return !!(elem->id == id &&
-			  !(with_timeout(map->timeout) &&
-			    list_set_expired(map, i)));
-	}
+	ip_set_put_byindex(e->id);
+
+	if (i < map->size - 1)
+		memmove(e, list_set_elem(map, i + 1),
+			map->dsize * (map->size - (i + 1)));
 
+	/* Last element */
+	e = list_set_elem(map, map->size - 1);
+	e->id = IPSET_INVALID_ID;
 	return 0;
 }
 
 static void
-list_elem_add(struct list_set *map, u32 i, ip_set_id_t id)
+set_cleanup_entries(struct ip_set *set)
 {
+	struct list_set *map = set->data;
 	struct set_elem *e;
+	u32 i;
 
-	for (; i < map->size; i++) {
+	for (i = 0; i < map->size; i++) {
 		e = list_set_elem(map, i);
-		swap(e->id, id);
-		if (e->id == IPSET_INVALID_ID)
-			break;
+		if (e->id != IPSET_INVALID_ID &&
+		    ip_set_timeout_expired(ext_timeout(e, map)))
+			list_set_del(set, i);
 	}
 }
 
-static void
-list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id,
-	       unsigned long timeout)
+static int
+list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	       struct ip_set_ext *mext, u32 flags)
 {
-	struct set_telem *e;
+	struct list_set *map = set->data;
+	struct set_adt_elem *d = value;
+	struct set_elem *e;
+	u32 i;
+	int ret;
 
-	for (; i < map->size; i++) {
-		e = list_set_telem(map, i);
-		swap(e->id, id);
-		swap(e->timeout, timeout);
+	for (i = 0; i < map->size; i++) {
+		e = list_set_elem(map, i);
 		if (e->id == IPSET_INVALID_ID)
-			break;
+			return 0;
+		else if (SET_WITH_TIMEOUT(set) &&
+			 ip_set_timeout_expired(ext_timeout(e, map)))
+			continue;
+		else if (e->id != d->id)
+			continue;
+
+		if (d->before == 0)
+			return 1;
+		else if (d->before > 0)
+			ret = id_eq(set, i + 1, d->refid);
+		else
+			ret = i > 0 && id_eq(set, i - 1, d->refid);
+		return ret;
 	}
+	return 0;
 }
 
+
 static int
-list_set_add(struct list_set *map, u32 i, ip_set_id_t id,
-	     unsigned long timeout)
+list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	      struct ip_set_ext *mext, u32 flags)
 {
-	const struct set_elem *e = list_set_elem(map, i);
-
-	if (i == map->size - 1 && e->id != IPSET_INVALID_ID)
-		/* Last element replaced: e.g. add new,before,last */
-		ip_set_put_byindex(e->id);
-	if (with_timeout(map->timeout))
-		list_elem_tadd(map, i, id, ip_set_timeout_set(timeout));
-	else
-		list_elem_add(map, i, id);
+	struct list_set *map = set->data;
+	struct set_adt_elem *d = value;
+	struct set_elem *e;
+	bool flag_exist = flags & IPSET_FLAG_EXIST;
+	u32 i, ret = 0;
 
-	return 0;
-}
+	/* Check already added element */
+	for (i = 0; i < map->size; i++) {
+		e = list_set_elem(map, i);
+		if (e->id == IPSET_INVALID_ID)
+			goto insert;
+		else if (SET_WITH_TIMEOUT(set) &&
+			 ip_set_timeout_expired(ext_timeout(e, map)))
+			continue;
+		else if (e->id != d->id)
+			continue;
 
-static int
-list_set_del(struct list_set *map, u32 i)
-{
-	struct set_elem *a = list_set_elem(map, i), *b;
-
-	ip_set_put_byindex(a->id);
-
-	for (; i < map->size - 1; i++) {
-		b = list_set_elem(map, i + 1);
-		a->id = b->id;
-		if (with_timeout(map->timeout))
-			((struct set_telem *)a)->timeout =
-				((struct set_telem *)b)->timeout;
-		a = b;
-		if (a->id == IPSET_INVALID_ID)
-			break;
+		if ((d->before > 1 && !id_eq(set, i + 1, d->refid)) ||
+		    (d->before < 0 &&
+		     (i == 0 || !id_eq(set, i - 1, d->refid))))
+			/* Before/after doesn't match */
+			return -IPSET_ERR_REF_EXIST;
+		if (!flag_exist)
+			/* Can't re-add */
+			return -IPSET_ERR_EXIST;
+		/* Update extensions */
+		if (SET_WITH_TIMEOUT(set))
+			ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
+		if (SET_WITH_COUNTER(set))
+			ip_set_init_counter(ext_counter(e, map), ext);
+		/* Set is already added to the list */
+		ip_set_put_byindex(d->id);
+		return 0;
 	}
-	/* Last element */
-	a->id = IPSET_INVALID_ID;
-	return 0;
+insert:
+	ret = -IPSET_ERR_LIST_FULL;
+	for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
+		e = list_set_elem(map, i);
+		if (e->id == IPSET_INVALID_ID)
+			ret = d->before != 0 ? -IPSET_ERR_REF_EXIST
+				: list_set_add(set, i, d, ext);
+		else if (e->id != d->refid)
+			continue;
+		else if (d->before > 0)
+			ret = list_set_add(set, i, d, ext);
+		else if (i + 1 < map->size)
+			ret = list_set_add(set, i + 1, d, ext);
+	}
+
+	return ret;
 }
 
-static void
-cleanup_entries(struct list_set *map)
+static int
+list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	      struct ip_set_ext *mext, u32 flags)
 {
-	struct set_telem *e;
+	struct list_set *map = set->data;
+	struct set_adt_elem *d = value;
+	struct set_elem *e;
 	u32 i;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_telem(map, i);
-		if (e->id != IPSET_INVALID_ID && list_set_expired(map, i))
-			list_set_del(map, i);
+		e = list_set_elem(map, i);
+		if (e->id == IPSET_INVALID_ID)
+			return d->before != 0 ? -IPSET_ERR_REF_EXIST
+					      : -IPSET_ERR_EXIST;
+		else if (SET_WITH_TIMEOUT(set) &&
+			 ip_set_timeout_expired(ext_timeout(e, map)))
+			continue;
+		else if (e->id != d->id)
+			continue;
+
+		if (d->before == 0)
+			return list_set_del(set, i);
+		else if (d->before > 0) {
+			if (!id_eq(set, i + 1, d->refid))
+				return -IPSET_ERR_REF_EXIST;
+			return list_set_del(set, i);
+		} else if (i == 0 || !id_eq(set, i - 1, d->refid))
+			return -IPSET_ERR_REF_EXIST;
+		else
+			return list_set_del(set, i);
 	}
+	return -IPSET_ERR_EXIST;
 }
 
 static int
@@ -225,26 +384,27 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
 	struct list_set *map = set->data;
-	bool with_timeout = with_timeout(map->timeout);
-	bool flag_exist = flags & IPSET_FLAG_EXIST;
-	int before = 0;
-	u32 timeout = map->timeout;
-	ip_set_id_t id, refid = IPSET_INVALID_ID;
-	const struct set_elem *elem;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct set_adt_elem e = { .refid = IPSET_INVALID_ID };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
 	struct ip_set *s;
-	u32 i;
 	int ret = 0;
 
 	if (unlikely(!tb[IPSET_ATTR_NAME] ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
-	id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s);
-	if (id == IPSET_INVALID_ID)
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+	e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s);
+	if (e.id == IPSET_INVALID_ID)
 		return -IPSET_ERR_NAME;
 	/* "Loop detection" */
 	if (s->type->features & IPSET_TYPE_NAME) {
@@ -254,115 +414,34 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-		before = f & IPSET_FLAG_BEFORE;
+		e.before = f & IPSET_FLAG_BEFORE;
 	}
 
-	if (before && !tb[IPSET_ATTR_NAMEREF]) {
+	if (e.before && !tb[IPSET_ATTR_NAMEREF]) {
 		ret = -IPSET_ERR_BEFORE;
 		goto finish;
 	}
 
 	if (tb[IPSET_ATTR_NAMEREF]) {
-		refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]),
-					  &s);
-		if (refid == IPSET_INVALID_ID) {
+		e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]),
+					    &s);
+		if (e.refid == IPSET_INVALID_ID) {
 			ret = -IPSET_ERR_NAMEREF;
 			goto finish;
 		}
-		if (!before)
-			before = -1;
-	}
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!with_timeout) {
-			ret = -IPSET_ERR_TIMEOUT;
-			goto finish;
-		}
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		if (!e.before)
+			e.before = -1;
 	}
-	if (with_timeout && adt != IPSET_TEST)
-		cleanup_entries(map);
+	if (adt != IPSET_TEST && SET_WITH_TIMEOUT(set))
+		set_cleanup_entries(set);
 
-	switch (adt) {
-	case IPSET_TEST:
-		for (i = 0; i < map->size && !ret; i++) {
-			elem = list_set_elem(map, i);
-			if (elem->id == IPSET_INVALID_ID ||
-			    (before != 0 && i + 1 >= map->size))
-				break;
-			else if (with_timeout && list_set_expired(map, i))
-				continue;
-			else if (before > 0 && elem->id == id)
-				ret = id_eq_timeout(map, i + 1, refid);
-			else if (before < 0 && elem->id == refid)
-				ret = id_eq_timeout(map, i + 1, id);
-			else if (before == 0 && elem->id == id)
-				ret = 1;
-		}
-		break;
-	case IPSET_ADD:
-		for (i = 0; i < map->size; i++) {
-			elem = list_set_elem(map, i);
-			if (elem->id != id)
-				continue;
-			if (!(with_timeout && flag_exist)) {
-				ret = -IPSET_ERR_EXIST;
-				goto finish;
-			} else {
-				struct set_telem *e = list_set_telem(map, i);
-
-				if ((before > 1 &&
-				     !id_eq(map, i + 1, refid)) ||
-				    (before < 0 &&
-				     (i == 0 || !id_eq(map, i - 1, refid)))) {
-					ret = -IPSET_ERR_EXIST;
-					goto finish;
-				}
-				e->timeout = ip_set_timeout_set(timeout);
-				ip_set_put_byindex(id);
-				ret = 0;
-				goto finish;
-			}
-		}
-		ret = -IPSET_ERR_LIST_FULL;
-		for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
-			elem = list_set_elem(map, i);
-			if (elem->id == IPSET_INVALID_ID)
-				ret = before != 0 ? -IPSET_ERR_REF_EXIST
-					: list_set_add(map, i, id, timeout);
-			else if (elem->id != refid)
-				continue;
-			else if (before > 0)
-				ret = list_set_add(map, i, id, timeout);
-			else if (i + 1 < map->size)
-				ret = list_set_add(map, i + 1, id, timeout);
-		}
-		break;
-	case IPSET_DEL:
-		ret = -IPSET_ERR_EXIST;
-		for (i = 0; i < map->size && ret == -IPSET_ERR_EXIST; i++) {
-			elem = list_set_elem(map, i);
-			if (elem->id == IPSET_INVALID_ID) {
-				ret = before != 0 ? -IPSET_ERR_REF_EXIST
-						  : -IPSET_ERR_EXIST;
-				break;
-			} else if (elem->id == id &&
-				   (before == 0 ||
-				    (before > 0 && id_eq(map, i + 1, refid))))
-				ret = list_set_del(map, i);
-			else if (elem->id == refid &&
-				 before < 0 && id_eq(map, i + 1, id))
-				ret = list_set_del(map, i + 1);
-		}
-		break;
-	default:
-		break;
-	}
+	ret = adtfn(set, &e, &ext, &ext, flags);
 
 finish:
-	if (refid != IPSET_INVALID_ID)
-		ip_set_put_byindex(refid);
+	if (e.refid != IPSET_INVALID_ID)
+		ip_set_put_byindex(e.refid);
 	if (adt != IPSET_ADD || ret)
-		ip_set_put_byindex(id);
+		ip_set_put_byindex(e.id);
 
 	return ip_set_eexist(ret, flags) ? 0 : ret;
 }
@@ -371,14 +450,14 @@ static void
 list_set_flush(struct ip_set *set)
 {
 	struct list_set *map = set->data;
-	struct set_elem *elem;
+	struct set_elem *e;
 	u32 i;
 
 	for (i = 0; i < map->size; i++) {
-		elem = list_set_elem(map, i);
-		if (elem->id != IPSET_INVALID_ID) {
-			ip_set_put_byindex(elem->id);
-			elem->id = IPSET_INVALID_ID;
+		e = list_set_elem(map, i);
+		if (e->id != IPSET_INVALID_ID) {
+			ip_set_put_byindex(e->id);
+			e->id = IPSET_INVALID_ID;
 		}
 	}
 }
@@ -388,7 +467,7 @@ list_set_destroy(struct ip_set *set)
 {
 	struct list_set *map = set->data;
 
-	if (with_timeout(map->timeout))
+	if (SET_WITH_TIMEOUT(set))
 		del_timer_sync(&map->gc);
 	list_set_flush(set);
 	kfree(map);
@@ -406,8 +485,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
 	if (!nested)
 		goto nla_put_failure;
 	if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
-	    (with_timeout(map->timeout) &&
+	    (SET_WITH_TIMEOUT(set) &&
 	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
+	    (SET_WITH_COUNTER(set) &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
+			   htonl(IPSET_FLAG_WITH_COUNTERS))) ||
 	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
 			  htonl(sizeof(*map) + map->size * map->dsize)))
@@ -436,7 +518,8 @@ list_set_list(const struct ip_set *set,
 		e = list_set_elem(map, i);
 		if (e->id == IPSET_INVALID_ID)
 			goto finish;
-		if (with_timeout(map->timeout) && list_set_expired(map, i))
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(e, map)))
 			continue;
 		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 		if (!nested) {
@@ -449,13 +532,14 @@ list_set_list(const struct ip_set *set,
 		if (nla_put_string(skb, IPSET_ATTR_NAME,
 				   ip_set_name_byindex(e->id)))
 			goto nla_put_failure;
-		if (with_timeout(map->timeout)) {
-			const struct set_telem *te =
-				(const struct set_telem *) e;
-			__be32 to = htonl(ip_set_timeout_get(te->timeout));
-			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, to))
-				goto nla_put_failure;
-		}
+		if (SET_WITH_TIMEOUT(set) &&
+		    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+				  htonl(ip_set_timeout_get(
+						ext_timeout(e, map)))))
+			goto nla_put_failure;
+		if (SET_WITH_COUNTER(set) &&
+		    ip_set_put_counter(skb, ext_counter(e, map)))
+			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
 finish:
@@ -481,12 +565,18 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
 	const struct list_set *y = b->data;
 
 	return x->size == y->size &&
-	       x->timeout == y->timeout;
+	       x->timeout == y->timeout &&
+	       a->extensions == b->extensions;
 }
 
-static const struct ip_set_type_variant list_set = {
+static const struct ip_set_type_variant set_variant = {
 	.kadt	= list_set_kadt,
 	.uadt	= list_set_uadt,
+	.adt	= {
+		[IPSET_ADD] = list_set_uadd,
+		[IPSET_DEL] = list_set_udel,
+		[IPSET_TEST] = list_set_utest,
+	},
 	.destroy = list_set_destroy,
 	.flush	= list_set_flush,
 	.head	= list_set_head,
@@ -501,7 +591,7 @@ list_set_gc(unsigned long ul_set)
 	struct list_set *map = set->data;
 
 	write_lock_bh(&set->lock);
-	cleanup_entries(map);
+	set_cleanup_entries(set);
 	write_unlock_bh(&set->lock);
 
 	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
@@ -509,20 +599,20 @@ list_set_gc(unsigned long ul_set)
 }
 
 static void
-list_set_gc_init(struct ip_set *set)
+list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 {
 	struct list_set *map = set->data;
 
 	init_timer(&map->gc);
 	map->gc.data = (unsigned long) set;
-	map->gc.function = list_set_gc;
+	map->gc.function = gc;
 	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
 /* Create list:set type of sets */
 
-static bool
+static struct list_set *
 init_list_set(struct ip_set *set, u32 size, size_t dsize,
 	      unsigned long timeout)
 {
@@ -532,7 +622,7 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize,
 
 	map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL);
 	if (!map)
-		return false;
+		return NULL;
 
 	map->size = size;
 	map->dsize = dsize;
@@ -544,16 +634,19 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize,
 		e->id = IPSET_INVALID_ID;
 	}
 
-	return true;
+	return map;
 }
 
 static int
 list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
-	u32 size = IP_SET_LIST_DEFAULT_SIZE;
+	struct list_set *map;
+	u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0;
+	unsigned long timeout = 0;
 
 	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) ||
-		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (tb[IPSET_ATTR_SIZE])
@@ -561,18 +654,46 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	if (size < IP_SET_LIST_MIN_SIZE)
 		size = IP_SET_LIST_MIN_SIZE;
 
-	if (tb[IPSET_ATTR_TIMEOUT]) {
-		if (!init_list_set(set, size, sizeof(struct set_telem),
-				   ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT])))
+	if (tb[IPSET_ATTR_CADT_FLAGS])
+		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+	if (tb[IPSET_ATTR_TIMEOUT])
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	set->variant = &set_variant;
+	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
+		set->extensions |= IPSET_EXT_COUNTER;
+		if (tb[IPSET_ATTR_TIMEOUT]) {
+			map = init_list_set(set, size,
+					sizeof(struct setct_elem), timeout);
+			if (!map)
+				return -ENOMEM;
+			set->extensions |= IPSET_EXT_TIMEOUT;
+			map->offset[IPSET_OFFSET_TIMEOUT] =
+				offsetof(struct setct_elem, timeout);
+			map->offset[IPSET_OFFSET_COUNTER] =
+				offsetof(struct setct_elem, counter);
+			list_set_gc_init(set, list_set_gc);
+		} else {
+			map = init_list_set(set, size,
+					    sizeof(struct setc_elem), 0);
+			if (!map)
+				return -ENOMEM;
+			map->offset[IPSET_OFFSET_COUNTER] =
+				offsetof(struct setc_elem, counter);
+		}
+	} else if (tb[IPSET_ATTR_TIMEOUT]) {
+		map = init_list_set(set, size,
+				    sizeof(struct sett_elem), timeout);
+		if (!map)
 			return -ENOMEM;
-
-		list_set_gc_init(set);
+		set->extensions |= IPSET_EXT_TIMEOUT;
+		map->offset[IPSET_OFFSET_TIMEOUT] =
+			offsetof(struct sett_elem, timeout);
+		list_set_gc_init(set, list_set_gc);
 	} else {
-		if (!init_list_set(set, size, sizeof(struct set_elem),
-				   IPSET_NO_TIMEOUT))
+		map = init_list_set(set, size, sizeof(struct set_elem), 0);
+		if (!map)
 			return -ENOMEM;
 	}
-	set->variant = &list_set;
 	return 0;
 }
 
@@ -588,6 +709,7 @@ static struct ip_set_type list_set_type __read_mostly = {
 	.create_policy	= {
 		[IPSET_ATTR_SIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 	},
 	.adt_policy	= {
 		[IPSET_ATTR_NAME]	= { .type = NLA_STRING,
@@ -597,6 +719,8 @@ static struct ip_set_type list_set_type __read_mostly = {
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 9713e6e86d47..dfd7b65b3d2a 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
 	module_put(app->module);
 }
 
+static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
+{
+	kfree(inc->timeout_table);
+	kfree(inc);
+}
+
+static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
+{
+	struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
+
+	ip_vs_app_inc_destroy(inc);
+}
 
 /*
  *	Allocate/initialize app incarnation and register it in proto apps.
@@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
 	return 0;
 
   out:
-	kfree(inc->timeout_table);
-	kfree(inc);
+	ip_vs_app_inc_destroy(inc);
 	return ret;
 }
 
@@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
 
 	list_del(&inc->a_list);
 
-	kfree(inc->timeout_table);
-	kfree(inc);
+	call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
 }
 
 
@@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
 {
 	int result;
 
-	atomic_inc(&inc->usecnt);
-	if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
-		atomic_dec(&inc->usecnt);
+	result = ip_vs_app_get(inc->app);
+	if (result)
+		atomic_inc(&inc->usecnt);
 	return result;
 }
 
@@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
  */
 void ip_vs_app_inc_put(struct ip_vs_app *inc)
 {
-	ip_vs_app_put(inc->app);
 	atomic_dec(&inc->usecnt);
+	ip_vs_app_put(inc->app);
 }
 
 
@@ -218,6 +228,7 @@ out_unlock:
 /*
  *	ip_vs_app unregistration routine
  *	We are sure there are no app incarnations attached to services
+ *	Caller should use synchronize_rcu() or rcu_barrier()
  */
 void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
@@ -341,14 +352,14 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
 				 unsigned int flag, __u32 seq, int diff)
 {
 	/* spinlock is to keep updating cp->flags atomic */
-	spin_lock(&cp->lock);
+	spin_lock_bh(&cp->lock);
 	if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
 		vseq->previous_delta = vseq->delta;
 		vseq->delta += diff;
 		vseq->init_seq = seq;
 		cp->flags |= flag;
 	}
-	spin_unlock(&cp->lock);
+	spin_unlock_bh(&cp->lock);
 }
 
 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
@@ -605,12 +616,12 @@ int __net_init ip_vs_app_net_init(struct net *net)
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	INIT_LIST_HEAD(&ipvs->app_list);
-	proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
+	proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops);
 	return 0;
 }
 
 void __net_exit ip_vs_app_net_cleanup(struct net *net)
 {
 	unregister_ip_vs_app(net, NULL /* all */);
-	proc_net_remove(net, "ip_vs_app");
+	remove_proc_entry("ip_vs_app", net->proc_net);
 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 30e764ad021f..a083bda322b6 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -79,51 +79,21 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
 
 struct ip_vs_aligned_lock
 {
-	rwlock_t	l;
+	spinlock_t	l;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 /* lock array for conn table */
 static struct ip_vs_aligned_lock
 __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
 
-static inline void ct_read_lock(unsigned int key)
-{
-	read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_read_unlock(unsigned int key)
-{
-	read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_write_lock(unsigned int key)
-{
-	write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_write_unlock(unsigned int key)
-{
-	write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_read_lock_bh(unsigned int key)
-{
-	read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_read_unlock_bh(unsigned int key)
-{
-	read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
 static inline void ct_write_lock_bh(unsigned int key)
 {
-	write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+	spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
 }
 
 static inline void ct_write_unlock_bh(unsigned int key)
 {
-	write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+	spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
 }
 
 
@@ -197,13 +167,13 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	/* Hash by protocol, client address and port */
 	hash = ip_vs_conn_hashkey_conn(cp);
 
-	ct_write_lock(hash);
+	ct_write_lock_bh(hash);
 	spin_lock(&cp->lock);
 
 	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
-		hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]);
 		cp->flags |= IP_VS_CONN_F_HASHED;
 		atomic_inc(&cp->refcnt);
+		hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
 		ret = 1;
 	} else {
 		pr_err("%s(): request for already hashed, called from %pF\n",
@@ -212,7 +182,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	}
 
 	spin_unlock(&cp->lock);
-	ct_write_unlock(hash);
+	ct_write_unlock_bh(hash);
 
 	return ret;
 }
@@ -220,7 +190,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 
 /*
  *	UNhashes ip_vs_conn from ip_vs_conn_tab.
- *	returns bool success.
+ *	returns bool success. Caller should hold conn reference.
  */
 static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 {
@@ -230,11 +200,11 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 	/* unhash it and decrease its reference counter */
 	hash = ip_vs_conn_hashkey_conn(cp);
 
-	ct_write_lock(hash);
+	ct_write_lock_bh(hash);
 	spin_lock(&cp->lock);
 
 	if (cp->flags & IP_VS_CONN_F_HASHED) {
-		hlist_del(&cp->c_list);
+		hlist_del_rcu(&cp->c_list);
 		cp->flags &= ~IP_VS_CONN_F_HASHED;
 		atomic_dec(&cp->refcnt);
 		ret = 1;
@@ -242,7 +212,37 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 		ret = 0;
 
 	spin_unlock(&cp->lock);
-	ct_write_unlock(hash);
+	ct_write_unlock_bh(hash);
+
+	return ret;
+}
+
+/* Try to unlink ip_vs_conn from ip_vs_conn_tab.
+ * returns bool success.
+ */
+static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
+{
+	unsigned int hash;
+	bool ret;
+
+	hash = ip_vs_conn_hashkey_conn(cp);
+
+	ct_write_lock_bh(hash);
+	spin_lock(&cp->lock);
+
+	if (cp->flags & IP_VS_CONN_F_HASHED) {
+		ret = false;
+		/* Decrease refcnt and unlink conn only if we are last user */
+		if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) {
+			hlist_del_rcu(&cp->c_list);
+			cp->flags &= ~IP_VS_CONN_F_HASHED;
+			ret = true;
+		}
+	} else
+		ret = atomic_read(&cp->refcnt) ? false : true;
+
+	spin_unlock(&cp->lock);
+	ct_write_unlock_bh(hash);
 
 	return ret;
 }
@@ -259,28 +259,28 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
 {
 	unsigned int hash;
 	struct ip_vs_conn *cp;
-	struct hlist_node *n;
 
 	hash = ip_vs_conn_hashkey_param(p, false);
 
-	ct_read_lock(hash);
+	rcu_read_lock();
 
-	hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
-		if (cp->af == p->af &&
-		    p->cport == cp->cport && p->vport == cp->vport &&
+	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (p->cport == cp->cport && p->vport == cp->vport &&
+		    cp->af == p->af &&
 		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
 		    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
 		    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
 		    p->protocol == cp->protocol &&
 		    ip_vs_conn_net_eq(cp, p->net)) {
+			if (!__ip_vs_conn_get(cp))
+				continue;
 			/* HIT */
-			atomic_inc(&cp->refcnt);
-			ct_read_unlock(hash);
+			rcu_read_unlock();
 			return cp;
 		}
 	}
 
-	ct_read_unlock(hash);
+	rcu_read_unlock();
 
 	return NULL;
 }
@@ -344,18 +344,19 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
 {
 	unsigned int hash;
 	struct ip_vs_conn *cp;
-	struct hlist_node *n;
 
 	hash = ip_vs_conn_hashkey_param(p, false);
 
-	ct_read_lock(hash);
+	rcu_read_lock();
 
-	hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
-		if (!ip_vs_conn_net_eq(cp, p->net))
-			continue;
-		if (p->pe_data && p->pe->ct_match) {
-			if (p->pe == cp->pe && p->pe->ct_match(p, cp))
-				goto out;
+	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (unlikely(p->pe_data && p->pe->ct_match)) {
+			if (!ip_vs_conn_net_eq(cp, p->net))
+				continue;
+			if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
+				if (__ip_vs_conn_get(cp))
+					goto out;
+			}
 			continue;
 		}
 
@@ -365,17 +366,18 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
 		     * p->vaddr is a fwmark */
 		    ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC :
 				     p->af, p->vaddr, &cp->vaddr) &&
-		    p->cport == cp->cport && p->vport == cp->vport &&
+		    p->vport == cp->vport && p->cport == cp->cport &&
 		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
-		    p->protocol == cp->protocol)
-			goto out;
+		    p->protocol == cp->protocol &&
+		    ip_vs_conn_net_eq(cp, p->net)) {
+			if (__ip_vs_conn_get(cp))
+				goto out;
+		}
 	}
 	cp = NULL;
 
   out:
-	if (cp)
-		atomic_inc(&cp->refcnt);
-	ct_read_unlock(hash);
+	rcu_read_unlock();
 
 	IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
 		      ip_vs_proto_name(p->protocol),
@@ -394,30 +396,30 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 {
 	unsigned int hash;
 	struct ip_vs_conn *cp, *ret=NULL;
-	struct hlist_node *n;
 
 	/*
 	 *	Check for "full" addressed entries
 	 */
 	hash = ip_vs_conn_hashkey_param(p, true);
 
-	ct_read_lock(hash);
+	rcu_read_lock();
 
-	hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
-		if (cp->af == p->af &&
-		    p->vport == cp->cport && p->cport == cp->dport &&
+	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (p->vport == cp->cport && p->cport == cp->dport &&
+		    cp->af == p->af &&
 		    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
 		    ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
 		    p->protocol == cp->protocol &&
 		    ip_vs_conn_net_eq(cp, p->net)) {
+			if (!__ip_vs_conn_get(cp))
+				continue;
 			/* HIT */
-			atomic_inc(&cp->refcnt);
 			ret = cp;
 			break;
 		}
 	}
 
-	ct_read_unlock(hash);
+	rcu_read_unlock();
 
 	IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
 		      ip_vs_proto_name(p->protocol),
@@ -460,13 +462,13 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
 void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
 {
 	if (ip_vs_conn_unhash(cp)) {
-		spin_lock(&cp->lock);
+		spin_lock_bh(&cp->lock);
 		if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
 			atomic_dec(&ip_vs_conn_no_cport_cnt);
 			cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
 			cp->cport = cport;
 		}
-		spin_unlock(&cp->lock);
+		spin_unlock_bh(&cp->lock);
 
 		/* hash on new dport */
 		ip_vs_conn_hash(cp);
@@ -552,7 +554,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 		return;
 
 	/* Increase the refcnt counter of the dest */
-	atomic_inc(&dest->refcnt);
+	ip_vs_dest_hold(dest);
 
 	conn_flags = atomic_read(&dest->conn_flags);
 	if (cp->protocol != IPPROTO_UDP)
@@ -609,20 +611,22 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
  * Check if there is a destination for the connection, if so
  * bind the connection to the destination.
  */
-struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
 {
 	struct ip_vs_dest *dest;
 
+	rcu_read_lock();
 	dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
 			       cp->dport, &cp->vaddr, cp->vport,
 			       cp->protocol, cp->fwmark, cp->flags);
 	if (dest) {
 		struct ip_vs_proto_data *pd;
 
-		spin_lock(&cp->lock);
+		spin_lock_bh(&cp->lock);
 		if (cp->dest) {
-			spin_unlock(&cp->lock);
-			return dest;
+			spin_unlock_bh(&cp->lock);
+			rcu_read_unlock();
+			return;
 		}
 
 		/* Applications work depending on the forwarding method
@@ -631,7 +635,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
 			ip_vs_unbind_app(cp);
 
 		ip_vs_bind_dest(cp, dest);
-		spin_unlock(&cp->lock);
+		spin_unlock_bh(&cp->lock);
 
 		/* Update its packet transmitter */
 		cp->packet_xmit = NULL;
@@ -646,7 +650,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
 		if (pd && atomic_read(&pd->appcnt))
 			ip_vs_bind_app(cp, pd->pp);
 	}
-	return dest;
+	rcu_read_unlock();
 }
 
 
@@ -698,12 +702,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
 	}
 
-	/*
-	 * Simply decrease the refcnt of the dest, because the
-	 * dest will be either in service's destination list
-	 * or in the trash.
-	 */
-	atomic_dec(&dest->refcnt);
+	ip_vs_dest_put(dest);
 }
 
 static int expire_quiescent_template(struct netns_ipvs *ipvs,
@@ -760,44 +759,38 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 		 * Simply decrease the refcnt of the template,
 		 * don't restart its timer.
 		 */
-		atomic_dec(&ct->refcnt);
+		__ip_vs_conn_put(ct);
 		return 0;
 	}
 	return 1;
 }
 
+static void ip_vs_conn_rcu_free(struct rcu_head *head)
+{
+	struct ip_vs_conn *cp = container_of(head, struct ip_vs_conn,
+					     rcu_head);
+
+	ip_vs_pe_put(cp->pe);
+	kfree(cp->pe_data);
+	kmem_cache_free(ip_vs_conn_cachep, cp);
+}
+
 static void ip_vs_conn_expire(unsigned long data)
 {
 	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
 	struct net *net = ip_vs_conn_net(cp);
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	cp->timeout = 60*HZ;
-
-	/*
-	 *	hey, I'm using it
-	 */
-	atomic_inc(&cp->refcnt);
-
 	/*
 	 *	do I control anybody?
 	 */
 	if (atomic_read(&cp->n_control))
 		goto expire_later;
 
-	/*
-	 *	unhash it if it is hashed in the conn table
-	 */
-	if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
-		goto expire_later;
-
-	/*
-	 *	refcnt==1 implies I'm the only one referrer
-	 */
-	if (likely(atomic_read(&cp->refcnt) == 1)) {
+	/* Unlink conn if not referenced anymore */
+	if (likely(ip_vs_conn_unlink(cp))) {
 		/* delete the timer if it is activated by other users */
-		if (timer_pending(&cp->timer))
-			del_timer(&cp->timer);
+		del_timer(&cp->timer);
 
 		/* does anybody control me? */
 		if (cp->control)
@@ -814,38 +807,41 @@ static void ip_vs_conn_expire(unsigned long data)
 				ip_vs_conn_drop_conntrack(cp);
 		}
 
-		ip_vs_pe_put(cp->pe);
-		kfree(cp->pe_data);
 		if (unlikely(cp->app != NULL))
 			ip_vs_unbind_app(cp);
 		ip_vs_unbind_dest(cp);
 		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
 			atomic_dec(&ip_vs_conn_no_cport_cnt);
+		call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
 		atomic_dec(&ipvs->conn_count);
-
-		kmem_cache_free(ip_vs_conn_cachep, cp);
 		return;
 	}
 
-	/* hash it back to the table */
-	ip_vs_conn_hash(cp);
-
   expire_later:
-	IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
-		  atomic_read(&cp->refcnt)-1,
+	IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n",
+		  atomic_read(&cp->refcnt),
 		  atomic_read(&cp->n_control));
 
+	atomic_inc(&cp->refcnt);
+	cp->timeout = 60*HZ;
+
 	if (ipvs->sync_state & IP_VS_STATE_MASTER)
 		ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
 
 	ip_vs_conn_put(cp);
 }
 
-
+/* Modify timer, so that it expires as soon as possible.
+ * Can be called without reference only if under RCU lock.
+ */
 void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
 {
-	if (del_timer(&cp->timer))
-		mod_timer(&cp->timer, jiffies);
+	/* Using mod_timer_pending will ensure the timer is not
+	 * modified after the final del_timer in ip_vs_conn_expire.
+	 */
+	if (timer_pending(&cp->timer) &&
+	    time_after(cp->timer.expires, jiffies))
+		mod_timer_pending(&cp->timer, jiffies);
 }
 
 
@@ -862,7 +858,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
 							   p->protocol);
 
-	cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
+	cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
 	if (cp == NULL) {
 		IP_VS_ERR_RL("%s(): no memory\n", __func__);
 		return NULL;
@@ -873,13 +869,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	ip_vs_conn_net_set(cp, p->net);
 	cp->af		   = p->af;
 	cp->protocol	   = p->protocol;
-	ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
+	ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
 	cp->cport	   = p->cport;
-	ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr);
+	ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr);
 	cp->vport	   = p->vport;
 	/* proto should only be IPPROTO_IP if d_addr is a fwmark */
-	ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
-			&cp->daddr, daddr);
+	ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
+		       &cp->daddr, daddr);
 	cp->dport          = dport;
 	cp->flags	   = flags;
 	cp->fwmark         = fwmark;
@@ -888,6 +884,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 		cp->pe = p->pe;
 		cp->pe_data = p->pe_data;
 		cp->pe_data_len = p->pe_data_len;
+	} else {
+		cp->pe = NULL;
+		cp->pe_data = NULL;
+		cp->pe_data_len = 0;
 	}
 	spin_lock_init(&cp->lock);
 
@@ -898,18 +898,28 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	 */
 	atomic_set(&cp->refcnt, 1);
 
+	cp->control = NULL;
 	atomic_set(&cp->n_control, 0);
 	atomic_set(&cp->in_pkts, 0);
 
+	cp->packet_xmit = NULL;
+	cp->app = NULL;
+	cp->app_data = NULL;
+	/* reset struct ip_vs_seq */
+	cp->in_seq.delta = 0;
+	cp->out_seq.delta = 0;
+
 	atomic_inc(&ipvs->conn_count);
 	if (flags & IP_VS_CONN_F_NO_CPORT)
 		atomic_inc(&ip_vs_conn_no_cport_cnt);
 
 	/* Bind the connection with a destination server */
+	cp->dest = NULL;
 	ip_vs_bind_dest(cp, dest);
 
 	/* Set its state and timeout */
 	cp->state = 0;
+	cp->old_state = 0;
 	cp->timeout = 3*HZ;
 	cp->sync_endtime = jiffies & ~3UL;
 
@@ -954,27 +964,31 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 	int idx;
 	struct ip_vs_conn *cp;
 	struct ip_vs_iter_state *iter = seq->private;
-	struct hlist_node *n;
 
 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
-		ct_read_lock_bh(idx);
-		hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) {
+		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
+			/* __ip_vs_conn_get() is not needed by
+			 * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show
+			 */
 			if (pos-- == 0) {
 				iter->l = &ip_vs_conn_tab[idx];
 				return cp;
 			}
 		}
-		ct_read_unlock_bh(idx);
+		rcu_read_unlock();
+		rcu_read_lock();
 	}
 
 	return NULL;
 }
 
 static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
 {
 	struct ip_vs_iter_state *iter = seq->private;
 
 	iter->l = NULL;
+	rcu_read_lock();
 	return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
 }
 
@@ -991,31 +1005,27 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		return ip_vs_conn_array(seq, 0);
 
 	/* more on same hash chain? */
-	if ((e = cp->c_list.next))
+	e = rcu_dereference(hlist_next_rcu(&cp->c_list));
+	if (e)
 		return hlist_entry(e, struct ip_vs_conn, c_list);
 
 	idx = l - ip_vs_conn_tab;
-	ct_read_unlock_bh(idx);
-
 	while (++idx < ip_vs_conn_tab_size) {
-		ct_read_lock_bh(idx);
-		hlist_for_each_entry(cp, e, &ip_vs_conn_tab[idx], c_list) {
+		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
 			iter->l = &ip_vs_conn_tab[idx];
 			return cp;
 		}
-		ct_read_unlock_bh(idx);
+		rcu_read_unlock();
+		rcu_read_lock();
 	}
 	iter->l = NULL;
 	return NULL;
 }
 
 static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
 {
-	struct ip_vs_iter_state *iter = seq->private;
-	struct hlist_head *l = iter->l;
-
-	if (l)
-		ct_read_unlock_bh(l - ip_vs_conn_tab);
+	rcu_read_unlock();
 }
 
 static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
@@ -1194,21 +1204,20 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
 void ip_vs_random_dropentry(struct net *net)
 {
 	int idx;
-	struct ip_vs_conn *cp;
+	struct ip_vs_conn *cp, *cp_c;
 
 	/*
 	 * Randomly scan 1/32 of the whole table every second
 	 */
 	for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
 		unsigned int hash = net_random() & ip_vs_conn_tab_mask;
-		struct hlist_node *n;
 
 		/*
 		 *  Lock is actually needed in this loop.
 		 */
-		ct_write_lock_bh(hash);
+		rcu_read_lock();
 
-		hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
+		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
 			if (cp->flags & IP_VS_CONN_F_TEMPLATE)
 				/* connection template */
 				continue;
@@ -1235,12 +1244,15 @@ void ip_vs_random_dropentry(struct net *net)
 
 			IP_VS_DBG(4, "del connection\n");
 			ip_vs_conn_expire_now(cp);
-			if (cp->control) {
+			cp_c = cp->control;
+			/* cp->control is valid only with reference to cp */
+			if (cp_c && __ip_vs_conn_get(cp)) {
 				IP_VS_DBG(4, "del conn template\n");
-				ip_vs_conn_expire_now(cp->control);
+				ip_vs_conn_expire_now(cp_c);
+				__ip_vs_conn_put(cp);
 			}
 		}
-		ct_write_unlock_bh(hash);
+		rcu_read_unlock();
 	}
 }
 
@@ -1251,29 +1263,30 @@ void ip_vs_random_dropentry(struct net *net)
 static void ip_vs_conn_flush(struct net *net)
 {
 	int idx;
-	struct ip_vs_conn *cp;
+	struct ip_vs_conn *cp, *cp_c;
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 flush_again:
 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
-		struct hlist_node *n;
-
 		/*
 		 *  Lock is actually needed in this loop.
 		 */
-		ct_write_lock_bh(idx);
+		rcu_read_lock();
 
-		hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) {
+		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
 			if (!ip_vs_conn_net_eq(cp, net))
 				continue;
 			IP_VS_DBG(4, "del connection\n");
 			ip_vs_conn_expire_now(cp);
-			if (cp->control) {
+			cp_c = cp->control;
+			/* cp->control is valid only with reference to cp */
+			if (cp_c && __ip_vs_conn_get(cp)) {
 				IP_VS_DBG(4, "del conn template\n");
-				ip_vs_conn_expire_now(cp->control);
+				ip_vs_conn_expire_now(cp_c);
+				__ip_vs_conn_put(cp);
 			}
 		}
-		ct_write_unlock_bh(idx);
+		rcu_read_unlock();
 	}
 
 	/* the counter may be not NULL, because maybe some conn entries
@@ -1292,8 +1305,8 @@ int __net_init ip_vs_conn_net_init(struct net *net)
 
 	atomic_set(&ipvs->conn_count, 0);
 
-	proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
-	proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+	proc_create("ip_vs_conn", 0, net->proc_net, &ip_vs_conn_fops);
+	proc_create("ip_vs_conn_sync", 0, net->proc_net, &ip_vs_conn_sync_fops);
 	return 0;
 }
 
@@ -1301,8 +1314,8 @@ void __net_exit ip_vs_conn_net_cleanup(struct net *net)
 {
 	/* flush all the connection entries first */
 	ip_vs_conn_flush(net);
-	proc_net_remove(net, "ip_vs_conn");
-	proc_net_remove(net, "ip_vs_conn_sync");
+	remove_proc_entry("ip_vs_conn", net->proc_net);
+	remove_proc_entry("ip_vs_conn_sync", net->proc_net);
 }
 
 int __init ip_vs_conn_init(void)
@@ -1340,7 +1353,7 @@ int __init ip_vs_conn_init(void)
 		INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]);
 
 	for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++)  {
-		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
+		spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l);
 	}
 
 	/* calculate the random value for connection hash */
@@ -1351,6 +1364,8 @@ int __init ip_vs_conn_init(void)
 
 void ip_vs_conn_cleanup(void)
 {
+	/* Wait all ip_vs_conn_rcu_free() callbacks to complete */
+	rcu_barrier();
 	/* Release the empty cache */
 	kmem_cache_destroy(ip_vs_conn_cachep);
 	vfree(ip_vs_conn_tab);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 47edf5a40a59..085b5880ab0d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -69,10 +69,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);
 EXPORT_SYMBOL(ip_vs_get_debug_level);
 #endif
 
-int ip_vs_net_id __read_mostly;
-#ifdef IP_VS_GENERIC_NETNS
-EXPORT_SYMBOL(ip_vs_net_id);
-#endif
+static int ip_vs_net_id __read_mostly;
 /* netns cnt used for uniqueness */
 static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
 
@@ -206,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
 {
 	ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
 			      vport, p);
-	p->pe = svc->pe;
+	p->pe = rcu_dereference(svc->pe);
 	if (p->pe && p->pe->fill_param)
 		return p->pe->fill_param(p, skb);
 
@@ -238,7 +235,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	/* Mask saddr with the netmask to adjust template granularity */
 #ifdef CONFIG_IP_VS_IPV6
 	if (svc->af == AF_INET6)
-		ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask);
+		ipv6_addr_prefix(&snet.in6, &iph->saddr.in6,
+				 (__force __u32) svc->netmask);
 	else
 #endif
 		snet.ip = iph->saddr.ip & svc->netmask;
@@ -299,12 +297,15 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	/* Check if a template already exists */
 	ct = ip_vs_ct_in_get(&param);
 	if (!ct || !ip_vs_check_template(ct)) {
+		struct ip_vs_scheduler *sched;
+
 		/*
 		 * No template found or the dest of the connection
 		 * template is not available.
 		 * return *ignored=0 i.e. ICMP and NF_DROP
 		 */
-		dest = svc->scheduler->schedule(svc, skb);
+		sched = rcu_dereference(svc->scheduler);
+		dest = sched->schedule(svc, skb);
 		if (!dest) {
 			IP_VS_DBG(1, "p-schedule: no dest found.\n");
 			kfree(param.pe_data);
@@ -394,6 +395,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 {
 	struct ip_vs_protocol *pp = pd->pp;
 	struct ip_vs_conn *cp = NULL;
+	struct ip_vs_scheduler *sched;
 	struct ip_vs_dest *dest;
 	__be16 _ports[2], *pptr;
 	unsigned int flags;
@@ -449,7 +451,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 		return NULL;
 	}
 
-	dest = svc->scheduler->schedule(svc, skb);
+	sched = rcu_dereference(svc->scheduler);
+	dest = sched->schedule(svc, skb);
 	if (dest == NULL) {
 		IP_VS_DBG(1, "Schedule: no dest found.\n");
 		return NULL;
@@ -507,7 +510,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 
 	pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
 	if (pptr == NULL) {
-		ip_vs_service_put(svc);
 		return NF_DROP;
 	}
 
@@ -533,8 +535,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 				      IP_VS_CONN_F_ONE_PACKET : 0;
 		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
 
-		ip_vs_service_put(svc);
-
 		/* create a new connection entry */
 		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
 		{
@@ -571,12 +571,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	 * listed in the ipvs table), pass the packets, because it is
 	 * not ipvs job to decide to drop the packets.
 	 */
-	if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
-		ip_vs_service_put(svc);
+	if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
 		return NF_ACCEPT;
-	}
-
-	ip_vs_service_put(svc);
 
 	/*
 	 * Notify the client that the destination is unreachable, and
@@ -588,9 +584,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 #ifdef CONFIG_IP_VS_IPV6
 	if (svc->af == AF_INET6) {
 		if (!skb->dev) {
-			struct net *net = dev_net(skb_dst(skb)->dev);
+			struct net *net_ = dev_net(skb_dst(skb)->dev);
 
-			skb->dev = net->loopback_dev;
+			skb->dev = net_->loopback_dev;
 		}
 		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 	} else
@@ -643,8 +639,11 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
 
 static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
-	int err = ip_defrag(skb, user);
+	int err;
 
+	local_bh_disable();
+	err = ip_defrag(skb, user);
+	local_bh_enable();
 	if (!err)
 		ip_send_check(ip_hdr(skb));
 
@@ -1164,9 +1163,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 					 sizeof(_ports), _ports, &iph);
 		if (pptr == NULL)
 			return NF_ACCEPT;	/* Not for me */
-		if (ip_vs_lookup_real_service(net, af, iph.protocol,
-					      &iph.saddr,
-					      pptr[0])) {
+		if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
+					   pptr[0])) {
 			/*
 			 * Notify the real server: there is no
 			 * existing entry if it is not RST
@@ -1181,9 +1179,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 						iph.len)))) {
 #ifdef CONFIG_IP_VS_IPV6
 				if (af == AF_INET6) {
-					struct net *net =
-						dev_net(skb_dst(skb)->dev);
-
 					if (!skb->dev)
 						skb->dev = net->loopback_dev;
 					icmpv6_send(skb,
@@ -1226,13 +1221,7 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
-	unsigned int verdict;
-
-	/* Disable BH in LOCAL_OUT until all places are fixed */
-	local_bh_disable();
-	verdict = ip_vs_out(hooknum, skb, AF_INET);
-	local_bh_enable();
-	return verdict;
+	return ip_vs_out(hooknum, skb, AF_INET);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -1259,13 +1248,7 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
-	unsigned int verdict;
-
-	/* Disable BH in LOCAL_OUT until all places are fixed */
-	local_bh_disable();
-	verdict = ip_vs_out(hooknum, skb, AF_INET6);
-	local_bh_enable();
-	return verdict;
+	return ip_vs_out(hooknum, skb, AF_INET6);
 }
 
 #endif
@@ -1394,19 +1377,20 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 			skb_reset_network_header(skb);
 			IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
 				&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
-			rcu_read_lock();
 			ipv4_update_pmtu(skb, dev_net(skb->dev),
 					 mtu, 0, 0, 0, 0);
-			rcu_read_unlock();
 			/* Client uses PMTUD? */
 			if (!(cih->frag_off & htons(IP_DF)))
 				goto ignore_ipip;
 			/* Prefer the resulting PMTU */
 			if (dest) {
-				spin_lock(&dest->dst_lock);
-				if (dest->dst_cache)
-					mtu = dst_mtu(dest->dst_cache);
-				spin_unlock(&dest->dst_lock);
+				struct ip_vs_dest_dst *dest_dst;
+
+				rcu_read_lock();
+				dest_dst = rcu_dereference(dest->dest_dst);
+				if (dest_dst)
+					mtu = dst_mtu(dest_dst->dst_cache);
+				rcu_read_unlock();
 			}
 			if (mtu > 68 + sizeof(struct iphdr))
 				mtu -= sizeof(struct iphdr);
@@ -1577,7 +1561,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}
 	/* ipvs enabled in this netns ? */
 	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
+	ipvs = net_ipvs(net);
+	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
 	ip_vs_fill_iph_skb(af, skb, &iph);
@@ -1654,7 +1639,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}
 
 	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-	ipvs = net_ipvs(net);
 	/* Check the server status */
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		/* the destination server is not available */
@@ -1722,13 +1706,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	unsigned int verdict;
-
-	/* Disable BH in LOCAL_OUT until all places are fixed */
-	local_bh_disable();
-	verdict = ip_vs_in(hooknum, skb, AF_INET);
-	local_bh_enable();
-	return verdict;
+	return ip_vs_in(hooknum, skb, AF_INET);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -1787,13 +1765,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	unsigned int verdict;
-
-	/* Disable BH in LOCAL_OUT until all places are fixed */
-	local_bh_disable();
-	verdict = ip_vs_in(hooknum, skb, AF_INET6);
-	local_bh_enable();
-	return verdict;
+	return ip_vs_in(hooknum, skb, AF_INET6);
 }
 
 #endif
@@ -1815,13 +1787,15 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 {
 	int r;
 	struct net *net;
+	struct netns_ipvs *ipvs;
 
 	if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
 		return NF_ACCEPT;
 
 	/* ipvs enabled in this netns ? */
 	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
+	ipvs = net_ipvs(net);
+	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
 	return ip_vs_in_icmp(skb, &r, hooknum);
@@ -1835,6 +1809,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
 {
 	int r;
 	struct net *net;
+	struct netns_ipvs *ipvs;
 	struct ip_vs_iphdr iphdr;
 
 	ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
@@ -1843,7 +1818,8 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
 
 	/* ipvs enabled in this netns ? */
 	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
+	ipvs = net_ipvs(net);
+	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
 	return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ec664cbb119f..5b142fb16480 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -55,9 +55,6 @@
 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
 static DEFINE_MUTEX(__ip_vs_mutex);
 
-/* lock for service table */
-static DEFINE_RWLOCK(__ip_vs_svc_lock);
-
 /* sysctl variables */
 
 #ifdef CONFIG_IP_VS_DEBUG
@@ -71,7 +68,7 @@ int ip_vs_get_debug_level(void)
 
 
 /*  Protos */
-static void __ip_vs_del_service(struct ip_vs_service *svc);
+static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
 
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -257,9 +254,9 @@ ip_vs_use_count_dec(void)
 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
 
 /* the service table hashed by <protocol, addr, port> */
-static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
+static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 /* the service table hashed by fwmark */
-static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
+static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 
 
 /*
@@ -271,16 +268,18 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
 {
 	register unsigned int porth = ntohs(port);
 	__be32 addr_fold = addr->ip;
+	__u32 ahash;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
 		addr_fold = addr->ip6[0]^addr->ip6[1]^
 			    addr->ip6[2]^addr->ip6[3];
 #endif
-	addr_fold ^= ((size_t)net>>8);
+	ahash = ntohl(addr_fold);
+	ahash ^= ((size_t) net >> 8);
 
-	return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
-		& IP_VS_SVC_TAB_MASK;
+	return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
+	       IP_VS_SVC_TAB_MASK;
 }
 
 /*
@@ -312,13 +311,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 		 */
 		hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
 					 &svc->addr, svc->port);
-		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
+		hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
 	} else {
 		/*
 		 *  Hash it by fwmark in svc_fwm_table
 		 */
 		hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
-		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
+		hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
 	}
 
 	svc->flags |= IP_VS_SVC_F_HASHED;
@@ -342,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 
 	if (svc->fwmark == 0) {
 		/* Remove it from the svc_table table */
-		list_del(&svc->s_list);
+		hlist_del_rcu(&svc->s_list);
 	} else {
 		/* Remove it from the svc_fwm_table table */
-		list_del(&svc->f_list);
+		hlist_del_rcu(&svc->f_list);
 	}
 
 	svc->flags &= ~IP_VS_SVC_F_HASHED;
@@ -367,7 +366,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
 	/* Check for "full" addressed entries */
 	hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
 
-	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
+	hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
 		if ((svc->af == af)
 		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
 		    && (svc->port == vport)
@@ -394,7 +393,7 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
 	/* Check for fwmark addressed entries */
 	hash = ip_vs_svc_fwm_hashkey(net, fwmark);
 
-	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
+	hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
 		if (svc->fwmark == fwmark && svc->af == af
 		    && net_eq(svc->net, net)) {
 			/* HIT */
@@ -405,15 +404,14 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
 	return NULL;
 }
 
+/* Find service, called under RCU lock */
 struct ip_vs_service *
-ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
-		  const union nf_inet_addr *vaddr, __be16 vport)
+ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
+		   const union nf_inet_addr *vaddr, __be16 vport)
 {
 	struct ip_vs_service *svc;
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	read_lock(&__ip_vs_svc_lock);
-
 	/*
 	 *	Check the table hashed by fwmark first
 	 */
@@ -449,10 +447,6 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 	}
 
   out:
-	if (svc)
-		atomic_inc(&svc->usecnt);
-	read_unlock(&__ip_vs_svc_lock);
-
 	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
 		      fwmark, ip_vs_proto_name(protocol),
 		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
@@ -469,6 +463,13 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 	dest->svc = svc;
 }
 
+static void ip_vs_service_free(struct ip_vs_service *svc)
+{
+	if (svc->stats.cpustats)
+		free_percpu(svc->stats.cpustats);
+	kfree(svc);
+}
+
 static void
 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
 {
@@ -476,12 +477,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
 
 	dest->svc = NULL;
 	if (atomic_dec_and_test(&svc->refcnt)) {
-		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
+		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
 			      svc->fwmark,
 			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
-			      ntohs(svc->port), atomic_read(&svc->usecnt));
-		free_percpu(svc->stats.cpustats);
-		kfree(svc);
+			      ntohs(svc->port));
+		ip_vs_service_free(svc);
 	}
 }
 
@@ -506,17 +506,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
 		& IP_VS_RTAB_MASK;
 }
 
-/*
- *	Hashes ip_vs_dest in rs_table by <proto,addr,port>.
- *	should be called with locked tables.
- */
-static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
+/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
+static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 {
 	unsigned int hash;
 
-	if (!list_empty(&dest->d_list)) {
-		return 0;
-	}
+	if (dest->in_rs_table)
+		return;
 
 	/*
 	 *	Hash by proto,addr,port,
@@ -524,64 +520,51 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 	 */
 	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
 
-	list_add(&dest->d_list, &ipvs->rs_table[hash]);
-
-	return 1;
+	hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
+	dest->in_rs_table = 1;
 }
 
-/*
- *	UNhashes ip_vs_dest from rs_table.
- *	should be called with locked tables.
- */
-static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
+/* Unhash ip_vs_dest from rs_table. */
+static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
 {
 	/*
 	 * Remove it from the rs_table table.
 	 */
-	if (!list_empty(&dest->d_list)) {
-		list_del_init(&dest->d_list);
+	if (dest->in_rs_table) {
+		hlist_del_rcu(&dest->d_list);
+		dest->in_rs_table = 0;
 	}
-
-	return 1;
 }
 
-/*
- *	Lookup real service by <proto,addr,port> in the real service table.
- */
-struct ip_vs_dest *
-ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
-			  const union nf_inet_addr *daddr,
-			  __be16 dport)
+/* Check if real service by <proto,addr,port> is present */
+bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
+			    const union nf_inet_addr *daddr, __be16 dport)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	unsigned int hash;
 	struct ip_vs_dest *dest;
 
-	/*
-	 *	Check for "full" addressed entries
-	 *	Return the first found entry
-	 */
+	/* Check for "full" addressed entries */
 	hash = ip_vs_rs_hashkey(af, daddr, dport);
 
-	read_lock(&ipvs->rs_lock);
-	list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
-		if ((dest->af == af)
-		    && ip_vs_addr_equal(af, &dest->addr, daddr)
-		    && (dest->port == dport)
-		    && ((dest->protocol == protocol) ||
-			dest->vfwmark)) {
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+		if (dest->port == dport &&
+		    dest->af == af &&
+		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
+		    (dest->protocol == protocol || dest->vfwmark)) {
 			/* HIT */
-			read_unlock(&ipvs->rs_lock);
-			return dest;
+			rcu_read_unlock();
+			return true;
 		}
 	}
-	read_unlock(&ipvs->rs_lock);
+	rcu_read_unlock();
 
-	return NULL;
+	return false;
 }
 
-/*
- *	Lookup destination by {addr,port} in the given service
+/* Lookup destination by {addr,port} in the given service
+ * Called under RCU lock.
  */
 static struct ip_vs_dest *
 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
@@ -592,7 +575,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 	/*
 	 * Find the destination for the given service
 	 */
-	list_for_each_entry(dest, &svc->destinations, n_list) {
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
 		if ((dest->af == svc->af)
 		    && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
 		    && (dest->port == dport)) {
@@ -606,13 +589,11 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 
 /*
  * Find destination by {daddr,dport,vaddr,protocol}
- * Cretaed to be used in ip_vs_process_message() in
+ * Created to be used in ip_vs_process_message() in
  * the backup synchronization daemon. It finds the
  * destination to be bound to the received connection
  * on the backup.
- *
- * ip_vs_lookup_real_service() looked promissing, but
- * seems not working as expected.
+ * Called under RCU lock, no refcnt is returned.
  */
 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
 				   const union nf_inet_addr *daddr,
@@ -625,7 +606,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
 	struct ip_vs_service *svc;
 	__be16 port = dport;
 
-	svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
+	svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);
 	if (!svc)
 		return NULL;
 	if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
@@ -633,12 +614,31 @@ struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
 	dest = ip_vs_lookup_dest(svc, daddr, port);
 	if (!dest)
 		dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
-	if (dest)
-		atomic_inc(&dest->refcnt);
-	ip_vs_service_put(svc);
 	return dest;
 }
 
+void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
+{
+	struct ip_vs_dest_dst *dest_dst = container_of(head,
+						       struct ip_vs_dest_dst,
+						       rcu_head);
+
+	dst_release(dest_dst->dst_cache);
+	kfree(dest_dst);
+}
+
+/* Release dest_dst and dst_cache for dest in user context */
+static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
+{
+	struct ip_vs_dest_dst *old;
+
+	old = rcu_dereference_protected(dest->dest_dst, 1);
+	if (old) {
+		RCU_INIT_POINTER(dest->dest_dst, NULL);
+		call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
+	}
+}
+
 /*
  *  Lookup dest by {svc,addr,port} in the destination trash.
  *  The destination trash is used to hold the destinations that are removed
@@ -653,19 +653,25 @@ static struct ip_vs_dest *
 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 		     __be16 dport)
 {
-	struct ip_vs_dest *dest, *nxt;
+	struct ip_vs_dest *dest;
 	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
 	/*
 	 * Find the destination in trash
 	 */
-	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
+	spin_lock_bh(&ipvs->dest_trash_lock);
+	list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
 		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
 			      "dest->refcnt=%d\n",
 			      dest->vfwmark,
 			      IP_VS_DBG_ADDR(svc->af, &dest->addr),
 			      ntohs(dest->port),
 			      atomic_read(&dest->refcnt));
+		/* We can not reuse dest while in grace period
+		 * because conns still can use dest->svc
+		 */
+		if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
+			continue;
 		if (dest->af == svc->af &&
 		    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
 		    dest->port == dport &&
@@ -675,29 +681,27 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 		     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
 		      dest->vport == svc->port))) {
 			/* HIT */
-			return dest;
-		}
-
-		/*
-		 * Try to purge the destination from trash if not referenced
-		 */
-		if (atomic_read(&dest->refcnt) == 1) {
-			IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
-				      "from trash\n",
-				      dest->vfwmark,
-				      IP_VS_DBG_ADDR(svc->af, &dest->addr),
-				      ntohs(dest->port));
-			list_del(&dest->n_list);
-			ip_vs_dst_reset(dest);
-			__ip_vs_unbind_svc(dest);
-			free_percpu(dest->stats.cpustats);
-			kfree(dest);
+			list_del(&dest->t_list);
+			ip_vs_dest_hold(dest);
+			goto out;
 		}
 	}
 
-	return NULL;
+	dest = NULL;
+
+out:
+	spin_unlock_bh(&ipvs->dest_trash_lock);
+
+	return dest;
 }
 
+static void ip_vs_dest_free(struct ip_vs_dest *dest)
+{
+	__ip_vs_dst_cache_reset(dest);
+	__ip_vs_unbind_svc(dest);
+	free_percpu(dest->stats.cpustats);
+	kfree(dest);
+}
 
 /*
  *  Clean up all the destinations in the trash
@@ -706,19 +710,18 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  *  When the ip_vs_control_clearup is activated by ipvs module exit,
  *  the service tables must have been flushed and all the connections
  *  are expired, and the refcnt of each destination in the trash must
- *  be 1, so we simply release them here.
+ *  be 0, so we simply release them here.
  */
 static void ip_vs_trash_cleanup(struct net *net)
 {
 	struct ip_vs_dest *dest, *nxt;
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
-		list_del(&dest->n_list);
-		ip_vs_dst_reset(dest);
-		__ip_vs_unbind_svc(dest);
-		free_percpu(dest->stats.cpustats);
-		kfree(dest);
+	del_timer_sync(&ipvs->dest_trash_timer);
+	/* No need to use dest_trash_lock */
+	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
+		list_del(&dest->t_list);
+		ip_vs_dest_free(dest);
 	}
 }
 
@@ -768,6 +771,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 		    struct ip_vs_dest_user_kern *udest, int add)
 {
 	struct netns_ipvs *ipvs = net_ipvs(svc->net);
+	struct ip_vs_scheduler *sched;
 	int conn_flags;
 
 	/* set the weight and the flags */
@@ -783,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 		 *    Put the real service in rs_table if not present.
 		 *    For now only for NAT!
 		 */
-		write_lock_bh(&ipvs->rs_lock);
 		ip_vs_rs_hash(ipvs, dest);
-		write_unlock_bh(&ipvs->rs_lock);
 	}
 	atomic_set(&dest->conn_flags, conn_flags);
 
@@ -809,27 +811,20 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	dest->l_threshold = udest->l_threshold;
 
 	spin_lock_bh(&dest->dst_lock);
-	ip_vs_dst_reset(dest);
+	__ip_vs_dst_cache_reset(dest);
 	spin_unlock_bh(&dest->dst_lock);
 
-	if (add)
-		ip_vs_start_estimator(svc->net, &dest->stats);
-
-	write_lock_bh(&__ip_vs_svc_lock);
-
-	/* Wait until all other svc users go away */
-	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-
+	sched = rcu_dereference_protected(svc->scheduler, 1);
 	if (add) {
-		list_add(&dest->n_list, &svc->destinations);
+		ip_vs_start_estimator(svc->net, &dest->stats);
+		list_add_rcu(&dest->n_list, &svc->destinations);
 		svc->num_dests++;
+		if (sched->add_dest)
+			sched->add_dest(svc, dest);
+	} else {
+		if (sched->upd_dest)
+			sched->upd_dest(svc, dest);
 	}
-
-	/* call the update_service, because server weight may be changed */
-	if (svc->scheduler->update_service)
-		svc->scheduler->update_service(svc);
-
-	write_unlock_bh(&__ip_vs_svc_lock);
 }
 
 
@@ -881,7 +876,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 	atomic_set(&dest->persistconns, 0);
 	atomic_set(&dest->refcnt, 1);
 
-	INIT_LIST_HEAD(&dest->d_list);
+	INIT_HLIST_NODE(&dest->d_list);
 	spin_lock_init(&dest->dst_lock);
 	spin_lock_init(&dest->stats.lock);
 	__ip_vs_update_dest(svc, dest, udest, 1);
@@ -923,10 +918,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 
 	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
 
-	/*
-	 * Check if the dest already exists in the list
-	 */
+	/* We use function that requires RCU lock */
+	rcu_read_lock();
 	dest = ip_vs_lookup_dest(svc, &daddr, dport);
+	rcu_read_unlock();
 
 	if (dest != NULL) {
 		IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
@@ -948,11 +943,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
 			      ntohs(dest->vport));
 
-		/*
-		 * Get the destination from the trash
-		 */
-		list_del(&dest->n_list);
-
 		__ip_vs_update_dest(svc, dest, udest, 1);
 		ret = 0;
 	} else {
@@ -992,10 +982,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 
 	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
 
-	/*
-	 *  Lookup the destination list
-	 */
+	/* We use function that requires RCU lock */
+	rcu_read_lock();
 	dest = ip_vs_lookup_dest(svc, &daddr, dport);
+	rcu_read_unlock();
 
 	if (dest == NULL) {
 		IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
@@ -1008,11 +998,21 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	return 0;
 }
 
+static void ip_vs_dest_wait_readers(struct rcu_head *head)
+{
+	struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
+					       rcu_head);
+
+	/* End of grace period after unlinking */
+	clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
+}
+
 
 /*
  *	Delete a destination (must be already unlinked from the service)
  */
-static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
+			     bool cleanup)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
@@ -1021,38 +1021,24 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
 	/*
 	 *  Remove it from the d-linked list with the real services.
 	 */
-	write_lock_bh(&ipvs->rs_lock);
 	ip_vs_rs_unhash(dest);
-	write_unlock_bh(&ipvs->rs_lock);
 
-	/*
-	 *  Decrease the refcnt of the dest, and free the dest
-	 *  if nobody refers to it (refcnt=0). Otherwise, throw
-	 *  the destination into the trash.
-	 */
-	if (atomic_dec_and_test(&dest->refcnt)) {
-		IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
-			      dest->vfwmark,
-			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
-			      ntohs(dest->port));
-		ip_vs_dst_reset(dest);
-		/* simply decrease svc->refcnt here, let the caller check
-		   and release the service if nobody refers to it.
-		   Only user context can release destination and service,
-		   and only one user context can update virtual service at a
-		   time, so the operation here is OK */
-		atomic_dec(&dest->svc->refcnt);
-		free_percpu(dest->stats.cpustats);
-		kfree(dest);
-	} else {
-		IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
-			      "dest->refcnt=%d\n",
-			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
-			      ntohs(dest->port),
-			      atomic_read(&dest->refcnt));
-		list_add(&dest->n_list, &ipvs->dest_trash);
-		atomic_inc(&dest->refcnt);
+	if (!cleanup) {
+		set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
+		call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
 	}
+
+	spin_lock_bh(&ipvs->dest_trash_lock);
+	IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
+		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
+		      atomic_read(&dest->refcnt));
+	if (list_empty(&ipvs->dest_trash) && !cleanup)
+		mod_timer(&ipvs->dest_trash_timer,
+			  jiffies + IP_VS_DEST_TRASH_PERIOD);
+	/* dest lives in trash without reference */
+	list_add(&dest->t_list, &ipvs->dest_trash);
+	spin_unlock_bh(&ipvs->dest_trash_lock);
+	ip_vs_dest_put(dest);
 }
 
 
@@ -1068,14 +1054,16 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
 	/*
 	 *  Remove it from the d-linked destination list.
 	 */
-	list_del(&dest->n_list);
+	list_del_rcu(&dest->n_list);
 	svc->num_dests--;
 
-	/*
-	 *  Call the update_service function of its scheduler
-	 */
-	if (svcupd && svc->scheduler->update_service)
-			svc->scheduler->update_service(svc);
+	if (svcupd) {
+		struct ip_vs_scheduler *sched;
+
+		sched = rcu_dereference_protected(svc->scheduler, 1);
+		if (sched->del_dest)
+			sched->del_dest(svc, dest);
+	}
 }
 
 
@@ -1090,37 +1078,56 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 
 	EnterFunction(2);
 
+	/* We use function that requires RCU lock */
+	rcu_read_lock();
 	dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+	rcu_read_unlock();
 
 	if (dest == NULL) {
 		IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
 		return -ENOENT;
 	}
 
-	write_lock_bh(&__ip_vs_svc_lock);
-
-	/*
-	 *	Wait until all other svc users go away.
-	 */
-	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-
 	/*
 	 *	Unlink dest from the service
 	 */
 	__ip_vs_unlink_dest(svc, dest, 1);
 
-	write_unlock_bh(&__ip_vs_svc_lock);
-
 	/*
 	 *	Delete the destination
 	 */
-	__ip_vs_del_dest(svc->net, dest);
+	__ip_vs_del_dest(svc->net, dest, false);
 
 	LeaveFunction(2);
 
 	return 0;
 }
 
+static void ip_vs_dest_trash_expire(unsigned long data)
+{
+	struct net *net = (struct net *) data;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_dest *dest, *next;
+
+	spin_lock(&ipvs->dest_trash_lock);
+	list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
+		/* Skip if dest is in grace period */
+		if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
+			continue;
+		if (atomic_read(&dest->refcnt) > 0)
+			continue;
+		IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
+			      dest->vfwmark,
+			      IP_VS_DBG_ADDR(dest->svc->af, &dest->addr),
+			      ntohs(dest->port));
+		list_del(&dest->t_list);
+		ip_vs_dest_free(dest);
+	}
+	if (!list_empty(&ipvs->dest_trash))
+		mod_timer(&ipvs->dest_trash_timer,
+			  jiffies + IP_VS_DEST_TRASH_PERIOD);
+	spin_unlock(&ipvs->dest_trash_lock);
+}
 
 /*
  *	Add a service into the service hash table
@@ -1157,9 +1164,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	}
 
 #ifdef CONFIG_IP_VS_IPV6
-	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
-		ret = -EINVAL;
-		goto out_err;
+	if (u->af == AF_INET6) {
+		__u32 plen = (__force __u32) u->netmask;
+
+		if (plen < 1 || plen > 128) {
+			ret = -EINVAL;
+			goto out_err;
+		}
 	}
 #endif
 
@@ -1176,7 +1187,6 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	}
 
 	/* I'm the first user of the service */
-	atomic_set(&svc->usecnt, 0);
 	atomic_set(&svc->refcnt, 0);
 
 	svc->af = u->af;
@@ -1190,7 +1200,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	svc->net = net;
 
 	INIT_LIST_HEAD(&svc->destinations);
-	rwlock_init(&svc->sched_lock);
+	spin_lock_init(&svc->sched_lock);
 	spin_lock_init(&svc->stats.lock);
 
 	/* Bind the scheduler */
@@ -1200,7 +1210,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	sched = NULL;
 
 	/* Bind the ct retriever */
-	ip_vs_bind_pe(svc, pe);
+	RCU_INIT_POINTER(svc->pe, pe);
 	pe = NULL;
 
 	/* Update the virtual service counters */
@@ -1216,9 +1226,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 		ipvs->num_services++;
 
 	/* Hash the service into the service table */
-	write_lock_bh(&__ip_vs_svc_lock);
 	ip_vs_svc_hash(svc);
-	write_unlock_bh(&__ip_vs_svc_lock);
 
 	*svc_p = svc;
 	/* Now there is a service - full throttle */
@@ -1228,15 +1236,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 
  out_err:
 	if (svc != NULL) {
-		ip_vs_unbind_scheduler(svc);
-		if (svc->inc) {
-			local_bh_disable();
-			ip_vs_app_inc_put(svc->inc);
-			local_bh_enable();
-		}
-		if (svc->stats.cpustats)
-			free_percpu(svc->stats.cpustats);
-		kfree(svc);
+		ip_vs_unbind_scheduler(svc, sched);
+		ip_vs_service_free(svc);
 	}
 	ip_vs_scheduler_put(sched);
 	ip_vs_pe_put(pe);
@@ -1280,18 +1281,27 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 	}
 
 #ifdef CONFIG_IP_VS_IPV6
-	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
-		ret = -EINVAL;
-		goto out;
+	if (u->af == AF_INET6) {
+		__u32 plen = (__force __u32) u->netmask;
+
+		if (plen < 1 || plen > 128) {
+			ret = -EINVAL;
+			goto out;
+		}
 	}
 #endif
 
-	write_lock_bh(&__ip_vs_svc_lock);
-
-	/*
-	 * Wait until all other svc users go away.
-	 */
-	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+	old_sched = rcu_dereference_protected(svc->scheduler, 1);
+	if (sched != old_sched) {
+		/* Bind the new scheduler */
+		ret = ip_vs_bind_scheduler(svc, sched);
+		if (ret) {
+			old_sched = sched;
+			goto out;
+		}
+		/* Unbind the old scheduler on success */
+		ip_vs_unbind_scheduler(svc, old_sched);
+	}
 
 	/*
 	 * Set the flags and timeout value
@@ -1300,57 +1310,30 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 	svc->timeout = u->timeout * HZ;
 	svc->netmask = u->netmask;
 
-	old_sched = svc->scheduler;
-	if (sched != old_sched) {
-		/*
-		 * Unbind the old scheduler
-		 */
-		if ((ret = ip_vs_unbind_scheduler(svc))) {
-			old_sched = sched;
-			goto out_unlock;
-		}
-
-		/*
-		 * Bind the new scheduler
-		 */
-		if ((ret = ip_vs_bind_scheduler(svc, sched))) {
-			/*
-			 * If ip_vs_bind_scheduler fails, restore the old
-			 * scheduler.
-			 * The main reason of failure is out of memory.
-			 *
-			 * The question is if the old scheduler can be
-			 * restored all the time. TODO: if it cannot be
-			 * restored some time, we must delete the service,
-			 * otherwise the system may crash.
-			 */
-			ip_vs_bind_scheduler(svc, old_sched);
-			old_sched = sched;
-			goto out_unlock;
-		}
-	}
-
-	old_pe = svc->pe;
-	if (pe != old_pe) {
-		ip_vs_unbind_pe(svc);
-		ip_vs_bind_pe(svc, pe);
-	}
+	old_pe = rcu_dereference_protected(svc->pe, 1);
+	if (pe != old_pe)
+		rcu_assign_pointer(svc->pe, pe);
 
-out_unlock:
-	write_unlock_bh(&__ip_vs_svc_lock);
 out:
 	ip_vs_scheduler_put(old_sched);
 	ip_vs_pe_put(old_pe);
 	return ret;
 }
 
+static void ip_vs_service_rcu_free(struct rcu_head *head)
+{
+	struct ip_vs_service *svc;
+
+	svc = container_of(head, struct ip_vs_service, rcu_head);
+	ip_vs_service_free(svc);
+}
 
 /*
  *	Delete a service from the service list
  *	- The service must be unlinked, unlocked and not referenced!
  *	- We are called under _bh lock
  */
-static void __ip_vs_del_service(struct ip_vs_service *svc)
+static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
 {
 	struct ip_vs_dest *dest, *nxt;
 	struct ip_vs_scheduler *old_sched;
@@ -1366,27 +1349,20 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 	ip_vs_stop_estimator(svc->net, &svc->stats);
 
 	/* Unbind scheduler */
-	old_sched = svc->scheduler;
-	ip_vs_unbind_scheduler(svc);
+	old_sched = rcu_dereference_protected(svc->scheduler, 1);
+	ip_vs_unbind_scheduler(svc, old_sched);
 	ip_vs_scheduler_put(old_sched);
 
-	/* Unbind persistence engine */
-	old_pe = svc->pe;
-	ip_vs_unbind_pe(svc);
+	/* Unbind persistence engine, keep svc->pe */
+	old_pe = rcu_dereference_protected(svc->pe, 1);
 	ip_vs_pe_put(old_pe);
 
-	/* Unbind app inc */
-	if (svc->inc) {
-		ip_vs_app_inc_put(svc->inc);
-		svc->inc = NULL;
-	}
-
 	/*
 	 *    Unlink the whole destination list
 	 */
 	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
 		__ip_vs_unlink_dest(svc, dest, 0);
-		__ip_vs_del_dest(svc->net, dest);
+		__ip_vs_del_dest(svc->net, dest, cleanup);
 	}
 
 	/*
@@ -1400,13 +1376,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 	/*
 	 *    Free the service if nobody refers to it
 	 */
-	if (atomic_read(&svc->refcnt) == 0) {
-		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
+	if (atomic_dec_and_test(&svc->refcnt)) {
+		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
 			      svc->fwmark,
 			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
-			      ntohs(svc->port), atomic_read(&svc->usecnt));
-		free_percpu(svc->stats.cpustats);
-		kfree(svc);
+			      ntohs(svc->port));
+		call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
 	}
 
 	/* decrease the module use count */
@@ -1416,23 +1391,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 /*
  * Unlink a service from list and try to delete it if its refcnt reached 0
  */
-static void ip_vs_unlink_service(struct ip_vs_service *svc)
+static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
 {
+	/* Hold svc to avoid double release from dest_trash */
+	atomic_inc(&svc->refcnt);
 	/*
 	 * Unhash it from the service table
 	 */
-	write_lock_bh(&__ip_vs_svc_lock);
-
 	ip_vs_svc_unhash(svc);
 
-	/*
-	 * Wait until all the svc users go away.
-	 */
-	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-
-	__ip_vs_del_service(svc);
-
-	write_unlock_bh(&__ip_vs_svc_lock);
+	__ip_vs_del_service(svc, cleanup);
 }
 
 /*
@@ -1442,7 +1410,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
 {
 	if (svc == NULL)
 		return -EEXIST;
-	ip_vs_unlink_service(svc);
+	ip_vs_unlink_service(svc, false);
 
 	return 0;
 }
@@ -1451,19 +1419,20 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
 /*
  *	Flush all the virtual services
  */
-static int ip_vs_flush(struct net *net)
+static int ip_vs_flush(struct net *net, bool cleanup)
 {
 	int idx;
-	struct ip_vs_service *svc, *nxt;
+	struct ip_vs_service *svc;
+	struct hlist_node *n;
 
 	/*
 	 * Flush the service table hashed by <netns,protocol,addr,port>
 	 */
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
-					 s_list) {
+		hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
+					  s_list) {
 			if (net_eq(svc->net, net))
-				ip_vs_unlink_service(svc);
+				ip_vs_unlink_service(svc, cleanup);
 		}
 	}
 
@@ -1471,10 +1440,10 @@ static int ip_vs_flush(struct net *net)
 	 * Flush the service table hashed by fwmark
 	 */
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry_safe(svc, nxt,
-					 &ip_vs_svc_fwm_table[idx], f_list) {
+		hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
+					  f_list) {
 			if (net_eq(svc->net, net))
-				ip_vs_unlink_service(svc);
+				ip_vs_unlink_service(svc, cleanup);
 		}
 	}
 
@@ -1490,32 +1459,32 @@ void ip_vs_service_net_cleanup(struct net *net)
 	EnterFunction(2);
 	/* Check for "full" addressed entries */
 	mutex_lock(&__ip_vs_mutex);
-	ip_vs_flush(net);
+	ip_vs_flush(net, true);
 	mutex_unlock(&__ip_vs_mutex);
 	LeaveFunction(2);
 }
-/*
- * Release dst hold by dst_cache
- */
+
+/* Put all references for device (dst_cache) */
 static inline void
-__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
+ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
 {
+	struct ip_vs_dest_dst *dest_dst;
+
 	spin_lock_bh(&dest->dst_lock);
-	if (dest->dst_cache && dest->dst_cache->dev == dev) {
+	dest_dst = rcu_dereference_protected(dest->dest_dst, 1);
+	if (dest_dst && dest_dst->dst_cache->dev == dev) {
 		IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
 			      dev->name,
 			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
 			      ntohs(dest->port),
 			      atomic_read(&dest->refcnt));
-		ip_vs_dst_reset(dest);
+		__ip_vs_dst_cache_reset(dest);
 	}
 	spin_unlock_bh(&dest->dst_lock);
 
 }
-/*
- * Netdev event receiver
- * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
- * a device that is "unregister" it must be released.
+/* Netdev event receiver
+ * Currently only NETDEV_DOWN is handled to release refs to cached dsts
  */
 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
 			    void *ptr)
@@ -1527,35 +1496,37 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
 	struct ip_vs_dest *dest;
 	unsigned int idx;
 
-	if (event != NETDEV_UNREGISTER || !ipvs)
+	if (event != NETDEV_DOWN || !ipvs)
 		return NOTIFY_DONE;
 	IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
 	EnterFunction(2);
 	mutex_lock(&__ip_vs_mutex);
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
 			if (net_eq(svc->net, net)) {
 				list_for_each_entry(dest, &svc->destinations,
 						    n_list) {
-					__ip_vs_dev_reset(dest, dev);
+					ip_vs_forget_dev(dest, dev);
 				}
 			}
 		}
 
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
 			if (net_eq(svc->net, net)) {
 				list_for_each_entry(dest, &svc->destinations,
 						    n_list) {
-					__ip_vs_dev_reset(dest, dev);
+					ip_vs_forget_dev(dest, dev);
 				}
 			}
 
 		}
 	}
 
-	list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
-		__ip_vs_dev_reset(dest, dev);
+	spin_lock_bh(&ipvs->dest_trash_lock);
+	list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
+		ip_vs_forget_dev(dest, dev);
 	}
+	spin_unlock_bh(&ipvs->dest_trash_lock);
 	mutex_unlock(&__ip_vs_mutex);
 	LeaveFunction(2);
 	return NOTIFY_DONE;
@@ -1568,12 +1539,10 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
 {
 	struct ip_vs_dest *dest;
 
-	write_lock_bh(&__ip_vs_svc_lock);
 	list_for_each_entry(dest, &svc->destinations, n_list) {
 		ip_vs_zero_stats(&dest->stats);
 	}
 	ip_vs_zero_stats(&svc->stats);
-	write_unlock_bh(&__ip_vs_svc_lock);
 	return 0;
 }
 
@@ -1583,14 +1552,14 @@ static int ip_vs_zero_all(struct net *net)
 	struct ip_vs_service *svc;
 
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
 			if (net_eq(svc->net, net))
 				ip_vs_zero_service(svc);
 		}
 	}
 
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
 			if (net_eq(svc->net, net))
 				ip_vs_zero_service(svc);
 		}
@@ -1808,6 +1777,12 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "backup_only",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #ifdef CONFIG_IP_VS_DEBUG
 	{
 		.procname	= "debug_level",
@@ -1912,7 +1887,7 @@ static struct ctl_table vs_vars[] = {
 
 struct ip_vs_iter {
 	struct seq_net_private p;  /* Do not move this, netns depends upon it*/
-	struct list_head *table;
+	struct hlist_head *table;
 	int bucket;
 };
 
@@ -1945,7 +1920,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 
 	/* look in hash by protocol */
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+		hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
 			if (net_eq(svc->net, net) && pos-- == 0) {
 				iter->table = ip_vs_svc_table;
 				iter->bucket = idx;
@@ -1956,7 +1931,8 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 
 	/* keep looking in fwmark */
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+		hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
+					 f_list) {
 			if (net_eq(svc->net, net) && pos-- == 0) {
 				iter->table = ip_vs_svc_fwm_table;
 				iter->bucket = idx;
@@ -1969,17 +1945,16 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 }
 
 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
-__acquires(__ip_vs_svc_lock)
+	__acquires(RCU)
 {
-
-	read_lock_bh(&__ip_vs_svc_lock);
+	rcu_read_lock();
 	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 
 
 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct list_head *e;
+	struct hlist_node *e;
 	struct ip_vs_iter *iter;
 	struct ip_vs_service *svc;
 
@@ -1992,13 +1967,14 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	if (iter->table == ip_vs_svc_table) {
 		/* next service in table hashed by protocol */
-		if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
-			return list_entry(e, struct ip_vs_service, s_list);
-
+		e = rcu_dereference(hlist_next_rcu(&svc->s_list));
+		if (e)
+			return hlist_entry(e, struct ip_vs_service, s_list);
 
 		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
-			list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
-					    s_list) {
+			hlist_for_each_entry_rcu(svc,
+						 &ip_vs_svc_table[iter->bucket],
+						 s_list) {
 				return svc;
 			}
 		}
@@ -2009,13 +1985,15 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	}
 
 	/* next service in hashed by fwmark */
-	if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
-		return list_entry(e, struct ip_vs_service, f_list);
+	e = rcu_dereference(hlist_next_rcu(&svc->f_list));
+	if (e)
+		return hlist_entry(e, struct ip_vs_service, f_list);
 
  scan_fwmark:
 	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
-				    f_list)
+		hlist_for_each_entry_rcu(svc,
+					 &ip_vs_svc_fwm_table[iter->bucket],
+					 f_list)
 			return svc;
 	}
 
@@ -2023,9 +2001,9 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
-__releases(__ip_vs_svc_lock)
+	__releases(RCU)
 {
-	read_unlock_bh(&__ip_vs_svc_lock);
+	rcu_read_unlock();
 }
 
 
@@ -2043,6 +2021,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 		const struct ip_vs_service *svc = v;
 		const struct ip_vs_iter *iter = seq->private;
 		const struct ip_vs_dest *dest;
+		struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
 
 		if (iter->table == ip_vs_svc_table) {
 #ifdef CONFIG_IP_VS_IPV6
@@ -2051,18 +2030,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 					   ip_vs_proto_name(svc->protocol),
 					   &svc->addr.in6,
 					   ntohs(svc->port),
-					   svc->scheduler->name);
+					   sched->name);
 			else
 #endif
 				seq_printf(seq, "%s  %08X:%04X %s %s ",
 					   ip_vs_proto_name(svc->protocol),
 					   ntohl(svc->addr.ip),
 					   ntohs(svc->port),
-					   svc->scheduler->name,
+					   sched->name,
 					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
 		} else {
 			seq_printf(seq, "FWM  %08X %s %s",
-				   svc->fwmark, svc->scheduler->name,
+				   svc->fwmark, sched->name,
 				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
 		}
 
@@ -2073,7 +2052,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 		else
 			seq_putc(seq, '\n');
 
-		list_for_each_entry(dest, &svc->destinations, n_list) {
+		list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
 #ifdef CONFIG_IP_VS_IPV6
 			if (dest->af == AF_INET6)
 				seq_printf(seq,
@@ -2167,7 +2146,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq_file_single_net(seq);
 	struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
-	struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
+	struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
 	struct ip_vs_stats_user rates;
 	int i;
 
@@ -2383,7 +2362,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
 	if (cmd == IP_VS_SO_SET_FLUSH) {
 		/* Flush the virtual service */
-		ret = ip_vs_flush(net);
+		ret = ip_vs_flush(net, false);
 		goto out_unlock;
 	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
 		/* Set timeout values for (tcp tcpfin udp) */
@@ -2418,11 +2397,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 	}
 
 	/* Lookup the exact service by <protocol, addr, port> or fwmark */
+	rcu_read_lock();
 	if (usvc.fwmark == 0)
 		svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
 					   &usvc.addr, usvc.port);
 	else
 		svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
+	rcu_read_unlock();
 
 	if (cmd != IP_VS_SO_SET_ADD
 	    && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2474,11 +2455,14 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 static void
 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 {
+	struct ip_vs_scheduler *sched;
+
+	sched = rcu_dereference_protected(src->scheduler, 1);
 	dst->protocol = src->protocol;
 	dst->addr = src->addr.ip;
 	dst->port = src->port;
 	dst->fwmark = src->fwmark;
-	strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
+	strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));
 	dst->flags = src->flags;
 	dst->timeout = src->timeout / HZ;
 	dst->netmask = src->netmask;
@@ -2497,7 +2481,7 @@ __ip_vs_get_service_entries(struct net *net,
 	int ret = 0;
 
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
 			/* Only expose IPv4 entries to old interface */
 			if (svc->af != AF_INET || !net_eq(svc->net, net))
 				continue;
@@ -2516,7 +2500,7 @@ __ip_vs_get_service_entries(struct net *net,
 	}
 
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
 			/* Only expose IPv4 entries to old interface */
 			if (svc->af != AF_INET || !net_eq(svc->net, net))
 				continue;
@@ -2545,11 +2529,13 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
 	union nf_inet_addr addr = { .ip = get->addr };
 	int ret = 0;
 
+	rcu_read_lock();
 	if (get->fwmark)
 		svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
 	else
 		svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
 					   get->port);
+	rcu_read_unlock();
 
 	if (svc) {
 		int count = 0;
@@ -2732,12 +2718,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 
 		entry = (struct ip_vs_service_entry *)arg;
 		addr.ip = entry->addr;
+		rcu_read_lock();
 		if (entry->fwmark)
 			svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
 		else
 			svc = __ip_vs_service_find(net, AF_INET,
 						   entry->protocol, &addr,
 						   entry->port);
+		rcu_read_unlock();
 		if (svc) {
 			ip_vs_copy_service(entry, svc);
 			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2894,6 +2882,8 @@ nla_put_failure:
 static int ip_vs_genl_fill_service(struct sk_buff *skb,
 				   struct ip_vs_service *svc)
 {
+	struct ip_vs_scheduler *sched;
+	struct ip_vs_pe *pe;
 	struct nlattr *nl_service;
 	struct ip_vs_flags flags = { .flags = svc->flags,
 				     .mask = ~0 };
@@ -2910,16 +2900,17 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
 	} else {
 		if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
 		    nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
-		    nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
+		    nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
 			goto nla_put_failure;
 	}
 
-	if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
-	    (svc->pe &&
-	     nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
+	sched = rcu_dereference_protected(svc->scheduler, 1);
+	pe = rcu_dereference_protected(svc->pe, 1);
+	if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) ||
+	    (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
 	    nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
 	    nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
-	    nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
+	    nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
 		goto nla_put_failure;
 	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
 		goto nla_put_failure;
@@ -2965,7 +2956,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
 
 	mutex_lock(&__ip_vs_mutex);
 	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+		hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
 			if (++idx <= start || !net_eq(svc->net, net))
 				continue;
 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -2976,7 +2967,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
 	}
 
 	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
 			if (++idx <= start || !net_eq(svc->net, net))
 				continue;
 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -3032,15 +3023,17 @@ static int ip_vs_genl_parse_service(struct net *net,
 	} else {
 		usvc->protocol = nla_get_u16(nla_protocol);
 		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
-		usvc->port = nla_get_u16(nla_port);
+		usvc->port = nla_get_be16(nla_port);
 		usvc->fwmark = 0;
 	}
 
+	rcu_read_lock();
 	if (usvc->fwmark)
 		svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
 	else
 		svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
 					   &usvc->addr, usvc->port);
+	rcu_read_unlock();
 	*ret_svc = svc;
 
 	/* If a full entry was requested, check for the additional fields */
@@ -3070,7 +3063,7 @@ static int ip_vs_genl_parse_service(struct net *net,
 		usvc->sched_name = nla_data(nla_sched);
 		usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
 		usvc->timeout = nla_get_u32(nla_timeout);
-		usvc->netmask = nla_get_u32(nla_netmask);
+		usvc->netmask = nla_get_be32(nla_netmask);
 	}
 
 	return 0;
@@ -3096,7 +3089,7 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
 		return -EMSGSIZE;
 
 	if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
-	    nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
+	    nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
 	    nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
 			(atomic_read(&dest->conn_flags) &
 			 IP_VS_CONN_F_FWD_MASK)) ||
@@ -3205,7 +3198,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
 	memset(udest, 0, sizeof(*udest));
 
 	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
-	udest->port = nla_get_u16(nla_port);
+	udest->port = nla_get_be16(nla_port);
 
 	/* If a full entry was requested, check for the additional fields */
 	if (full_entry) {
@@ -3230,8 +3223,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
 	return 0;
 }
 
-static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
-				  const char *mcast_ifn, __be32 syncid)
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
+				  const char *mcast_ifn, __u32 syncid)
 {
 	struct nlattr *nl_daemon;
 
@@ -3252,8 +3245,8 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
-				  const char *mcast_ifn, __be32 syncid,
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
+				  const char *mcast_ifn, __u32 syncid,
 				  struct netlink_callback *cb)
 {
 	void *hdr;
@@ -3392,7 +3385,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	mutex_lock(&__ip_vs_mutex);
 
 	if (cmd == IPVS_CMD_FLUSH) {
-		ret = ip_vs_flush(net);
+		ret = ip_vs_flush(net, false);
 		goto out;
 	} else if (cmd == IPVS_CMD_SET_CONFIG) {
 		ret = ip_vs_genl_set_config(net, info->attrs);
@@ -3741,6 +3734,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
 	ipvs->sysctl_pmtu_disc = 1;
 	tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
+	tbl[idx++].data = &ipvs->sysctl_backup_only;
 
 
 	ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
@@ -3783,13 +3777,14 @@ int __net_init ip_vs_control_net_init(struct net *net)
 	int idx;
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	rwlock_init(&ipvs->rs_lock);
-
 	/* Initialize rs_table */
 	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
-		INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+		INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
 
 	INIT_LIST_HEAD(&ipvs->dest_trash);
+	spin_lock_init(&ipvs->dest_trash_lock);
+	setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
+		    (unsigned long) net);
 	atomic_set(&ipvs->ftpsvc_counter, 0);
 	atomic_set(&ipvs->nullsvc_counter, 0);
 
@@ -3800,10 +3795,10 @@ int __net_init ip_vs_control_net_init(struct net *net)
 
 	spin_lock_init(&ipvs->tot_stats.lock);
 
-	proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
-	proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
-	proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
-			     &ip_vs_stats_percpu_fops);
+	proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops);
+	proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops);
+	proc_create("ip_vs_stats_percpu", 0, net->proc_net,
+		    &ip_vs_stats_percpu_fops);
 
 	if (ip_vs_control_net_init_sysctl(net))
 		goto err;
@@ -3819,12 +3814,16 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
+	/* Some dest can be in grace period even before cleanup, we have to
+	 * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called.
+	 */
+	rcu_barrier();
 	ip_vs_trash_cleanup(net);
 	ip_vs_stop_estimator(net, &ipvs->tot_stats);
 	ip_vs_control_net_cleanup_sysctl(net);
-	proc_net_remove(net, "ip_vs_stats_percpu");
-	proc_net_remove(net, "ip_vs_stats");
-	proc_net_remove(net, "ip_vs");
+	remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
+	remove_proc_entry("ip_vs_stats", net->proc_net);
+	remove_proc_entry("ip_vs", net->proc_net);
 	free_percpu(ipvs->tot_stats.cpustats);
 }
 
@@ -3864,10 +3863,10 @@ int __init ip_vs_control_init(void)
 
 	EnterFunction(2);
 
-	/* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
+	/* Initialize svc_table, ip_vs_svc_fwm_table */
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
-		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
+		INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
+		INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
 	}
 
 	smp_wmb();	/* Do we really need it now ? */
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 7f3b0cc00b7a..ccab120df45e 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -51,7 +51,7 @@
  *      IPVS DH bucket
  */
 struct ip_vs_dh_bucket {
-	struct ip_vs_dest       *dest;          /* real server (cache) */
+	struct ip_vs_dest __rcu	*dest;	/* real server (cache) */
 };
 
 /*
@@ -64,6 +64,10 @@ struct ip_vs_dh_bucket {
 #define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
 #define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
 
+struct ip_vs_dh_state {
+	struct ip_vs_dh_bucket		buckets[IP_VS_DH_TAB_SIZE];
+	struct rcu_head			rcu_head;
+};
 
 /*
  *	Returns hash value for IPVS DH entry
@@ -85,10 +89,9 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad
  *      Get ip_vs_dest associated with supplied parameters.
  */
 static inline struct ip_vs_dest *
-ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,
-	     const union nf_inet_addr *addr)
+ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
 {
-	return (tbl[ip_vs_dh_hashkey(af, addr)]).dest;
+	return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
 }
 
 
@@ -96,25 +99,30 @@ ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,
  *      Assign all the hash buckets of the specified table with the service.
  */
 static int
-ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
+ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
 {
 	int i;
 	struct ip_vs_dh_bucket *b;
 	struct list_head *p;
 	struct ip_vs_dest *dest;
+	bool empty;
 
-	b = tbl;
+	b = &s->buckets[0];
 	p = &svc->destinations;
+	empty = list_empty(p);
 	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
-		if (list_empty(p)) {
-			b->dest = NULL;
-		} else {
+		dest = rcu_dereference_protected(b->dest, 1);
+		if (dest)
+			ip_vs_dest_put(dest);
+		if (empty)
+			RCU_INIT_POINTER(b->dest, NULL);
+		else {
 			if (p == &svc->destinations)
 				p = p->next;
 
 			dest = list_entry(p, struct ip_vs_dest, n_list);
-			atomic_inc(&dest->refcnt);
-			b->dest = dest;
+			ip_vs_dest_hold(dest);
+			RCU_INIT_POINTER(b->dest, dest);
 
 			p = p->next;
 		}
@@ -127,16 +135,18 @@ ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
 /*
  *      Flush all the hash buckets of the specified table.
  */
-static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
+static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
 {
 	int i;
 	struct ip_vs_dh_bucket *b;
+	struct ip_vs_dest *dest;
 
-	b = tbl;
+	b = &s->buckets[0];
 	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
-		if (b->dest) {
-			atomic_dec(&b->dest->refcnt);
-			b->dest = NULL;
+		dest = rcu_dereference_protected(b->dest, 1);
+		if (dest) {
+			ip_vs_dest_put(dest);
+			RCU_INIT_POINTER(b->dest, NULL);
 		}
 		b++;
 	}
@@ -145,51 +155,46 @@ static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
 
 static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
 {
-	struct ip_vs_dh_bucket *tbl;
+	struct ip_vs_dh_state *s;
 
 	/* allocate the DH table for this service */
-	tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
-		      GFP_KERNEL);
-	if (tbl == NULL)
+	s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
+	if (s == NULL)
 		return -ENOMEM;
 
-	svc->sched_data = tbl;
+	svc->sched_data = s;
 	IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
 		  "current service\n",
 		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
 
-	/* assign the hash buckets with the updated service */
-	ip_vs_dh_assign(tbl, svc);
+	/* assign the hash buckets with current dests */
+	ip_vs_dh_reassign(s, svc);
 
 	return 0;
 }
 
 
-static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
+static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
 {
-	struct ip_vs_dh_bucket *tbl = svc->sched_data;
+	struct ip_vs_dh_state *s = svc->sched_data;
 
 	/* got to clean up hash buckets here */
-	ip_vs_dh_flush(tbl);
+	ip_vs_dh_flush(s);
 
 	/* release the table itself */
-	kfree(svc->sched_data);
+	kfree_rcu(s, rcu_head);
 	IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
 		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
-
-	return 0;
 }
 
 
-static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
+static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
+				 struct ip_vs_dest *dest)
 {
-	struct ip_vs_dh_bucket *tbl = svc->sched_data;
-
-	/* got to clean up hash buckets here */
-	ip_vs_dh_flush(tbl);
+	struct ip_vs_dh_state *s = svc->sched_data;
 
 	/* assign the hash buckets with the updated service */
-	ip_vs_dh_assign(tbl, svc);
+	ip_vs_dh_reassign(s, svc);
 
 	return 0;
 }
@@ -212,19 +217,20 @@ static struct ip_vs_dest *
 ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest;
-	struct ip_vs_dh_bucket *tbl;
+	struct ip_vs_dh_state *s;
 	struct ip_vs_iphdr iph;
 
 	ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
 
 	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
-	tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
-	dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr);
+	s = (struct ip_vs_dh_state *) svc->sched_data;
+	dest = ip_vs_dh_get(svc->af, s, &iph.daddr);
 	if (!dest
 	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
 	    || atomic_read(&dest->weight) <= 0
 	    || is_overloaded(dest)) {
+		ip_vs_scheduler_err(svc, "no destination available");
 		return NULL;
 	}
 
@@ -248,7 +254,8 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
 	.n_list =		LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
 	.init_service =		ip_vs_dh_init_svc,
 	.done_service =		ip_vs_dh_done_svc,
-	.update_service =	ip_vs_dh_update_svc,
+	.add_dest =		ip_vs_dh_dest_changed,
+	.del_dest =		ip_vs_dh_dest_changed,
 	.schedule =		ip_vs_dh_schedule,
 };
 
@@ -262,6 +269,7 @@ static int __init ip_vs_dh_init(void)
 static void __exit ip_vs_dh_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
+	synchronize_rcu();
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 0fac6017b6fb..6bee6d0c73a5 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -56,7 +56,7 @@
  * Make a summary from each cpu
  */
 static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
-				 struct ip_vs_cpu_stats *stats)
+				 struct ip_vs_cpu_stats __percpu *stats)
 {
 	int i;
 
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 4f53a5f04437..77c173282f38 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -267,10 +267,12 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 			 * hopefully it will succeed on the retransmitted
 			 * packet.
 			 */
+			rcu_read_lock();
 			ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
 						       iph->ihl * 4,
 						       start-data, end-start,
 						       buf, buf_len);
+			rcu_read_unlock();
 			if (ret) {
 				ip_vs_nfct_expect_related(skb, ct, n_cp,
 							  IPPROTO_TCP, 0, 0);
@@ -480,6 +482,7 @@ static int __init ip_vs_ftp_init(void)
 	int rv;
 
 	rv = register_pernet_subsys(&ip_vs_ftp_ops);
+	/* rcu_barrier() is called by netns on error */
 	return rv;
 }
 
@@ -489,6 +492,7 @@ static int __init ip_vs_ftp_init(void)
 static void __exit ip_vs_ftp_exit(void)
 {
 	unregister_pernet_subsys(&ip_vs_ftp_ops);
+	/* rcu_barrier() is called by netns */
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index fdd89b9564ea..5ea26bd87743 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -90,11 +90,12 @@
  *      IP address and its destination server
  */
 struct ip_vs_lblc_entry {
-	struct list_head        list;
+	struct hlist_node	list;
 	int			af;		/* address family */
 	union nf_inet_addr      addr;           /* destination IP address */
-	struct ip_vs_dest       *dest;          /* real server (cache) */
+	struct ip_vs_dest __rcu	*dest;          /* real server (cache) */
 	unsigned long           lastuse;        /* last used time */
+	struct rcu_head		rcu_head;
 };
 
 
@@ -102,12 +103,14 @@ struct ip_vs_lblc_entry {
  *      IPVS lblc hash table
  */
 struct ip_vs_lblc_table {
-	struct list_head        bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
+	struct rcu_head		rcu_head;
+	struct hlist_head	bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
+	struct timer_list       periodic_timer; /* collect stale entries */
 	atomic_t                entries;        /* number of entries */
 	int                     max_size;       /* maximum size of entries */
-	struct timer_list       periodic_timer; /* collect stale entries */
 	int                     rover;          /* rover for expire check */
 	int                     counter;        /* counter for no expire */
+	bool			dead;
 };
 
 
@@ -129,13 +132,16 @@ static ctl_table vs_vars_table[] = {
 
 static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
 {
-	list_del(&en->list);
+	struct ip_vs_dest *dest;
+
+	hlist_del_rcu(&en->list);
 	/*
 	 * We don't kfree dest because it is referred either by its service
 	 * or the trash dest list.
 	 */
-	atomic_dec(&en->dest->refcnt);
-	kfree(en);
+	dest = rcu_dereference_protected(en->dest, 1);
+	ip_vs_dest_put(dest);
+	kfree_rcu(en, rcu_head);
 }
 
 
@@ -165,15 +171,12 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
 {
 	unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr);
 
-	list_add(&en->list, &tbl->bucket[hash]);
+	hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
 	atomic_inc(&tbl->entries);
 }
 
 
-/*
- *  Get ip_vs_lblc_entry associated with supplied parameters. Called under read
- *  lock
- */
+/* Get ip_vs_lblc_entry associated with supplied parameters. */
 static inline struct ip_vs_lblc_entry *
 ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
 	       const union nf_inet_addr *addr)
@@ -181,7 +184,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
 	unsigned int hash = ip_vs_lblc_hashkey(af, addr);
 	struct ip_vs_lblc_entry *en;
 
-	list_for_each_entry(en, &tbl->bucket[hash], list)
+	hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
 		if (ip_vs_addr_equal(af, &en->addr, addr))
 			return en;
 
@@ -191,7 +194,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
 
 /*
  * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
- * address to a server. Called under write lock.
+ * address to a server. Called under spin lock.
  */
 static inline struct ip_vs_lblc_entry *
 ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
@@ -209,14 +212,20 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
 		ip_vs_addr_copy(dest->af, &en->addr, daddr);
 		en->lastuse = jiffies;
 
-		atomic_inc(&dest->refcnt);
-		en->dest = dest;
+		ip_vs_dest_hold(dest);
+		RCU_INIT_POINTER(en->dest, dest);
 
 		ip_vs_lblc_hash(tbl, en);
-	} else if (en->dest != dest) {
-		atomic_dec(&en->dest->refcnt);
-		atomic_inc(&dest->refcnt);
-		en->dest = dest;
+	} else {
+		struct ip_vs_dest *old_dest;
+
+		old_dest = rcu_dereference_protected(en->dest, 1);
+		if (old_dest != dest) {
+			ip_vs_dest_put(old_dest);
+			ip_vs_dest_hold(dest);
+			/* No ordering constraints for refcnt */
+			RCU_INIT_POINTER(en->dest, dest);
+		}
 	}
 
 	return en;
@@ -226,17 +235,22 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
 /*
  *      Flush all the entries of the specified table.
  */
-static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
+static void ip_vs_lblc_flush(struct ip_vs_service *svc)
 {
-	struct ip_vs_lblc_entry *en, *nxt;
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+	struct ip_vs_lblc_entry *en;
+	struct hlist_node *next;
 	int i;
 
+	spin_lock_bh(&svc->sched_lock);
+	tbl->dead = 1;
 	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
-		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+		hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
 			ip_vs_lblc_free(en);
 			atomic_dec(&tbl->entries);
 		}
 	}
+	spin_unlock_bh(&svc->sched_lock);
 }
 
 static int sysctl_lblc_expiration(struct ip_vs_service *svc)
@@ -252,15 +266,16 @@ static int sysctl_lblc_expiration(struct ip_vs_service *svc)
 static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
 {
 	struct ip_vs_lblc_table *tbl = svc->sched_data;
-	struct ip_vs_lblc_entry *en, *nxt;
+	struct ip_vs_lblc_entry *en;
+	struct hlist_node *next;
 	unsigned long now = jiffies;
 	int i, j;
 
 	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
 
-		write_lock(&svc->sched_lock);
-		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+		spin_lock(&svc->sched_lock);
+		hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
 			if (time_before(now,
 					en->lastuse +
 					sysctl_lblc_expiration(svc)))
@@ -269,7 +284,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
 			ip_vs_lblc_free(en);
 			atomic_dec(&tbl->entries);
 		}
-		write_unlock(&svc->sched_lock);
+		spin_unlock(&svc->sched_lock);
 	}
 	tbl->rover = j;
 }
@@ -293,7 +308,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)
 	unsigned long now = jiffies;
 	int goal;
 	int i, j;
-	struct ip_vs_lblc_entry *en, *nxt;
+	struct ip_vs_lblc_entry *en;
+	struct hlist_node *next;
 
 	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
 		/* do full expiration check */
@@ -314,8 +330,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)
 	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
 
-		write_lock(&svc->sched_lock);
-		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+		spin_lock(&svc->sched_lock);
+		hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
 			if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
 				continue;
 
@@ -323,7 +339,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
 			atomic_dec(&tbl->entries);
 			goal--;
 		}
-		write_unlock(&svc->sched_lock);
+		spin_unlock(&svc->sched_lock);
 		if (goal <= 0)
 			break;
 	}
@@ -354,11 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
 	 *    Initialize the hash buckets
 	 */
 	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
-		INIT_LIST_HEAD(&tbl->bucket[i]);
+		INIT_HLIST_HEAD(&tbl->bucket[i]);
 	}
 	tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
 	tbl->rover = 0;
 	tbl->counter = 1;
+	tbl->dead = 0;
 
 	/*
 	 *    Hook periodic timer for garbage collection
@@ -371,7 +388,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
 }
 
 
-static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
+static void ip_vs_lblc_done_svc(struct ip_vs_service *svc)
 {
 	struct ip_vs_lblc_table *tbl = svc->sched_data;
 
@@ -379,14 +396,12 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
 	del_timer_sync(&tbl->periodic_timer);
 
 	/* got to clean up table entries here */
-	ip_vs_lblc_flush(tbl);
+	ip_vs_lblc_flush(svc);
 
 	/* release the table itself */
-	kfree(tbl);
+	kfree_rcu(tbl, rcu_head);
 	IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
 		  sizeof(*tbl));
-
-	return 0;
 }
 
 
@@ -408,7 +423,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
 	 * The server with weight=0 is quiesced and will not receive any
 	 * new connection.
 	 */
-	list_for_each_entry(dest, &svc->destinations, n_list) {
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 		if (atomic_read(&dest->weight) > 0) {
@@ -423,7 +438,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
 	 *    Find the destination with the least load.
 	 */
   nextstage:
-	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
@@ -457,7 +472,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
 		struct ip_vs_dest *d;
 
-		list_for_each_entry(d, &svc->destinations, n_list) {
+		list_for_each_entry_rcu(d, &svc->destinations, n_list) {
 			if (atomic_read(&d->activeconns)*2
 			    < atomic_read(&d->weight)) {
 				return 1;
@@ -484,7 +499,6 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	/* First look in our cache */
-	read_lock(&svc->sched_lock);
 	en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr);
 	if (en) {
 		/* We only hold a read lock, but this is atomic */
@@ -499,14 +513,11 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		 * free up entries from the trash at any time.
 		 */
 
-		if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
-			dest = en->dest;
+		dest = rcu_dereference(en->dest);
+		if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
+		    atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
+			goto out;
 	}
-	read_unlock(&svc->sched_lock);
-
-	/* If the destination has a weight and is not overloaded, use it */
-	if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
-		goto out;
 
 	/* No cache entry or it is invalid, time to schedule */
 	dest = __ip_vs_lblc_schedule(svc);
@@ -516,9 +527,10 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	}
 
 	/* If we fail to create a cache entry, we'll just use the valid dest */
-	write_lock(&svc->sched_lock);
-	ip_vs_lblc_new(tbl, &iph.daddr, dest);
-	write_unlock(&svc->sched_lock);
+	spin_lock_bh(&svc->sched_lock);
+	if (!tbl->dead)
+		ip_vs_lblc_new(tbl, &iph.daddr, dest);
+	spin_unlock_bh(&svc->sched_lock);
 
 out:
 	IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
@@ -621,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
 	unregister_pernet_subsys(&ip_vs_lblc_ops);
+	synchronize_rcu();
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index c03b6a3ade2f..50123c2ab484 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,40 +89,44 @@
  */
 struct ip_vs_dest_set_elem {
 	struct list_head	list;          /* list link */
-	struct ip_vs_dest       *dest;          /* destination server */
+	struct ip_vs_dest __rcu *dest;         /* destination server */
+	struct rcu_head		rcu_head;
 };
 
 struct ip_vs_dest_set {
 	atomic_t                size;           /* set size */
 	unsigned long           lastmod;        /* last modified time */
 	struct list_head	list;           /* destination list */
-	rwlock_t	        lock;           /* lock for this list */
 };
 
 
-static struct ip_vs_dest_set_elem *
-ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
+				  struct ip_vs_dest *dest, bool check)
 {
 	struct ip_vs_dest_set_elem *e;
 
-	list_for_each_entry(e, &set->list, list) {
-		if (e->dest == dest)
-			/* already existed */
-			return NULL;
+	if (check) {
+		list_for_each_entry(e, &set->list, list) {
+			struct ip_vs_dest *d;
+
+			d = rcu_dereference_protected(e->dest, 1);
+			if (d == dest)
+				/* already existed */
+				return;
+		}
 	}
 
 	e = kmalloc(sizeof(*e), GFP_ATOMIC);
 	if (e == NULL)
-		return NULL;
+		return;
 
-	atomic_inc(&dest->refcnt);
-	e->dest = dest;
+	ip_vs_dest_hold(dest);
+	RCU_INIT_POINTER(e->dest, dest);
 
-	list_add(&e->list, &set->list);
+	list_add_rcu(&e->list, &set->list);
 	atomic_inc(&set->size);
 
 	set->lastmod = jiffies;
-	return e;
 }
 
 static void
@@ -131,13 +135,16 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 	struct ip_vs_dest_set_elem *e;
 
 	list_for_each_entry(e, &set->list, list) {
-		if (e->dest == dest) {
+		struct ip_vs_dest *d;
+
+		d = rcu_dereference_protected(e->dest, 1);
+		if (d == dest) {
 			/* HIT */
 			atomic_dec(&set->size);
 			set->lastmod = jiffies;
-			atomic_dec(&e->dest->refcnt);
-			list_del(&e->list);
-			kfree(e);
+			ip_vs_dest_put(dest);
+			list_del_rcu(&e->list);
+			kfree_rcu(e, rcu_head);
 			break;
 		}
 	}
@@ -147,17 +154,18 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
 {
 	struct ip_vs_dest_set_elem *e, *ep;
 
-	write_lock(&set->lock);
 	list_for_each_entry_safe(e, ep, &set->list, list) {
+		struct ip_vs_dest *d;
+
+		d = rcu_dereference_protected(e->dest, 1);
 		/*
 		 * We don't kfree dest because it is referred either
 		 * by its service or by the trash dest list.
 		 */
-		atomic_dec(&e->dest->refcnt);
-		list_del(&e->list);
-		kfree(e);
+		ip_vs_dest_put(d);
+		list_del_rcu(&e->list);
+		kfree_rcu(e, rcu_head);
 	}
-	write_unlock(&set->lock);
 }
 
 /* get weighted least-connection node in the destination set */
@@ -171,8 +179,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 		return NULL;
 
 	/* select the first destination server, whose weight > 0 */
-	list_for_each_entry(e, &set->list, list) {
-		least = e->dest;
+	list_for_each_entry_rcu(e, &set->list, list) {
+		least = rcu_dereference(e->dest);
 		if (least->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
@@ -186,8 +194,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 
 	/* find the destination with the weighted least load */
   nextstage:
-	list_for_each_entry(e, &set->list, list) {
-		dest = e->dest;
+	list_for_each_entry_continue_rcu(e, &set->list, list) {
+		dest = rcu_dereference(e->dest);
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
@@ -224,7 +232,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 
 	/* select the first destination server, whose weight > 0 */
 	list_for_each_entry(e, &set->list, list) {
-		most = e->dest;
+		most = rcu_dereference_protected(e->dest, 1);
 		if (atomic_read(&most->weight) > 0) {
 			moh = ip_vs_dest_conn_overhead(most);
 			goto nextstage;
@@ -234,8 +242,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 
 	/* find the destination with the weighted most load */
   nextstage:
-	list_for_each_entry(e, &set->list, list) {
-		dest = e->dest;
+	list_for_each_entry_continue(e, &set->list, list) {
+		dest = rcu_dereference_protected(e->dest, 1);
 		doh = ip_vs_dest_conn_overhead(dest);
 		/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
 		if ((moh * atomic_read(&dest->weight) <
@@ -262,11 +270,12 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
  *      IP address and its destination server set
  */
 struct ip_vs_lblcr_entry {
-	struct list_head        list;
+	struct hlist_node       list;
 	int			af;		/* address family */
 	union nf_inet_addr      addr;           /* destination IP address */
 	struct ip_vs_dest_set   set;            /* destination server set */
 	unsigned long           lastuse;        /* last used time */
+	struct rcu_head		rcu_head;
 };
 
 
@@ -274,12 +283,14 @@ struct ip_vs_lblcr_entry {
  *      IPVS lblcr hash table
  */
 struct ip_vs_lblcr_table {
-	struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
+	struct rcu_head		rcu_head;
+	struct hlist_head	bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
 	atomic_t                entries;        /* number of entries */
 	int                     max_size;       /* maximum size of entries */
 	struct timer_list       periodic_timer; /* collect stale entries */
 	int                     rover;          /* rover for expire check */
 	int                     counter;        /* counter for no expire */
+	bool			dead;
 };
 
 
@@ -302,9 +313,9 @@ static ctl_table vs_vars_table[] = {
 
 static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
 {
-	list_del(&en->list);
+	hlist_del_rcu(&en->list);
 	ip_vs_dest_set_eraseall(&en->set);
-	kfree(en);
+	kfree_rcu(en, rcu_head);
 }
 
 
@@ -334,15 +345,12 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
 {
 	unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr);
 
-	list_add(&en->list, &tbl->bucket[hash]);
+	hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
 	atomic_inc(&tbl->entries);
 }
 
 
-/*
- *  Get ip_vs_lblcr_entry associated with supplied parameters. Called under
- *  read lock.
- */
+/* Get ip_vs_lblcr_entry associated with supplied parameters. */
 static inline struct ip_vs_lblcr_entry *
 ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
 		const union nf_inet_addr *addr)
@@ -350,7 +358,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
 	unsigned int hash = ip_vs_lblcr_hashkey(af, addr);
 	struct ip_vs_lblcr_entry *en;
 
-	list_for_each_entry(en, &tbl->bucket[hash], list)
+	hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
 		if (ip_vs_addr_equal(af, &en->addr, addr))
 			return en;
 
@@ -360,7 +368,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
 
 /*
  * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
- * IP address to a server. Called under write lock.
+ * IP address to a server. Called under spin lock.
  */
 static inline struct ip_vs_lblcr_entry *
 ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
@@ -381,14 +389,14 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
 		/* initialize its dest set */
 		atomic_set(&(en->set.size), 0);
 		INIT_LIST_HEAD(&en->set.list);
-		rwlock_init(&en->set.lock);
+
+		ip_vs_dest_set_insert(&en->set, dest, false);
 
 		ip_vs_lblcr_hash(tbl, en);
+		return en;
 	}
 
-	write_lock(&en->set.lock);
-	ip_vs_dest_set_insert(&en->set, dest);
-	write_unlock(&en->set.lock);
+	ip_vs_dest_set_insert(&en->set, dest, true);
 
 	return en;
 }
@@ -397,17 +405,21 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
 /*
  *      Flush all the entries of the specified table.
  */
-static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
+static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
 {
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
 	int i;
-	struct ip_vs_lblcr_entry *en, *nxt;
+	struct ip_vs_lblcr_entry *en;
+	struct hlist_node *next;
 
-	/* No locking required, only called during cleanup. */
+	spin_lock_bh(&svc->sched_lock);
+	tbl->dead = 1;
 	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
-		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+		hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
 			ip_vs_lblcr_free(en);
 		}
 	}
+	spin_unlock_bh(&svc->sched_lock);
 }
 
 static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
@@ -425,13 +437,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
 	struct ip_vs_lblcr_table *tbl = svc->sched_data;
 	unsigned long now = jiffies;
 	int i, j;
-	struct ip_vs_lblcr_entry *en, *nxt;
+	struct ip_vs_lblcr_entry *en;
+	struct hlist_node *next;
 
 	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 
-		write_lock(&svc->sched_lock);
-		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+		spin_lock(&svc->sched_lock);
+		hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
 			if (time_after(en->lastuse +
 				       sysctl_lblcr_expiration(svc), now))
 				continue;
@@ -439,7 +452,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
 			ip_vs_lblcr_free(en);
 			atomic_dec(&tbl->entries);
 		}
-		write_unlock(&svc->sched_lock);
+		spin_unlock(&svc->sched_lock);
 	}
 	tbl->rover = j;
 }
@@ -463,7 +476,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
 	unsigned long now = jiffies;
 	int goal;
 	int i, j;
-	struct ip_vs_lblcr_entry *en, *nxt;
+	struct ip_vs_lblcr_entry *en;
+	struct hlist_node *next;
 
 	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
 		/* do full expiration check */
@@ -484,8 +498,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
 	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 
-		write_lock(&svc->sched_lock);
-		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+		spin_lock(&svc->sched_lock);
+		hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
 			if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
 				continue;
 
@@ -493,7 +507,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
 			atomic_dec(&tbl->entries);
 			goal--;
 		}
-		write_unlock(&svc->sched_lock);
+		spin_unlock(&svc->sched_lock);
 		if (goal <= 0)
 			break;
 	}
@@ -523,11 +537,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 	 *    Initialize the hash buckets
 	 */
 	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
-		INIT_LIST_HEAD(&tbl->bucket[i]);
+		INIT_HLIST_HEAD(&tbl->bucket[i]);
 	}
 	tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
 	tbl->rover = 0;
 	tbl->counter = 1;
+	tbl->dead = 0;
 
 	/*
 	 *    Hook periodic timer for garbage collection
@@ -540,7 +555,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 }
 
 
-static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
+static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
 {
 	struct ip_vs_lblcr_table *tbl = svc->sched_data;
 
@@ -548,14 +563,12 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
 	del_timer_sync(&tbl->periodic_timer);
 
 	/* got to clean up table entries here */
-	ip_vs_lblcr_flush(tbl);
+	ip_vs_lblcr_flush(svc);
 
 	/* release the table itself */
-	kfree(tbl);
+	kfree_rcu(tbl, rcu_head);
 	IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
 		  sizeof(*tbl));
-
-	return 0;
 }
 
 
@@ -577,7 +590,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
 	 * The server with weight=0 is quiesced and will not receive any
 	 * new connection.
 	 */
-	list_for_each_entry(dest, &svc->destinations, n_list) {
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
@@ -593,7 +606,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
 	 *    Find the destination with the least load.
 	 */
   nextstage:
-	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
@@ -627,7 +640,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
 		struct ip_vs_dest *d;
 
-		list_for_each_entry(d, &svc->destinations, n_list) {
+		list_for_each_entry_rcu(d, &svc->destinations, n_list) {
 			if (atomic_read(&d->activeconns)*2
 			    < atomic_read(&d->weight)) {
 				return 1;
@@ -646,7 +659,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
 	struct ip_vs_lblcr_table *tbl = svc->sched_data;
 	struct ip_vs_iphdr iph;
-	struct ip_vs_dest *dest = NULL;
+	struct ip_vs_dest *dest;
 	struct ip_vs_lblcr_entry *en;
 
 	ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
@@ -654,53 +667,46 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	/* First look in our cache */
-	read_lock(&svc->sched_lock);
 	en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
 	if (en) {
-		/* We only hold a read lock, but this is atomic */
 		en->lastuse = jiffies;
 
 		/* Get the least loaded destination */
-		read_lock(&en->set.lock);
 		dest = ip_vs_dest_set_min(&en->set);
-		read_unlock(&en->set.lock);
 
 		/* More than one destination + enough time passed by, cleanup */
 		if (atomic_read(&en->set.size) > 1 &&
-				time_after(jiffies, en->set.lastmod +
+		    time_after(jiffies, en->set.lastmod +
 				sysctl_lblcr_expiration(svc))) {
-			struct ip_vs_dest *m;
+			spin_lock_bh(&svc->sched_lock);
+			if (atomic_read(&en->set.size) > 1) {
+				struct ip_vs_dest *m;
 
-			write_lock(&en->set.lock);
-			m = ip_vs_dest_set_max(&en->set);
-			if (m)
-				ip_vs_dest_set_erase(&en->set, m);
-			write_unlock(&en->set.lock);
+				m = ip_vs_dest_set_max(&en->set);
+				if (m)
+					ip_vs_dest_set_erase(&en->set, m);
+			}
+			spin_unlock_bh(&svc->sched_lock);
 		}
 
 		/* If the destination is not overloaded, use it */
-		if (dest && !is_overloaded(dest, svc)) {
-			read_unlock(&svc->sched_lock);
+		if (dest && !is_overloaded(dest, svc))
 			goto out;
-		}
 
 		/* The cache entry is invalid, time to schedule */
 		dest = __ip_vs_lblcr_schedule(svc);
 		if (!dest) {
 			ip_vs_scheduler_err(svc, "no destination available");
-			read_unlock(&svc->sched_lock);
 			return NULL;
 		}
 
 		/* Update our cache entry */
-		write_lock(&en->set.lock);
-		ip_vs_dest_set_insert(&en->set, dest);
-		write_unlock(&en->set.lock);
-	}
-	read_unlock(&svc->sched_lock);
-
-	if (dest)
+		spin_lock_bh(&svc->sched_lock);
+		if (!tbl->dead)
+			ip_vs_dest_set_insert(&en->set, dest, true);
+		spin_unlock_bh(&svc->sched_lock);
 		goto out;
+	}
 
 	/* No cache entry, time to schedule */
 	dest = __ip_vs_lblcr_schedule(svc);
@@ -710,9 +716,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	}
 
 	/* If we fail to create a cache entry, we'll just use the valid dest */
-	write_lock(&svc->sched_lock);
-	ip_vs_lblcr_new(tbl, &iph.daddr, dest);
-	write_unlock(&svc->sched_lock);
+	spin_lock_bh(&svc->sched_lock);
+	if (!tbl->dead)
+		ip_vs_lblcr_new(tbl, &iph.daddr, dest);
+	spin_unlock_bh(&svc->sched_lock);
 
 out:
 	IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
@@ -814,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
 	unregister_pernet_subsys(&ip_vs_lblcr_ops);
+	synchronize_rcu();
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index f391819c0cca..5128e338a749 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -42,7 +42,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	 * served, but no new connection is assigned to the server.
 	 */
 
-	list_for_each_entry(dest, &svc->destinations, n_list) {
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
 		if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
 		    atomic_read(&dest->weight) == 0)
 			continue;
@@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void)
 static void __exit ip_vs_lc_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
+	synchronize_rcu();
 }
 
 module_init(ip_vs_lc_init);
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 984d9c137d84..646cfd4baa73 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -75,7 +75,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	 * new connections.
 	 */
 
-	list_for_each_entry(dest, &svc->destinations, n_list) {
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
 
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
 		    !atomic_read(&dest->weight))
@@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void)
 static void __exit ip_vs_nq_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
+	synchronize_rcu();
 }
 
 module_init(ip_vs_nq_init);
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 5cf859ccb31b..1a82b29ce8ea 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -13,20 +13,8 @@
 /* IPVS pe list */
 static LIST_HEAD(ip_vs_pe);
 
-/* lock for service table */
-static DEFINE_SPINLOCK(ip_vs_pe_lock);
-
-/* Bind a service with a pe */
-void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
-{
-	svc->pe = pe;
-}
-
-/* Unbind a service from its pe */
-void ip_vs_unbind_pe(struct ip_vs_service *svc)
-{
-	svc->pe = NULL;
-}
+/* semaphore for IPVS PEs. */
+static DEFINE_MUTEX(ip_vs_pe_mutex);
 
 /* Get pe in the pe list by name */
 struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
@@ -36,9 +24,8 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
 	IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
 		  pe_name);
 
-	spin_lock_bh(&ip_vs_pe_lock);
-
-	list_for_each_entry(pe, &ip_vs_pe, n_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) {
 		/* Test and get the modules atomically */
 		if (pe->module &&
 		    !try_module_get(pe->module)) {
@@ -47,14 +34,14 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
 		}
 		if (strcmp(pe_name, pe->name)==0) {
 			/* HIT */
-			spin_unlock_bh(&ip_vs_pe_lock);
+			rcu_read_unlock();
 			return pe;
 		}
 		if (pe->module)
 			module_put(pe->module);
 	}
+	rcu_read_unlock();
 
-	spin_unlock_bh(&ip_vs_pe_lock);
 	return NULL;
 }
 
@@ -83,22 +70,13 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
 	/* increase the module use count */
 	ip_vs_use_count_inc();
 
-	spin_lock_bh(&ip_vs_pe_lock);
-
-	if (!list_empty(&pe->n_list)) {
-		spin_unlock_bh(&ip_vs_pe_lock);
-		ip_vs_use_count_dec();
-		pr_err("%s(): [%s] pe already linked\n",
-		       __func__, pe->name);
-		return -EINVAL;
-	}
-
+	mutex_lock(&ip_vs_pe_mutex);
 	/* Make sure that the pe with this name doesn't exist
 	 * in the pe list.
 	 */
 	list_for_each_entry(tmp, &ip_vs_pe, n_list) {
 		if (strcmp(tmp->name, pe->name) == 0) {
-			spin_unlock_bh(&ip_vs_pe_lock);
+			mutex_unlock(&ip_vs_pe_mutex);
 			ip_vs_use_count_dec();
 			pr_err("%s(): [%s] pe already existed "
 			       "in the system\n", __func__, pe->name);
@@ -106,8 +84,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
 		}
 	}
 	/* Add it into the d-linked pe list */
-	list_add(&pe->n_list, &ip_vs_pe);
-	spin_unlock_bh(&ip_vs_pe_lock);
+	list_add_rcu(&pe->n_list, &ip_vs_pe);
+	mutex_unlock(&ip_vs_pe_mutex);
 
 	pr_info("[%s] pe registered.\n", pe->name);
 
@@ -118,17 +96,10 @@ EXPORT_SYMBOL_GPL(register_ip_vs_pe);
 /* Unregister a pe from the pe list */
 int unregister_ip_vs_pe(struct ip_vs_pe *pe)
 {
-	spin_lock_bh(&ip_vs_pe_lock);
-	if (list_empty(&pe->n_list)) {
-		spin_unlock_bh(&ip_vs_pe_lock);
-		pr_err("%s(): [%s] pe is not in the list. failed\n",
-		       __func__, pe->name);
-		return -EINVAL;
-	}
-
+	mutex_lock(&ip_vs_pe_mutex);
 	/* Remove it from the d-linked pe list */
-	list_del(&pe->n_list);
-	spin_unlock_bh(&ip_vs_pe_lock);
+	list_del_rcu(&pe->n_list);
+	mutex_unlock(&ip_vs_pe_mutex);
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 12475ef88daf..9ef22bdce9f1 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -13,7 +13,8 @@ static const char *ip_vs_dbg_callid(char *buf, size_t buf_len,
 				    const char *callid, size_t callid_len,
 				    int *idx)
 {
-	size_t len = min(min(callid_len, (size_t)64), buf_len - *idx - 1);
+	size_t max_len = 64;
+	size_t len = min3(max_len, callid_len, buf_len - *idx - 1);
 	memcpy(buf + *idx, callid, len);
 	buf[*idx+len] = '\0';
 	*idx += len + 1;
@@ -37,14 +38,10 @@ static int get_callid(const char *dptr, unsigned int dataoff,
 		if (ret > 0)
 			break;
 		if (!ret)
-			return 0;
+			return -EINVAL;
 		dataoff += *matchoff;
 	}
 
-	/* Empty callid is useless */
-	if (!*matchlen)
-		return -EINVAL;
-
 	/* Too large is useless */
 	if (*matchlen > IP_VS_PEDATA_MAXLEN)
 		return -EINVAL;
@@ -172,6 +169,7 @@ static int __init ip_vs_sip_init(void)
 static void __exit ip_vs_sip_cleanup(void)
 {
 	unregister_ip_vs_pe(&ip_vs_sip_pe);
+	synchronize_rcu();
 }
 
 module_init(ip_vs_sip_init);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index ae8ec6f27688..86464881cd20 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 	if (sch == NULL)
 		return 0;
 	net = skb_net(skb);
+	rcu_read_lock();
 	if ((sch->type == SCTP_CID_INIT) &&
-	    (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
-				     &iph->daddr, sh->dest))) {
+	    (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
+				      &iph->daddr, sh->dest))) {
 		int ignored;
 
 		if (ip_vs_todrop(net_ipvs(net))) {
@@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
 			 */
-			ip_vs_service_put(svc);
+			rcu_read_unlock();
 			*verdict = NF_DROP;
 			return 0;
 		}
@@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		if (!*cpp && ignored <= 0) {
 			if (!ignored)
 				*verdict = ip_vs_leave(svc, skb, pd, iph);
-			else {
-				ip_vs_service_put(svc);
+			else
 				*verdict = NF_DROP;
-			}
+			rcu_read_unlock();
 			return 0;
 		}
-		ip_vs_service_put(svc);
 	}
+	rcu_read_unlock();
 	/* NF_ACCEPT */
 	return 1;
 }
@@ -208,7 +208,7 @@ enum ipvs_sctp_event_t {
 	IP_VS_SCTP_EVE_LAST
 };
 
-static enum ipvs_sctp_event_t sctp_events[255] = {
+static enum ipvs_sctp_event_t sctp_events[256] = {
 	IP_VS_SCTP_EVE_DATA_CLI,
 	IP_VS_SCTP_EVE_INIT_CLI,
 	IP_VS_SCTP_EVE_INIT_ACK_CLI,
@@ -906,7 +906,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	sctp_chunkhdr_t _sctpch, *sch;
 	unsigned char chunk_type;
 	int event, next_state;
-	int ihl;
+	int ihl, cofs;
 
 #ifdef CONFIG_IP_VS_IPV6
 	ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
@@ -914,8 +914,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	ihl = ip_hdrlen(skb);
 #endif
 
-	sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t),
-				sizeof(_sctpch), &_sctpch);
+	cofs = ihl + sizeof(sctp_sctphdr_t);
+	sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
 	if (sch == NULL)
 		return;
 
@@ -933,10 +933,12 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	 */
 	if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
 	    (sch->type == SCTP_CID_COOKIE_ACK)) {
-		sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) +
-				sch->length), sizeof(_sctpch), &_sctpch);
-		if (sch) {
-			if (sch->type == SCTP_CID_ABORT)
+		int clen = ntohs(sch->length);
+
+		if (clen >= sizeof(sctp_chunkhdr_t)) {
+			sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
+						 sizeof(_sctpch), &_sctpch);
+			if (sch && sch->type == SCTP_CID_ABORT)
 				chunk_type = sch->type;
 		}
 	}
@@ -992,9 +994,9 @@ static void
 sctp_state_transition(struct ip_vs_conn *cp, int direction,
 		const struct sk_buff *skb, struct ip_vs_proto_data *pd)
 {
-	spin_lock(&cp->lock);
+	spin_lock_bh(&cp->lock);
 	set_sctp_state(pd, cp, direction, skb);
-	spin_unlock(&cp->lock);
+	spin_unlock_bh(&cp->lock);
 }
 
 static inline __u16 sctp_app_hashkey(__be16 port)
@@ -1014,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
 
 	hash = sctp_app_hashkey(port);
 
-	spin_lock_bh(&ipvs->sctp_app_lock);
 	list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
 		if (i->port == port) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
-	list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
+	list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
 	atomic_inc(&pd->appcnt);
 out:
-	spin_unlock_bh(&ipvs->sctp_app_lock);
 
 	return ret;
 }
 
 static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
 
-	spin_lock_bh(&ipvs->sctp_app_lock);
 	atomic_dec(&pd->appcnt);
-	list_del(&inc->p_list);
-	spin_unlock_bh(&ipvs->sctp_app_lock);
+	list_del_rcu(&inc->p_list);
 }
 
 static int sctp_app_conn_bind(struct ip_vs_conn *cp)
@@ -1053,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = sctp_app_hashkey(cp->vport);
 
-	spin_lock(&ipvs->sctp_app_lock);
-	list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			spin_unlock(&ipvs->sctp_app_lock);
+			rcu_read_unlock();
 
 			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
 					"%s:%u to app %s on port %u\n",
@@ -1074,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 			goto out;
 		}
 	}
-	spin_unlock(&ipvs->sctp_app_lock);
+	rcu_read_unlock();
 out:
 	return result;
 }
@@ -1088,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
-	spin_lock_init(&ipvs->sctp_app_lock);
 	pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
 							sizeof(sctp_timeouts));
 	if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 9af653a75825..50a15944c6c1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 	}
 	net = skb_net(skb);
 	/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
+	rcu_read_lock();
 	if (th->syn &&
-	    (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
-				     &iph->daddr, th->dest))) {
+	    (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
+				      &iph->daddr, th->dest))) {
 		int ignored;
 
 		if (ip_vs_todrop(net_ipvs(net))) {
@@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
 			 */
-			ip_vs_service_put(svc);
+			rcu_read_unlock();
 			*verdict = NF_DROP;
 			return 0;
 		}
@@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		if (!*cpp && ignored <= 0) {
 			if (!ignored)
 				*verdict = ip_vs_leave(svc, skb, pd, iph);
-			else {
-				ip_vs_service_put(svc);
+			else
 				*verdict = NF_DROP;
-			}
+			rcu_read_unlock();
 			return 0;
 		}
-		ip_vs_service_put(svc);
 	}
+	rcu_read_unlock();
 	/* NF_ACCEPT */
 	return 1;
 }
@@ -557,9 +557,9 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
 	if (th == NULL)
 		return;
 
-	spin_lock(&cp->lock);
+	spin_lock_bh(&cp->lock);
 	set_tcp_state(pd, cp, direction, th);
-	spin_unlock(&cp->lock);
+	spin_unlock_bh(&cp->lock);
 }
 
 static inline __u16 tcp_app_hashkey(__be16 port)
@@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
 
 	hash = tcp_app_hashkey(port);
 
-	spin_lock_bh(&ipvs->tcp_app_lock);
 	list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
 		if (i->port == port) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
-	list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
+	list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]);
 	atomic_inc(&pd->appcnt);
 
   out:
-	spin_unlock_bh(&ipvs->tcp_app_lock);
 	return ret;
 }
 
@@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
 static void
 tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
 
-	spin_lock_bh(&ipvs->tcp_app_lock);
 	atomic_dec(&pd->appcnt);
-	list_del(&inc->p_list);
-	spin_unlock_bh(&ipvs->tcp_app_lock);
+	list_del_rcu(&inc->p_list);
 }
 
 
@@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = tcp_app_hashkey(cp->vport);
 
-	spin_lock(&ipvs->tcp_app_lock);
-	list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			spin_unlock(&ipvs->tcp_app_lock);
+			rcu_read_unlock();
 
 			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
@@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 			goto out;
 		}
 	}
-	spin_unlock(&ipvs->tcp_app_lock);
+	rcu_read_unlock();
 
   out:
 	return result;
@@ -660,11 +655,11 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
 {
 	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
 
-	spin_lock(&cp->lock);
+	spin_lock_bh(&cp->lock);
 	cp->state = IP_VS_TCP_S_LISTEN;
 	cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
 			   : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
-	spin_unlock(&cp->lock);
+	spin_unlock_bh(&cp->lock);
 }
 
 /* ---------------------------------------------
@@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
-	spin_lock_init(&ipvs->tcp_app_lock);
 	pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
 							sizeof(tcp_timeouts));
 	if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 503a842c90d2..b62a3c0ff9bf 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		return 0;
 	}
 	net = skb_net(skb);
-	svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
-				&iph->daddr, uh->dest);
+	rcu_read_lock();
+	svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
+				 &iph->daddr, uh->dest);
 	if (svc) {
 		int ignored;
 
@@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
 			 */
-			ip_vs_service_put(svc);
+			rcu_read_unlock();
 			*verdict = NF_DROP;
 			return 0;
 		}
@@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		if (!*cpp && ignored <= 0) {
 			if (!ignored)
 				*verdict = ip_vs_leave(svc, skb, pd, iph);
-			else {
-				ip_vs_service_put(svc);
+			else
 				*verdict = NF_DROP;
-			}
+			rcu_read_unlock();
 			return 0;
 		}
-		ip_vs_service_put(svc);
 	}
+	rcu_read_unlock();
 	/* NF_ACCEPT */
 	return 1;
 }
@@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)
 
 	hash = udp_app_hashkey(port);
 
-
-	spin_lock_bh(&ipvs->udp_app_lock);
 	list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
 		if (i->port == port) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
-	list_add(&inc->p_list, &ipvs->udp_apps[hash]);
+	list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]);
 	atomic_inc(&pd->appcnt);
 
   out:
-	spin_unlock_bh(&ipvs->udp_app_lock);
 	return ret;
 }
 
@@ -380,12 +377,9 @@ static void
 udp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
 	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
-	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	spin_lock_bh(&ipvs->udp_app_lock);
 	atomic_dec(&pd->appcnt);
-	list_del(&inc->p_list);
-	spin_unlock_bh(&ipvs->udp_app_lock);
+	list_del_rcu(&inc->p_list);
 }
 
 
@@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = udp_app_hashkey(cp->vport);
 
-	spin_lock(&ipvs->udp_app_lock);
-	list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			spin_unlock(&ipvs->udp_app_lock);
+			rcu_read_unlock();
 
 			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
@@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 			goto out;
 		}
 	}
-	spin_unlock(&ipvs->udp_app_lock);
+	rcu_read_unlock();
 
   out:
 	return result;
@@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
-	spin_lock_init(&ipvs->udp_app_lock);
 	pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
 							sizeof(udp_timeouts));
 	if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index c49b388d1085..c35986c793d9 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -35,9 +35,18 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
 }
 
 
-static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
+static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest)
 {
-	svc->sched_data = &svc->destinations;
+	struct list_head *p;
+
+	spin_lock_bh(&svc->sched_lock);
+	p = (struct list_head *) svc->sched_data;
+	/* dest is already unlinked, so p->prev is not valid but
+	 * p->next is valid, use it to reach previous entry.
+	 */
+	if (p == &dest->n_list)
+		svc->sched_data = p->next->prev;
+	spin_unlock_bh(&svc->sched_lock);
 	return 0;
 }
 
@@ -48,36 +57,41 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
 static struct ip_vs_dest *
 ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
-	struct list_head *p, *q;
-	struct ip_vs_dest *dest;
+	struct list_head *p;
+	struct ip_vs_dest *dest, *last;
+	int pass = 0;
 
 	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
-	write_lock(&svc->sched_lock);
-	p = (struct list_head *)svc->sched_data;
-	p = p->next;
-	q = p;
+	spin_lock_bh(&svc->sched_lock);
+	p = (struct list_head *) svc->sched_data;
+	last = dest = list_entry(p, struct ip_vs_dest, n_list);
+
 	do {
-		/* skip list head */
-		if (q == &svc->destinations) {
-			q = q->next;
-			continue;
+		list_for_each_entry_continue_rcu(dest,
+						 &svc->destinations,
+						 n_list) {
+			if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+			    atomic_read(&dest->weight) > 0)
+				/* HIT */
+				goto out;
+			if (dest == last)
+				goto stop;
 		}
-
-		dest = list_entry(q, struct ip_vs_dest, n_list);
-		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
-		    atomic_read(&dest->weight) > 0)
-			/* HIT */
-			goto out;
-		q = q->next;
-	} while (q != p);
-	write_unlock(&svc->sched_lock);
+		pass++;
+		/* Previous dest could be unlinked, do not loop forever.
+		 * If we stay at head there is no need for 2nd pass.
+		 */
+	} while (pass < 2 && p != &svc->destinations);
+
+stop:
+	spin_unlock_bh(&svc->sched_lock);
 	ip_vs_scheduler_err(svc, "no destination available");
 	return NULL;
 
   out:
-	svc->sched_data = q;
-	write_unlock(&svc->sched_lock);
+	svc->sched_data = &dest->n_list;
+	spin_unlock_bh(&svc->sched_lock);
 	IP_VS_DBG_BUF(6, "RR: server %s:%u "
 		      "activeconns %d refcnt %d weight %d\n",
 		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
@@ -94,7 +108,8 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
 	.init_service =		ip_vs_rr_init_svc,
-	.update_service =	ip_vs_rr_update_svc,
+	.add_dest =		NULL,
+	.del_dest =		ip_vs_rr_del_dest,
 	.schedule =		ip_vs_rr_schedule,
 };
 
@@ -106,6 +121,7 @@ static int __init ip_vs_rr_init(void)
 static void __exit ip_vs_rr_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
+	synchronize_rcu();
 }
 
 module_init(ip_vs_rr_init);
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index d6bf20d6cdbe..4dbcda6258bc 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -35,8 +35,8 @@ EXPORT_SYMBOL(ip_vs_scheduler_err);
  */
 static LIST_HEAD(ip_vs_schedulers);
 
-/* lock for service table */
-static DEFINE_SPINLOCK(ip_vs_sched_lock);
+/* semaphore for schedulers */
+static DEFINE_MUTEX(ip_vs_sched_mutex);
 
 
 /*
@@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
 {
 	int ret;
 
-	svc->scheduler = scheduler;
-
 	if (scheduler->init_service) {
 		ret = scheduler->init_service(svc);
 		if (ret) {
@@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
 			return ret;
 		}
 	}
-
+	rcu_assign_pointer(svc->scheduler, scheduler);
 	return 0;
 }
 
@@ -64,22 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
 /*
  *  Unbind a service with its scheduler
  */
-int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
+void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
+			    struct ip_vs_scheduler *sched)
 {
-	struct ip_vs_scheduler *sched = svc->scheduler;
+	struct ip_vs_scheduler *cur_sched;
 
-	if (!sched)
-		return 0;
+	cur_sched = rcu_dereference_protected(svc->scheduler, 1);
+	/* This check proves that old 'sched' was installed */
+	if (!cur_sched)
+		return;
 
-	if (sched->done_service) {
-		if (sched->done_service(svc) != 0) {
-			pr_err("%s(): done error\n", __func__);
-			return -EINVAL;
-		}
-	}
-
-	svc->scheduler = NULL;
-	return 0;
+	if (sched->done_service)
+		sched->done_service(svc);
+	/* svc->scheduler can not be set to NULL */
 }
 
 
@@ -92,7 +87,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
 
 	IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
 
-	spin_lock_bh(&ip_vs_sched_lock);
+	mutex_lock(&ip_vs_sched_mutex);
 
 	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
 		/*
@@ -106,14 +101,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
 		}
 		if (strcmp(sched_name, sched->name)==0) {
 			/* HIT */
-			spin_unlock_bh(&ip_vs_sched_lock);
+			mutex_unlock(&ip_vs_sched_mutex);
 			return sched;
 		}
 		if (sched->module)
 			module_put(sched->module);
 	}
 
-	spin_unlock_bh(&ip_vs_sched_lock);
+	mutex_unlock(&ip_vs_sched_mutex);
 	return NULL;
 }
 
@@ -153,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
 
 void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
 {
+	struct ip_vs_scheduler *sched;
+
+	sched = rcu_dereference(svc->scheduler);
 	if (svc->fwmark) {
 		IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
-			     svc->scheduler->name, svc->fwmark,
-			     svc->fwmark, msg);
+			     sched->name, svc->fwmark, svc->fwmark, msg);
 #ifdef CONFIG_IP_VS_IPV6
 	} else if (svc->af == AF_INET6) {
 		IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
-			     svc->scheduler->name,
-			     ip_vs_proto_name(svc->protocol),
+			     sched->name, ip_vs_proto_name(svc->protocol),
 			     &svc->addr.in6, ntohs(svc->port), msg);
 #endif
 	} else {
 		IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
-			     svc->scheduler->name,
-			     ip_vs_proto_name(svc->protocol),
+			     sched->name, ip_vs_proto_name(svc->protocol),
 			     &svc->addr.ip, ntohs(svc->port), msg);
 	}
 }
@@ -192,10 +187,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 	/* increase the module use count */
 	ip_vs_use_count_inc();
 
-	spin_lock_bh(&ip_vs_sched_lock);
+	mutex_lock(&ip_vs_sched_mutex);
 
 	if (!list_empty(&scheduler->n_list)) {
-		spin_unlock_bh(&ip_vs_sched_lock);
+		mutex_unlock(&ip_vs_sched_mutex);
 		ip_vs_use_count_dec();
 		pr_err("%s(): [%s] scheduler already linked\n",
 		       __func__, scheduler->name);
@@ -208,7 +203,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 	 */
 	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
 		if (strcmp(scheduler->name, sched->name) == 0) {
-			spin_unlock_bh(&ip_vs_sched_lock);
+			mutex_unlock(&ip_vs_sched_mutex);
 			ip_vs_use_count_dec();
 			pr_err("%s(): [%s] scheduler already existed "
 			       "in the system\n", __func__, scheduler->name);
@@ -219,7 +214,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 	 *	Add it into the d-linked scheduler list
 	 */
 	list_add(&scheduler->n_list, &ip_vs_schedulers);
-	spin_unlock_bh(&ip_vs_sched_lock);
+	mutex_unlock(&ip_vs_sched_mutex);
 
 	pr_info("[%s] scheduler registered.\n", scheduler->name);
 
@@ -237,9 +232,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 		return -EINVAL;
 	}
 
-	spin_lock_bh(&ip_vs_sched_lock);
+	mutex_lock(&ip_vs_sched_mutex);
 	if (list_empty(&scheduler->n_list)) {
-		spin_unlock_bh(&ip_vs_sched_lock);
+		mutex_unlock(&ip_vs_sched_mutex);
 		pr_err("%s(): [%s] scheduler is not in the list. failed\n",
 		       __func__, scheduler->name);
 		return -EINVAL;
@@ -249,7 +244,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 	 *	Remove it from the d-linked scheduler list
 	 */
 	list_del(&scheduler->n_list);
-	spin_unlock_bh(&ip_vs_sched_lock);
+	mutex_unlock(&ip_vs_sched_mutex);
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 89ead246ed3d..f3205925359a 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -79,7 +79,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	 * new connections.
 	 */
 
-	list_for_each_entry(dest, &svc->destinations, n_list) {
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
 		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
 		    atomic_read(&dest->weight) > 0) {
 			least = dest;
@@ -94,7 +94,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	 *    Find the destination with the least load.
 	 */
   nextstage:
-	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 		doh = ip_vs_sed_dest_overhead(dest);
@@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void)
 static void __exit ip_vs_sed_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
+	synchronize_rcu();
 }
 
 module_init(ip_vs_sed_init);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index e33126994628..0df269d7c99f 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -53,7 +53,7 @@
  *      IPVS SH bucket
  */
 struct ip_vs_sh_bucket {
-	struct ip_vs_dest       *dest;          /* real server (cache) */
+	struct ip_vs_dest __rcu	*dest;	/* real server (cache) */
 };
 
 /*
@@ -66,6 +66,10 @@ struct ip_vs_sh_bucket {
 #define IP_VS_SH_TAB_SIZE               (1 << IP_VS_SH_TAB_BITS)
 #define IP_VS_SH_TAB_MASK               (IP_VS_SH_TAB_SIZE - 1)
 
+struct ip_vs_sh_state {
+	struct ip_vs_sh_bucket		buckets[IP_VS_SH_TAB_SIZE];
+	struct rcu_head			rcu_head;
+};
 
 /*
  *	Returns hash value for IPVS SH entry
@@ -87,10 +91,9 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
  *      Get ip_vs_dest associated with supplied parameters.
  */
 static inline struct ip_vs_dest *
-ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl,
-	     const union nf_inet_addr *addr)
+ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr)
 {
-	return (tbl[ip_vs_sh_hashkey(af, addr)]).dest;
+	return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest);
 }
 
 
@@ -98,27 +101,32 @@ ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl,
  *      Assign all the hash buckets of the specified table with the service.
  */
 static int
-ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
+ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
 {
 	int i;
 	struct ip_vs_sh_bucket *b;
 	struct list_head *p;
 	struct ip_vs_dest *dest;
 	int d_count;
+	bool empty;
 
-	b = tbl;
+	b = &s->buckets[0];
 	p = &svc->destinations;
+	empty = list_empty(p);
 	d_count = 0;
 	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
-		if (list_empty(p)) {
-			b->dest = NULL;
-		} else {
+		dest = rcu_dereference_protected(b->dest, 1);
+		if (dest)
+			ip_vs_dest_put(dest);
+		if (empty)
+			RCU_INIT_POINTER(b->dest, NULL);
+		else {
 			if (p == &svc->destinations)
 				p = p->next;
 
 			dest = list_entry(p, struct ip_vs_dest, n_list);
-			atomic_inc(&dest->refcnt);
-			b->dest = dest;
+			ip_vs_dest_hold(dest);
+			RCU_INIT_POINTER(b->dest, dest);
 
 			IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
 				      i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
@@ -140,16 +148,18 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
 /*
  *      Flush all the hash buckets of the specified table.
  */
-static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
+static void ip_vs_sh_flush(struct ip_vs_sh_state *s)
 {
 	int i;
 	struct ip_vs_sh_bucket *b;
+	struct ip_vs_dest *dest;
 
-	b = tbl;
+	b = &s->buckets[0];
 	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
-		if (b->dest) {
-			atomic_dec(&b->dest->refcnt);
-			b->dest = NULL;
+		dest = rcu_dereference_protected(b->dest, 1);
+		if (dest) {
+			ip_vs_dest_put(dest);
+			RCU_INIT_POINTER(b->dest, NULL);
 		}
 		b++;
 	}
@@ -158,51 +168,46 @@ static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
 
 static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
 {
-	struct ip_vs_sh_bucket *tbl;
+	struct ip_vs_sh_state *s;
 
 	/* allocate the SH table for this service */
-	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
-		      GFP_KERNEL);
-	if (tbl == NULL)
+	s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL);
+	if (s == NULL)
 		return -ENOMEM;
 
-	svc->sched_data = tbl;
+	svc->sched_data = s;
 	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
 		  "current service\n",
 		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
 
-	/* assign the hash buckets with the updated service */
-	ip_vs_sh_assign(tbl, svc);
+	/* assign the hash buckets with current dests */
+	ip_vs_sh_reassign(s, svc);
 
 	return 0;
 }
 
 
-static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
+static void ip_vs_sh_done_svc(struct ip_vs_service *svc)
 {
-	struct ip_vs_sh_bucket *tbl = svc->sched_data;
+	struct ip_vs_sh_state *s = svc->sched_data;
 
 	/* got to clean up hash buckets here */
-	ip_vs_sh_flush(tbl);
+	ip_vs_sh_flush(s);
 
 	/* release the table itself */
-	kfree(svc->sched_data);
+	kfree_rcu(s, rcu_head);
 	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
 		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
-
-	return 0;
 }
 
 
-static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
+static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
+				 struct ip_vs_dest *dest)
 {
-	struct ip_vs_sh_bucket *tbl = svc->sched_data;
-
-	/* got to clean up hash buckets here */
-	ip_vs_sh_flush(tbl);
+	struct ip_vs_sh_state *s = svc->sched_data;
 
 	/* assign the hash buckets with the updated service */
-	ip_vs_sh_assign(tbl, svc);
+	ip_vs_sh_reassign(s, svc);
 
 	return 0;
 }
@@ -225,15 +230,15 @@ static struct ip_vs_dest *
 ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest;
-	struct ip_vs_sh_bucket *tbl;
+	struct ip_vs_sh_state *s;
 	struct ip_vs_iphdr iph;
 
 	ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
 
 	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
 
-	tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
-	dest = ip_vs_sh_get(svc->af, tbl, &iph.saddr);
+	s = (struct ip_vs_sh_state *) svc->sched_data;
+	dest = ip_vs_sh_get(svc->af, s, &iph.saddr);
 	if (!dest
 	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
 	    || atomic_read(&dest->weight) <= 0
@@ -262,7 +267,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
 	.n_list	 =		LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
 	.init_service =		ip_vs_sh_init_svc,
 	.done_service =		ip_vs_sh_done_svc,
-	.update_service =	ip_vs_sh_update_svc,
+	.add_dest =		ip_vs_sh_dest_changed,
+	.del_dest =		ip_vs_sh_dest_changed,
+	.upd_dest =		ip_vs_sh_dest_changed,
 	.schedule =		ip_vs_sh_schedule,
 };
 
@@ -276,6 +283,7 @@ static int __init ip_vs_sh_init(void)
 static void __exit ip_vs_sh_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
+	synchronize_rcu();
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 44fd10c539ac..f6046d9af8d3 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -246,7 +246,7 @@ struct ip_vs_sync_thread_data {
 struct ip_vs_sync_mesg_v0 {
 	__u8                    nr_conns;
 	__u8                    syncid;
-	__u16                   size;
+	__be16                  size;
 
 	/* ip_vs_sync_conn entries start here */
 };
@@ -255,7 +255,7 @@ struct ip_vs_sync_mesg_v0 {
 struct ip_vs_sync_mesg {
 	__u8			reserved;	/* must be zero */
 	__u8			syncid;
-	__u16			size;
+	__be16			size;
 	__u8			nr_conns;
 	__s8			version;	/* SYNC_PROTO_VER  */
 	__u16			spare;
@@ -335,7 +335,7 @@ ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
 	sb->mesg->reserved = 0;  /* old nr_conns i.e. must be zero now */
 	sb->mesg->version = SYNC_PROTO_VER;
 	sb->mesg->syncid = ipvs->master_syncid;
-	sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
+	sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
 	sb->mesg->nr_conns = 0;
 	sb->mesg->spare = 0;
 	sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
@@ -418,7 +418,7 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
 	mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
 	mesg->nr_conns = 0;
 	mesg->syncid = ipvs->master_syncid;
-	mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
+	mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
 	sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
 	sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
 	sb->firstuse = jiffies;
@@ -531,9 +531,9 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
 	if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
 		return;
 
-	spin_lock(&ipvs->sync_buff_lock);
+	spin_lock_bh(&ipvs->sync_buff_lock);
 	if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
-		spin_unlock(&ipvs->sync_buff_lock);
+		spin_unlock_bh(&ipvs->sync_buff_lock);
 		return;
 	}
 
@@ -552,7 +552,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
 	if (!buff) {
 		buff = ip_vs_sync_buff_create_v0(ipvs);
 		if (!buff) {
-			spin_unlock(&ipvs->sync_buff_lock);
+			spin_unlock_bh(&ipvs->sync_buff_lock);
 			pr_err("ip_vs_sync_buff_create failed.\n");
 			return;
 		}
@@ -582,7 +582,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
 	}
 
 	m->nr_conns++;
-	m->size += len;
+	m->size = htons(ntohs(m->size) + len);
 	buff->head += len;
 
 	/* check if there is a space for next one */
@@ -590,7 +590,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
 		sb_queue_tail(ipvs, ms);
 		ms->sync_buff = NULL;
 	}
-	spin_unlock(&ipvs->sync_buff_lock);
+	spin_unlock_bh(&ipvs->sync_buff_lock);
 
 	/* synchronize its controller if it has */
 	cp = cp->control;
@@ -641,9 +641,9 @@ sloop:
 		pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
 	}
 
-	spin_lock(&ipvs->sync_buff_lock);
+	spin_lock_bh(&ipvs->sync_buff_lock);
 	if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
-		spin_unlock(&ipvs->sync_buff_lock);
+		spin_unlock_bh(&ipvs->sync_buff_lock);
 		return;
 	}
 
@@ -683,7 +683,7 @@ sloop:
 	if (!buff) {
 		buff = ip_vs_sync_buff_create(ipvs);
 		if (!buff) {
-			spin_unlock(&ipvs->sync_buff_lock);
+			spin_unlock_bh(&ipvs->sync_buff_lock);
 			pr_err("ip_vs_sync_buff_create failed.\n");
 			return;
 		}
@@ -693,7 +693,7 @@ sloop:
 
 	p = buff->head;
 	buff->head += pad + len;
-	m->size += pad + len;
+	m->size = htons(ntohs(m->size) + pad + len);
 	/* Add ev. padding from prev. sync_conn */
 	while (pad--)
 		*(p++) = 0;
@@ -750,7 +750,7 @@ sloop:
 		}
 	}
 
-	spin_unlock(&ipvs->sync_buff_lock);
+	spin_unlock_bh(&ipvs->sync_buff_lock);
 
 control:
 	/* synchronize its controller if it has */
@@ -843,7 +843,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 		kfree(param->pe_data);
 
 		dest = cp->dest;
-		spin_lock(&cp->lock);
+		spin_lock_bh(&cp->lock);
 		if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
 		    !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
 			if (flags & IP_VS_CONN_F_INACTIVE) {
@@ -857,24 +857,21 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 		flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
 		flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
 		cp->flags = flags;
-		spin_unlock(&cp->lock);
-		if (!dest) {
-			dest = ip_vs_try_bind_dest(cp);
-			if (dest)
-				atomic_dec(&dest->refcnt);
-		}
+		spin_unlock_bh(&cp->lock);
+		if (!dest)
+			ip_vs_try_bind_dest(cp);
 	} else {
 		/*
 		 * Find the appropriate destination for the connection.
 		 * If it is not found the connection will remain unbound
 		 * but still handled.
 		 */
+		rcu_read_lock();
 		dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
 				       param->vport, protocol, fwmark, flags);
 
 		cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
-		if (dest)
-			atomic_dec(&dest->refcnt);
+		rcu_read_unlock();
 		if (!cp) {
 			if (param->pe_data)
 				kfree(param->pe_data);
@@ -1178,10 +1175,8 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
 		IP_VS_DBG(2, "BACKUP, message header too short\n");
 		return;
 	}
-	/* Convert size back to host byte order */
-	m2->size = ntohs(m2->size);
 
-	if (buflen != m2->size) {
+	if (buflen != ntohs(m2->size)) {
 		IP_VS_DBG(2, "BACKUP, bogus message size\n");
 		return;
 	}
@@ -1547,10 +1542,7 @@ ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
 	int msize;
 	int ret;
 
-	msize = msg->size;
-
-	/* Put size in network byte order */
-	msg->size = htons(msg->size);
+	msize = ntohs(msg->size);
 
 	ret = ip_vs_send_async(sock, (char *)msg, msize);
 	if (ret >= 0 || ret == -EAGAIN)
@@ -1692,11 +1684,7 @@ static int sync_thread_backup(void *data)
 				break;
 			}
 
-			/* disable bottom half, because it accesses the data
-			   shared by softirq while getting/creating conns */
-			local_bh_disable();
 			ip_vs_process_message(tinfo->net, tinfo->buf, len);
-			local_bh_enable();
 		}
 	}
 
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index bc1bfc48a17f..c60a81c4ce9a 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -51,7 +51,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	 * new connections.
 	 */
 
-	list_for_each_entry(dest, &svc->destinations, n_list) {
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
 		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
 		    atomic_read(&dest->weight) > 0) {
 			least = dest;
@@ -66,7 +66,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	 *    Find the destination with the least load.
 	 */
   nextstage:
-	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 		doh = ip_vs_dest_conn_overhead(dest);
@@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void)
 static void __exit ip_vs_wlc_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+	synchronize_rcu();
 }
 
 module_init(ip_vs_wlc_init);
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 231be7dd547a..0e68555bceb9 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -29,14 +29,45 @@
 
 #include <net/ip_vs.h>
 
+/* The WRR algorithm depends on some caclulations:
+ * - mw: maximum weight
+ * - di: weight step, greatest common divisor from all weights
+ * - cw: current required weight
+ * As result, all weights are in the [di..mw] range with a step=di.
+ *
+ * First, we start with cw = mw and select dests with weight >= cw.
+ * Then cw is reduced with di and all dests are checked again.
+ * Last pass should be with cw = di. We have mw/di passes in total:
+ *
+ * pass 1: cw = max weight
+ * pass 2: cw = max weight - di
+ * pass 3: cw = max weight - 2 * di
+ * ...
+ * last pass: cw = di
+ *
+ * Weights are supposed to be >= di but we run in parallel with
+ * weight changes, it is possible some dest weight to be reduced
+ * below di, bad if it is the only available dest.
+ *
+ * So, we modify how mw is calculated, now it is reduced with (di - 1),
+ * so that last cw is 1 to catch such dests with weight below di:
+ * pass 1: cw = max weight - (di - 1)
+ * pass 2: cw = max weight - di - (di - 1)
+ * pass 3: cw = max weight - 2 * di - (di - 1)
+ * ...
+ * last pass: cw = 1
+ *
+ */
+
 /*
  * current destination pointer for weighted round-robin scheduling
  */
 struct ip_vs_wrr_mark {
-	struct list_head *cl;	/* current list head */
+	struct ip_vs_dest *cl;	/* current dest or head */
 	int cw;			/* current weight */
 	int mw;			/* maximum weight */
 	int di;			/* decreasing interval */
+	struct rcu_head		rcu_head;
 };
 
 
@@ -88,36 +119,41 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
 	if (mark == NULL)
 		return -ENOMEM;
 
-	mark->cl = &svc->destinations;
-	mark->cw = 0;
-	mark->mw = ip_vs_wrr_max_weight(svc);
+	mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
 	mark->di = ip_vs_wrr_gcd_weight(svc);
+	mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
+	mark->cw = mark->mw;
 	svc->sched_data = mark;
 
 	return 0;
 }
 
 
-static int ip_vs_wrr_done_svc(struct ip_vs_service *svc)
+static void ip_vs_wrr_done_svc(struct ip_vs_service *svc)
 {
+	struct ip_vs_wrr_mark *mark = svc->sched_data;
+
 	/*
 	 *    Release the mark variable
 	 */
-	kfree(svc->sched_data);
-
-	return 0;
+	kfree_rcu(mark, rcu_head);
 }
 
 
-static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
+static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc,
+				  struct ip_vs_dest *dest)
 {
 	struct ip_vs_wrr_mark *mark = svc->sched_data;
 
-	mark->cl = &svc->destinations;
-	mark->mw = ip_vs_wrr_max_weight(svc);
+	spin_lock_bh(&svc->sched_lock);
+	mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
 	mark->di = ip_vs_wrr_gcd_weight(svc);
-	if (mark->cw > mark->mw)
-		mark->cw = 0;
+	mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
+	if (mark->cw > mark->mw || !mark->cw)
+		mark->cw = mark->mw;
+	else if (mark->di > 1)
+		mark->cw = (mark->cw / mark->di) * mark->di + 1;
+	spin_unlock_bh(&svc->sched_lock);
 	return 0;
 }
 
@@ -128,80 +164,79 @@ static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
 static struct ip_vs_dest *
 ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
-	struct ip_vs_dest *dest;
+	struct ip_vs_dest *dest, *last, *stop = NULL;
 	struct ip_vs_wrr_mark *mark = svc->sched_data;
-	struct list_head *p;
+	bool last_pass = false, restarted = false;
 
 	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
-	/*
-	 * This loop will always terminate, because mark->cw in (0, max_weight]
-	 * and at least one server has its weight equal to max_weight.
-	 */
-	write_lock(&svc->sched_lock);
-	p = mark->cl;
+	spin_lock_bh(&svc->sched_lock);
+	dest = mark->cl;
+	/* No available dests? */
+	if (mark->mw == 0)
+		goto err_noavail;
+	last = dest;
+	/* Stop only after all dests were checked for weight >= 1 (last pass) */
 	while (1) {
-		if (mark->cl == &svc->destinations) {
-			/* it is at the head of the destination list */
-
-			if (mark->cl == mark->cl->next) {
-				/* no dest entry */
-				ip_vs_scheduler_err(svc,
-					"no destination available: "
-					"no destinations present");
-				dest = NULL;
-				goto out;
-			}
-
-			mark->cl = svc->destinations.next;
-			mark->cw -= mark->di;
-			if (mark->cw <= 0) {
-				mark->cw = mark->mw;
-				/*
-				 * Still zero, which means no available servers.
-				 */
-				if (mark->cw == 0) {
-					mark->cl = &svc->destinations;
-					ip_vs_scheduler_err(svc,
-						"no destination available");
-					dest = NULL;
-					goto out;
-				}
-			}
-		} else
-			mark->cl = mark->cl->next;
-
-		if (mark->cl != &svc->destinations) {
-			/* not at the head of the list */
-			dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
+		list_for_each_entry_continue_rcu(dest,
+						 &svc->destinations,
+						 n_list) {
 			if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
-			    atomic_read(&dest->weight) >= mark->cw) {
-				/* got it */
-				break;
-			}
+			    atomic_read(&dest->weight) >= mark->cw)
+				goto found;
+			if (dest == stop)
+				goto err_over;
 		}
-
-		if (mark->cl == p && mark->cw == mark->di) {
-			/* back to the start, and no dest is found.
-			   It is only possible when all dests are OVERLOADED */
-			dest = NULL;
-			ip_vs_scheduler_err(svc,
-				"no destination available: "
-				"all destinations are overloaded");
-			goto out;
+		mark->cw -= mark->di;
+		if (mark->cw <= 0) {
+			mark->cw = mark->mw;
+			/* Stop if we tried last pass from first dest:
+			 * 1. last_pass: we started checks when cw > di but
+			 *	then all dests were checked for w >= 1
+			 * 2. last was head: the first and only traversal
+			 *	was for weight >= 1, for all dests.
+			 */
+			if (last_pass ||
+			    &last->n_list == &svc->destinations)
+				goto err_over;
+			restarted = true;
+		}
+		last_pass = mark->cw <= mark->di;
+		if (last_pass && restarted &&
+		    &last->n_list != &svc->destinations) {
+			/* First traversal was for w >= 1 but only
+			 * for dests after 'last', now do the same
+			 * for all dests up to 'last'.
+			 */
+			stop = last;
 		}
 	}
 
+found:
 	IP_VS_DBG_BUF(6, "WRR: server %s:%u "
 		      "activeconns %d refcnt %d weight %d\n",
 		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
 		      atomic_read(&dest->activeconns),
 		      atomic_read(&dest->refcnt),
 		      atomic_read(&dest->weight));
+	mark->cl = dest;
 
   out:
-	write_unlock(&svc->sched_lock);
+	spin_unlock_bh(&svc->sched_lock);
 	return dest;
+
+err_noavail:
+	mark->cl = dest;
+	dest = NULL;
+	ip_vs_scheduler_err(svc, "no destination available");
+	goto out;
+
+err_over:
+	mark->cl = dest;
+	dest = NULL;
+	ip_vs_scheduler_err(svc, "no destination available: "
+			    "all destinations are overloaded");
+	goto out;
 }
 
 
@@ -212,7 +247,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
 	.n_list =		LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
 	.init_service =		ip_vs_wrr_init_svc,
 	.done_service =		ip_vs_wrr_done_svc,
-	.update_service =	ip_vs_wrr_update_svc,
+	.add_dest =		ip_vs_wrr_dest_changed,
+	.del_dest =		ip_vs_wrr_dest_changed,
+	.upd_dest =		ip_vs_wrr_dest_changed,
 	.schedule =		ip_vs_wrr_schedule,
 };
 
@@ -224,6 +261,7 @@ static int __init ip_vs_wrr_init(void)
 static void __exit ip_vs_wrr_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
+	synchronize_rcu();
 }
 
 module_init(ip_vs_wrr_init);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index ee6b7a9f1ec2..b75ff6429a04 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -17,6 +17,8 @@
  * - not all connections have destination server, for example,
  * connections in backup server when fwmark is used
  * - bypass connections use daddr from packet
+ * - we can use dst without ref while sending in RCU section, we use
+ * ref when returning NF_ACCEPT for NAT-ed packet via loopback
  * LOCAL_OUT rules:
  * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
  * - skb->pkt_type is not set yet
@@ -51,39 +53,54 @@ enum {
 				      */
 	IP_VS_RT_MODE_CONNECT	= 8, /* Always bind route to saddr */
 	IP_VS_RT_MODE_KNOWN_NH	= 16,/* Route via remote addr */
+	IP_VS_RT_MODE_TUNNEL	= 32,/* Tunnel mode */
 };
 
+static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void)
+{
+	return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC);
+}
+
+static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst)
+{
+	kfree(dest_dst);
+}
+
 /*
  *      Destination cache to speed up outgoing route lookup
  */
 static inline void
-__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
-		u32 dst_cookie)
+__ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst,
+		struct dst_entry *dst, u32 dst_cookie)
 {
-	struct dst_entry *old_dst;
+	struct ip_vs_dest_dst *old;
+
+	old = rcu_dereference_protected(dest->dest_dst,
+					lockdep_is_held(&dest->dst_lock));
 
-	old_dst = dest->dst_cache;
-	dest->dst_cache = dst;
-	dest->dst_rtos = rtos;
-	dest->dst_cookie = dst_cookie;
-	dst_release(old_dst);
+	if (dest_dst) {
+		dest_dst->dst_cache = dst;
+		dest_dst->dst_cookie = dst_cookie;
+	}
+	rcu_assign_pointer(dest->dest_dst, dest_dst);
+
+	if (old)
+		call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
 }
 
-static inline struct dst_entry *
-__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
+static inline struct ip_vs_dest_dst *
+__ip_vs_dst_check(struct ip_vs_dest *dest)
 {
-	struct dst_entry *dst = dest->dst_cache;
+	struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst);
+	struct dst_entry *dst;
 
-	if (!dst)
+	if (!dest_dst)
 		return NULL;
-	if ((dst->obsolete || rtos != dest->dst_rtos) &&
-	    dst->ops->check(dst, dest->dst_cookie) == NULL) {
-		dest->dst_cache = NULL;
-		dst_release(dst);
+	dst = dest_dst->dst_cache;
+	if (dst->obsolete &&
+	    dst->ops->check(dst, dest_dst->dst_cookie) == NULL)
 		return NULL;
-	}
-	dst_hold(dst);
-	return dst;
+	return dest_dst;
 }
 
 static inline bool
@@ -104,7 +121,7 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
 
 /* Get route to daddr, update *saddr, optionally bind route to saddr */
 static struct rtable *do_output_route4(struct net *net, __be32 daddr,
-				       u32 rtos, int rt_mode, __be32 *saddr)
+				       int rt_mode, __be32 *saddr)
 {
 	struct flowi4 fl4;
 	struct rtable *rt;
@@ -113,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
 	memset(&fl4, 0, sizeof(fl4));
 	fl4.daddr = daddr;
 	fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
-	fl4.flowi4_tos = rtos;
 	fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
 			   FLOWI_FLAG_KNOWN_NH : 0;
 
@@ -124,7 +140,7 @@ retry:
 		if (PTR_ERR(rt) == -EINVAL && *saddr &&
 		    rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
 			*saddr = 0;
-			flowi4_update_output(&fl4, 0, rtos, daddr, 0);
+			flowi4_update_output(&fl4, 0, 0, daddr, 0);
 			goto retry;
 		}
 		IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
@@ -132,7 +148,7 @@ retry:
 	} else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
 		ip_rt_put(rt);
 		*saddr = fl4.saddr;
-		flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
+		flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
 		loop++;
 		goto retry;
 	}
@@ -141,113 +157,140 @@ retry:
 }
 
 /* Get route to destination or remote server */
-static struct rtable *
+static int
 __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
-		   __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
+		   __be32 daddr, int rt_mode, __be32 *ret_saddr)
 {
 	struct net *net = dev_net(skb_dst(skb)->dev);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_dest_dst *dest_dst;
 	struct rtable *rt;			/* Route to the other host */
 	struct rtable *ort;			/* Original route */
-	int local;
+	struct iphdr *iph;
+	__be16 df;
+	int mtu;
+	int local, noref = 1;
 
 	if (dest) {
-		spin_lock(&dest->dst_lock);
-		if (!(rt = (struct rtable *)
-		      __ip_vs_dst_check(dest, rtos))) {
-			rt = do_output_route4(net, dest->addr.ip, rtos,
-					      rt_mode, &dest->dst_saddr.ip);
+		dest_dst = __ip_vs_dst_check(dest);
+		if (likely(dest_dst))
+			rt = (struct rtable *) dest_dst->dst_cache;
+		else {
+			dest_dst = ip_vs_dest_dst_alloc();
+			spin_lock_bh(&dest->dst_lock);
+			if (!dest_dst) {
+				__ip_vs_dst_set(dest, NULL, NULL, 0);
+				spin_unlock_bh(&dest->dst_lock);
+				goto err_unreach;
+			}
+			rt = do_output_route4(net, dest->addr.ip, rt_mode,
+					      &dest_dst->dst_saddr.ip);
 			if (!rt) {
-				spin_unlock(&dest->dst_lock);
-				return NULL;
+				__ip_vs_dst_set(dest, NULL, NULL, 0);
+				spin_unlock_bh(&dest->dst_lock);
+				ip_vs_dest_dst_free(dest_dst);
+				goto err_unreach;
 			}
-			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
-			IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
-				  "rtos=%X\n",
-				  &dest->addr.ip, &dest->dst_saddr.ip,
-				  atomic_read(&rt->dst.__refcnt), rtos);
+			__ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
+			spin_unlock_bh(&dest->dst_lock);
+			IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
+				  &dest->addr.ip, &dest_dst->dst_saddr.ip,
+				  atomic_read(&rt->dst.__refcnt));
 		}
 		daddr = dest->addr.ip;
 		if (ret_saddr)
-			*ret_saddr = dest->dst_saddr.ip;
-		spin_unlock(&dest->dst_lock);
+			*ret_saddr = dest_dst->dst_saddr.ip;
 	} else {
 		__be32 saddr = htonl(INADDR_ANY);
 
+		noref = 0;
+
 		/* For such unconfigured boxes avoid many route lookups
 		 * for performance reasons because we do not remember saddr
 		 */
 		rt_mode &= ~IP_VS_RT_MODE_CONNECT;
-		rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
+		rt = do_output_route4(net, daddr, rt_mode, &saddr);
 		if (!rt)
-			return NULL;
+			goto err_unreach;
 		if (ret_saddr)
 			*ret_saddr = saddr;
 	}
 
-	local = rt->rt_flags & RTCF_LOCAL;
+	local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
 	if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
 	      rt_mode)) {
 		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
 			     (rt->rt_flags & RTCF_LOCAL) ?
 			     "local":"non-local", &daddr);
-		ip_rt_put(rt);
-		return NULL;
-	}
-	if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
-	    !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
-		IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
-			     "requires NAT method, dest: %pI4\n",
-			     &ip_hdr(skb)->daddr, &daddr);
-		ip_rt_put(rt);
-		return NULL;
+		goto err_put;
 	}
-	if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
-		IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
-			     "to non-local address, dest: %pI4\n",
-			     &ip_hdr(skb)->saddr, &daddr);
-		ip_rt_put(rt);
-		return NULL;
+	iph = ip_hdr(skb);
+	if (likely(!local)) {
+		if (unlikely(ipv4_is_loopback(iph->saddr))) {
+			IP_VS_DBG_RL("Stopping traffic from loopback address "
+				     "%pI4 to non-local address, dest: %pI4\n",
+				     &iph->saddr, &daddr);
+			goto err_put;
+		}
+	} else {
+		ort = skb_rtable(skb);
+		if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
+		    !(ort->rt_flags & RTCF_LOCAL)) {
+			IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
+				     "local requires NAT method, dest: %pI4\n",
+				     &iph->daddr, &daddr);
+			goto err_put;
+		}
+		/* skb to local stack, preserve old route */
+		if (!noref)
+			ip_rt_put(rt);
+		return local;
 	}
 
-	return rt;
-}
-
-/* Reroute packet to local IPv4 stack after DNAT */
-static int
-__ip_vs_reroute_locally(struct sk_buff *skb)
-{
-	struct rtable *rt = skb_rtable(skb);
-	struct net_device *dev = rt->dst.dev;
-	struct net *net = dev_net(dev);
-	struct iphdr *iph = ip_hdr(skb);
-
-	if (rt_is_input_route(rt)) {
-		unsigned long orefdst = skb->_skb_refdst;
-
-		if (ip_route_input(skb, iph->daddr, iph->saddr,
-				   iph->tos, skb->dev))
-			return 0;
-		refdst_drop(orefdst);
+	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
+		mtu = dst_mtu(&rt->dst);
+		df = iph->frag_off & htons(IP_DF);
 	} else {
-		struct flowi4 fl4 = {
-			.daddr = iph->daddr,
-			.saddr = iph->saddr,
-			.flowi4_tos = RT_TOS(iph->tos),
-			.flowi4_mark = skb->mark,
-		};
-
-		rt = ip_route_output_key(net, &fl4);
-		if (IS_ERR(rt))
-			return 0;
-		if (!(rt->rt_flags & RTCF_LOCAL)) {
-			ip_rt_put(rt);
-			return 0;
+		struct sock *sk = skb->sk;
+
+		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
+		if (mtu < 68) {
+			IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
+			goto err_put;
 		}
-		/* Drop old route. */
-		skb_dst_drop(skb);
-		skb_dst_set(skb, &rt->dst);
+		ort = skb_rtable(skb);
+		if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+			ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+		/* MTU check allowed? */
+		df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
 	}
-	return 1;
+
+	/* MTU checking */
+	if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
+		goto err_put;
+	}
+
+	skb_dst_drop(skb);
+	if (noref) {
+		if (!local)
+			skb_dst_set_noref_force(skb, &rt->dst);
+		else
+			skb_dst_set(skb, dst_clone(&rt->dst));
+	} else
+		skb_dst_set(skb, &rt->dst);
+
+	return local;
+
+err_put:
+	if (!noref)
+		ip_rt_put(rt);
+	return -1;
+
+err_unreach:
+	dst_link_failure(skb);
+	return -1;
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -294,44 +337,57 @@ out_err:
 /*
  * Get route to destination or remote server
  */
-static struct rt6_info *
+static int
 __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
 		      struct in6_addr *daddr, struct in6_addr *ret_saddr,
-		      int do_xfrm, int rt_mode)
+		      struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
 {
 	struct net *net = dev_net(skb_dst(skb)->dev);
+	struct ip_vs_dest_dst *dest_dst;
 	struct rt6_info *rt;			/* Route to the other host */
 	struct rt6_info *ort;			/* Original route */
 	struct dst_entry *dst;
-	int local;
+	int mtu;
+	int local, noref = 1;
 
 	if (dest) {
-		spin_lock(&dest->dst_lock);
-		rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
-		if (!rt) {
+		dest_dst = __ip_vs_dst_check(dest);
+		if (likely(dest_dst))
+			rt = (struct rt6_info *) dest_dst->dst_cache;
+		else {
 			u32 cookie;
 
+			dest_dst = ip_vs_dest_dst_alloc();
+			spin_lock_bh(&dest->dst_lock);
+			if (!dest_dst) {
+				__ip_vs_dst_set(dest, NULL, NULL, 0);
+				spin_unlock_bh(&dest->dst_lock);
+				goto err_unreach;
+			}
 			dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
-						      &dest->dst_saddr.in6,
+						      &dest_dst->dst_saddr.in6,
 						      do_xfrm);
 			if (!dst) {
-				spin_unlock(&dest->dst_lock);
-				return NULL;
+				__ip_vs_dst_set(dest, NULL, NULL, 0);
+				spin_unlock_bh(&dest->dst_lock);
+				ip_vs_dest_dst_free(dest_dst);
+				goto err_unreach;
 			}
 			rt = (struct rt6_info *) dst;
 			cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
-			__ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
+			__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
+			spin_unlock_bh(&dest->dst_lock);
 			IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
-				  &dest->addr.in6, &dest->dst_saddr.in6,
+				  &dest->addr.in6, &dest_dst->dst_saddr.in6,
 				  atomic_read(&rt->dst.__refcnt));
 		}
 		if (ret_saddr)
-			*ret_saddr = dest->dst_saddr.in6;
-		spin_unlock(&dest->dst_lock);
+			*ret_saddr = dest_dst->dst_saddr.in6;
 	} else {
+		noref = 0;
 		dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
 		if (!dst)
-			return NULL;
+			goto err_unreach;
 		rt = (struct rt6_info *) dst;
 	}
 
@@ -340,86 +396,137 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
 	      rt_mode)) {
 		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
 			     local ? "local":"non-local", daddr);
-		dst_release(&rt->dst);
-		return NULL;
+		goto err_put;
 	}
-	if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
-	    !((ort = (struct rt6_info *) skb_dst(skb)) &&
-	      __ip_vs_is_local_route6(ort))) {
-		IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
-			     "requires NAT method, dest: %pI6c\n",
-			     &ipv6_hdr(skb)->daddr, daddr);
-		dst_release(&rt->dst);
-		return NULL;
+	if (likely(!local)) {
+		if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+			     ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
+					    IPV6_ADDR_LOOPBACK)) {
+			IP_VS_DBG_RL("Stopping traffic from loopback address "
+				     "%pI6c to non-local address, "
+				     "dest: %pI6c\n",
+				     &ipv6_hdr(skb)->saddr, daddr);
+			goto err_put;
+		}
+	} else {
+		ort = (struct rt6_info *) skb_dst(skb);
+		if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
+		    !__ip_vs_is_local_route6(ort)) {
+			IP_VS_DBG_RL("Redirect from non-local address %pI6c "
+				     "to local requires NAT method, "
+				     "dest: %pI6c\n",
+				     &ipv6_hdr(skb)->daddr, daddr);
+			goto err_put;
+		}
+		/* skb to local stack, preserve old route */
+		if (!noref)
+			dst_release(&rt->dst);
+		return local;
 	}
-	if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
-		     ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
-				    IPV6_ADDR_LOOPBACK)) {
-		IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
-			     "to non-local address, dest: %pI6c\n",
-			     &ipv6_hdr(skb)->saddr, daddr);
-		dst_release(&rt->dst);
-		return NULL;
+
+	/* MTU checking */
+	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
+		mtu = dst_mtu(&rt->dst);
+	else {
+		struct sock *sk = skb->sk;
+
+		mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
+		if (mtu < IPV6_MIN_MTU) {
+			IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
+				     IPV6_MIN_MTU);
+			goto err_put;
+		}
+		ort = (struct rt6_info *) skb_dst(skb);
+		if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+			ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
 	}
 
-	return rt;
+	if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
+		if (!skb->dev)
+			skb->dev = net->loopback_dev;
+		/* only send ICMP too big on first fragment */
+		if (!ipvsh->fragoffs)
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
+		goto err_put;
+	}
+
+	skb_dst_drop(skb);
+	if (noref) {
+		if (!local)
+			skb_dst_set_noref_force(skb, &rt->dst);
+		else
+			skb_dst_set(skb, dst_clone(&rt->dst));
+	} else
+		skb_dst_set(skb, &rt->dst);
+
+	return local;
+
+err_put:
+	if (!noref)
+		dst_release(&rt->dst);
+	return -1;
+
+err_unreach:
+	dst_link_failure(skb);
+	return -1;
 }
 #endif
 
 
-/*
- *	Release dest->dst_cache before a dest is removed
- */
-void
-ip_vs_dst_reset(struct ip_vs_dest *dest)
+/* return NF_ACCEPT to allow forwarding or other NF_xxx on error */
+static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
+					    struct ip_vs_conn *cp)
 {
-	struct dst_entry *old_dst;
+	int ret = NF_ACCEPT;
+
+	skb->ipvs_property = 1;
+	if (unlikely(cp->flags & IP_VS_CONN_F_NFCT))
+		ret = ip_vs_confirm_conntrack(skb);
+	if (ret == NF_ACCEPT) {
+		nf_reset(skb);
+		skb_forward_csum(skb);
+	}
+	return ret;
+}
+
+/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
+static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
+					 struct ip_vs_conn *cp, int local)
+{
+	int ret = NF_STOLEN;
 
-	old_dst = dest->dst_cache;
-	dest->dst_cache = NULL;
-	dst_release(old_dst);
-	dest->dst_saddr.ip = 0;
+	skb->ipvs_property = 1;
+	if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
+		ip_vs_notrack(skb);
+	else
+		ip_vs_update_conntrack(skb, cp, 1);
+	if (!local) {
+		skb_forward_csum(skb);
+		NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
+			dst_output);
+	} else
+		ret = NF_ACCEPT;
+	return ret;
 }
 
-#define IP_VS_XMIT_TUNNEL(skb, cp)				\
-({								\
-	int __ret = NF_ACCEPT;					\
-								\
-	(skb)->ipvs_property = 1;				\
-	if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT))		\
-		__ret = ip_vs_confirm_conntrack(skb);		\
-	if (__ret == NF_ACCEPT) {				\
-		nf_reset(skb);					\
-		skb_forward_csum(skb);				\
-	}							\
-	__ret;							\
-})
-
-#define IP_VS_XMIT_NAT(pf, skb, cp, local)		\
-do {							\
-	(skb)->ipvs_property = 1;			\
-	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
-		ip_vs_notrack(skb);			\
-	else						\
-		ip_vs_update_conntrack(skb, cp, 1);	\
-	if (local)					\
-		return NF_ACCEPT;			\
-	skb_forward_csum(skb);				\
-	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
-		skb_dst(skb)->dev, dst_output);		\
-} while (0)
-
-#define IP_VS_XMIT(pf, skb, cp, local)			\
-do {							\
-	(skb)->ipvs_property = 1;			\
-	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
-		ip_vs_notrack(skb);			\
-	if (local)					\
-		return NF_ACCEPT;			\
-	skb_forward_csum(skb);				\
-	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
-		skb_dst(skb)->dev, dst_output);		\
-} while (0)
+/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
+static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
+				     struct ip_vs_conn *cp, int local)
+{
+	int ret = NF_STOLEN;
+
+	skb->ipvs_property = 1;
+	if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
+		ip_vs_notrack(skb);
+	if (!local) {
+		skb_forward_csum(skb);
+		NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
+			dst_output);
+	} else
+		ret = NF_ACCEPT;
+	return ret;
+}
 
 
 /*
@@ -430,7 +537,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
 {
 	/* we do not touch skb and do not need pskb ptr */
-	IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
+	return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
 }
 
 
@@ -443,52 +550,29 @@ int
 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
 {
-	struct rtable *rt;			/* Route to the other host */
 	struct iphdr  *iph = ip_hdr(skb);
-	int    mtu;
 
 	EnterFunction(10);
 
-	if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
-				      IP_VS_RT_MODE_NON_LOCAL, NULL)))
-		goto tx_error_icmp;
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->dst);
-	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
-	    !skb_is_gso(skb)) {
-		ip_rt_put(rt);
-		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
+	rcu_read_lock();
+	if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
+			       NULL) < 0)
 		goto tx_error;
-	}
 
-	/*
-	 * Call ip_send_check because we are not sure it is called
-	 * after ip_defrag. Is copy-on-write needed?
-	 */
-	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
-		ip_rt_put(rt);
-		return NF_STOLEN;
-	}
-	ip_send_check(ip_hdr(skb));
-
-	/* drop old route */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
+	ip_send_check(iph);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
+	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
+	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return NF_STOLEN;
 
- tx_error_icmp:
-	dst_link_failure(skb);
  tx_error:
 	kfree_skb(skb);
+	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -496,60 +580,27 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 #ifdef CONFIG_IP_VS_IPV6
 int
 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
-		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
+		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
 {
-	struct rt6_info *rt;			/* Route to the other host */
-	int    mtu;
-
 	EnterFunction(10);
 
-	rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
-				   IP_VS_RT_MODE_NON_LOCAL);
-	if (!rt)
-		goto tx_error_icmp;
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->dst);
-	if (__mtu_check_toobig_v6(skb, mtu)) {
-		if (!skb->dev) {
-			struct net *net = dev_net(skb_dst(skb)->dev);
-
-			skb->dev = net->loopback_dev;
-		}
-		/* only send ICMP too big on first fragment */
-		if (!iph->fragoffs)
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		dst_release(&rt->dst);
-		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
+	rcu_read_lock();
+	if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL,
+				  ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
 		goto tx_error;
-	}
-
-	/*
-	 * Call ip_send_check because we are not sure it is called
-	 * after ip_defrag. Is copy-on-write needed?
-	 */
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (unlikely(skb == NULL)) {
-		dst_release(&rt->dst);
-		return NF_STOLEN;
-	}
-
-	/* drop old route */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
+	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
+	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return NF_STOLEN;
 
- tx_error_icmp:
-	dst_link_failure(skb);
  tx_error:
 	kfree_skb(skb);
+	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -564,29 +615,30 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	       struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
 {
 	struct rtable *rt;		/* Route to the other host */
-	int mtu;
-	struct iphdr *iph = ip_hdr(skb);
-	int local;
+	int local, rc, was_input;
 
 	EnterFunction(10);
 
+	rcu_read_lock();
 	/* check if it is a connection of no-client-port */
 	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
 		__be16 _pt, *p;
-		p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
+
+		p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
 		if (p == NULL)
 			goto tx_error;
 		ip_vs_conn_fill_cport(cp, *p);
 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
 	}
 
-	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
-				      RT_TOS(iph->tos),
-				      IP_VS_RT_MODE_LOCAL |
-					IP_VS_RT_MODE_NON_LOCAL |
-					IP_VS_RT_MODE_RDR, NULL)))
-		goto tx_error_icmp;
-	local = rt->rt_flags & RTCF_LOCAL;
+	was_input = rt_is_input_route(skb_rtable(skb));
+	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+				   IP_VS_RT_MODE_LOCAL |
+				   IP_VS_RT_MODE_NON_LOCAL |
+				   IP_VS_RT_MODE_RDR, NULL);
+	if (local < 0)
+		goto tx_error;
+	rt = skb_rtable(skb);
 	/*
 	 * Avoid duplicate tuple in reply direction for NAT traffic
 	 * to local address when connection is sync-ed
@@ -600,57 +652,31 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 			IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
 					 "ip_vs_nat_xmit(): "
 					 "stopping DNAT to local address");
-			goto tx_error_put;
+			goto tx_error;
 		}
 	}
 #endif
 
 	/* From world but DNAT to loopback address? */
-	if (local && ipv4_is_loopback(cp->daddr.ip) &&
-	    rt_is_input_route(skb_rtable(skb))) {
+	if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
 		IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
 				 "stopping DNAT to loopback address");
-		goto tx_error_put;
-	}
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->dst);
-	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
-	    !skb_is_gso(skb)) {
-		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-		IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
-				 "ip_vs_nat_xmit(): frag needed for");
-		goto tx_error_put;
+		goto tx_error;
 	}
 
 	/* copy-on-write the packet before mangling it */
 	if (!skb_make_writable(skb, sizeof(struct iphdr)))
-		goto tx_error_put;
+		goto tx_error;
 
 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
-		goto tx_error_put;
+		goto tx_error;
 
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
-		goto tx_error_put;
+		goto tx_error;
 	ip_hdr(skb)->daddr = cp->daddr.ip;
 	ip_send_check(ip_hdr(skb));
 
-	if (!local) {
-		/* drop old route */
-		skb_dst_drop(skb);
-		skb_dst_set(skb, &rt->dst);
-	} else {
-		ip_rt_put(rt);
-		/*
-		 * Some IPv4 replies get local address from routes,
-		 * not from iph, so while we DNAT after routing
-		 * we need this second input/output route.
-		 */
-		if (!__ip_vs_reroute_locally(skb))
-			goto tx_error;
-	}
-
 	IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
 
 	/* FIXME: when application helper enlarges the packet and the length
@@ -660,49 +686,48 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
+	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
+	rcu_read_unlock();
 
 	LeaveFunction(10);
-	return NF_STOLEN;
+	return rc;
 
-  tx_error_icmp:
-	dst_link_failure(skb);
   tx_error:
 	kfree_skb(skb);
+	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
-  tx_error_put:
-	ip_rt_put(rt);
-	goto tx_error;
 }
 
 #ifdef CONFIG_IP_VS_IPV6
 int
 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
-		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
+		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
 {
 	struct rt6_info *rt;		/* Route to the other host */
-	int mtu;
-	int local;
+	int local, rc;
 
 	EnterFunction(10);
 
+	rcu_read_lock();
 	/* check if it is a connection of no-client-port */
-	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
+	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
 		__be16 _pt, *p;
-		p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
+		p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
 		if (p == NULL)
 			goto tx_error;
 		ip_vs_conn_fill_cport(cp, *p);
 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
 	}
 
-	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
-					 0, (IP_VS_RT_MODE_LOCAL |
-					     IP_VS_RT_MODE_NON_LOCAL |
-					     IP_VS_RT_MODE_RDR))))
-		goto tx_error_icmp;
-	local = __ip_vs_is_local_route6(rt);
+	local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
+				      ipvsh, 0,
+				      IP_VS_RT_MODE_LOCAL |
+				      IP_VS_RT_MODE_NON_LOCAL |
+				      IP_VS_RT_MODE_RDR);
+	if (local < 0)
+		goto tx_error;
+	rt = (struct rt6_info *) skb_dst(skb);
 	/*
 	 * Avoid duplicate tuple in reply direction for NAT traffic
 	 * to local address when connection is sync-ed
@@ -716,7 +741,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 			IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
 					 "ip_vs_nat_xmit_v6(): "
 					 "stopping DNAT to local address");
-			goto tx_error_put;
+			goto tx_error;
 		}
 	}
 #endif
@@ -727,46 +752,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
 				 "ip_vs_nat_xmit_v6(): "
 				 "stopping DNAT to loopback address");
-		goto tx_error_put;
-	}
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->dst);
-	if (__mtu_check_toobig_v6(skb, mtu)) {
-		if (!skb->dev) {
-			struct net *net = dev_net(skb_dst(skb)->dev);
-
-			skb->dev = net->loopback_dev;
-		}
-		/* only send ICMP too big on first fragment */
-		if (!iph->fragoffs)
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
-				 "ip_vs_nat_xmit_v6(): frag needed for");
-		goto tx_error_put;
+		goto tx_error;
 	}
 
 	/* copy-on-write the packet before mangling it */
 	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
-		goto tx_error_put;
+		goto tx_error;
 
 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
-		goto tx_error_put;
+		goto tx_error;
 
 	/* mangle the packet */
-	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
+	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
 		goto tx_error;
 	ipv6_hdr(skb)->daddr = cp->daddr.in6;
 
-	if (!local || !skb->dev) {
-		/* drop the old route when skb is not shared */
-		skb_dst_drop(skb);
-		skb_dst_set(skb, &rt->dst);
-	} else {
-		/* destined to loopback, do we need to change route? */
-		dst_release(&rt->dst);
-	}
-
 	IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
 
 	/* FIXME: when application helper enlarges the packet and the length
@@ -776,20 +776,17 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
+	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
+	rcu_read_unlock();
 
 	LeaveFunction(10);
-	return NF_STOLEN;
+	return rc;
 
-tx_error_icmp:
-	dst_link_failure(skb);
 tx_error:
 	LeaveFunction(10);
 	kfree_skb(skb);
+	rcu_read_unlock();
 	return NF_STOLEN;
-tx_error_put:
-	dst_release(&rt->dst);
-	goto tx_error;
 }
 #endif
 
@@ -826,56 +823,40 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	__be16 df;
 	struct iphdr  *iph;			/* Our new IP header */
 	unsigned int max_headroom;		/* The extra header space needed */
-	int    mtu;
-	int ret;
+	int ret, local;
 
 	EnterFunction(10);
 
-	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
-				      RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
-						   IP_VS_RT_MODE_NON_LOCAL |
-						   IP_VS_RT_MODE_CONNECT,
-						   &saddr)))
-		goto tx_error_icmp;
-	if (rt->rt_flags & RTCF_LOCAL) {
-		ip_rt_put(rt);
-		IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
+	rcu_read_lock();
+	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+				   IP_VS_RT_MODE_LOCAL |
+				   IP_VS_RT_MODE_NON_LOCAL |
+				   IP_VS_RT_MODE_CONNECT |
+				   IP_VS_RT_MODE_TUNNEL, &saddr);
+	if (local < 0)
+		goto tx_error;
+	if (local) {
+		rcu_read_unlock();
+		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
 	}
 
+	rt = skb_rtable(skb);
 	tdev = rt->dst.dev;
 
-	mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
-	if (mtu < 68) {
-		IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
-		goto tx_error_put;
-	}
-	if (rt_is_output_route(skb_rtable(skb)))
-		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-
 	/* Copy DF, reset fragment offset and MF */
 	df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
 
-	if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
-		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
-		goto tx_error_put;
-	}
-
 	/*
 	 * Okay, now see if we can stuff it in the buffer as-is.
 	 */
 	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
 
-	if (skb_headroom(skb) < max_headroom
-	    || skb_cloned(skb) || skb_shared(skb)) {
+	if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
 		struct sk_buff *new_skb =
 			skb_realloc_headroom(skb, max_headroom);
-		if (!new_skb) {
-			ip_rt_put(rt);
-			kfree_skb(skb);
-			IP_VS_ERR_RL("%s(): no memory\n", __func__);
-			return NF_STOLEN;
-		}
+
+		if (!new_skb)
+			goto tx_error;
 		consume_skb(skb);
 		skb = new_skb;
 		old_iph = ip_hdr(skb);
@@ -890,10 +871,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 
-	/* drop old route */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
-
 	/*
 	 *	Push down and install the IPIP header.
 	 */
@@ -911,25 +888,22 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	ret = IP_VS_XMIT_TUNNEL(skb, cp);
+	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
 	if (ret == NF_ACCEPT)
 		ip_local_out(skb);
 	else if (ret == NF_DROP)
 		kfree_skb(skb);
+	rcu_read_unlock();
 
 	LeaveFunction(10);
 
 	return NF_STOLEN;
 
-  tx_error_icmp:
-	dst_link_failure(skb);
   tx_error:
 	kfree_skb(skb);
+	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
-tx_error_put:
-	ip_rt_put(rt);
-	goto tx_error;
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -943,60 +917,37 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct ipv6hdr  *old_iph = ipv6_hdr(skb);
 	struct ipv6hdr  *iph;		/* Our new IP header */
 	unsigned int max_headroom;	/* The extra header space needed */
-	int    mtu;
-	int ret;
+	int ret, local;
 
 	EnterFunction(10);
 
-	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
-					 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
-						     IP_VS_RT_MODE_NON_LOCAL))))
-		goto tx_error_icmp;
-	if (__ip_vs_is_local_route6(rt)) {
-		dst_release(&rt->dst);
-		IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
+	rcu_read_lock();
+	local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
+				      &saddr, ipvsh, 1,
+				      IP_VS_RT_MODE_LOCAL |
+				      IP_VS_RT_MODE_NON_LOCAL |
+				      IP_VS_RT_MODE_TUNNEL);
+	if (local < 0)
+		goto tx_error;
+	if (local) {
+		rcu_read_unlock();
+		return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
 	}
 
+	rt = (struct rt6_info *) skb_dst(skb);
 	tdev = rt->dst.dev;
 
-	mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
-	if (mtu < IPV6_MIN_MTU) {
-		IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
-			     IPV6_MIN_MTU);
-		goto tx_error_put;
-	}
-	if (skb_dst(skb))
-		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-
-	/* MTU checking: Notice that 'mtu' have been adjusted before hand */
-	if (__mtu_check_toobig_v6(skb, mtu)) {
-		if (!skb->dev) {
-			struct net *net = dev_net(skb_dst(skb)->dev);
-
-			skb->dev = net->loopback_dev;
-		}
-		/* only send ICMP too big on first fragment */
-		if (!ipvsh->fragoffs)
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
-		goto tx_error_put;
-	}
-
 	/*
 	 * Okay, now see if we can stuff it in the buffer as-is.
 	 */
 	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
 
-	if (skb_headroom(skb) < max_headroom
-	    || skb_cloned(skb) || skb_shared(skb)) {
+	if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
 		struct sk_buff *new_skb =
 			skb_realloc_headroom(skb, max_headroom);
-		if (!new_skb) {
-			dst_release(&rt->dst);
-			kfree_skb(skb);
-			IP_VS_ERR_RL("%s(): no memory\n", __func__);
-			return NF_STOLEN;
-		}
+
+		if (!new_skb)
+			goto tx_error;
 		consume_skb(skb);
 		skb = new_skb;
 		old_iph = ipv6_hdr(skb);
@@ -1008,10 +959,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 
-	/* drop old route */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
-
 	/*
 	 *	Push down and install the IPIP header.
 	 */
@@ -1029,25 +976,22 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	ret = IP_VS_XMIT_TUNNEL(skb, cp);
+	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
 	if (ret == NF_ACCEPT)
 		ip6_local_out(skb);
 	else if (ret == NF_DROP)
 		kfree_skb(skb);
+	rcu_read_unlock();
 
 	LeaveFunction(10);
 
 	return NF_STOLEN;
 
-tx_error_icmp:
-	dst_link_failure(skb);
 tx_error:
 	kfree_skb(skb);
+	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
-tx_error_put:
-	dst_release(&rt->dst);
-	goto tx_error;
 }
 #endif
 
@@ -1060,59 +1004,36 @@ int
 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	      struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
 {
-	struct rtable *rt;			/* Route to the other host */
-	struct iphdr  *iph = ip_hdr(skb);
-	int    mtu;
+	int local;
 
 	EnterFunction(10);
 
-	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
-				      RT_TOS(iph->tos),
-				      IP_VS_RT_MODE_LOCAL |
-				      IP_VS_RT_MODE_NON_LOCAL |
-				      IP_VS_RT_MODE_KNOWN_NH, NULL)))
-		goto tx_error_icmp;
-	if (rt->rt_flags & RTCF_LOCAL) {
-		ip_rt_put(rt);
-		IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
-	}
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->dst);
-	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
-	    !skb_is_gso(skb)) {
-		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-		ip_rt_put(rt);
-		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
+	rcu_read_lock();
+	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+				   IP_VS_RT_MODE_LOCAL |
+				   IP_VS_RT_MODE_NON_LOCAL |
+				   IP_VS_RT_MODE_KNOWN_NH, NULL);
+	if (local < 0)
 		goto tx_error;
+	if (local) {
+		rcu_read_unlock();
+		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
 	}
 
-	/*
-	 * Call ip_send_check because we are not sure it is called
-	 * after ip_defrag. Is copy-on-write needed?
-	 */
-	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
-		ip_rt_put(rt);
-		return NF_STOLEN;
-	}
 	ip_send_check(ip_hdr(skb));
 
-	/* drop old route */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
-
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
+	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
+	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return NF_STOLEN;
 
-  tx_error_icmp:
-	dst_link_failure(skb);
   tx_error:
 	kfree_skb(skb);
+	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -1120,64 +1041,36 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 #ifdef CONFIG_IP_VS_IPV6
 int
 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
-		 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
+		 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
 {
-	struct rt6_info *rt;			/* Route to the other host */
-	int    mtu;
+	int local;
 
 	EnterFunction(10);
 
-	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
-					 0, (IP_VS_RT_MODE_LOCAL |
-					     IP_VS_RT_MODE_NON_LOCAL))))
-		goto tx_error_icmp;
-	if (__ip_vs_is_local_route6(rt)) {
-		dst_release(&rt->dst);
-		IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
-	}
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->dst);
-	if (__mtu_check_toobig_v6(skb, mtu)) {
-		if (!skb->dev) {
-			struct net *net = dev_net(skb_dst(skb)->dev);
-
-			skb->dev = net->loopback_dev;
-		}
-		/* only send ICMP too big on first fragment */
-		if (!iph->fragoffs)
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		dst_release(&rt->dst);
-		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
+	rcu_read_lock();
+	local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
+				      ipvsh, 0,
+				      IP_VS_RT_MODE_LOCAL |
+				      IP_VS_RT_MODE_NON_LOCAL);
+	if (local < 0)
 		goto tx_error;
+	if (local) {
+		rcu_read_unlock();
+		return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
 	}
 
-	/*
-	 * Call ip_send_check because we are not sure it is called
-	 * after ip_defrag. Is copy-on-write needed?
-	 */
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (unlikely(skb == NULL)) {
-		dst_release(&rt->dst);
-		return NF_STOLEN;
-	}
-
-	/* drop old route */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
-
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
+	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
+	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return NF_STOLEN;
 
-tx_error_icmp:
-	dst_link_failure(skb);
 tx_error:
 	kfree_skb(skb);
+	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -1194,10 +1087,9 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		struct ip_vs_iphdr *iph)
 {
 	struct rtable	*rt;	/* Route to the other host */
-	int mtu;
 	int rc;
 	int local;
-	int rt_mode;
+	int rt_mode, was_input;
 
 	EnterFunction(10);
 
@@ -1217,16 +1109,17 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/*
 	 * mangle and send the packet here (only for VS/NAT)
 	 */
+	was_input = rt_is_input_route(skb_rtable(skb));
 
 	/* LOCALNODE from FORWARD hook is not supported */
 	rt_mode = (hooknum != NF_INET_FORWARD) ?
 		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
 		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
-	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
-				      RT_TOS(ip_hdr(skb)->tos),
-				      rt_mode, NULL)))
-		goto tx_error_icmp;
-	local = rt->rt_flags & RTCF_LOCAL;
+	rcu_read_lock();
+	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL);
+	if (local < 0)
+		goto tx_error;
+	rt = skb_rtable(skb);
 
 	/*
 	 * Avoid duplicate tuple in reply direction for NAT traffic
@@ -1241,82 +1134,51 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 			IP_VS_DBG(10, "%s(): "
 				  "stopping DNAT to local address %pI4\n",
 				  __func__, &cp->daddr.ip);
-			goto tx_error_put;
+			goto tx_error;
 		}
 	}
 #endif
 
 	/* From world but DNAT to loopback address? */
-	if (local && ipv4_is_loopback(cp->daddr.ip) &&
-	    rt_is_input_route(skb_rtable(skb))) {
+	if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
 		IP_VS_DBG(1, "%s(): "
 			  "stopping DNAT to loopback %pI4\n",
 			  __func__, &cp->daddr.ip);
-		goto tx_error_put;
-	}
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->dst);
-	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
-	    !skb_is_gso(skb)) {
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
-		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
-		goto tx_error_put;
+		goto tx_error;
 	}
 
 	/* copy-on-write the packet before mangling it */
 	if (!skb_make_writable(skb, offset))
-		goto tx_error_put;
+		goto tx_error;
 
 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
-		goto tx_error_put;
+		goto tx_error;
 
 	ip_vs_nat_icmp(skb, pp, cp, 0);
 
-	if (!local) {
-		/* drop the old route when skb is not shared */
-		skb_dst_drop(skb);
-		skb_dst_set(skb, &rt->dst);
-	} else {
-		ip_rt_put(rt);
-		/*
-		 * Some IPv4 replies get local address from routes,
-		 * not from iph, so while we DNAT after routing
-		 * we need this second input/output route.
-		 */
-		if (!__ip_vs_reroute_locally(skb))
-			goto tx_error;
-	}
-
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
-
-	rc = NF_STOLEN;
+	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
+	rcu_read_unlock();
 	goto out;
 
-  tx_error_icmp:
-	dst_link_failure(skb);
   tx_error:
-	dev_kfree_skb(skb);
+	kfree_skb(skb);
+	rcu_read_unlock();
 	rc = NF_STOLEN;
   out:
 	LeaveFunction(10);
 	return rc;
-  tx_error_put:
-	ip_rt_put(rt);
-	goto tx_error;
 }
 
 #ifdef CONFIG_IP_VS_IPV6
 int
 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
-		struct ip_vs_iphdr *iph)
+		struct ip_vs_iphdr *ipvsh)
 {
 	struct rt6_info	*rt;	/* Route to the other host */
-	int mtu;
 	int rc;
 	int local;
 	int rt_mode;
@@ -1328,7 +1190,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	   translate address/port back */
 	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
 		if (cp->packet_xmit)
-			rc = cp->packet_xmit(skb, cp, pp, iph);
+			rc = cp->packet_xmit(skb, cp, pp, ipvsh);
 		else
 			rc = NF_ACCEPT;
 		/* do not touch skb anymore */
@@ -1344,11 +1206,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	rt_mode = (hooknum != NF_INET_FORWARD) ?
 		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
 		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
-	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
-					 0, rt_mode)))
-		goto tx_error_icmp;
-
-	local = __ip_vs_is_local_route6(rt);
+	rcu_read_lock();
+	local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
+				      ipvsh, 0, rt_mode);
+	if (local < 0)
+		goto tx_error;
+	rt = (struct rt6_info *) skb_dst(skb);
 	/*
 	 * Avoid duplicate tuple in reply direction for NAT traffic
 	 * to local address when connection is sync-ed
@@ -1362,7 +1225,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 			IP_VS_DBG(10, "%s(): "
 				  "stopping DNAT to local address %pI6\n",
 				  __func__, &cp->daddr.in6);
-			goto tx_error_put;
+			goto tx_error;
 		}
 	}
 #endif
@@ -1373,60 +1236,31 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		IP_VS_DBG(1, "%s(): "
 			  "stopping DNAT to loopback %pI6\n",
 			  __func__, &cp->daddr.in6);
-		goto tx_error_put;
-	}
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->dst);
-	if (__mtu_check_toobig_v6(skb, mtu)) {
-		if (!skb->dev) {
-			struct net *net = dev_net(skb_dst(skb)->dev);
-
-			skb->dev = net->loopback_dev;
-		}
-		/* only send ICMP too big on first fragment */
-		if (!iph->fragoffs)
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
-		goto tx_error_put;
+		goto tx_error;
 	}
 
 	/* copy-on-write the packet before mangling it */
 	if (!skb_make_writable(skb, offset))
-		goto tx_error_put;
+		goto tx_error;
 
 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
-		goto tx_error_put;
+		goto tx_error;
 
 	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
 
-	if (!local || !skb->dev) {
-		/* drop the old route when skb is not shared */
-		skb_dst_drop(skb);
-		skb_dst_set(skb, &rt->dst);
-	} else {
-		/* destined to loopback, do we need to change route? */
-		dst_release(&rt->dst);
-	}
-
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
-
-	rc = NF_STOLEN;
+	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
+	rcu_read_unlock();
 	goto out;
 
-tx_error_icmp:
-	dst_link_failure(skb);
 tx_error:
-	dev_kfree_skb(skb);
+	kfree_skb(skb);
+	rcu_read_unlock();
 	rc = NF_STOLEN;
 out:
 	LeaveFunction(10);
 	return rc;
-tx_error_put:
-	dst_release(&rt->dst);
-	goto tx_error;
 }
 #endif
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 7df424e2d10c..2d3030ab5b61 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -106,36 +106,26 @@ static void nf_conntrack_acct_fini_sysctl(struct net *net)
 }
 #endif
 
-int nf_conntrack_acct_init(struct net *net)
+int nf_conntrack_acct_pernet_init(struct net *net)
 {
-	int ret;
-
 	net->ct.sysctl_acct = nf_ct_acct;
+	return nf_conntrack_acct_init_sysctl(net);
+}
 
-	if (net_eq(net, &init_net)) {
-		ret = nf_ct_extend_register(&acct_extend);
-		if (ret < 0) {
-			printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
-			goto out_extend_register;
-		}
-	}
+void nf_conntrack_acct_pernet_fini(struct net *net)
+{
+	nf_conntrack_acct_fini_sysctl(net);
+}
 
-	ret = nf_conntrack_acct_init_sysctl(net);
+int nf_conntrack_acct_init(void)
+{
+	int ret = nf_ct_extend_register(&acct_extend);
 	if (ret < 0)
-		goto out_sysctl;
-
-	return 0;
-
-out_sysctl:
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&acct_extend);
-out_extend_register:
+		pr_err("nf_conntrack_acct: Unable to register extension\n");
 	return ret;
 }
 
-void nf_conntrack_acct_fini(struct net *net)
+void nf_conntrack_acct_fini(void)
 {
-	nf_conntrack_acct_fini_sysctl(net);
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&acct_extend);
+	nf_ct_extend_unregister(&acct_extend);
 }
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index c514fe6033d2..b8b95f4027ca 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -2,6 +2,7 @@
  *
  * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
  * based on HW's ip_conntrack_irc.c as well as other modules
+ * (C) 2006 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -145,6 +146,7 @@ static int amanda_help(struct sk_buff *skb,
 
 		exp = nf_ct_expect_alloc(ct);
 		if (exp == NULL) {
+			nf_ct_helper_log(skb, ct, "cannot alloc expectation");
 			ret = NF_DROP;
 			goto out;
 		}
@@ -158,8 +160,10 @@ static int amanda_help(struct sk_buff *skb,
 		if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
 			ret = nf_nat_amanda(skb, ctinfo, protoff,
 					    off - dataoff, len, exp);
-		else if (nf_ct_expect_related(exp) != 0)
+		else if (nf_ct_expect_related(exp) != 0) {
+			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
+		}
 		nf_ct_expect_put(exp);
 	}
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e4a0c4fb3a7c..0283baedcdfb 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -5,6 +5,7 @@
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -45,8 +46,10 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_labels.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
 
 #define NF_CONNTRACK_VERSION	"0.5.0"
 
@@ -263,7 +266,7 @@ static void death_by_event(unsigned long ul_conntrack)
 	if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
 		/* bad luck, let's retry again */
 		ecache->timeout.expires = jiffies +
-			(random32() % net->ct.sysctl_events_retry_timeout);
+			(prandom_u32() % net->ct.sysctl_events_retry_timeout);
 		add_timer(&ecache->timeout);
 		return;
 	}
@@ -282,7 +285,7 @@ void nf_ct_dying_timeout(struct nf_conn *ct)
 	/* set a new timer to retry event delivery */
 	setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
 	ecache->timeout.expires = jiffies +
-		(random32() % net->ct.sysctl_events_retry_timeout);
+		(prandom_u32() % net->ct.sysctl_events_retry_timeout);
 	add_timer(&ecache->timeout);
 }
 EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
@@ -763,6 +766,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
 
+
 /* Allocate a new conntrack: we return -ENOMEM if classification
    failed due to stress.  Otherwise it really is unclassifiable. */
 static struct nf_conntrack_tuple_hash *
@@ -809,6 +813,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
 	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
+	nf_ct_labels_ext_add(ct);
 
 	ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
 	nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
@@ -1256,7 +1261,7 @@ void nf_ct_iterate_cleanup(struct net *net,
 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
 
 struct __nf_ct_flush_report {
-	u32 pid;
+	u32 portid;
 	int report;
 };
 
@@ -1271,7 +1276,7 @@ static int kill_report(struct nf_conn *i, void *data)
 
 	/* If we fail to deliver the event, death_by_timeout() will retry */
 	if (nf_conntrack_event_report(IPCT_DESTROY, i,
-				      fr->pid, fr->report) < 0)
+				      fr->portid, fr->report) < 0)
 		return 1;
 
 	/* Avoid the delivery of the destroy event in death_by_timeout(). */
@@ -1294,10 +1299,10 @@ void nf_ct_free_hashtable(void *hash, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
 
-void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
+void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
 {
 	struct __nf_ct_flush_report fr = {
-		.pid 	= pid,
+		.portid	= portid,
 		.report = report,
 	};
 	nf_ct_iterate_cleanup(net, kill_report, &fr);
@@ -1331,57 +1336,78 @@ static int untrack_refs(void)
 	return cnt;
 }
 
-static void nf_conntrack_cleanup_init_net(void)
+void nf_conntrack_cleanup_start(void)
+{
+	RCU_INIT_POINTER(ip_ct_attach, NULL);
+}
+
+void nf_conntrack_cleanup_end(void)
 {
+	RCU_INIT_POINTER(nf_ct_destroy, NULL);
 	while (untrack_refs() > 0)
 		schedule();
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
 #endif
+	nf_conntrack_proto_fini();
+	nf_conntrack_labels_fini();
+	nf_conntrack_helper_fini();
+	nf_conntrack_timeout_fini();
+	nf_conntrack_ecache_fini();
+	nf_conntrack_tstamp_fini();
+	nf_conntrack_acct_fini();
+	nf_conntrack_expect_fini();
 }
 
-static void nf_conntrack_cleanup_net(struct net *net)
+/*
+ * Mishearing the voices in his head, our hero wonders how he's
+ * supposed to kill the mall.
+ */
+void nf_conntrack_cleanup_net(struct net *net)
 {
- i_see_dead_people:
-	nf_ct_iterate_cleanup(net, kill_all, NULL);
-	nf_ct_release_dying_list(net);
-	if (atomic_read(&net->ct.count) != 0) {
-		schedule();
-		goto i_see_dead_people;
-	}
+	LIST_HEAD(single);
 
-	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
-	nf_conntrack_helper_fini(net);
-	nf_conntrack_timeout_fini(net);
-	nf_conntrack_ecache_fini(net);
-	nf_conntrack_tstamp_fini(net);
-	nf_conntrack_acct_fini(net);
-	nf_conntrack_expect_fini(net);
-	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
-	kfree(net->ct.slabname);
-	free_percpu(net->ct.stat);
+	list_add(&net->exit_list, &single);
+	nf_conntrack_cleanup_net_list(&single);
 }
 
-/* Mishearing the voices in his head, our hero wonders how he's
-   supposed to kill the mall. */
-void nf_conntrack_cleanup(struct net *net)
+void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
 {
-	if (net_eq(net, &init_net))
-		RCU_INIT_POINTER(ip_ct_attach, NULL);
+	int busy;
+	struct net *net;
 
-	/* This makes sure all current packets have passed through
-	   netfilter framework.  Roll on, two-stage module
-	   delete... */
+	/*
+	 * This makes sure all current packets have passed through
+	 *  netfilter framework.  Roll on, two-stage module
+	 *  delete...
+	 */
 	synchronize_net();
-	nf_conntrack_proto_fini(net);
-	nf_conntrack_cleanup_net(net);
-}
+i_see_dead_people:
+	busy = 0;
+	list_for_each_entry(net, net_exit_list, exit_list) {
+		nf_ct_iterate_cleanup(net, kill_all, NULL);
+		nf_ct_release_dying_list(net);
+		if (atomic_read(&net->ct.count) != 0)
+			busy = 1;
+	}
+	if (busy) {
+		schedule();
+		goto i_see_dead_people;
+	}
 
-void nf_conntrack_cleanup_end(void)
-{
-	RCU_INIT_POINTER(nf_ct_destroy, NULL);
-	nf_conntrack_cleanup_init_net();
+	list_for_each_entry(net, net_exit_list, exit_list) {
+		nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
+		nf_conntrack_proto_pernet_fini(net);
+		nf_conntrack_helper_pernet_fini(net);
+		nf_conntrack_ecache_pernet_fini(net);
+		nf_conntrack_tstamp_pernet_fini(net);
+		nf_conntrack_acct_pernet_fini(net);
+		nf_conntrack_expect_pernet_fini(net);
+		kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+		kfree(net->ct.slabname);
+		free_percpu(net->ct.stat);
+	}
 }
 
 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
@@ -1474,7 +1500,7 @@ void nf_ct_untracked_status_or(unsigned long bits)
 }
 EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
 
-static int nf_conntrack_init_init_net(void)
+int nf_conntrack_init_start(void)
 {
 	int max_factor = 8;
 	int ret, cpu;
@@ -1501,11 +1527,44 @@ static int nf_conntrack_init_init_net(void)
 	printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
 	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
 	       nf_conntrack_max);
+
+	ret = nf_conntrack_expect_init();
+	if (ret < 0)
+		goto err_expect;
+
+	ret = nf_conntrack_acct_init();
+	if (ret < 0)
+		goto err_acct;
+
+	ret = nf_conntrack_tstamp_init();
+	if (ret < 0)
+		goto err_tstamp;
+
+	ret = nf_conntrack_ecache_init();
+	if (ret < 0)
+		goto err_ecache;
+
+	ret = nf_conntrack_timeout_init();
+	if (ret < 0)
+		goto err_timeout;
+
+	ret = nf_conntrack_helper_init();
+	if (ret < 0)
+		goto err_helper;
+
+	ret = nf_conntrack_labels_init();
+	if (ret < 0)
+		goto err_labels;
+
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	ret = nf_ct_extend_register(&nf_ct_zone_extend);
 	if (ret < 0)
 		goto err_extend;
 #endif
+	ret = nf_conntrack_proto_init();
+	if (ret < 0)
+		goto err_proto;
+
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
 	for_each_possible_cpu(cpu) {
 		struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
@@ -1516,12 +1575,38 @@ static int nf_conntrack_init_init_net(void)
 	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
 	return 0;
 
+err_proto:
 #ifdef CONFIG_NF_CONNTRACK_ZONES
+	nf_ct_extend_unregister(&nf_ct_zone_extend);
 err_extend:
 #endif
+	nf_conntrack_labels_fini();
+err_labels:
+	nf_conntrack_helper_fini();
+err_helper:
+	nf_conntrack_timeout_fini();
+err_timeout:
+	nf_conntrack_ecache_fini();
+err_ecache:
+	nf_conntrack_tstamp_fini();
+err_tstamp:
+	nf_conntrack_acct_fini();
+err_acct:
+	nf_conntrack_expect_fini();
+err_expect:
 	return ret;
 }
 
+void nf_conntrack_init_end(void)
+{
+	/* For use by REJECT target */
+	RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
+	RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
+
+	/* Howto get NAT offsets */
+	RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
+}
+
 /*
  * We need to use special "null" values, not used in hash table
  */
@@ -1529,7 +1614,7 @@ err_extend:
 #define DYING_NULLS_VAL		((1<<30)+1)
 #define TEMPLATE_NULLS_VAL	((1<<30)+2)
 
-static int nf_conntrack_init_net(struct net *net)
+int nf_conntrack_init_net(struct net *net)
 {
 	int ret;
 
@@ -1565,35 +1650,36 @@ static int nf_conntrack_init_net(struct net *net)
 		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
 		goto err_hash;
 	}
-	ret = nf_conntrack_expect_init(net);
+	ret = nf_conntrack_expect_pernet_init(net);
 	if (ret < 0)
 		goto err_expect;
-	ret = nf_conntrack_acct_init(net);
+	ret = nf_conntrack_acct_pernet_init(net);
 	if (ret < 0)
 		goto err_acct;
-	ret = nf_conntrack_tstamp_init(net);
+	ret = nf_conntrack_tstamp_pernet_init(net);
 	if (ret < 0)
 		goto err_tstamp;
-	ret = nf_conntrack_ecache_init(net);
+	ret = nf_conntrack_ecache_pernet_init(net);
 	if (ret < 0)
 		goto err_ecache;
-	ret = nf_conntrack_timeout_init(net);
-	if (ret < 0)
-		goto err_timeout;
-	ret = nf_conntrack_helper_init(net);
+	ret = nf_conntrack_helper_pernet_init(net);
 	if (ret < 0)
 		goto err_helper;
+	ret = nf_conntrack_proto_pernet_init(net);
+	if (ret < 0)
+		goto err_proto;
 	return 0;
+
+err_proto:
+	nf_conntrack_helper_pernet_fini(net);
 err_helper:
-	nf_conntrack_timeout_fini(net);
-err_timeout:
-	nf_conntrack_ecache_fini(net);
+	nf_conntrack_ecache_pernet_fini(net);
 err_ecache:
-	nf_conntrack_tstamp_fini(net);
+	nf_conntrack_tstamp_pernet_fini(net);
 err_tstamp:
-	nf_conntrack_acct_fini(net);
+	nf_conntrack_acct_pernet_fini(net);
 err_acct:
-	nf_conntrack_expect_fini(net);
+	nf_conntrack_expect_pernet_fini(net);
 err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 err_hash:
@@ -1610,38 +1696,3 @@ s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			enum ip_conntrack_dir dir,
 			u32 seq);
 EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
-
-int nf_conntrack_init(struct net *net)
-{
-	int ret;
-
-	if (net_eq(net, &init_net)) {
-		ret = nf_conntrack_init_init_net();
-		if (ret < 0)
-			goto out_init_net;
-	}
-	ret = nf_conntrack_proto_init(net);
-	if (ret < 0)
-		goto out_proto;
-	ret = nf_conntrack_init_net(net);
-	if (ret < 0)
-		goto out_net;
-
-	if (net_eq(net, &init_net)) {
-		/* For use by REJECT target */
-		RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
-		RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
-
-		/* Howto get NAT offsets */
-		RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
-	}
-	return 0;
-
-out_net:
-	nf_conntrack_proto_fini(net);
-out_proto:
-	if (net_eq(net, &init_net))
-		nf_conntrack_cleanup_init_net();
-out_init_net:
-	return ret;
-}
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index faa978f1714b..1df176146567 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -1,8 +1,10 @@
 /* Event cache for netfilter. */
 
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+/*
+ * (C) 2005 Harald Welte <laforge@gnumonks.org>
+ * (C) 2005 Patrick McHardy <kaber@trash.net>
+ * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -233,38 +235,27 @@ static void nf_conntrack_event_fini_sysctl(struct net *net)
 }
 #endif /* CONFIG_SYSCTL */
 
-int nf_conntrack_ecache_init(struct net *net)
+int nf_conntrack_ecache_pernet_init(struct net *net)
 {
-	int ret;
-
 	net->ct.sysctl_events = nf_ct_events;
 	net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
+	return nf_conntrack_event_init_sysctl(net);
+}
 
-	if (net_eq(net, &init_net)) {
-		ret = nf_ct_extend_register(&event_extend);
-		if (ret < 0) {
-			printk(KERN_ERR "nf_ct_event: Unable to register "
-					"event extension.\n");
-			goto out_extend_register;
-		}
-	}
+void nf_conntrack_ecache_pernet_fini(struct net *net)
+{
+	nf_conntrack_event_fini_sysctl(net);
+}
 
-	ret = nf_conntrack_event_init_sysctl(net);
+int nf_conntrack_ecache_init(void)
+{
+	int ret = nf_ct_extend_register(&event_extend);
 	if (ret < 0)
-		goto out_sysctl;
-
-	return 0;
-
-out_sysctl:
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&event_extend);
-out_extend_register:
+		pr_err("nf_ct_event: Unable to register event extension.\n");
 	return ret;
 }
 
-void nf_conntrack_ecache_fini(struct net *net)
+void nf_conntrack_ecache_fini(void)
 {
-	nf_conntrack_event_fini_sysctl(net);
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&event_extend);
+	nf_ct_extend_unregister(&event_extend);
 }
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 527651a53a45..c63b618cd619 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -3,6 +3,7 @@
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -40,7 +41,7 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 
 /* nf_conntrack_expect helper functions */
 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
-				u32 pid, int report)
+				u32 portid, int report)
 {
 	struct nf_conn_help *master_help = nfct_help(exp->master);
 	struct net *net = nf_ct_exp_net(exp);
@@ -54,7 +55,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
 	hlist_del(&exp->lnode);
 	master_help->expecting[exp->class]--;
 
-	nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
+	nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
 	nf_ct_expect_put(exp);
 
 	NF_CT_STAT_INC(net, expect_delete);
@@ -90,14 +91,13 @@ __nf_ct_expect_find(struct net *net, u16 zone,
 		    const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
-	struct hlist_node *n;
 	unsigned int h;
 
 	if (!net->ct.expect_count)
 		return NULL;
 
 	h = nf_ct_expect_dst_hash(tuple);
-	hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
+	hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) {
 		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
 		    nf_ct_zone(i->master) == zone)
 			return i;
@@ -130,14 +130,13 @@ nf_ct_find_expectation(struct net *net, u16 zone,
 		       const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i, *exp = NULL;
-	struct hlist_node *n;
 	unsigned int h;
 
 	if (!net->ct.expect_count)
 		return NULL;
 
 	h = nf_ct_expect_dst_hash(tuple);
-	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
+	hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
 		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
 		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
 		    nf_ct_zone(i->master) == zone) {
@@ -172,13 +171,13 @@ void nf_ct_remove_expectations(struct nf_conn *ct)
 {
 	struct nf_conn_help *help = nfct_help(ct);
 	struct nf_conntrack_expect *exp;
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 
 	/* Optimization: most connection never expect any others. */
 	if (!help)
 		return;
 
-	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
+	hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
 		if (del_timer(&exp->timeout)) {
 			nf_ct_unlink_expect(exp);
 			nf_ct_expect_put(exp);
@@ -348,9 +347,8 @@ static void evict_oldest_expect(struct nf_conn *master,
 {
 	struct nf_conn_help *master_help = nfct_help(master);
 	struct nf_conntrack_expect *exp, *last = NULL;
-	struct hlist_node *n;
 
-	hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
+	hlist_for_each_entry(exp, &master_help->expectations, lnode) {
 		if (exp->class == new->class)
 			last = exp;
 	}
@@ -369,7 +367,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	struct nf_conn_help *master_help = nfct_help(master);
 	struct nf_conntrack_helper *helper;
 	struct net *net = nf_ct_exp_net(expect);
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	unsigned int h;
 	int ret = 1;
 
@@ -378,7 +376,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 		goto out;
 	}
 	h = nf_ct_expect_dst_hash(&expect->tuple);
-	hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) {
+	hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) {
 		if (expect_matches(i, expect)) {
 			if (del_timer(&i->timeout)) {
 				nf_ct_unlink_expect(i);
@@ -415,7 +413,7 @@ out:
 }
 
 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
-				u32 pid, int report)
+				u32 portid, int report)
 {
 	int ret;
 
@@ -428,7 +426,7 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
 	if (ret < 0)
 		goto out;
 	spin_unlock_bh(&nf_conntrack_lock);
-	nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
+	nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
 	return ret;
 out:
 	spin_unlock_bh(&nf_conntrack_lock);
@@ -571,7 +569,8 @@ static int exp_proc_init(struct net *net)
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	struct proc_dir_entry *proc;
 
-	proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
+	proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
+			   &exp_file_ops);
 	if (!proc)
 		return -ENOMEM;
 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
@@ -581,59 +580,56 @@ static int exp_proc_init(struct net *net)
 static void exp_proc_remove(struct net *net)
 {
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
-	proc_net_remove(net, "nf_conntrack_expect");
+	remove_proc_entry("nf_conntrack_expect", net->proc_net);
 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
 }
 
 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
 
-int nf_conntrack_expect_init(struct net *net)
+int nf_conntrack_expect_pernet_init(struct net *net)
 {
 	int err = -ENOMEM;
 
-	if (net_eq(net, &init_net)) {
-		if (!nf_ct_expect_hsize) {
-			nf_ct_expect_hsize = net->ct.htable_size / 256;
-			if (!nf_ct_expect_hsize)
-				nf_ct_expect_hsize = 1;
-		}
-		nf_ct_expect_max = nf_ct_expect_hsize * 4;
-	}
-
 	net->ct.expect_count = 0;
 	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
 	if (net->ct.expect_hash == NULL)
 		goto err1;
 
-	if (net_eq(net, &init_net)) {
-		nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
-					sizeof(struct nf_conntrack_expect),
-					0, 0, NULL);
-		if (!nf_ct_expect_cachep)
-			goto err2;
-	}
-
 	err = exp_proc_init(net);
 	if (err < 0)
-		goto err3;
+		goto err2;
 
 	return 0;
-
-err3:
-	if (net_eq(net, &init_net))
-		kmem_cache_destroy(nf_ct_expect_cachep);
 err2:
 	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
 err1:
 	return err;
 }
 
-void nf_conntrack_expect_fini(struct net *net)
+void nf_conntrack_expect_pernet_fini(struct net *net)
 {
 	exp_proc_remove(net);
-	if (net_eq(net, &init_net)) {
-		rcu_barrier(); /* Wait for call_rcu() before destroy */
-		kmem_cache_destroy(nf_ct_expect_cachep);
-	}
 	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
 }
+
+int nf_conntrack_expect_init(void)
+{
+	if (!nf_ct_expect_hsize) {
+		nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+		if (!nf_ct_expect_hsize)
+			nf_ct_expect_hsize = 1;
+	}
+	nf_ct_expect_max = nf_ct_expect_hsize * 4;
+	nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+				sizeof(struct nf_conntrack_expect),
+				0, 0, NULL);
+	if (!nf_ct_expect_cachep)
+		return -ENOMEM;
+	return 0;
+}
+
+void nf_conntrack_expect_fini(void)
+{
+	rcu_barrier(); /* Wait for call_rcu() before destroy */
+	kmem_cache_destroy(nf_ct_expect_cachep);
+}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 1ce3befb7c8a..6b217074237b 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -3,6 +3,7 @@
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -435,8 +436,8 @@ skip_nl_seq:
 		   connection tracking, not packet filtering.
 		   However, it is necessary for accurate tracking in
 		   this case. */
-		pr_debug("conntrack_ftp: partial %s %u+%u\n",
-			 search[dir][i].pattern,  ntohl(th->seq), datalen);
+		nf_ct_helper_log(skb, ct, "partial matching of `%s'",
+			         search[dir][i].pattern);
 		ret = NF_DROP;
 		goto out;
 	} else if (found == 0) { /* No match */
@@ -450,6 +451,7 @@ skip_nl_seq:
 
 	exp = nf_ct_expect_alloc(ct);
 	if (exp == NULL) {
+		nf_ct_helper_log(skb, ct, "cannot alloc expectation");
 		ret = NF_DROP;
 		goto out;
 	}
@@ -500,9 +502,10 @@ skip_nl_seq:
 				 protoff, matchoff, matchlen, exp);
 	else {
 		/* Can't expect this?  Best to drop packet now. */
-		if (nf_ct_expect_related(exp) != 0)
+		if (nf_ct_expect_related(exp) != 0) {
+			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
-		else
+		} else
 			ret = NF_ACCEPT;
 	}
 
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 962795e839ab..bdebd03bc8cd 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -2,6 +2,7 @@
  * H.323 connection tracking helper
  *
  * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * This source code is licensed under General Public License version 2.
  *
@@ -623,7 +624,7 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff,
 
       drop:
 	spin_unlock_bh(&nf_h323_lock);
-	net_info_ratelimited("nf_ct_h245: packet dropped\n");
+	nf_ct_helper_log(skb, ct, "cannot process H.245 message");
 	return NF_DROP;
 }
 
@@ -1197,7 +1198,7 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff,
 
       drop:
 	spin_unlock_bh(&nf_h323_lock);
-	net_info_ratelimited("nf_ct_q931: packet dropped\n");
+	nf_ct_helper_log(skb, ct, "cannot process Q.931 message");
 	return NF_DROP;
 }
 
@@ -1795,7 +1796,7 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff,
 
       drop:
 	spin_unlock_bh(&nf_h323_lock);
-	net_info_ratelimited("nf_ct_ras: packet dropped\n");
+	nf_ct_helper_log(skb, ct, "cannot process RAS message");
 	return NF_DROP;
 }
 
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 884f2b39319a..974a2a4adefa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -3,6 +3,7 @@
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -28,6 +29,7 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_log.h>
 
 static DEFINE_MUTEX(nf_ct_helper_mutex);
 struct hlist_head *nf_ct_helper_hash __read_mostly;
@@ -115,14 +117,13 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_helper *helper;
 	struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
-	struct hlist_node *n;
 	unsigned int h;
 
 	if (!nf_ct_helper_count)
 		return NULL;
 
 	h = helper_hash(tuple);
-	hlist_for_each_entry_rcu(helper, n, &nf_ct_helper_hash[h], hnode) {
+	hlist_for_each_entry_rcu(helper, &nf_ct_helper_hash[h], hnode) {
 		if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask))
 			return helper;
 	}
@@ -133,11 +134,10 @@ struct nf_conntrack_helper *
 __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
 {
 	struct nf_conntrack_helper *h;
-	struct hlist_node *n;
 	unsigned int i;
 
 	for (i = 0; i < nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) {
+		hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) {
 			if (!strcmp(h->name, name) &&
 			    h->tuple.src.l3num == l3num &&
 			    h->tuple.dst.protonum == protonum)
@@ -236,7 +236,9 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 		/* We only allow helper re-assignment of the same sort since
 		 * we cannot reallocate the helper extension area.
 		 */
-		if (help->helper != helper) {
+		struct nf_conntrack_helper *tmp = rcu_dereference(help->helper);
+
+		if (tmp && tmp->help != helper->help) {
 			RCU_INIT_POINTER(help->helper, NULL);
 			goto out;
 		}
@@ -332,11 +334,37 @@ nf_ct_helper_expectfn_find_by_symbol(const void *symbol)
 }
 EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol);
 
+__printf(3, 4)
+void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
+		      const char *fmt, ...)
+{
+	const struct nf_conn_help *help;
+	const struct nf_conntrack_helper *helper;
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	/* Called from the helper function, this call never fails */
+	help = nfct_help(ct);
+
+	/* rcu_read_lock()ed by nf_hook_slow */
+	helper = rcu_dereference(help->helper);
+
+	nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
+		      "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf);
+
+	va_end(args);
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_log);
+
 int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 {
 	int ret = 0;
 	struct nf_conntrack_helper *cur;
-	struct hlist_node *n;
 	unsigned int h = helper_hash(&me->tuple);
 
 	BUG_ON(me->expect_policy == NULL);
@@ -344,7 +372,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 	BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
 
 	mutex_lock(&nf_ct_helper_mutex);
-	hlist_for_each_entry(cur, n, &nf_ct_helper_hash[h], hnode) {
+	hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
 		if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 &&
 		    cur->tuple.src.l3num == me->tuple.src.l3num &&
 		    cur->tuple.dst.protonum == me->tuple.dst.protonum) {
@@ -365,13 +393,13 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp;
-	const struct hlist_node *n, *next;
+	const struct hlist_node *next;
 	const struct hlist_nulls_node *nn;
 	unsigned int i;
 
 	/* Get rid of expectations */
 	for (i = 0; i < nf_ct_expect_hsize; i++) {
-		hlist_for_each_entry_safe(exp, n, next,
+		hlist_for_each_entry_safe(exp, next,
 					  &net->ct.expect_hash[i], hnode) {
 			struct nf_conn_help *help = nfct_help(exp->master);
 			if ((rcu_dereference_protected(
@@ -423,44 +451,41 @@ static struct nf_ct_ext_type helper_extend __read_mostly = {
 	.id	= NF_CT_EXT_HELPER,
 };
 
-int nf_conntrack_helper_init(struct net *net)
+int nf_conntrack_helper_pernet_init(struct net *net)
 {
-	int err;
-
 	net->ct.auto_assign_helper_warned = false;
 	net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
+	return nf_conntrack_helper_init_sysctl(net);
+}
 
-	if (net_eq(net, &init_net)) {
-		nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
-		nf_ct_helper_hash =
-			nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
-		if (!nf_ct_helper_hash)
-			return -ENOMEM;
+void nf_conntrack_helper_pernet_fini(struct net *net)
+{
+	nf_conntrack_helper_fini_sysctl(net);
+}
 
-		err = nf_ct_extend_register(&helper_extend);
-		if (err < 0)
-			goto err1;
+int nf_conntrack_helper_init(void)
+{
+	int ret;
+	nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
+	nf_ct_helper_hash =
+		nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
+	if (!nf_ct_helper_hash)
+		return -ENOMEM;
+
+	ret = nf_ct_extend_register(&helper_extend);
+	if (ret < 0) {
+		pr_err("nf_ct_helper: Unable to register helper extension.\n");
+		goto out_extend;
 	}
 
-	err = nf_conntrack_helper_init_sysctl(net);
-	if (err < 0)
-		goto out_sysctl;
-
 	return 0;
-
-out_sysctl:
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&helper_extend);
-err1:
+out_extend:
 	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
-	return err;
+	return ret;
 }
 
-void nf_conntrack_helper_fini(struct net *net)
+void nf_conntrack_helper_fini(void)
 {
-	nf_conntrack_helper_fini_sysctl(net);
-	if (net_eq(net, &init_net)) {
-		nf_ct_extend_unregister(&helper_extend);
-		nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
-	}
+	nf_ct_extend_unregister(&helper_extend);
+	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
 }
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 3b20aa77cfc8..0fd2976db7ee 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -1,6 +1,7 @@
 /* IRC extension for IP connection tracking, Version 1.21
  * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
  * based on RR's ip_conntrack_ftp.c
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -194,6 +195,8 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 
 			exp = nf_ct_expect_alloc(ct);
 			if (exp == NULL) {
+				nf_ct_helper_log(skb, ct,
+						 "cannot alloc expectation");
 				ret = NF_DROP;
 				goto out;
 			}
@@ -210,8 +213,11 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 						 addr_beg_p - ib_ptr,
 						 addr_end_p - addr_beg_p,
 						 exp);
-			else if (nf_ct_expect_related(exp) != 0)
+			else if (nf_ct_expect_related(exp) != 0) {
+				nf_ct_helper_log(skb, ct,
+						 "cannot add expectation");
 				ret = NF_DROP;
+			}
 			nf_ct_expect_put(exp);
 			goto out;
 		}
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
new file mode 100644
index 000000000000..8fe2e99428b7
--- /dev/null
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -0,0 +1,112 @@
+/*
+ * test/set flag bits stored in conntrack extension area.
+ *
+ * (C) 2013 Astaro GmbH & Co KG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ctype.h>
+#include <linux/export.h>
+#include <linux/jhash.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+
+static unsigned int label_bits(const struct nf_conn_labels *l)
+{
+	unsigned int longs = l->words;
+	return longs * BITS_PER_LONG;
+}
+
+bool nf_connlabel_match(const struct nf_conn *ct, u16 bit)
+{
+	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+
+	if (!labels)
+		return false;
+
+	return bit < label_bits(labels) && test_bit(bit, labels->bits);
+}
+EXPORT_SYMBOL_GPL(nf_connlabel_match);
+
+int nf_connlabel_set(struct nf_conn *ct, u16 bit)
+{
+	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+
+	if (!labels || bit >= label_bits(labels))
+		return -ENOSPC;
+
+	if (test_bit(bit, labels->bits))
+		return 0;
+
+	if (test_and_set_bit(bit, labels->bits))
+		nf_conntrack_event_cache(IPCT_LABEL, ct);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_connlabel_set);
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+static void replace_u32(u32 *address, u32 mask, u32 new)
+{
+	u32 old, tmp;
+
+	do {
+		old = *address;
+		tmp = (old & mask) ^ new;
+	} while (cmpxchg(address, old, tmp) != old);
+}
+
+int nf_connlabels_replace(struct nf_conn *ct,
+			  const u32 *data,
+			  const u32 *mask, unsigned int words32)
+{
+	struct nf_conn_labels *labels;
+	unsigned int size, i;
+	u32 *dst;
+
+	labels = nf_ct_labels_find(ct);
+	if (!labels)
+		return -ENOSPC;
+
+	size = labels->words * sizeof(long);
+	if (size < (words32 * sizeof(u32)))
+		words32 = size / sizeof(u32);
+
+	dst = (u32 *) labels->bits;
+	if (words32) {
+		for (i = 0; i < words32; i++)
+			replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]);
+	}
+
+	size /= sizeof(u32);
+	for (i = words32; i < size; i++) /* pad */
+		replace_u32(&dst[i], 0, 0);
+
+	nf_conntrack_event_cache(IPCT_LABEL, ct);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_connlabels_replace);
+#endif
+
+static struct nf_ct_ext_type labels_extend __read_mostly = {
+	.len    = sizeof(struct nf_conn_labels),
+	.align  = __alignof__(struct nf_conn_labels),
+	.id     = NF_CT_EXT_LABELS,
+};
+
+int nf_conntrack_labels_init(void)
+{
+	return nf_ct_extend_register(&labels_extend);
+}
+
+void nf_conntrack_labels_fini(void)
+{
+	nf_ct_extend_unregister(&labels_extend);
+}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 627b0e50b238..6d0f8a17c5b7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -43,6 +43,7 @@
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_timestamp.h>
+#include <net/netfilter/nf_conntrack_labels.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_l4proto.h>
@@ -323,6 +324,40 @@ nla_put_failure:
 #define ctnetlink_dump_secctx(a, b) (0)
 #endif
 
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+static int ctnetlink_label_size(const struct nf_conn *ct)
+{
+	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+
+	if (!labels)
+		return 0;
+	return nla_total_size(labels->words * sizeof(long));
+}
+
+static int
+ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+	unsigned int len, i;
+
+	if (!labels)
+		return 0;
+
+	len = labels->words * sizeof(long);
+	i = 0;
+	do {
+		if (labels->bits[i] != 0)
+			return nla_put(skb, CTA_LABELS, len, labels->bits);
+		i++;
+	} while (i < labels->words);
+
+	return 0;
+}
+#else
+#define ctnetlink_dump_labels(a, b) (0)
+#define ctnetlink_label_size(a)	(0)
+#endif
+
 #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
 
 static inline int
@@ -463,6 +498,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_mark(skb, ct) < 0 ||
 	    ctnetlink_dump_secctx(skb, ct) < 0 ||
+	    ctnetlink_dump_labels(skb, ct) < 0 ||
 	    ctnetlink_dump_id(skb, ct) < 0 ||
 	    ctnetlink_dump_use(skb, ct) < 0 ||
 	    ctnetlink_dump_master(skb, ct) < 0 ||
@@ -561,6 +597,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
 #endif
 	       + ctnetlink_proto_size(ct)
+	       + ctnetlink_label_size(ct)
 	       ;
 }
 
@@ -662,6 +699,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 		    && ctnetlink_dump_secctx(skb, ct) < 0)
 			goto nla_put_failure;
 #endif
+		if (events & (1 << IPCT_LABEL) &&
+		     ctnetlink_dump_labels(skb, ct) < 0)
+			goto nla_put_failure;
 
 		if (events & (1 << IPCT_RELATED) &&
 		    ctnetlink_dump_master(skb, ct) < 0)
@@ -921,6 +961,7 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name,
 	return 0;
 }
 
+#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
 static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 	[CTA_TUPLE_ORIG]	= { .type = NLA_NESTED },
 	[CTA_TUPLE_REPLY]	= { .type = NLA_NESTED },
@@ -937,6 +978,10 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 	[CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED },
 	[CTA_ZONE]		= { .type = NLA_U16 },
 	[CTA_MARK_MASK]		= { .type = NLA_U32 },
+	[CTA_LABELS]		= { .type = NLA_BINARY,
+				    .len = __CTA_LABELS_MAX_LENGTH },
+	[CTA_LABELS_MASK]	= { .type = NLA_BINARY,
+				    .len = __CTA_LABELS_MAX_LENGTH },
 };
 
 static int
@@ -1211,13 +1256,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 	if (!parse_nat_setup) {
 #ifdef CONFIG_MODULES
 		rcu_read_unlock();
-		nfnl_unlock();
+		nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
 		if (request_module("nf-nat") < 0) {
-			nfnl_lock();
+			nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 			rcu_read_lock();
 			return -EOPNOTSUPP;
 		}
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 		rcu_read_lock();
 		if (nfnetlink_parse_nat_setup_hook)
 			return -EAGAIN;
@@ -1229,13 +1274,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 	if (err == -EAGAIN) {
 #ifdef CONFIG_MODULES
 		rcu_read_unlock();
-		nfnl_unlock();
+		nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
 		if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) {
-			nfnl_lock();
+			nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 			rcu_read_lock();
 			return -EOPNOTSUPP;
 		}
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 		rcu_read_lock();
 #else
 		err = -EOPNOTSUPP;
@@ -1465,6 +1510,31 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct,
 #endif
 
 static int
+ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[])
+{
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	size_t len = nla_len(cda[CTA_LABELS]);
+	const void *mask = cda[CTA_LABELS_MASK];
+
+	if (len & (sizeof(u32)-1)) /* must be multiple of u32 */
+		return -EINVAL;
+
+	if (mask) {
+		if (nla_len(cda[CTA_LABELS_MASK]) == 0 ||
+		    nla_len(cda[CTA_LABELS_MASK]) != len)
+			return -EINVAL;
+		mask = nla_data(cda[CTA_LABELS_MASK]);
+	}
+
+	len /= sizeof(u32);
+
+	return nf_connlabels_replace(ct, nla_data(cda[CTA_LABELS]), mask, len);
+#else
+	return -EOPNOTSUPP;
+#endif
+}
+
+static int
 ctnetlink_change_conntrack(struct nf_conn *ct,
 			   const struct nlattr * const cda[])
 {
@@ -1510,6 +1580,11 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
 			return err;
 	}
 #endif
+	if (cda[CTA_LABELS]) {
+		err = ctnetlink_attach_labels(ct, cda);
+		if (err < 0)
+			return err;
+	}
 
 	return 0;
 }
@@ -1598,6 +1673,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
 	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
+	nf_ct_labels_ext_add(ct);
+
 	/* we must add conntrack extensions before confirmation. */
 	ct->status |= IPS_CONFIRMED;
 
@@ -1705,6 +1782,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		if (nlh->nlmsg_flags & NLM_F_CREATE) {
 			enum ip_conntrack_events events;
 
+			if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
+				return -EINVAL;
+
 			ct = ctnetlink_create_conntrack(net, zone, cda, &otuple,
 							&rtuple, u3);
 			if (IS_ERR(ct))
@@ -1716,6 +1796,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			else
 				events = IPCT_NEW;
 
+			if (cda[CTA_LABELS] &&
+			    ctnetlink_attach_labels(ct, cda) == 0)
+				events |= (1 << IPCT_LABEL);
+
 			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
 						      (1 << IPCT_ASSURED) |
 						      (1 << IPCT_HELPER) |
@@ -1983,6 +2067,8 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
 	if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0)
 		goto nla_put_failure;
 #endif
+	if (ctnetlink_dump_labels(skb, ct) < 0)
+		goto nla_put_failure;
 	rcu_read_unlock();
 	return 0;
 
@@ -2011,6 +2097,11 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
 		if (err < 0)
 			return err;
 	}
+	if (cda[CTA_LABELS]) {
+		err = ctnetlink_attach_labels(ct, cda);
+		if (err < 0)
+			return err;
+	}
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
 		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
@@ -2279,14 +2370,13 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net *net = sock_net(skb->sk);
 	struct nf_conntrack_expect *exp, *last;
 	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
-	struct hlist_node *n;
 	u_int8_t l3proto = nfmsg->nfgen_family;
 
 	rcu_read_lock();
 	last = (struct nf_conntrack_expect *)cb->args[1];
 	for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
 restart:
-		hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]],
+		hlist_for_each_entry(exp, &net->ct.expect_hash[cb->args[0]],
 				     hnode) {
 			if (l3proto && exp->tuple.src.l3num != l3proto)
 				continue;
@@ -2319,6 +2409,92 @@ out:
 	return skb->len;
 }
 
+static int
+ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conntrack_expect *exp, *last;
+	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	struct nf_conn *ct = cb->data;
+	struct nf_conn_help *help = nfct_help(ct);
+	u_int8_t l3proto = nfmsg->nfgen_family;
+
+	if (cb->args[0])
+		return 0;
+
+	rcu_read_lock();
+	last = (struct nf_conntrack_expect *)cb->args[1];
+restart:
+	hlist_for_each_entry(exp, &help->expectations, lnode) {
+		if (l3proto && exp->tuple.src.l3num != l3proto)
+			continue;
+		if (cb->args[1]) {
+			if (exp != last)
+				continue;
+			cb->args[1] = 0;
+		}
+		if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid,
+					    cb->nlh->nlmsg_seq,
+					    IPCTNL_MSG_EXP_NEW,
+					    exp) < 0) {
+			if (!atomic_inc_not_zero(&exp->use))
+				continue;
+			cb->args[1] = (unsigned long)exp;
+			goto out;
+		}
+	}
+	if (cb->args[1]) {
+		cb->args[1] = 0;
+		goto restart;
+	}
+	cb->args[0] = 1;
+out:
+	rcu_read_unlock();
+	if (last)
+		nf_ct_expect_put(last);
+
+	return skb->len;
+}
+
+static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
+				 const struct nlmsghdr *nlh,
+				 const struct nlattr * const cda[])
+{
+	int err;
+	struct net *net = sock_net(ctnl);
+	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
+	u16 zone = 0;
+	struct netlink_dump_control c = {
+		.dump = ctnetlink_exp_ct_dump_table,
+		.done = ctnetlink_exp_done,
+	};
+
+	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+	if (err < 0)
+		return err;
+
+	if (cda[CTA_EXPECT_ZONE]) {
+		err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+		if (err < 0)
+			return err;
+	}
+
+	h = nf_conntrack_find_get(net, zone, &tuple);
+	if (!h)
+		return -ENOENT;
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+	c.data = ct;
+
+	err = netlink_dump_start(ctnl, skb, nlh, &c);
+	nf_ct_put(ct);
+
+	return err;
+}
+
 static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
 	[CTA_EXPECT_MASTER]	= { .type = NLA_NESTED },
 	[CTA_EXPECT_TUPLE]	= { .type = NLA_NESTED },
@@ -2349,11 +2525,15 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	int err;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		struct netlink_dump_control c = {
-			.dump = ctnetlink_exp_dump_table,
-			.done = ctnetlink_exp_done,
-		};
-		return netlink_dump_start(ctnl, skb, nlh, &c);
+		if (cda[CTA_EXPECT_MASTER])
+			return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda);
+		else {
+			struct netlink_dump_control c = {
+				.dump = ctnetlink_exp_dump_table,
+				.done = ctnetlink_exp_done,
+			};
+			return netlink_dump_start(ctnl, skb, nlh, &c);
+		}
 	}
 
 	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
@@ -2419,7 +2599,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	u_int8_t u3 = nfmsg->nfgen_family;
 	unsigned int i;
 	u16 zone;
@@ -2466,7 +2646,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		/* delete all expectations for this helper */
 		spin_lock_bh(&nf_conntrack_lock);
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
-			hlist_for_each_entry_safe(exp, n, next,
+			hlist_for_each_entry_safe(exp, next,
 						  &net->ct.expect_hash[i],
 						  hnode) {
 				m_help = nfct_help(exp->master);
@@ -2484,7 +2664,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		/* This basically means we have to flush everything*/
 		spin_lock_bh(&nf_conntrack_lock);
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
-			hlist_for_each_entry_safe(exp, n, next,
+			hlist_for_each_entry_safe(exp, next,
 						  &net->ct.expect_hash[i],
 						  hnode) {
 				if (del_timer(&exp->timeout)) {
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index cc7669ef0b95..7bd03decd36c 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -11,10 +11,12 @@
  *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  *
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
  * Limitations:
  * 	 - We blindly assume that control connections are always
  * 	   established in PNS->PAC direction.  This is a violation
- * 	   of RFFC2673
+ *	   of RFC 2637
  * 	 - We can only support one single call within each session
  * TODO:
  *	 - testing of incoming PPTP calls
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 51e928db48c8..0ab9636ac57e 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -3,6 +3,7 @@
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -212,8 +213,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct net *net,
 #endif
 }
 
-static int
-nf_conntrack_l3proto_register_net(struct nf_conntrack_l3proto *proto)
+int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
 	struct nf_conntrack_l3proto *old;
@@ -242,8 +242,9 @@ out_unlock:
 	return ret;
 
 }
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
 
-int nf_conntrack_l3proto_register(struct net *net,
+int nf_ct_l3proto_pernet_register(struct net *net,
 				  struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
@@ -254,22 +255,11 @@ int nf_conntrack_l3proto_register(struct net *net,
 			return ret;
 	}
 
-	ret = nf_ct_l3proto_register_sysctl(net, proto);
-	if (ret < 0)
-		return ret;
-
-	if (net == &init_net) {
-		ret = nf_conntrack_l3proto_register_net(proto);
-		if (ret < 0)
-			nf_ct_l3proto_unregister_sysctl(net, proto);
-	}
-
-	return ret;
+	return nf_ct_l3proto_register_sysctl(net, proto);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
 
-static void
-nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto)
+void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 {
 	BUG_ON(proto->l3proto >= AF_MAX);
 
@@ -283,19 +273,17 @@ nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto)
 
 	synchronize_rcu();
 }
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
 
-void nf_conntrack_l3proto_unregister(struct net *net,
+void nf_ct_l3proto_pernet_unregister(struct net *net,
 				     struct nf_conntrack_l3proto *proto)
 {
-	if (net == &init_net)
-		nf_conntrack_l3proto_unregister_net(proto);
-
 	nf_ct_l3proto_unregister_sysctl(net, proto);
 
 	/* Remove all contrack entries for this protocol */
 	nf_ct_iterate_cleanup(net, kill_l3proto, proto);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
 
 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
 					      struct nf_conntrack_l4proto *l4proto)
@@ -376,8 +364,7 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
 
 /* FIXME: Allow NULL functions and sub in pointers to generic for
    them. --RR */
-static int
-nf_conntrack_l4proto_register_net(struct nf_conntrack_l4proto *l4proto)
+int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 
@@ -431,8 +418,9 @@ out_unlock:
 	mutex_unlock(&nf_ct_proto_mutex);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
 
-int nf_conntrack_l4proto_register(struct net *net,
+int nf_ct_l4proto_pernet_register(struct net *net,
 				  struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
@@ -452,22 +440,13 @@ int nf_conntrack_l4proto_register(struct net *net,
 	if (ret < 0)
 		goto out;
 
-	if (net == &init_net) {
-		ret = nf_conntrack_l4proto_register_net(l4proto);
-		if (ret < 0) {
-			nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
-			goto out;
-		}
-	}
-
 	pn->users++;
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
 
-static void
-nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 {
 	BUG_ON(l4proto->l3proto >= PF_MAX);
 
@@ -482,15 +461,13 @@ nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto)
 
 	synchronize_rcu();
 }
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
 
-void nf_conntrack_l4proto_unregister(struct net *net,
+void nf_ct_l4proto_pernet_unregister(struct net *net,
 				     struct nf_conntrack_l4proto *l4proto)
 {
 	struct nf_proto_net *pn = NULL;
 
-	if (net == &init_net)
-		nf_conntrack_l4proto_unregister_net(l4proto);
-
 	pn = nf_ct_l4proto_net(net, l4proto);
 	if (pn == NULL)
 		return;
@@ -501,11 +478,10 @@ void nf_conntrack_l4proto_unregister(struct net *net,
 	/* Remove all contrack entries for this protocol */
 	nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
 
-int nf_conntrack_proto_init(struct net *net)
+int nf_conntrack_proto_pernet_init(struct net *net)
 {
-	unsigned int i;
 	int err;
 	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
 					&nf_conntrack_l4proto_generic);
@@ -520,19 +496,12 @@ int nf_conntrack_proto_init(struct net *net)
 	if (err < 0)
 		return err;
 
-	if (net == &init_net) {
-		for (i = 0; i < AF_MAX; i++)
-			rcu_assign_pointer(nf_ct_l3protos[i],
-					   &nf_conntrack_l3proto_generic);
-	}
-
 	pn->users++;
 	return 0;
 }
 
-void nf_conntrack_proto_fini(struct net *net)
+void nf_conntrack_proto_pernet_fini(struct net *net)
 {
-	unsigned int i;
 	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
 					&nf_conntrack_l4proto_generic);
 
@@ -540,9 +509,21 @@ void nf_conntrack_proto_fini(struct net *net)
 	nf_ct_l4proto_unregister_sysctl(net,
 					pn,
 					&nf_conntrack_l4proto_generic);
-	if (net == &init_net) {
-		/* free l3proto protocol tables */
-		for (i = 0; i < PF_MAX; i++)
-			kfree(nf_ct_protos[i]);
-	}
+}
+
+int nf_conntrack_proto_init(void)
+{
+	unsigned int i;
+	for (i = 0; i < AF_MAX; i++)
+		rcu_assign_pointer(nf_ct_l3protos[i],
+				   &nf_conntrack_l3proto_generic);
+	return 0;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+	unsigned int i;
+	/* free l3proto protocol tables */
+	for (i = 0; i < PF_MAX; i++)
+		kfree(nf_ct_protos[i]);
 }
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index a8ae287bc7af..a99b6c3427b0 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -456,7 +456,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 
 out_invalid:
 	if (LOG_INVALID(net, IPPROTO_DCCP))
-		nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg);
+		nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL,
+			      NULL, msg);
 	return false;
 }
 
@@ -542,13 +543,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 
 		spin_unlock_bh(&ct->lock);
 		if (LOG_INVALID(net, IPPROTO_DCCP))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_dccp: invalid packet ignored ");
 		return NF_ACCEPT;
 	case CT_DCCP_INVALID:
 		spin_unlock_bh(&ct->lock);
 		if (LOG_INVALID(net, IPPROTO_DCCP))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_dccp: invalid state transition ");
 		return -NF_ACCEPT;
 	}
@@ -613,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
 
 out_invalid:
 	if (LOG_INVALID(net, IPPROTO_DCCP))
-		nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg);
+		nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg);
 	return -NF_ACCEPT;
 }
 
@@ -935,32 +936,27 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
 static __net_init int dccp_net_init(struct net *net)
 {
 	int ret = 0;
-	ret = nf_conntrack_l4proto_register(net,
-					    &dccp_proto4);
+	ret = nf_ct_l4proto_pernet_register(net, &dccp_proto4);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_dccp4 :protocol register failed.\n");
+		pr_err("nf_conntrack_dccp4: pernet registration failed.\n");
 		goto out;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &dccp_proto6);
+	ret = nf_ct_l4proto_pernet_register(net, &dccp_proto6);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_dccp6 :protocol register failed.\n");
+		pr_err("nf_conntrack_dccp6: pernet registration failed.\n");
 		goto cleanup_dccp4;
 	}
 	return 0;
 cleanup_dccp4:
-	nf_conntrack_l4proto_unregister(net,
-					&dccp_proto4);
+	nf_ct_l4proto_pernet_unregister(net, &dccp_proto4);
 out:
 	return ret;
 }
 
 static __net_exit void dccp_net_exit(struct net *net)
 {
-	nf_conntrack_l4proto_unregister(net,
-					&dccp_proto6);
-	nf_conntrack_l4proto_unregister(net,
-					&dccp_proto4);
+	nf_ct_l4proto_pernet_unregister(net, &dccp_proto6);
+	nf_ct_l4proto_pernet_unregister(net, &dccp_proto4);
 }
 
 static struct pernet_operations dccp_net_ops = {
@@ -972,11 +968,33 @@ static struct pernet_operations dccp_net_ops = {
 
 static int __init nf_conntrack_proto_dccp_init(void)
 {
-	return register_pernet_subsys(&dccp_net_ops);
+	int ret;
+
+	ret = register_pernet_subsys(&dccp_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	ret = nf_ct_l4proto_register(&dccp_proto4);
+	if (ret < 0)
+		goto out_dccp4;
+
+	ret = nf_ct_l4proto_register(&dccp_proto6);
+	if (ret < 0)
+		goto out_dccp6;
+
+	return 0;
+out_dccp6:
+	nf_ct_l4proto_unregister(&dccp_proto4);
+out_dccp4:
+	unregister_pernet_subsys(&dccp_net_ops);
+out_pernet:
+	return ret;
 }
 
 static void __exit nf_conntrack_proto_dccp_fini(void)
 {
+	nf_ct_l4proto_unregister(&dccp_proto6);
+	nf_ct_l4proto_unregister(&dccp_proto4);
 	unregister_pernet_subsys(&dccp_net_ops);
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index b09b7af7f6f8..9d9c0dade602 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -21,6 +21,7 @@
  *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  *
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  */
 
 #include <linux/module.h>
@@ -397,15 +398,15 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
 static int proto_gre_net_init(struct net *net)
 {
 	int ret = 0;
-	ret = nf_conntrack_l4proto_register(net, &nf_conntrack_l4proto_gre4);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4);
 	if (ret < 0)
-		pr_err("nf_conntrack_l4proto_gre4 :protocol register failed.\n");
+		pr_err("nf_conntrack_gre4: pernet registration failed.\n");
 	return ret;
 }
 
 static void proto_gre_net_exit(struct net *net)
 {
-	nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_gre4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4);
 	nf_ct_gre_keymap_flush(net);
 }
 
@@ -418,11 +419,26 @@ static struct pernet_operations proto_gre_net_ops = {
 
 static int __init nf_ct_proto_gre_init(void)
 {
-	return register_pernet_subsys(&proto_gre_net_ops);
+	int ret;
+
+	ret = register_pernet_subsys(&proto_gre_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4);
+	if (ret < 0)
+		goto out_gre4;
+
+	return 0;
+out_gre4:
+	unregister_pernet_subsys(&proto_gre_net_ops);
+out_pernet:
+	return ret;
 }
 
 static void __exit nf_ct_proto_gre_fini(void)
 {
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4);
 	unregister_pernet_subsys(&proto_gre_net_ops);
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c746d61f83ed..1314d33f6bcf 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -1,6 +1,9 @@
 /*
  * Connection tracking protocol helper module for SCTP.
  *
+ * Copyright (c) 2004 Kiran Kumar Immidi <immidi_kiran@yahoo.com>
+ * Copyright (c) 2004-2012 Patrick McHardy <kaber@trash.net>
+ *
  * SCTP is defined in RFC 2960. References to various sections in this code
  * are to this RFC.
  *
@@ -853,33 +856,28 @@ static int sctp_net_init(struct net *net)
 {
 	int ret = 0;
 
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_sctp4);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp4);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_sctp4 :protocol register failed.\n");
+		pr_err("nf_conntrack_sctp4: pernet registration failed.\n");
 		goto out;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_sctp6);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp6);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_sctp6 :protocol register failed.\n");
+		pr_err("nf_conntrack_sctp6: pernet registration failed.\n");
 		goto cleanup_sctp4;
 	}
 	return 0;
 
 cleanup_sctp4:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_sctp4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4);
 out:
 	return ret;
 }
 
 static void sctp_net_exit(struct net *net)
 {
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_sctp6);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_sctp4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4);
 }
 
 static struct pernet_operations sctp_net_ops = {
@@ -891,11 +889,33 @@ static struct pernet_operations sctp_net_ops = {
 
 static int __init nf_conntrack_proto_sctp_init(void)
 {
-	return register_pernet_subsys(&sctp_net_ops);
+	int ret;
+
+	ret = register_pernet_subsys(&sctp_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4);
+	if (ret < 0)
+		goto out_sctp4;
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp6);
+	if (ret < 0)
+		goto out_sctp6;
+
+	return 0;
+out_sctp6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
+out_sctp4:
+	unregister_pernet_subsys(&sctp_net_ops);
+out_pernet:
+	return ret;
 }
 
 static void __exit nf_conntrack_proto_sctp_fini(void)
 {
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
 	unregister_pernet_subsys(&sctp_net_ops);
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 83876e9877f1..4d4d8f1d01fc 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1,5 +1,7 @@
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -720,7 +722,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		    tn->tcp_be_liberal)
 			res = true;
 		if (!res && LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 			"nf_ct_tcp: %s ",
 			before(seq, sender->td_maxend + 1) ?
 			after(end, sender->td_end - receiver->td_maxwin - 1) ?
@@ -772,7 +774,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 	if (th == NULL) {
 		if (LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_tcp: short packet ");
 		return -NF_ACCEPT;
 	}
@@ -780,7 +782,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
 	/* Not whole TCP header or malformed packet */
 	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
 		if (LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_tcp: truncated/malformed packet ");
 		return -NF_ACCEPT;
 	}
@@ -793,7 +795,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
 		if (LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: bad TCP checksum ");
 		return -NF_ACCEPT;
 	}
@@ -802,7 +804,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
 	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
 	if (!tcp_valid_flags[tcpflags]) {
 		if (LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: invalid TCP flag combination ");
 		return -NF_ACCEPT;
 	}
@@ -949,7 +951,7 @@ static int tcp_packet(struct nf_conn *ct,
 		}
 		spin_unlock_bh(&ct->lock);
 		if (LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: invalid packet ignored in "
 				  "state %s ", tcp_conntrack_names[old_state]);
 		return NF_ACCEPT;
@@ -959,7 +961,7 @@ static int tcp_packet(struct nf_conn *ct,
 			 dir, get_conntrack_index(th), old_state);
 		spin_unlock_bh(&ct->lock);
 		if (LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: invalid state ");
 		return -NF_ACCEPT;
 	case TCP_CONNTRACK_CLOSE:
@@ -969,8 +971,8 @@ static int tcp_packet(struct nf_conn *ct,
 			/* Invalid RST  */
 			spin_unlock_bh(&ct->lock);
 			if (LOG_INVALID(net, IPPROTO_TCP))
-				nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
-					  "nf_ct_tcp: invalid RST ");
+				nf_log_packet(net, pf, 0, skb, NULL, NULL,
+					      NULL, "nf_ct_tcp: invalid RST ");
 			return -NF_ACCEPT;
 		}
 		if (index == TCP_RST_SET
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 59623cc56e8d..9d7721cbce4b 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -1,5 +1,6 @@
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -119,7 +120,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
 	if (hdr == NULL) {
 		if (LOG_INVALID(net, IPPROTO_UDP))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_udp: short packet ");
 		return -NF_ACCEPT;
 	}
@@ -127,7 +128,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 	/* Truncated/malformed packets */
 	if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
 		if (LOG_INVALID(net, IPPROTO_UDP))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_udp: truncated/malformed packet ");
 		return -NF_ACCEPT;
 	}
@@ -143,7 +144,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
 		if (LOG_INVALID(net, IPPROTO_UDP))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_udp: bad UDP checksum ");
 		return -NF_ACCEPT;
 	}
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 4b66df209286..2750e6c69f82 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -131,7 +131,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
 	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
 	if (hdr == NULL) {
 		if (LOG_INVALID(net, IPPROTO_UDPLITE))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_udplite: short packet ");
 		return -NF_ACCEPT;
 	}
@@ -141,7 +141,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
 		cscov = udplen;
 	else if (cscov < sizeof(*hdr) || cscov > udplen) {
 		if (LOG_INVALID(net, IPPROTO_UDPLITE))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_udplite: invalid checksum coverage ");
 		return -NF_ACCEPT;
 	}
@@ -149,7 +149,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
 	/* UDPLITE mandates checksums */
 	if (!hdr->check) {
 		if (LOG_INVALID(net, IPPROTO_UDPLITE))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_udplite: checksum missing ");
 		return -NF_ACCEPT;
 	}
@@ -159,7 +159,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
 	    nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
 	    			pf)) {
 		if (LOG_INVALID(net, IPPROTO_UDPLITE))
-			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_udplite: bad UDPLite checksum ");
 		return -NF_ACCEPT;
 	}
@@ -336,30 +336,28 @@ static int udplite_net_init(struct net *net)
 {
 	int ret = 0;
 
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_udplite4);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite4);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_udplite4 :protocol register failed.\n");
+		pr_err("nf_conntrack_udplite4: pernet registration failed.\n");
 		goto out;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_udplite6);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite6);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_udplite4 :protocol register failed.\n");
+		pr_err("nf_conntrack_udplite6: pernet registration failed.\n");
 		goto cleanup_udplite4;
 	}
 	return 0;
 
 cleanup_udplite4:
-	nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4);
 out:
 	return ret;
 }
 
 static void udplite_net_exit(struct net *net)
 {
-	nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite6);
-	nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4);
 }
 
 static struct pernet_operations udplite_net_ops = {
@@ -371,11 +369,33 @@ static struct pernet_operations udplite_net_ops = {
 
 static int __init nf_conntrack_proto_udplite_init(void)
 {
-	return register_pernet_subsys(&udplite_net_ops);
+	int ret;
+
+	ret = register_pernet_subsys(&udplite_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4);
+	if (ret < 0)
+		goto out_udplite4;
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite6);
+	if (ret < 0)
+		goto out_udplite6;
+
+	return 0;
+out_udplite6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
+out_udplite4:
+	unregister_pernet_subsys(&udplite_net_ops);
+out_pernet:
+	return ret;
 }
 
 static void __exit nf_conntrack_proto_udplite_exit(void)
 {
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
 	unregister_pernet_subsys(&udplite_net_ops);
 }
 
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 295429f39088..4a2134fd3fcb 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -138,6 +138,7 @@ static int help(struct sk_buff *skb,
 
 	exp = nf_ct_expect_alloc(ct);
 	if (exp == NULL) {
+		nf_ct_helper_log(skb, ct, "cannot alloc expectation");
 		ret = NF_DROP;
 		goto out;
 	}
@@ -151,8 +152,10 @@ static int help(struct sk_buff *skb,
 	nf_ct_dump_tuple(&exp->tuple);
 
 	/* Can't expect this?  Best to drop packet now. */
-	if (nf_ct_expect_related(exp) != 0)
+	if (nf_ct_expect_related(exp) != 0) {
+		nf_ct_helper_log(skb, ct, "cannot add expectation");
 		ret = NF_DROP;
+	}
 
 	nf_ct_expect_put(exp);
 
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index df8f4f284481..e0c4373b4747 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -855,11 +855,11 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
 {
 	struct nf_conn_help *help = nfct_help(ct);
 	struct nf_conntrack_expect *exp;
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	int found = 0;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
+	hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
 		if (exp->class != SIP_EXPECT_SIGNALLING ||
 		    !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
 		    exp->tuple.dst.protonum != proto ||
@@ -881,10 +881,10 @@ static void flush_expectations(struct nf_conn *ct, bool media)
 {
 	struct nf_conn_help *help = nfct_help(ct);
 	struct nf_conntrack_expect *exp;
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
+	hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
 		if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media)
 			continue;
 		if (!del_timer(&exp->timeout))
@@ -1095,8 +1095,10 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 		port = simple_strtoul(*dptr + mediaoff, NULL, 10);
 		if (port == 0)
 			continue;
-		if (port < 1024 || port > 65535)
+		if (port < 1024 || port > 65535) {
+			nf_ct_helper_log(skb, ct, "wrong port %u", port);
 			return NF_DROP;
+		}
 
 		/* The media description overrides the session description. */
 		maddr_len = 0;
@@ -1107,15 +1109,20 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 			memcpy(&rtp_addr, &maddr, sizeof(rtp_addr));
 		} else if (caddr_len)
 			memcpy(&rtp_addr, &caddr, sizeof(rtp_addr));
-		else
+		else {
+			nf_ct_helper_log(skb, ct, "cannot parse SDP message");
 			return NF_DROP;
+		}
 
 		ret = set_expected_rtp_rtcp(skb, protoff, dataoff,
 					    dptr, datalen,
 					    &rtp_addr, htons(port), t->class,
 					    mediaoff, medialen);
-		if (ret != NF_ACCEPT)
+		if (ret != NF_ACCEPT) {
+			nf_ct_helper_log(skb, ct,
+					 "cannot add expectation for voice");
 			return ret;
+		}
 
 		/* Update media connection address if present */
 		if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) {
@@ -1123,8 +1130,10 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 					      dptr, datalen, mediaoff,
 					      SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
 					      &rtp_addr);
-			if (ret != NF_ACCEPT)
+			if (ret != NF_ACCEPT) {
+				nf_ct_helper_log(skb, ct, "cannot mangle SDP");
 				return ret;
+			}
 		}
 		i++;
 	}
@@ -1258,9 +1267,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 	ret = ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
 				      SIP_HDR_CONTACT, NULL,
 				      &matchoff, &matchlen, &daddr, &port);
-	if (ret < 0)
+	if (ret < 0) {
+		nf_ct_helper_log(skb, ct, "cannot parse contact");
 		return NF_DROP;
-	else if (ret == 0)
+	} else if (ret == 0)
 		return NF_ACCEPT;
 
 	/* We don't support third-party registrations */
@@ -1273,8 +1283,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 
 	if (ct_sip_parse_numerical_param(ct, *dptr,
 					 matchoff + matchlen, *datalen,
-					 "expires=", NULL, NULL, &expires) < 0)
+					 "expires=", NULL, NULL, &expires) < 0) {
+		nf_ct_helper_log(skb, ct, "cannot parse expires");
 		return NF_DROP;
+	}
 
 	if (expires == 0) {
 		ret = NF_ACCEPT;
@@ -1282,8 +1294,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 	}
 
 	exp = nf_ct_expect_alloc(ct);
-	if (!exp)
+	if (!exp) {
+		nf_ct_helper_log(skb, ct, "cannot alloc expectation");
 		return NF_DROP;
+	}
 
 	saddr = NULL;
 	if (sip_direct_signalling)
@@ -1300,9 +1314,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 		ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen,
 					exp, matchoff, matchlen);
 	else {
-		if (nf_ct_expect_related(exp) != 0)
+		if (nf_ct_expect_related(exp) != 0) {
+			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
-		else
+		} else
 			ret = NF_ACCEPT;
 	}
 	nf_ct_expect_put(exp);
@@ -1356,9 +1371,10 @@ static int process_register_response(struct sk_buff *skb, unsigned int protoff,
 					      SIP_HDR_CONTACT, &in_contact,
 					      &matchoff, &matchlen,
 					      &addr, &port);
-		if (ret < 0)
+		if (ret < 0) {
+			nf_ct_helper_log(skb, ct, "cannot parse contact");
 			return NF_DROP;
-		else if (ret == 0)
+		} else if (ret == 0)
 			break;
 
 		/* We don't support third-party registrations */
@@ -1373,8 +1389,10 @@ static int process_register_response(struct sk_buff *skb, unsigned int protoff,
 						   matchoff + matchlen,
 						   *datalen, "expires=",
 						   NULL, NULL, &c_expires);
-		if (ret < 0)
+		if (ret < 0) {
+			nf_ct_helper_log(skb, ct, "cannot parse expires");
 			return NF_DROP;
+		}
 		if (c_expires == 0)
 			break;
 		if (refresh_signalling_expectation(ct, &addr, proto, port,
@@ -1408,15 +1426,21 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff,
 	if (*datalen < strlen("SIP/2.0 200"))
 		return NF_ACCEPT;
 	code = simple_strtoul(*dptr + strlen("SIP/2.0 "), NULL, 10);
-	if (!code)
+	if (!code) {
+		nf_ct_helper_log(skb, ct, "cannot get code");
 		return NF_DROP;
+	}
 
 	if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ,
-			      &matchoff, &matchlen) <= 0)
+			      &matchoff, &matchlen) <= 0) {
+		nf_ct_helper_log(skb, ct, "cannot parse cseq");
 		return NF_DROP;
+	}
 	cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
-	if (!cseq)
+	if (!cseq) {
+		nf_ct_helper_log(skb, ct, "cannot get cseq");
 		return NF_DROP;
+	}
 	matchend = matchoff + matchlen + 1;
 
 	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
@@ -1440,8 +1464,25 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned int matchoff, matchlen;
 	unsigned int cseq, i;
+	union nf_inet_addr addr;
+	__be16 port;
+
+	/* Many Cisco IP phones use a high source port for SIP requests, but
+	 * listen for the response on port 5060.  If we are the local
+	 * router for one of these phones, save the port number from the
+	 * Via: header so that nf_nat_sip can redirect the responses to
+	 * the correct port.
+	 */
+	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+				    SIP_HDR_VIA_UDP, NULL, &matchoff,
+				    &matchlen, &addr, &port) > 0 &&
+	    port != ct->tuplehash[dir].tuple.src.u.udp.port &&
+	    nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3))
+		ct_sip_info->forced_dport = port;
 
 	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
 		const struct sip_handler *handler;
@@ -1454,11 +1495,15 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
 			continue;
 
 		if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ,
-				      &matchoff, &matchlen) <= 0)
+				      &matchoff, &matchlen) <= 0) {
+			nf_ct_helper_log(skb, ct, "cannot parse cseq");
 			return NF_DROP;
+		}
 		cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
-		if (!cseq)
+		if (!cseq) {
+			nf_ct_helper_log(skb, ct, "cannot get cseq");
 			return NF_DROP;
+		}
 
 		return handler->request(skb, protoff, dataoff, dptr, datalen,
 					cseq);
@@ -1481,8 +1526,10 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
 	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
 		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
 		if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff,
-					      dptr, datalen))
+					      dptr, datalen)) {
+			nf_ct_helper_log(skb, ct, "cannot NAT SIP message");
 			ret = NF_DROP;
+		}
 	}
 
 	return ret;
@@ -1547,10 +1594,11 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 
 		msglen = origlen = end - dptr;
 		if (msglen > datalen)
-			return NF_DROP;
+			return NF_ACCEPT;
 
 		ret = process_sip_msg(skb, ct, protoff, dataoff,
 				      &dptr, &msglen);
+		/* process_sip_* functions report why this packet is dropped */
 		if (ret != NF_ACCEPT)
 			break;
 		diff     = msglen - origlen;
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
index 6e545e26289e..87b95a2c270c 100644
--- a/net/netfilter/nf_conntrack_snmp.c
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -16,6 +16,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
 
 #define SNMP_PORT	161
 
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index e7185c684816..bd700b4013c1 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1,5 +1,6 @@
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -366,7 +367,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net)
 {
 	struct proc_dir_entry *pde;
 
-	pde = proc_net_fops_create(net, "nf_conntrack", 0440, &ct_file_ops);
+	pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops);
 	if (!pde)
 		goto out_nf_conntrack;
 
@@ -377,7 +378,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net)
 	return 0;
 
 out_stat_nf_conntrack:
-	proc_net_remove(net, "nf_conntrack");
+	remove_proc_entry("nf_conntrack", net->proc_net);
 out_nf_conntrack:
 	return -ENOMEM;
 }
@@ -385,7 +386,7 @@ out_nf_conntrack:
 static void nf_conntrack_standalone_fini_proc(struct net *net)
 {
 	remove_proc_entry("nf_conntrack", net->proc_net_stat);
-	proc_net_remove(net, "nf_conntrack");
+	remove_proc_entry("nf_conntrack", net->proc_net);
 }
 #else
 static int nf_conntrack_standalone_init_proc(struct net *net)
@@ -472,13 +473,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 {
 	struct ctl_table *table;
 
-	if (net_eq(net, &init_net)) {
-		nf_ct_netfilter_header =
-		       register_net_sysctl(&init_net, "net", nf_ct_netfilter_table);
-		if (!nf_ct_netfilter_header)
-			goto out;
-	}
-
 	table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
 			GFP_KERNEL);
 	if (!table)
@@ -502,10 +496,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 out_unregister_netfilter:
 	kfree(table);
 out_kmemdup:
-	if (net_eq(net, &init_net))
-		unregister_net_sysctl_table(nf_ct_netfilter_header);
-out:
-	printk(KERN_ERR "nf_conntrack: can't register to sysctl.\n");
 	return -ENOMEM;
 }
 
@@ -513,8 +503,6 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net)
 {
 	struct ctl_table *table;
 
-	if (net_eq(net, &init_net))
-		unregister_net_sysctl_table(nf_ct_netfilter_header);
 	table = net->ct.sysctl_header->ctl_table_arg;
 	unregister_net_sysctl_table(net->ct.sysctl_header);
 	kfree(table);
@@ -530,51 +518,90 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net)
 }
 #endif /* CONFIG_SYSCTL */
 
-static int nf_conntrack_net_init(struct net *net)
+static int nf_conntrack_pernet_init(struct net *net)
 {
 	int ret;
 
-	ret = nf_conntrack_init(net);
+	ret = nf_conntrack_init_net(net);
 	if (ret < 0)
 		goto out_init;
+
 	ret = nf_conntrack_standalone_init_proc(net);
 	if (ret < 0)
 		goto out_proc;
+
 	net->ct.sysctl_checksum = 1;
 	net->ct.sysctl_log_invalid = 0;
 	ret = nf_conntrack_standalone_init_sysctl(net);
 	if (ret < 0)
 		goto out_sysctl;
+
 	return 0;
 
 out_sysctl:
 	nf_conntrack_standalone_fini_proc(net);
 out_proc:
-	nf_conntrack_cleanup(net);
+	nf_conntrack_cleanup_net(net);
 out_init:
 	return ret;
 }
 
-static void nf_conntrack_net_exit(struct net *net)
+static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
 {
-	nf_conntrack_standalone_fini_sysctl(net);
-	nf_conntrack_standalone_fini_proc(net);
-	nf_conntrack_cleanup(net);
+	struct net *net;
+
+	list_for_each_entry(net, net_exit_list, exit_list) {
+		nf_conntrack_standalone_fini_sysctl(net);
+		nf_conntrack_standalone_fini_proc(net);
+	}
+	nf_conntrack_cleanup_net_list(net_exit_list);
 }
 
 static struct pernet_operations nf_conntrack_net_ops = {
-	.init = nf_conntrack_net_init,
-	.exit = nf_conntrack_net_exit,
+	.init		= nf_conntrack_pernet_init,
+	.exit_batch	= nf_conntrack_pernet_exit,
 };
 
 static int __init nf_conntrack_standalone_init(void)
 {
-	return register_pernet_subsys(&nf_conntrack_net_ops);
+	int ret = nf_conntrack_init_start();
+	if (ret < 0)
+		goto out_start;
+
+#ifdef CONFIG_SYSCTL
+	nf_ct_netfilter_header =
+		register_net_sysctl(&init_net, "net", nf_ct_netfilter_table);
+	if (!nf_ct_netfilter_header) {
+		pr_err("nf_conntrack: can't register to sysctl.\n");
+		ret = -ENOMEM;
+		goto out_sysctl;
+	}
+#endif
+
+	ret = register_pernet_subsys(&nf_conntrack_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	nf_conntrack_init_end();
+	return 0;
+
+out_pernet:
+#ifdef CONFIG_SYSCTL
+	unregister_net_sysctl_table(nf_ct_netfilter_header);
+out_sysctl:
+#endif
+	nf_conntrack_cleanup_end();
+out_start:
+	return ret;
 }
 
 static void __exit nf_conntrack_standalone_fini(void)
 {
+	nf_conntrack_cleanup_start();
 	unregister_pernet_subsys(&nf_conntrack_net_ops);
+#ifdef CONFIG_SYSCTL
+	unregister_net_sysctl_table(nf_ct_netfilter_header);
+#endif
 	nf_conntrack_cleanup_end();
 }
 
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index 81fc61c05263..e68ab4fbd71f 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -1,5 +1,5 @@
 /* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -60,8 +60,10 @@ static int tftp_help(struct sk_buff *skb,
 		nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 		exp = nf_ct_expect_alloc(ct);
-		if (exp == NULL)
+		if (exp == NULL) {
+			nf_ct_helper_log(skb, ct, "cannot alloc expectation");
 			return NF_DROP;
+		}
 		tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 		nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
 				  nf_ct_l3num(ct),
@@ -74,8 +76,10 @@ static int tftp_help(struct sk_buff *skb,
 		nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
 		if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
 			ret = nf_nat_tftp(skb, ctinfo, exp);
-		else if (nf_ct_expect_related(exp) != 0)
+		else if (nf_ct_expect_related(exp) != 0) {
+			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
+		}
 		nf_ct_expect_put(exp);
 		break;
 	case TFTP_OPCODE_DATA:
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index a878ce5b252c..93da609d9d29 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -37,24 +37,15 @@ static struct nf_ct_ext_type timeout_extend __read_mostly = {
 	.id	= NF_CT_EXT_TIMEOUT,
 };
 
-int nf_conntrack_timeout_init(struct net *net)
+int nf_conntrack_timeout_init(void)
 {
-	int ret = 0;
-
-	if (net_eq(net, &init_net)) {
-		ret = nf_ct_extend_register(&timeout_extend);
-		if (ret < 0) {
-			printk(KERN_ERR "nf_ct_timeout: Unable to register "
-					"timeout extension.\n");
-			return ret;
-		}
-	}
-
-	return 0;
+	int ret = nf_ct_extend_register(&timeout_extend);
+	if (ret < 0)
+		pr_err("nf_ct_timeout: Unable to register timeout extension.\n");
+	return ret;
 }
 
-void nf_conntrack_timeout_fini(struct net *net)
+void nf_conntrack_timeout_fini(void)
 {
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&timeout_extend);
+	nf_ct_extend_unregister(&timeout_extend);
 }
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
index 7ea8026f07c9..902fb0a6b38a 100644
--- a/net/netfilter/nf_conntrack_timestamp.c
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -88,37 +88,28 @@ static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
 }
 #endif
 
-int nf_conntrack_tstamp_init(struct net *net)
+int nf_conntrack_tstamp_pernet_init(struct net *net)
 {
-	int ret;
-
 	net->ct.sysctl_tstamp = nf_ct_tstamp;
+	return nf_conntrack_tstamp_init_sysctl(net);
+}
 
-	if (net_eq(net, &init_net)) {
-		ret = nf_ct_extend_register(&tstamp_extend);
-		if (ret < 0) {
-			printk(KERN_ERR "nf_ct_tstamp: Unable to register "
-					"extension\n");
-			goto out_extend_register;
-		}
-	}
+void nf_conntrack_tstamp_pernet_fini(struct net *net)
+{
+	nf_conntrack_tstamp_fini_sysctl(net);
+	nf_ct_extend_unregister(&tstamp_extend);
+}
 
-	ret = nf_conntrack_tstamp_init_sysctl(net);
+int nf_conntrack_tstamp_init(void)
+{
+	int ret;
+	ret = nf_ct_extend_register(&tstamp_extend);
 	if (ret < 0)
-		goto out_sysctl;
-
-	return 0;
-
-out_sysctl:
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&tstamp_extend);
-out_extend_register:
+		pr_err("nf_ct_tstamp: Unable to register extension\n");
 	return ret;
 }
 
-void nf_conntrack_tstamp_fini(struct net *net)
+void nf_conntrack_tstamp_fini(void)
 {
-	nf_conntrack_tstamp_fini_sysctl(net);
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&tstamp_extend);
+	nf_ct_extend_unregister(&tstamp_extend);
 }
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 9e312695c818..388656d5a9ec 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -16,7 +16,6 @@
 #define NF_LOG_PREFIXLEN		128
 #define NFLOGGER_NAME_LEN		64
 
-static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
 static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
 static DEFINE_MUTEX(nf_log_mutex);
 
@@ -32,13 +31,46 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
 	return NULL;
 }
 
+void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
+{
+	const struct nf_logger *log;
+
+	if (pf == NFPROTO_UNSPEC)
+		return;
+
+	mutex_lock(&nf_log_mutex);
+	log = rcu_dereference_protected(net->nf.nf_loggers[pf],
+					lockdep_is_held(&nf_log_mutex));
+	if (log == NULL)
+		rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
+
+	mutex_unlock(&nf_log_mutex);
+}
+EXPORT_SYMBOL(nf_log_set);
+
+void nf_log_unset(struct net *net, const struct nf_logger *logger)
+{
+	int i;
+	const struct nf_logger *log;
+
+	mutex_lock(&nf_log_mutex);
+	for (i = 0; i < NFPROTO_NUMPROTO; i++) {
+		log = rcu_dereference_protected(net->nf.nf_loggers[i],
+				lockdep_is_held(&nf_log_mutex));
+		if (log == logger)
+			RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL);
+	}
+	mutex_unlock(&nf_log_mutex);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL(nf_log_unset);
+
 /* return EEXIST if the same logger is registered, 0 on success. */
 int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 {
-	const struct nf_logger *llog;
 	int i;
 
-	if (pf >= ARRAY_SIZE(nf_loggers))
+	if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers))
 		return -EINVAL;
 
 	for (i = 0; i < ARRAY_SIZE(logger->list); i++)
@@ -52,10 +84,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 	} else {
 		/* register at end of list to honor first register win */
 		list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
-		llog = rcu_dereference_protected(nf_loggers[pf],
-						 lockdep_is_held(&nf_log_mutex));
-		if (llog == NULL)
-			rcu_assign_pointer(nf_loggers[pf], logger);
 	}
 
 	mutex_unlock(&nf_log_mutex);
@@ -66,49 +94,43 @@ EXPORT_SYMBOL(nf_log_register);
 
 void nf_log_unregister(struct nf_logger *logger)
 {
-	const struct nf_logger *c_logger;
 	int i;
 
 	mutex_lock(&nf_log_mutex);
-	for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
-		c_logger = rcu_dereference_protected(nf_loggers[i],
-						     lockdep_is_held(&nf_log_mutex));
-		if (c_logger == logger)
-			RCU_INIT_POINTER(nf_loggers[i], NULL);
+	for (i = 0; i < NFPROTO_NUMPROTO; i++)
 		list_del(&logger->list[i]);
-	}
 	mutex_unlock(&nf_log_mutex);
-
-	synchronize_rcu();
 }
 EXPORT_SYMBOL(nf_log_unregister);
 
-int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
+int nf_log_bind_pf(struct net *net, u_int8_t pf,
+		   const struct nf_logger *logger)
 {
-	if (pf >= ARRAY_SIZE(nf_loggers))
+	if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
 		return -EINVAL;
 	mutex_lock(&nf_log_mutex);
 	if (__find_logger(pf, logger->name) == NULL) {
 		mutex_unlock(&nf_log_mutex);
 		return -ENOENT;
 	}
-	rcu_assign_pointer(nf_loggers[pf], logger);
+	rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
 	mutex_unlock(&nf_log_mutex);
 	return 0;
 }
 EXPORT_SYMBOL(nf_log_bind_pf);
 
-void nf_log_unbind_pf(u_int8_t pf)
+void nf_log_unbind_pf(struct net *net, u_int8_t pf)
 {
-	if (pf >= ARRAY_SIZE(nf_loggers))
+	if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
 		return;
 	mutex_lock(&nf_log_mutex);
-	RCU_INIT_POINTER(nf_loggers[pf], NULL);
+	RCU_INIT_POINTER(net->nf.nf_loggers[pf], NULL);
 	mutex_unlock(&nf_log_mutex);
 }
 EXPORT_SYMBOL(nf_log_unbind_pf);
 
-void nf_log_packet(u_int8_t pf,
+void nf_log_packet(struct net *net,
+		   u_int8_t pf,
 		   unsigned int hooknum,
 		   const struct sk_buff *skb,
 		   const struct net_device *in,
@@ -121,7 +143,7 @@ void nf_log_packet(u_int8_t pf,
 	const struct nf_logger *logger;
 
 	rcu_read_lock();
-	logger = rcu_dereference(nf_loggers[pf]);
+	logger = rcu_dereference(net->nf.nf_loggers[pf]);
 	if (logger) {
 		va_start(args, fmt);
 		vsnprintf(prefix, sizeof(prefix), fmt, args);
@@ -135,9 +157,11 @@ EXPORT_SYMBOL(nf_log_packet);
 #ifdef CONFIG_PROC_FS
 static void *seq_start(struct seq_file *seq, loff_t *pos)
 {
+	struct net *net = seq_file_net(seq);
+
 	mutex_lock(&nf_log_mutex);
 
-	if (*pos >= ARRAY_SIZE(nf_loggers))
+	if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
 		return NULL;
 
 	return pos;
@@ -145,9 +169,11 @@ static void *seq_start(struct seq_file *seq, loff_t *pos)
 
 static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
+	struct net *net = seq_file_net(s);
+
 	(*pos)++;
 
-	if (*pos >= ARRAY_SIZE(nf_loggers))
+	if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
 		return NULL;
 
 	return pos;
@@ -164,8 +190,9 @@ static int seq_show(struct seq_file *s, void *v)
 	const struct nf_logger *logger;
 	struct nf_logger *t;
 	int ret;
+	struct net *net = seq_file_net(s);
 
-	logger = rcu_dereference_protected(nf_loggers[*pos],
+	logger = rcu_dereference_protected(net->nf.nf_loggers[*pos],
 					   lockdep_is_held(&nf_log_mutex));
 
 	if (!logger)
@@ -199,7 +226,8 @@ static const struct seq_operations nflog_seq_ops = {
 
 static int nflog_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &nflog_seq_ops);
+	return seq_open_net(inode, file, &nflog_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations nflog_file_ops = {
@@ -207,7 +235,7 @@ static const struct file_operations nflog_file_ops = {
 	.open	 = nflog_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_net,
 };
 
 
@@ -216,7 +244,6 @@ static const struct file_operations nflog_file_ops = {
 #ifdef CONFIG_SYSCTL
 static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
 static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
-static struct ctl_table_header *nf_log_dir_header;
 
 static int nf_log_proc_dostring(ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -226,6 +253,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
 	size_t size = *lenp;
 	int r = 0;
 	int tindex = (unsigned long)table->extra1;
+	struct net *net = current->nsproxy->net_ns;
 
 	if (write) {
 		if (size > sizeof(buf))
@@ -234,7 +262,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
 			return -EFAULT;
 
 		if (!strcmp(buf, "NONE")) {
-			nf_log_unbind_pf(tindex);
+			nf_log_unbind_pf(net, tindex);
 			return 0;
 		}
 		mutex_lock(&nf_log_mutex);
@@ -243,11 +271,11 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
 			mutex_unlock(&nf_log_mutex);
 			return -ENOENT;
 		}
-		rcu_assign_pointer(nf_loggers[tindex], logger);
+		rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
 		mutex_unlock(&nf_log_mutex);
 	} else {
 		mutex_lock(&nf_log_mutex);
-		logger = rcu_dereference_protected(nf_loggers[tindex],
+		logger = rcu_dereference_protected(net->nf.nf_loggers[tindex],
 						   lockdep_is_held(&nf_log_mutex));
 		if (!logger)
 			table->data = "NONE";
@@ -260,49 +288,111 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
 	return r;
 }
 
-static __init int netfilter_log_sysctl_init(void)
+static int netfilter_log_sysctl_init(struct net *net)
 {
 	int i;
-
-	for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
-		snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i);
-		nf_log_sysctl_table[i].procname	=
-			nf_log_sysctl_fnames[i-NFPROTO_UNSPEC];
-		nf_log_sysctl_table[i].data = NULL;
-		nf_log_sysctl_table[i].maxlen =
-			NFLOGGER_NAME_LEN * sizeof(char);
-		nf_log_sysctl_table[i].mode = 0644;
-		nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring;
-		nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i;
+	struct ctl_table *table;
+
+	table = nf_log_sysctl_table;
+	if (!net_eq(net, &init_net)) {
+		table = kmemdup(nf_log_sysctl_table,
+				 sizeof(nf_log_sysctl_table),
+				 GFP_KERNEL);
+		if (!table)
+			goto err_alloc;
+	} else {
+		for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
+			snprintf(nf_log_sysctl_fnames[i],
+				 3, "%d", i);
+			nf_log_sysctl_table[i].procname	=
+				nf_log_sysctl_fnames[i];
+			nf_log_sysctl_table[i].data = NULL;
+			nf_log_sysctl_table[i].maxlen =
+				NFLOGGER_NAME_LEN * sizeof(char);
+			nf_log_sysctl_table[i].mode = 0644;
+			nf_log_sysctl_table[i].proc_handler =
+				nf_log_proc_dostring;
+			nf_log_sysctl_table[i].extra1 =
+				(void *)(unsigned long) i;
+		}
 	}
 
-	nf_log_dir_header = register_net_sysctl(&init_net, "net/netfilter/nf_log",
-				       nf_log_sysctl_table);
-	if (!nf_log_dir_header)
-		return -ENOMEM;
+	net->nf.nf_log_dir_header = register_net_sysctl(net,
+						"net/netfilter/nf_log",
+						table);
+	if (!net->nf.nf_log_dir_header)
+		goto err_reg;
 
 	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
+}
+
+static void netfilter_log_sysctl_exit(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->nf.nf_log_dir_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->nf.nf_log_dir_header);
+	if (!net_eq(net, &init_net))
+		kfree(table);
 }
 #else
-static __init int netfilter_log_sysctl_init(void)
+static int netfilter_log_sysctl_init(struct net *net)
 {
 	return 0;
 }
+
+static void netfilter_log_sysctl_exit(struct net *net)
+{
+}
 #endif /* CONFIG_SYSCTL */
 
-int __init netfilter_log_init(void)
+static int __net_init nf_log_net_init(struct net *net)
 {
-	int i, r;
+	int ret = -ENOMEM;
+
 #ifdef CONFIG_PROC_FS
 	if (!proc_create("nf_log", S_IRUGO,
-			 proc_net_netfilter, &nflog_file_ops))
-		return -1;
+			 net->nf.proc_netfilter, &nflog_file_ops))
+		return ret;
 #endif
+	ret = netfilter_log_sysctl_init(net);
+	if (ret < 0)
+		goto out_sysctl;
+
+	return 0;
 
-	/* Errors will trigger panic, unroll on error is unnecessary. */
-	r = netfilter_log_sysctl_init();
-	if (r < 0)
-		return r;
+out_sysctl:
+	/* For init_net: errors will trigger panic, don't unroll on error. */
+	if (!net_eq(net, &init_net))
+		remove_proc_entry("nf_log", net->nf.proc_netfilter);
+
+	return ret;
+}
+
+static void __net_exit nf_log_net_exit(struct net *net)
+{
+	netfilter_log_sysctl_exit(net);
+	remove_proc_entry("nf_log", net->nf.proc_netfilter);
+}
+
+static struct pernet_operations nf_log_net_ops = {
+	.init = nf_log_net_init,
+	.exit = nf_log_net_exit,
+};
+
+int __init netfilter_log_init(void)
+{
+	int i, ret;
+
+	ret = register_pernet_subsys(&nf_log_net_ops);
+	if (ret < 0)
+		return ret;
 
 	for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
 		INIT_LIST_HEAD(&(nf_loggers_l[i]));
diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
index 42d337881171..eb772380a202 100644
--- a/net/netfilter/nf_nat_amanda.c
+++ b/net/netfilter/nf_nat_amanda.c
@@ -1,6 +1,7 @@
 /* Amanda extension for TCP NAT alteration.
  * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
  * based on a copy of HW's ip_nat_irc.c as well as other modules
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -56,15 +57,19 @@ static unsigned int help(struct sk_buff *skb,
 		}
 	}
 
-	if (port == 0)
+	if (port == 0) {
+		nf_ct_helper_log(skb, exp->master, "all ports in use");
 		return NF_DROP;
+	}
 
 	sprintf(buffer, "%u", port);
 	ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
 				       protoff, matchoff, matchlen,
 				       buffer, strlen(buffer));
-	if (ret != NF_ACCEPT)
+	if (ret != NF_ACCEPT) {
+		nf_ct_helper_log(skb, exp->master, "cannot mangle packet");
 		nf_ct_unexpect_related(exp);
+	}
 	return ret;
 }
 
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 5f2f9109f461..038eee5c8f85 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -87,9 +87,11 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
 	struct flowi fl;
 	unsigned int hh_len;
 	struct dst_entry *dst;
+	int err;
 
-	if (xfrm_decode_session(skb, &fl, family) < 0)
-		return -1;
+	err = xfrm_decode_session(skb, &fl, family);
+	if (err < 0)
+		return err;
 
 	dst = skb_dst(skb);
 	if (dst->xfrm)
@@ -98,7 +100,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
 
 	dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
 	if (IS_ERR(dst))
-		return -1;
+		return PTR_ERR(dst);
 
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst);
@@ -107,7 +109,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
 	hh_len = skb_dst(skb)->dev->hard_header_len;
 	if (skb_headroom(skb) < hh_len &&
 	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
-		return -1;
+		return -ENOMEM;
 	return 0;
 }
 EXPORT_SYMBOL(nf_xfrm_me_harder);
@@ -191,9 +193,8 @@ find_appropriate_src(struct net *net, u16 zone,
 	unsigned int h = hash_by_src(net, zone, tuple);
 	const struct nf_conn_nat *nat;
 	const struct nf_conn *ct;
-	const struct hlist_node *n;
 
-	hlist_for_each_entry_rcu(nat, n, &net->ct.nat_bysource[h], bysource) {
+	hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) {
 		ct = nat->ct;
 		if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
 			/* Copy source part from reply tuple. */
@@ -468,33 +469,22 @@ EXPORT_SYMBOL_GPL(nf_nat_packet);
 struct nf_nat_proto_clean {
 	u8	l3proto;
 	u8	l4proto;
-	bool	hash;
 };
 
-/* Clear NAT section of all conntracks, in case we're loaded again. */
-static int nf_nat_proto_clean(struct nf_conn *i, void *data)
+/* kill conntracks with affected NAT section */
+static int nf_nat_proto_remove(struct nf_conn *i, void *data)
 {
 	const struct nf_nat_proto_clean *clean = data;
 	struct nf_conn_nat *nat = nfct_nat(i);
 
 	if (!nat)
 		return 0;
-	if (!(i->status & IPS_SRC_NAT_DONE))
-		return 0;
+
 	if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) ||
 	    (clean->l4proto && nf_ct_protonum(i) != clean->l4proto))
 		return 0;
 
-	if (clean->hash) {
-		spin_lock_bh(&nf_nat_lock);
-		hlist_del_rcu(&nat->bysource);
-		spin_unlock_bh(&nf_nat_lock);
-	} else {
-		memset(nat, 0, sizeof(*nat));
-		i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK |
-			       IPS_SEQ_ADJUST);
-	}
-	return 0;
+	return i->status & IPS_NAT_MASK ? 1 : 0;
 }
 
 static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
@@ -506,16 +496,8 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
 	struct net *net;
 
 	rtnl_lock();
-	/* Step 1 - remove from bysource hash */
-	clean.hash = true;
 	for_each_net(net)
-		nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
-	synchronize_rcu();
-
-	/* Step 2 - clean NAT section */
-	clean.hash = false;
-	for_each_net(net)
-		nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
+		nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
 	rtnl_unlock();
 }
 
@@ -527,16 +509,9 @@ static void nf_nat_l3proto_clean(u8 l3proto)
 	struct net *net;
 
 	rtnl_lock();
-	/* Step 1 - remove from bysource hash */
-	clean.hash = true;
-	for_each_net(net)
-		nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
-	synchronize_rcu();
 
-	/* Step 2 - clean NAT section */
-	clean.hash = false;
 	for_each_net(net)
-		nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
+		nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
 	rtnl_unlock();
 }
 
@@ -774,7 +749,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 {
 	struct nf_nat_proto_clean clean = {};
 
-	nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean);
+	nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean);
 	synchronize_rcu();
 	nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
 }
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index e839b97b2863..e84a578dbe35 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -96,8 +96,10 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
 		}
 	}
 
-	if (port == 0)
+	if (port == 0) {
+		nf_ct_helper_log(skb, ct, "all ports in use");
 		return NF_DROP;
+	}
 
 	buflen = nf_nat_ftp_fmt_cmd(ct, type, buffer, sizeof(buffer),
 				    &newaddr, port);
@@ -113,6 +115,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
 	return NF_ACCEPT;
 
 out:
+	nf_ct_helper_log(skb, ct, "cannot mangle packet");
 	nf_ct_unexpect_related(exp);
 	return NF_DROP;
 }
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 23c2b38676a6..5fea563afe30 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -2,6 +2,7 @@
  *
  * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
  * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2007-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index 1fedee6e7fb6..f02b3605823e 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -56,14 +56,18 @@ static unsigned int help(struct sk_buff *skb,
 		}
 	}
 
-	if (port == 0)
+	if (port == 0) {
+		nf_ct_helper_log(skb, exp->master, "all ports in use");
 		return NF_DROP;
+	}
 
 	ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo,
 				       protoff, matchoff, matchlen, buffer,
 				       strlen(buffer));
-	if (ret != NF_ACCEPT)
+	if (ret != NF_ACCEPT) {
+		nf_ct_helper_log(skb, exp->master, "cannot mangle packet");
 		nf_ct_unexpect_related(exp);
+	}
 	return ret;
 }
 
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index e64faa5ca893..396e55d46f90 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -36,7 +36,7 @@ sctp_manip_pkt(struct sk_buff *skb,
 {
 	struct sk_buff *frag;
 	sctp_sctphdr_t *hdr;
-	__be32 crc32;
+	__u32 crc32;
 
 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
 		return false;
@@ -55,8 +55,7 @@ sctp_manip_pkt(struct sk_buff *skb,
 	skb_walk_frags(skb, frag)
 		crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),
 					  crc32);
-	crc32 = sctp_end_cksum(crc32);
-	hdr->checksum = crc32;
+	hdr->checksum = sctp_end_cksum(crc32);
 
 	return true;
 }
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 16303c752213..96ccdf78a29f 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -95,6 +95,7 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
 	char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
 	unsigned int buflen;
 	union nf_inet_addr newaddr;
@@ -107,7 +108,8 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff,
 	} else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) &&
 		   ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
 		newaddr = ct->tuplehash[!dir].tuple.src.u3;
-		newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
+		newport = ct_sip_info->forced_dport ? :
+			  ct->tuplehash[!dir].tuple.src.u.udp.port;
 	} else
 		return 1;
 
@@ -144,6 +146,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
 	unsigned int coff, matchoff, matchlen;
 	enum sip_header_types hdr;
 	union nf_inet_addr addr;
@@ -156,8 +159,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
 					 &matchoff, &matchlen,
 					 &addr, &port) > 0 &&
 		    !map_addr(skb, protoff, dataoff, dptr, datalen,
-			      matchoff, matchlen, &addr, port))
+			      matchoff, matchlen, &addr, port)) {
+			nf_ct_helper_log(skb, ct, "cannot mangle SIP message");
 			return NF_DROP;
+		}
 		request = 1;
 	} else
 		request = 0;
@@ -190,8 +195,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
 
 		olen = *datalen;
 		if (!map_addr(skb, protoff, dataoff, dptr, datalen,
-			      matchoff, matchlen, &addr, port))
+			      matchoff, matchlen, &addr, port)) {
+			nf_ct_helper_log(skb, ct, "cannot mangle Via header");
 			return NF_DROP;
+		}
 
 		matchend = matchoff + matchlen + *datalen - olen;
 
@@ -206,8 +213,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
 					&ct->tuplehash[!dir].tuple.dst.u3,
 					true);
 			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
-					   poff, plen, buffer, buflen))
+					   poff, plen, buffer, buflen)) {
+				nf_ct_helper_log(skb, ct, "cannot mangle maddr");
 				return NF_DROP;
+			}
 		}
 
 		/* The received= parameter (RFC 2361) contains the address
@@ -222,6 +231,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
 					false);
 			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
 					   poff, plen, buffer, buflen))
+				nf_ct_helper_log(skb, ct, "cannot mangle received");
 				return NF_DROP;
 		}
 
@@ -235,8 +245,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
 			__be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
 			buflen = sprintf(buffer, "%u", ntohs(p));
 			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
-					   poff, plen, buffer, buflen))
+					   poff, plen, buffer, buflen)) {
+				nf_ct_helper_log(skb, ct, "cannot mangle rport");
 				return NF_DROP;
+			}
 		}
 	}
 
@@ -250,13 +262,36 @@ next:
 				       &addr, &port) > 0) {
 		if (!map_addr(skb, protoff, dataoff, dptr, datalen,
 			      matchoff, matchlen,
-			      &addr, port))
+			      &addr, port)) {
+			nf_ct_helper_log(skb, ct, "cannot mangle contact");
 			return NF_DROP;
+		}
 	}
 
 	if (!map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_FROM) ||
-	    !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO))
+	    !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO)) {
+		nf_ct_helper_log(skb, ct, "cannot mangle SIP from/to");
 		return NF_DROP;
+	}
+
+	/* Mangle destination port for Cisco phones, then fix up checksums */
+	if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) {
+		struct udphdr *uh;
+
+		if (!skb_make_writable(skb, skb->len)) {
+			nf_ct_helper_log(skb, ct, "cannot mangle packet");
+			return NF_DROP;
+		}
+
+		uh = (void *)skb->data + protoff;
+		uh->dest = ct_sip_info->forced_dport;
+
+		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, protoff,
+					      0, 0, NULL, 0)) {
+			nf_ct_helper_log(skb, ct, "cannot mangle packet");
+			return NF_DROP;
+		}
+	}
 
 	return NF_ACCEPT;
 }
@@ -311,8 +346,10 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
 	union nf_inet_addr newaddr;
 	u_int16_t port;
+	__be16 srcport;
 	char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
 	unsigned int buflen;
 
@@ -326,8 +363,9 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 	/* If the signalling port matches the connection's source port in the
 	 * original direction, try to use the destination port in the opposite
 	 * direction. */
-	if (exp->tuple.dst.u.udp.port ==
-	    ct->tuplehash[dir].tuple.src.u.udp.port)
+	srcport = ct_sip_info->forced_dport ? :
+		  ct->tuplehash[dir].tuple.src.u.udp.port;
+	if (exp->tuple.dst.u.udp.port == srcport)
 		port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
 	else
 		port = ntohs(exp->tuple.dst.u.udp.port);
@@ -351,15 +389,19 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 		}
 	}
 
-	if (port == 0)
+	if (port == 0) {
+		nf_ct_helper_log(skb, ct, "all ports in use for SIP");
 		return NF_DROP;
+	}
 
 	if (!nf_inet_addr_cmp(&exp->tuple.dst.u3, &exp->saved_addr) ||
 	    exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
 		buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, port);
 		if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
-				   matchoff, matchlen, buffer, buflen))
+				   matchoff, matchlen, buffer, buflen)) {
+			nf_ct_helper_log(skb, ct, "cannot mangle packet");
 			goto err;
+		}
 	}
 	return NF_ACCEPT;
 
@@ -552,14 +594,18 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
 		}
 	}
 
-	if (port == 0)
+	if (port == 0) {
+		nf_ct_helper_log(skb, ct, "all ports in use for SDP media");
 		goto err1;
+	}
 
 	/* Update media port. */
 	if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
 	    !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen,
-			     mediaoff, medialen, port))
+			     mediaoff, medialen, port)) {
+		nf_ct_helper_log(skb, ct, "cannot mangle SDP message");
 		goto err2;
+	}
 
 	return NF_ACCEPT;
 
diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c
index ccabbda71a3e..7f67e1d5310d 100644
--- a/net/netfilter/nf_nat_tftp.c
+++ b/net/netfilter/nf_nat_tftp.c
@@ -28,8 +28,10 @@ static unsigned int help(struct sk_buff *skb,
 		= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
 	exp->dir = IP_CT_DIR_REPLY;
 	exp->expectfn = nf_nat_follow_master;
-	if (nf_ct_expect_related(exp) != 0)
+	if (nf_ct_expect_related(exp) != 0) {
+		nf_ct_helper_log(skb, exp->master, "cannot add expectation");
 		return NF_DROP;
+	}
 	return NF_ACCEPT;
 }
 
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index d812c1235b30..5d24b1fdb593 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -1,3 +1,8 @@
+/*
+ * Rusty Russell (C)2000 -- This code is GPL.
+ * Patrick McHardy (c) 2006-2012
+ */
+
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -40,7 +45,7 @@ void nf_unregister_queue_handler(void)
 }
 EXPORT_SYMBOL(nf_unregister_queue_handler);
 
-static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
+void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 {
 	/* Release those devices we held, or Alexey will kill me. */
 	if (entry->indev)
@@ -60,12 +65,41 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 	/* Drop reference to owner of hook which queued us. */
 	module_put(entry->elem->owner);
 }
+EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
+
+/* Bump dev refs so they don't vanish while packet is out */
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+{
+	if (!try_module_get(entry->elem->owner))
+		return false;
+
+	if (entry->indev)
+		dev_hold(entry->indev);
+	if (entry->outdev)
+		dev_hold(entry->outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (entry->skb->nf_bridge) {
+		struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
+		struct net_device *physdev;
+
+		physdev = nf_bridge->physindev;
+		if (physdev)
+			dev_hold(physdev);
+		physdev = nf_bridge->physoutdev;
+		if (physdev)
+			dev_hold(physdev);
+	}
+#endif
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
 
 /*
  * Any packet that leaves via this function must come back
  * through nf_reinject().
  */
-static int __nf_queue(struct sk_buff *skb,
+int nf_queue(struct sk_buff *skb,
 		      struct nf_hook_ops *elem,
 		      u_int8_t pf, unsigned int hook,
 		      struct net_device *indev,
@@ -75,10 +109,6 @@ static int __nf_queue(struct sk_buff *skb,
 {
 	int status = -ENOENT;
 	struct nf_queue_entry *entry = NULL;
-#ifdef CONFIG_BRIDGE_NETFILTER
-	struct net_device *physindev;
-	struct net_device *physoutdev;
-#endif
 	const struct nf_afinfo *afinfo;
 	const struct nf_queue_handler *qh;
 
@@ -109,28 +139,13 @@ static int __nf_queue(struct sk_buff *skb,
 		.indev	= indev,
 		.outdev	= outdev,
 		.okfn	= okfn,
+		.size	= sizeof(*entry) + afinfo->route_key_size,
 	};
 
-	/* If it's going away, ignore hook. */
-	if (!try_module_get(entry->elem->owner)) {
+	if (!nf_queue_entry_get_refs(entry)) {
 		status = -ECANCELED;
 		goto err_unlock;
 	}
-	/* Bump dev refs so they don't vanish while packet is out */
-	if (indev)
-		dev_hold(indev);
-	if (outdev)
-		dev_hold(outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
-	if (skb->nf_bridge) {
-		physindev = skb->nf_bridge->physindev;
-		if (physindev)
-			dev_hold(physindev);
-		physoutdev = skb->nf_bridge->physoutdev;
-		if (physoutdev)
-			dev_hold(physoutdev);
-	}
-#endif
 	skb_dst_force(skb);
 	afinfo->saveroute(skb, entry);
 	status = qh->outfn(entry, queuenum);
@@ -151,87 +166,6 @@ err:
 	return status;
 }
 
-#ifdef CONFIG_BRIDGE_NETFILTER
-/* When called from bridge netfilter, skb->data must point to MAC header
- * before calling skb_gso_segment(). Else, original MAC header is lost
- * and segmented skbs will be sent to wrong destination.
- */
-static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
-{
-	if (skb->nf_bridge)
-		__skb_push(skb, skb->network_header - skb->mac_header);
-}
-
-static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
-{
-	if (skb->nf_bridge)
-		__skb_pull(skb, skb->network_header - skb->mac_header);
-}
-#else
-#define nf_bridge_adjust_skb_data(s) do {} while (0)
-#define nf_bridge_adjust_segmented_data(s) do {} while (0)
-#endif
-
-int nf_queue(struct sk_buff *skb,
-	     struct nf_hook_ops *elem,
-	     u_int8_t pf, unsigned int hook,
-	     struct net_device *indev,
-	     struct net_device *outdev,
-	     int (*okfn)(struct sk_buff *),
-	     unsigned int queuenum)
-{
-	struct sk_buff *segs;
-	int err = -EINVAL;
-	unsigned int queued;
-
-	if (!skb_is_gso(skb))
-		return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
-				  queuenum);
-
-	switch (pf) {
-	case NFPROTO_IPV4:
-		skb->protocol = htons(ETH_P_IP);
-		break;
-	case NFPROTO_IPV6:
-		skb->protocol = htons(ETH_P_IPV6);
-		break;
-	}
-
-	nf_bridge_adjust_skb_data(skb);
-	segs = skb_gso_segment(skb, 0);
-	/* Does not use PTR_ERR to limit the number of error codes that can be
-	 * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean
-	 * 'ignore this hook'.
-	 */
-	if (IS_ERR(segs))
-		goto out_err;
-	queued = 0;
-	err = 0;
-	do {
-		struct sk_buff *nskb = segs->next;
-
-		segs->next = NULL;
-		if (err == 0) {
-			nf_bridge_adjust_segmented_data(segs);
-			err = __nf_queue(segs, elem, pf, hook, indev,
-					   outdev, okfn, queuenum);
-		}
-		if (err == 0)
-			queued++;
-		else
-			kfree_skb(segs);
-		segs = nskb;
-	} while (segs);
-
-	if (queued) {
-		kfree_skb(skb);
-		return 0;
-	}
-  out_err:
-	nf_bridge_adjust_segmented_data(skb);
-	return err;
-}
-
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
 	struct sk_buff *skb = entry->skb;
@@ -271,9 +205,9 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
-		err = __nf_queue(skb, elem, entry->pf, entry->hook,
-				 entry->indev, entry->outdev, entry->okfn,
-				 verdict >> NF_VERDICT_QBITS);
+		err = nf_queue(skb, elem, entry->pf, entry->hook,
+				entry->indev, entry->outdev, entry->okfn,
+				verdict >> NF_VERDICT_QBITS);
 		if (err < 0) {
 			if (err == -ECANCELED)
 				goto next_hook;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 58a09b7c3f6d..572d87dc116f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -24,10 +24,9 @@
 #include <linux/skbuff.h>
 #include <asm/uaccess.h>
 #include <net/sock.h>
-#include <net/netlink.h>
 #include <linux/init.h>
 
-#include <linux/netlink.h>
+#include <net/netlink.h>
 #include <linux/netfilter/nfnetlink.h>
 
 MODULE_LICENSE("GPL");
@@ -36,8 +35,10 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
 
 static char __initdata nfversion[] = "0.30";
 
-static const struct nfnetlink_subsystem __rcu *subsys_table[NFNL_SUBSYS_COUNT];
-static DEFINE_MUTEX(nfnl_mutex);
+static struct {
+	struct mutex				mutex;
+	const struct nfnetlink_subsystem __rcu	*subsys;
+} table[NFNL_SUBSYS_COUNT];
 
 static const int nfnl_group2type[NFNLGRP_MAX+1] = {
 	[NFNLGRP_CONNTRACK_NEW]		= NFNL_SUBSYS_CTNETLINK,
@@ -48,27 +49,27 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = {
 	[NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP,
 };
 
-void nfnl_lock(void)
+void nfnl_lock(__u8 subsys_id)
 {
-	mutex_lock(&nfnl_mutex);
+	mutex_lock(&table[subsys_id].mutex);
 }
 EXPORT_SYMBOL_GPL(nfnl_lock);
 
-void nfnl_unlock(void)
+void nfnl_unlock(__u8 subsys_id)
 {
-	mutex_unlock(&nfnl_mutex);
+	mutex_unlock(&table[subsys_id].mutex);
 }
 EXPORT_SYMBOL_GPL(nfnl_unlock);
 
 int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
 {
-	nfnl_lock();
-	if (subsys_table[n->subsys_id]) {
-		nfnl_unlock();
+	nfnl_lock(n->subsys_id);
+	if (table[n->subsys_id].subsys) {
+		nfnl_unlock(n->subsys_id);
 		return -EBUSY;
 	}
-	rcu_assign_pointer(subsys_table[n->subsys_id], n);
-	nfnl_unlock();
+	rcu_assign_pointer(table[n->subsys_id].subsys, n);
+	nfnl_unlock(n->subsys_id);
 
 	return 0;
 }
@@ -76,9 +77,9 @@ EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
 
 int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n)
 {
-	nfnl_lock();
-	subsys_table[n->subsys_id] = NULL;
-	nfnl_unlock();
+	nfnl_lock(n->subsys_id);
+	table[n->subsys_id].subsys = NULL;
+	nfnl_unlock(n->subsys_id);
 	synchronize_rcu();
 	return 0;
 }
@@ -91,7 +92,7 @@ static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t t
 	if (subsys_id >= NFNL_SUBSYS_COUNT)
 		return NULL;
 
-	return rcu_dereference(subsys_table[subsys_id]);
+	return rcu_dereference(table[subsys_id].subsys);
 }
 
 static inline const struct nfnl_callback *
@@ -111,22 +112,30 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group)
 }
 EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
 
-int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid,
+struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
+				    u32 dst_portid, gfp_t gfp_mask)
+{
+	return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
+
+int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
 		   unsigned int group, int echo, gfp_t flags)
 {
-	return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags);
+	return nlmsg_notify(net->nfnl, skb, portid, group, echo, flags);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);
 
-int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
+int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error)
 {
-	return netlink_set_err(net->nfnl, pid, group, error);
+	return netlink_set_err(net->nfnl, portid, group, error);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_set_err);
 
-int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags)
+int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
+		      int flags)
 {
-	return netlink_unicast(net->nfnl, skb, pid, flags);
+	return netlink_unicast(net->nfnl, skb, portid, flags);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_unicast);
 
@@ -142,7 +151,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return -EPERM;
 
 	/* All the messages must at least contain nfgenmsg */
-	if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg)))
+	if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
 		return 0;
 
 	type = nlh->nlmsg_type;
@@ -170,11 +179,12 @@ replay:
 	}
 
 	{
-		int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
+		int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
 		u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
 		struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
 		struct nlattr *attr = (void *)nlh + min_len;
 		int attrlen = nlh->nlmsg_len - min_len;
+		__u8 subsys_id = NFNL_SUBSYS_ID(type);
 
 		err = nla_parse(cda, ss->cb[cb_id].attr_count,
 				attr, attrlen, ss->cb[cb_id].policy);
@@ -189,10 +199,9 @@ replay:
 			rcu_read_unlock();
 		} else {
 			rcu_read_unlock();
-			nfnl_lock();
-			if (rcu_dereference_protected(
-					subsys_table[NFNL_SUBSYS_ID(type)],
-					lockdep_is_held(&nfnl_mutex)) != ss ||
+			nfnl_lock(subsys_id);
+			if (rcu_dereference_protected(table[subsys_id].subsys,
+				lockdep_is_held(&table[subsys_id].mutex)) != ss ||
 			    nfnetlink_find_client(type, ss) != nc)
 				err = -EAGAIN;
 			else if (nc->call)
@@ -200,7 +209,7 @@ replay:
 						   (const struct nlattr **)cda);
 			else
 				err = -EINVAL;
-			nfnl_unlock();
+			nfnl_unlock(subsys_id);
 		}
 		if (err == -EAGAIN)
 			goto replay;
@@ -267,6 +276,11 @@ static struct pernet_operations nfnetlink_net_ops = {
 
 static int __init nfnetlink_init(void)
 {
+	int i;
+
+	for (i=0; i<NFNL_SUBSYS_COUNT; i++)
+		mutex_init(&table[i].mutex);
+
 	pr_info("Netfilter messages via NETLINK v%s.\n", nfversion);
 	return register_pernet_subsys(&nfnetlink_net_ops);
 }
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 589d686f0b4c..dc3fd5d44464 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -49,6 +49,8 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 		return -EINVAL;
 
 	acct_name = nla_data(tb[NFACCT_NAME]);
+	if (strlen(acct_name) == 0)
+		return -EINVAL;
 
 	list_for_each_entry(nfacct, &nfnl_acct_list, head) {
 		if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 945950a8b1f1..a191b6db657e 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -282,7 +282,6 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb,
 	const char *helper_name;
 	struct nf_conntrack_helper *cur, *helper = NULL;
 	struct nf_conntrack_tuple tuple;
-	struct hlist_node *n;
 	int ret = 0, i;
 
 	if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
@@ -296,7 +295,7 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb,
 
 	rcu_read_lock();
 	for (i = 0; i < nf_ct_helper_hsize && !helper; i++) {
-		hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) {
+		hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
 
 			/* skip non-userspace conntrack helpers. */
 			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
@@ -452,13 +451,12 @@ static int
 nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct nf_conntrack_helper *cur, *last;
-	struct hlist_node *n;
 
 	rcu_read_lock();
 	last = (struct nf_conntrack_helper *)cb->args[1];
 	for (; cb->args[0] < nf_ct_helper_hsize; cb->args[0]++) {
 restart:
-		hlist_for_each_entry_rcu(cur, n,
+		hlist_for_each_entry_rcu(cur,
 				&nf_ct_helper_hash[cb->args[0]], hnode) {
 
 			/* skip non-userspace conntrack helpers. */
@@ -495,7 +493,6 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
 {
 	int ret = -ENOENT, i;
 	struct nf_conntrack_helper *cur;
-	struct hlist_node *n;
 	struct sk_buff *skb2;
 	char *helper_name = NULL;
 	struct nf_conntrack_tuple tuple;
@@ -520,7 +517,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
 	}
 
 	for (i = 0; i < nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) {
+		hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
 
 			/* skip non-userspace conntrack helpers. */
 			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
@@ -568,7 +565,7 @@ nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb,
 {
 	char *helper_name = NULL;
 	struct nf_conntrack_helper *cur;
-	struct hlist_node *n, *tmp;
+	struct hlist_node *tmp;
 	struct nf_conntrack_tuple tuple;
 	bool tuple_set = false, found = false;
 	int i, j = 0, ret;
@@ -585,7 +582,7 @@ nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb,
 	}
 
 	for (i = 0; i < nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i],
+		hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
 								hnode) {
 			/* skip non-userspace conntrack helpers. */
 			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
@@ -654,13 +651,13 @@ err_out:
 static void __exit nfnl_cthelper_exit(void)
 {
 	struct nf_conntrack_helper *cur;
-	struct hlist_node *n, *tmp;
+	struct hlist_node *tmp;
 	int i;
 
 	nfnetlink_subsys_unregister(&nfnl_cthelper_subsys);
 
 	for (i=0; i<nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i],
+		hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
 									hnode) {
 			/* skip non-userspace conntrack helpers. */
 			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 92fd8eca0d31..faf1e9300d8a 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -3,6 +3,7 @@
  * nfetlink.
  *
  * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * Based on the old ipv4-only ipt_ULOG.c:
  * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
@@ -19,7 +20,7 @@
 #include <linux/ipv6.h>
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
-#include <linux/netlink.h>
+#include <net/netlink.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_log.h>
 #include <linux/spinlock.h>
@@ -32,6 +33,7 @@
 #include <linux/slab.h>
 #include <net/sock.h>
 #include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
 #include <net/netfilter/nfnetlink_log.h>
 
 #include <linux/atomic.h>
@@ -56,6 +58,7 @@ struct nfulnl_instance {
 	unsigned int qlen;		/* number of nlmsgs in skb */
 	struct sk_buff *skb;		/* pre-allocatd skb */
 	struct timer_list timer;
+	struct net *net;
 	struct user_namespace *peer_user_ns;	/* User namespace of the peer process */
 	int peer_portid;			/* PORTID of the peer process */
 
@@ -71,27 +74,35 @@ struct nfulnl_instance {
 	struct rcu_head rcu;
 };
 
-static DEFINE_SPINLOCK(instances_lock);
-static atomic_t global_seq;
-
 #define INSTANCE_BUCKETS	16
-static struct hlist_head instance_table[INSTANCE_BUCKETS];
 static unsigned int hash_init;
 
+static int nfnl_log_net_id __read_mostly;
+
+struct nfnl_log_net {
+	spinlock_t instances_lock;
+	struct hlist_head instance_table[INSTANCE_BUCKETS];
+	atomic_t global_seq;
+};
+
+static struct nfnl_log_net *nfnl_log_pernet(struct net *net)
+{
+	return net_generic(net, nfnl_log_net_id);
+}
+
 static inline u_int8_t instance_hashfn(u_int16_t group_num)
 {
 	return ((group_num & 0xff) % INSTANCE_BUCKETS);
 }
 
 static struct nfulnl_instance *
-__instance_lookup(u_int16_t group_num)
+__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
 {
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct nfulnl_instance *inst;
 
-	head = &instance_table[instance_hashfn(group_num)];
-	hlist_for_each_entry_rcu(inst, pos, head, hlist) {
+	head = &log->instance_table[instance_hashfn(group_num)];
+	hlist_for_each_entry_rcu(inst, head, hlist) {
 		if (inst->group_num == group_num)
 			return inst;
 	}
@@ -105,12 +116,12 @@ instance_get(struct nfulnl_instance *inst)
 }
 
 static struct nfulnl_instance *
-instance_lookup_get(u_int16_t group_num)
+instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
 {
 	struct nfulnl_instance *inst;
 
 	rcu_read_lock_bh();
-	inst = __instance_lookup(group_num);
+	inst = __instance_lookup(log, group_num);
 	if (inst && !atomic_inc_not_zero(&inst->use))
 		inst = NULL;
 	rcu_read_unlock_bh();
@@ -120,7 +131,11 @@ instance_lookup_get(u_int16_t group_num)
 
 static void nfulnl_instance_free_rcu(struct rcu_head *head)
 {
-	kfree(container_of(head, struct nfulnl_instance, rcu));
+	struct nfulnl_instance *inst =
+		container_of(head, struct nfulnl_instance, rcu);
+
+	put_net(inst->net);
+	kfree(inst);
 	module_put(THIS_MODULE);
 }
 
@@ -134,13 +149,15 @@ instance_put(struct nfulnl_instance *inst)
 static void nfulnl_timer(unsigned long data);
 
 static struct nfulnl_instance *
-instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
+instance_create(struct net *net, u_int16_t group_num,
+		int portid, struct user_namespace *user_ns)
 {
 	struct nfulnl_instance *inst;
+	struct nfnl_log_net *log = nfnl_log_pernet(net);
 	int err;
 
-	spin_lock_bh(&instances_lock);
-	if (__instance_lookup(group_num)) {
+	spin_lock_bh(&log->instances_lock);
+	if (__instance_lookup(log, group_num)) {
 		err = -EEXIST;
 		goto out_unlock;
 	}
@@ -164,6 +181,7 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
 
 	setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
 
+	inst->net = get_net(net);
 	inst->peer_user_ns = user_ns;
 	inst->peer_portid = portid;
 	inst->group_num = group_num;
@@ -175,14 +193,15 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
 	inst->copy_range 	= NFULNL_COPY_RANGE_MAX;
 
 	hlist_add_head_rcu(&inst->hlist,
-		       &instance_table[instance_hashfn(group_num)]);
+		       &log->instance_table[instance_hashfn(group_num)]);
+
 
-	spin_unlock_bh(&instances_lock);
+	spin_unlock_bh(&log->instances_lock);
 
 	return inst;
 
 out_unlock:
-	spin_unlock_bh(&instances_lock);
+	spin_unlock_bh(&log->instances_lock);
 	return ERR_PTR(err);
 }
 
@@ -211,11 +230,12 @@ __instance_destroy(struct nfulnl_instance *inst)
 }
 
 static inline void
-instance_destroy(struct nfulnl_instance *inst)
+instance_destroy(struct nfnl_log_net *log,
+		 struct nfulnl_instance *inst)
 {
-	spin_lock_bh(&instances_lock);
+	spin_lock_bh(&log->instances_lock);
 	__instance_destroy(inst);
-	spin_unlock_bh(&instances_lock);
+	spin_unlock_bh(&log->instances_lock);
 }
 
 static int
@@ -299,7 +319,7 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags)
 }
 
 static struct sk_buff *
-nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
+nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size)
 {
 	struct sk_buff *skb;
 	unsigned int n;
@@ -308,13 +328,14 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
 	 * message.  WARNING: has to be <= 128k due to slab restrictions */
 
 	n = max(inst_size, pkt_size);
-	skb = alloc_skb(n, GFP_ATOMIC);
+	skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC);
 	if (!skb) {
 		if (n > pkt_size) {
 			/* try to allocate only as much as we need for current
 			 * packet */
 
-			skb = alloc_skb(pkt_size, GFP_ATOMIC);
+			skb = nfnetlink_alloc_skb(&init_net, pkt_size,
+						  peer_portid, GFP_ATOMIC);
 			if (!skb)
 				pr_err("nfnetlink_log: can't even alloc %u bytes\n",
 				       pkt_size);
@@ -337,7 +358,7 @@ __nfulnl_send(struct nfulnl_instance *inst)
 		if (!nlh)
 			goto out;
 	}
-	status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid,
+	status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
 				   MSG_DONTWAIT);
 
 	inst->qlen = 0;
@@ -371,7 +392,8 @@ nfulnl_timer(unsigned long data)
 /* This is an inline function, we don't really care about a long
  * list of arguments */
 static inline int
-__build_packet_message(struct nfulnl_instance *inst,
+__build_packet_message(struct nfnl_log_net *log,
+			struct nfulnl_instance *inst,
 			const struct sk_buff *skb,
 			unsigned int data_len,
 			u_int8_t pf,
@@ -537,7 +559,7 @@ __build_packet_message(struct nfulnl_instance *inst,
 	/* global sequence number */
 	if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&
 	    nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL,
-			 htonl(atomic_inc_return(&global_seq))))
+			 htonl(atomic_inc_return(&log->global_seq))))
 		goto nla_put_failure;
 
 	if (data_len) {
@@ -593,13 +615,15 @@ nfulnl_log_packet(u_int8_t pf,
 	const struct nf_loginfo *li;
 	unsigned int qthreshold;
 	unsigned int plen;
+	struct net *net = dev_net(in ? in : out);
+	struct nfnl_log_net *log = nfnl_log_pernet(net);
 
 	if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
 		li = li_user;
 	else
 		li = &default_loginfo;
 
-	inst = instance_lookup_get(li->u.ulog.group);
+	inst = instance_lookup_get(log, li->u.ulog.group);
 	if (!inst)
 		return;
 
@@ -610,7 +634,7 @@ nfulnl_log_packet(u_int8_t pf,
 	/* FIXME: do we want to make the size calculation conditional based on
 	 * what is actually present?  way more branches and checks, but more
 	 * memory efficient... */
-	size =    NLMSG_SPACE(sizeof(struct nfgenmsg))
+	size =    nlmsg_total_size(sizeof(struct nfgenmsg))
 		+ nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
@@ -674,14 +698,15 @@ nfulnl_log_packet(u_int8_t pf,
 	}
 
 	if (!inst->skb) {
-		inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size);
+		inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz,
+					     size);
 		if (!inst->skb)
 			goto alloc_failure;
 	}
 
 	inst->qlen++;
 
-	__build_packet_message(inst, skb, data_len, pf,
+	__build_packet_message(log, inst, skb, data_len, pf,
 				hooknum, in, out, prefix, plen);
 
 	if (inst->qlen >= qthreshold)
@@ -710,24 +735,24 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
 		   unsigned long event, void *ptr)
 {
 	struct netlink_notify *n = ptr;
+	struct nfnl_log_net *log = nfnl_log_pernet(n->net);
 
 	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
 		int i;
 
 		/* destroy all instances for this portid */
-		spin_lock_bh(&instances_lock);
+		spin_lock_bh(&log->instances_lock);
 		for  (i = 0; i < INSTANCE_BUCKETS; i++) {
-			struct hlist_node *tmp, *t2;
+			struct hlist_node *t2;
 			struct nfulnl_instance *inst;
-			struct hlist_head *head = &instance_table[i];
+			struct hlist_head *head = &log->instance_table[i];
 
-			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
-				if ((net_eq(n->net, &init_net)) &&
-				    (n->portid == inst->peer_portid))
+			hlist_for_each_entry_safe(inst, t2, head, hlist) {
+				if (n->portid == inst->peer_portid)
 					__instance_destroy(inst);
 			}
 		}
-		spin_unlock_bh(&instances_lock);
+		spin_unlock_bh(&log->instances_lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -768,6 +793,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	u_int16_t group_num = ntohs(nfmsg->res_id);
 	struct nfulnl_instance *inst;
 	struct nfulnl_msg_config_cmd *cmd = NULL;
+	struct net *net = sock_net(ctnl);
+	struct nfnl_log_net *log = nfnl_log_pernet(net);
 	int ret = 0;
 
 	if (nfula[NFULA_CFG_CMD]) {
@@ -777,14 +804,14 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 		/* Commands without queue context */
 		switch (cmd->command) {
 		case NFULNL_CFG_CMD_PF_BIND:
-			return nf_log_bind_pf(pf, &nfulnl_logger);
+			return nf_log_bind_pf(net, pf, &nfulnl_logger);
 		case NFULNL_CFG_CMD_PF_UNBIND:
-			nf_log_unbind_pf(pf);
+			nf_log_unbind_pf(net, pf);
 			return 0;
 		}
 	}
 
-	inst = instance_lookup_get(group_num);
+	inst = instance_lookup_get(log, group_num);
 	if (inst && inst->peer_portid != NETLINK_CB(skb).portid) {
 		ret = -EPERM;
 		goto out_put;
@@ -798,9 +825,9 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 				goto out_put;
 			}
 
-			inst = instance_create(group_num,
+			inst = instance_create(net, group_num,
 					       NETLINK_CB(skb).portid,
-					       sk_user_ns(NETLINK_CB(skb).ssk));
+					       sk_user_ns(NETLINK_CB(skb).sk));
 			if (IS_ERR(inst)) {
 				ret = PTR_ERR(inst);
 				goto out;
@@ -812,7 +839,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 				goto out;
 			}
 
-			instance_destroy(inst);
+			instance_destroy(log, inst);
 			goto out_put;
 		default:
 			ret = -ENOTSUPP;
@@ -895,55 +922,68 @@ static const struct nfnetlink_subsystem nfulnl_subsys = {
 
 #ifdef CONFIG_PROC_FS
 struct iter_state {
+	struct seq_net_private p;
 	unsigned int bucket;
 };
 
-static struct hlist_node *get_first(struct iter_state *st)
+static struct hlist_node *get_first(struct net *net, struct iter_state *st)
 {
+	struct nfnl_log_net *log;
 	if (!st)
 		return NULL;
 
+	log = nfnl_log_pernet(net);
+
 	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
-		if (!hlist_empty(&instance_table[st->bucket]))
-			return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+		struct hlist_head *head = &log->instance_table[st->bucket];
+
+		if (!hlist_empty(head))
+			return rcu_dereference_bh(hlist_first_rcu(head));
 	}
 	return NULL;
 }
 
-static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
+static struct hlist_node *get_next(struct net *net, struct iter_state *st,
+				   struct hlist_node *h)
 {
 	h = rcu_dereference_bh(hlist_next_rcu(h));
 	while (!h) {
+		struct nfnl_log_net *log;
+		struct hlist_head *head;
+
 		if (++st->bucket >= INSTANCE_BUCKETS)
 			return NULL;
 
-		h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+		log = nfnl_log_pernet(net);
+		head = &log->instance_table[st->bucket];
+		h = rcu_dereference_bh(hlist_first_rcu(head));
 	}
 	return h;
 }
 
-static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
+static struct hlist_node *get_idx(struct net *net, struct iter_state *st,
+				  loff_t pos)
 {
 	struct hlist_node *head;
-	head = get_first(st);
+	head = get_first(net, st);
 
 	if (head)
-		while (pos && (head = get_next(st, head)))
+		while (pos && (head = get_next(net, st, head)))
 			pos--;
 	return pos ? NULL : head;
 }
 
-static void *seq_start(struct seq_file *seq, loff_t *pos)
+static void *seq_start(struct seq_file *s, loff_t *pos)
 	__acquires(rcu_bh)
 {
 	rcu_read_lock_bh();
-	return get_idx(seq->private, *pos);
+	return get_idx(seq_file_net(s), s->private, *pos);
 }
 
 static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
 	(*pos)++;
-	return get_next(s->private, v);
+	return get_next(seq_file_net(s), s->private, v);
 }
 
 static void seq_stop(struct seq_file *s, void *v)
@@ -972,8 +1012,8 @@ static const struct seq_operations nful_seq_ops = {
 
 static int nful_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &nful_seq_ops,
-			sizeof(struct iter_state));
+	return seq_open_net(inode, file, &nful_seq_ops,
+			    sizeof(struct iter_state));
 }
 
 static const struct file_operations nful_file_ops = {
@@ -981,17 +1021,43 @@ static const struct file_operations nful_file_ops = {
 	.open	 = nful_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release_net,
 };
 
 #endif /* PROC_FS */
 
-static int __init nfnetlink_log_init(void)
+static int __net_init nfnl_log_net_init(struct net *net)
 {
-	int i, status = -ENOMEM;
+	unsigned int i;
+	struct nfnl_log_net *log = nfnl_log_pernet(net);
 
 	for (i = 0; i < INSTANCE_BUCKETS; i++)
-		INIT_HLIST_HEAD(&instance_table[i]);
+		INIT_HLIST_HEAD(&log->instance_table[i]);
+	spin_lock_init(&log->instances_lock);
+
+#ifdef CONFIG_PROC_FS
+	if (!proc_create("nfnetlink_log", 0440,
+			 net->nf.proc_netfilter, &nful_file_ops))
+		return -ENOMEM;
+#endif
+	return 0;
+}
+
+static void __net_exit nfnl_log_net_exit(struct net *net)
+{
+	remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
+}
+
+static struct pernet_operations nfnl_log_net_ops = {
+	.init	= nfnl_log_net_init,
+	.exit	= nfnl_log_net_exit,
+	.id	= &nfnl_log_net_id,
+	.size	= sizeof(struct nfnl_log_net),
+};
+
+static int __init nfnetlink_log_init(void)
+{
+	int status = -ENOMEM;
 
 	/* it's not really all that important to have a random value, so
 	 * we can do this from the init function, even if there hasn't
@@ -1001,29 +1067,25 @@ static int __init nfnetlink_log_init(void)
 	netlink_register_notifier(&nfulnl_rtnl_notifier);
 	status = nfnetlink_subsys_register(&nfulnl_subsys);
 	if (status < 0) {
-		printk(KERN_ERR "log: failed to create netlink socket\n");
+		pr_err("log: failed to create netlink socket\n");
 		goto cleanup_netlink_notifier;
 	}
 
 	status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);
 	if (status < 0) {
-		printk(KERN_ERR "log: failed to register logger\n");
+		pr_err("log: failed to register logger\n");
 		goto cleanup_subsys;
 	}
 
-#ifdef CONFIG_PROC_FS
-	if (!proc_create("nfnetlink_log", 0440,
-			 proc_net_netfilter, &nful_file_ops)) {
-		status = -ENOMEM;
+	status = register_pernet_subsys(&nfnl_log_net_ops);
+	if (status < 0) {
+		pr_err("log: failed to register pernet ops\n");
 		goto cleanup_logger;
 	}
-#endif
 	return status;
 
-#ifdef CONFIG_PROC_FS
 cleanup_logger:
 	nf_log_unregister(&nfulnl_logger);
-#endif
 cleanup_subsys:
 	nfnetlink_subsys_unregister(&nfulnl_subsys);
 cleanup_netlink_notifier:
@@ -1033,10 +1095,8 @@ cleanup_netlink_notifier:
 
 static void __exit nfnetlink_log_fini(void)
 {
+	unregister_pernet_subsys(&nfnl_log_net_ops);
 	nf_log_unregister(&nfulnl_logger);
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("nfnetlink_log", proc_net_netfilter);
-#endif
 	nfnetlink_subsys_unregister(&nfulnl_subsys);
 	netlink_unregister_notifier(&nfulnl_rtnl_notifier);
 }
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 3158d87b56a8..2e0e835baf72 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -30,6 +30,7 @@
 #include <linux/list.h>
 #include <net/sock.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/netns/generic.h>
 #include <net/netfilter/nfnetlink_queue.h>
 
 #include <linux/atomic.h>
@@ -66,25 +67,32 @@ struct nfqnl_instance {
 
 typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
 
-static DEFINE_SPINLOCK(instances_lock);
+static int nfnl_queue_net_id __read_mostly;
 
 #define INSTANCE_BUCKETS	16
-static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
+struct nfnl_queue_net {
+	spinlock_t instances_lock;
+	struct hlist_head instance_table[INSTANCE_BUCKETS];
+};
+
+static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
+{
+	return net_generic(net, nfnl_queue_net_id);
+}
 
 static inline u_int8_t instance_hashfn(u_int16_t queue_num)
 {
-	return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS;
+	return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
 }
 
 static struct nfqnl_instance *
-instance_lookup(u_int16_t queue_num)
+instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
 {
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct nfqnl_instance *inst;
 
-	head = &instance_table[instance_hashfn(queue_num)];
-	hlist_for_each_entry_rcu(inst, pos, head, hlist) {
+	head = &q->instance_table[instance_hashfn(queue_num)];
+	hlist_for_each_entry_rcu(inst, head, hlist) {
 		if (inst->queue_num == queue_num)
 			return inst;
 	}
@@ -92,14 +100,15 @@ instance_lookup(u_int16_t queue_num)
 }
 
 static struct nfqnl_instance *
-instance_create(u_int16_t queue_num, int portid)
+instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
+		int portid)
 {
 	struct nfqnl_instance *inst;
 	unsigned int h;
 	int err;
 
-	spin_lock(&instances_lock);
-	if (instance_lookup(queue_num)) {
+	spin_lock(&q->instances_lock);
+	if (instance_lookup(q, queue_num)) {
 		err = -EEXIST;
 		goto out_unlock;
 	}
@@ -113,7 +122,7 @@ instance_create(u_int16_t queue_num, int portid)
 	inst->queue_num = queue_num;
 	inst->peer_portid = portid;
 	inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
-	inst->copy_range = 0xfffff;
+	inst->copy_range = 0xffff;
 	inst->copy_mode = NFQNL_COPY_NONE;
 	spin_lock_init(&inst->lock);
 	INIT_LIST_HEAD(&inst->queue_list);
@@ -124,16 +133,16 @@ instance_create(u_int16_t queue_num, int portid)
 	}
 
 	h = instance_hashfn(queue_num);
-	hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
+	hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]);
 
-	spin_unlock(&instances_lock);
+	spin_unlock(&q->instances_lock);
 
 	return inst;
 
 out_free:
 	kfree(inst);
 out_unlock:
-	spin_unlock(&instances_lock);
+	spin_unlock(&q->instances_lock);
 	return ERR_PTR(err);
 }
 
@@ -159,11 +168,11 @@ __instance_destroy(struct nfqnl_instance *inst)
 }
 
 static void
-instance_destroy(struct nfqnl_instance *inst)
+instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
 {
-	spin_lock(&instances_lock);
+	spin_lock(&q->instances_lock);
 	__instance_destroy(inst);
-	spin_unlock(&instances_lock);
+	spin_unlock(&q->instances_lock);
 }
 
 static inline void
@@ -218,14 +227,71 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 	spin_unlock_bh(&queue->lock);
 }
 
+static void
+nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
+{
+	int i, j = 0;
+	int plen = 0; /* length of skb->head fragment */
+	struct page *page;
+	unsigned int offset;
+
+	/* dont bother with small payloads */
+	if (len <= skb_tailroom(to)) {
+		skb_copy_bits(from, 0, skb_put(to, len), len);
+		return;
+	}
+
+	if (hlen) {
+		skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
+		len -= hlen;
+	} else {
+		plen = min_t(int, skb_headlen(from), len);
+		if (plen) {
+			page = virt_to_head_page(from->head);
+			offset = from->data - (unsigned char *)page_address(page);
+			__skb_fill_page_desc(to, 0, page, offset, plen);
+			get_page(page);
+			j = 1;
+			len -= plen;
+		}
+	}
+
+	to->truesize += len + plen;
+	to->len += len + plen;
+	to->data_len += len + plen;
+
+	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
+		if (!len)
+			break;
+		skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
+		skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
+		len -= skb_shinfo(to)->frags[j].size;
+		skb_frag_ref(to, j);
+		j++;
+	}
+	skb_shinfo(to)->nr_frags = j;
+}
+
+static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet)
+{
+	__u32 flags = 0;
+
+	if (packet->ip_summed == CHECKSUM_PARTIAL)
+		flags = NFQA_SKB_CSUMNOTREADY;
+	if (skb_is_gso(packet))
+		flags |= NFQA_SKB_GSO;
+
+	return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0;
+}
+
 static struct sk_buff *
 nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			   struct nf_queue_entry *entry,
 			   __be32 **packet_id_ptr)
 {
-	sk_buff_data_t old_tail;
 	size_t size;
 	size_t data_len = 0, cap_len = 0;
+	int hlen = 0;
 	struct sk_buff *skb;
 	struct nlattr *nla;
 	struct nfqnl_msg_packet_hdr *pmsg;
@@ -237,7 +303,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	struct nf_conn *ct = NULL;
 	enum ip_conntrack_info uninitialized_var(ctinfo);
 
-	size =    NLMSG_SPACE(sizeof(struct nfgenmsg))
+	size =    nlmsg_total_size(sizeof(struct nfgenmsg))
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
@@ -247,8 +313,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 #endif
 		+ nla_total_size(sizeof(u_int32_t))	/* mark */
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
-		+ nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)
-		+ nla_total_size(sizeof(u_int32_t)));	/* cap_len */
+		+ nla_total_size(sizeof(u_int32_t))	/* skbinfo */
+		+ nla_total_size(sizeof(u_int32_t));	/* cap_len */
+
+	if (entskb->tstamp.tv64)
+		size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
 
 	outdev = entry->outdev;
 
@@ -258,7 +327,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		break;
 
 	case NFQNL_COPY_PACKET:
-		if (entskb->ip_summed == CHECKSUM_PARTIAL &&
+		if (!(queue->flags & NFQA_CFG_F_GSO) &&
+		    entskb->ip_summed == CHECKSUM_PARTIAL &&
 		    skb_checksum_help(entskb))
 			return NULL;
 
@@ -266,7 +336,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		if (data_len == 0 || data_len > entskb->len)
 			data_len = entskb->len;
 
-		size += nla_total_size(data_len);
+
+		if (!entskb->head_frag ||
+		    skb_headlen(entskb) < L1_CACHE_BYTES ||
+		    skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS)
+			hlen = skb_headlen(entskb);
+
+		if (skb_has_frag_list(entskb))
+			hlen = entskb->len;
+		hlen = min_t(int, data_len, hlen);
+		size += sizeof(struct nlattr) + hlen;
 		cap_len = entskb->len;
 		break;
 	}
@@ -274,11 +353,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	if (queue->flags & NFQA_CFG_F_CONNTRACK)
 		ct = nfqnl_ct_get(entskb, &size, &ctinfo);
 
-	skb = alloc_skb(size, GFP_ATOMIC);
+	skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid,
+				  GFP_ATOMIC);
 	if (!skb)
 		return NULL;
 
-	old_tail = skb->tail;
 	nlh = nlmsg_put(skb, 0, 0,
 			NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
 			sizeof(struct nfgenmsg), 0);
@@ -383,31 +462,29 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			goto nla_put_failure;
 	}
 
+	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
+		goto nla_put_failure;
+
+	if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
+		goto nla_put_failure;
+
+	if (nfqnl_put_packet_info(skb, entskb))
+		goto nla_put_failure;
+
 	if (data_len) {
 		struct nlattr *nla;
-		int sz = nla_attr_size(data_len);
 
-		if (skb_tailroom(skb) < nla_total_size(data_len)) {
-			printk(KERN_WARNING "nf_queue: no tailroom!\n");
-			kfree_skb(skb);
-			return NULL;
-		}
+		if (skb_tailroom(skb) < sizeof(*nla) + hlen)
+			goto nla_put_failure;
 
-		nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len));
+		nla = (struct nlattr *)skb_put(skb, sizeof(*nla));
 		nla->nla_type = NFQA_PAYLOAD;
-		nla->nla_len = sz;
+		nla->nla_len = nla_attr_size(data_len);
 
-		if (skb_copy_bits(entskb, 0, nla_data(nla), data_len))
-			BUG();
+		nfqnl_zcopy(skb, entskb, data_len, hlen);
 	}
 
-	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
-		goto nla_put_failure;
-
-	if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
-		goto nla_put_failure;
-
-	nlh->nlmsg_len = skb->tail - old_tail;
+	nlh->nlmsg_len = skb->len;
 	return skb;
 
 nla_put_failure:
@@ -417,26 +494,14 @@ nla_put_failure:
 }
 
 static int
-nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
+			struct nf_queue_entry *entry)
 {
 	struct sk_buff *nskb;
-	struct nfqnl_instance *queue;
 	int err = -ENOBUFS;
 	__be32 *packet_id_ptr;
 	int failopen = 0;
 
-	/* rcu_read_lock()ed by nf_hook_slow() */
-	queue = instance_lookup(queuenum);
-	if (!queue) {
-		err = -ESRCH;
-		goto err_out;
-	}
-
-	if (queue->copy_mode == NFQNL_COPY_NONE) {
-		err = -EINVAL;
-		goto err_out;
-	}
-
 	nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
 	if (nskb == NULL) {
 		err = -ENOMEM;
@@ -463,7 +528,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	*packet_id_ptr = htonl(entry->id);
 
 	/* nfnetlink_unicast will either free the nskb or add it to a socket */
-	err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT);
+	err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT);
 	if (err < 0) {
 		queue->queue_user_dropped++;
 		goto err_out_unlock;
@@ -484,6 +549,141 @@ err_out:
 	return err;
 }
 
+static struct nf_queue_entry *
+nf_queue_entry_dup(struct nf_queue_entry *e)
+{
+	struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
+	if (entry) {
+		if (nf_queue_entry_get_refs(entry))
+			return entry;
+		kfree(entry);
+	}
+	return NULL;
+}
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+/* When called from bridge netfilter, skb->data must point to MAC header
+ * before calling skb_gso_segment(). Else, original MAC header is lost
+ * and segmented skbs will be sent to wrong destination.
+ */
+static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
+{
+	if (skb->nf_bridge)
+		__skb_push(skb, skb->network_header - skb->mac_header);
+}
+
+static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
+{
+	if (skb->nf_bridge)
+		__skb_pull(skb, skb->network_header - skb->mac_header);
+}
+#else
+#define nf_bridge_adjust_skb_data(s) do {} while (0)
+#define nf_bridge_adjust_segmented_data(s) do {} while (0)
+#endif
+
+static void free_entry(struct nf_queue_entry *entry)
+{
+	nf_queue_entry_release_refs(entry);
+	kfree(entry);
+}
+
+static int
+__nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
+			   struct sk_buff *skb, struct nf_queue_entry *entry)
+{
+	int ret = -ENOMEM;
+	struct nf_queue_entry *entry_seg;
+
+	nf_bridge_adjust_segmented_data(skb);
+
+	if (skb->next == NULL) { /* last packet, no need to copy entry */
+		struct sk_buff *gso_skb = entry->skb;
+		entry->skb = skb;
+		ret = __nfqnl_enqueue_packet(net, queue, entry);
+		if (ret)
+			entry->skb = gso_skb;
+		return ret;
+	}
+
+	skb->next = NULL;
+
+	entry_seg = nf_queue_entry_dup(entry);
+	if (entry_seg) {
+		entry_seg->skb = skb;
+		ret = __nfqnl_enqueue_packet(net, queue, entry_seg);
+		if (ret)
+			free_entry(entry_seg);
+	}
+	return ret;
+}
+
+static int
+nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+{
+	unsigned int queued;
+	struct nfqnl_instance *queue;
+	struct sk_buff *skb, *segs;
+	int err = -ENOBUFS;
+	struct net *net = dev_net(entry->indev ?
+				  entry->indev : entry->outdev);
+	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+
+	/* rcu_read_lock()ed by nf_hook_slow() */
+	queue = instance_lookup(q, queuenum);
+	if (!queue)
+		return -ESRCH;
+
+	if (queue->copy_mode == NFQNL_COPY_NONE)
+		return -EINVAL;
+
+	if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(entry->skb))
+		return __nfqnl_enqueue_packet(net, queue, entry);
+
+	skb = entry->skb;
+
+	switch (entry->pf) {
+	case NFPROTO_IPV4:
+		skb->protocol = htons(ETH_P_IP);
+		break;
+	case NFPROTO_IPV6:
+		skb->protocol = htons(ETH_P_IPV6);
+		break;
+	}
+
+	nf_bridge_adjust_skb_data(skb);
+	segs = skb_gso_segment(skb, 0);
+	/* Does not use PTR_ERR to limit the number of error codes that can be
+	 * returned by nf_queue.  For instance, callers rely on -ECANCELED to
+	 * mean 'ignore this hook'.
+	 */
+	if (IS_ERR(segs))
+		goto out_err;
+	queued = 0;
+	err = 0;
+	do {
+		struct sk_buff *nskb = segs->next;
+		if (err == 0)
+			err = __nfqnl_enqueue_packet_gso(net, queue,
+							segs, entry);
+		if (err == 0)
+			queued++;
+		else
+			kfree_skb(segs);
+		segs = nskb;
+	} while (segs);
+
+	if (queued) {
+		if (err) /* some segments are already queued */
+			free_entry(entry);
+		kfree_skb(skb);
+		return 0;
+	}
+ out_err:
+	nf_bridge_adjust_segmented_data(skb);
+	return err;
+}
+
 static int
 nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
 {
@@ -576,18 +776,18 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
 /* drop all packets with either indev or outdev == ifindex from all queue
  * instances */
 static void
-nfqnl_dev_drop(int ifindex)
+nfqnl_dev_drop(struct net *net, int ifindex)
 {
 	int i;
+	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 
 	rcu_read_lock();
 
 	for (i = 0; i < INSTANCE_BUCKETS; i++) {
-		struct hlist_node *tmp;
 		struct nfqnl_instance *inst;
-		struct hlist_head *head = &instance_table[i];
+		struct hlist_head *head = &q->instance_table[i];
 
-		hlist_for_each_entry_rcu(inst, tmp, head, hlist)
+		hlist_for_each_entry_rcu(inst, head, hlist)
 			nfqnl_flush(inst, dev_cmp, ifindex);
 	}
 
@@ -602,12 +802,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this,
 {
 	struct net_device *dev = ptr;
 
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
-
 	/* Drop any packets associated with the downed device */
 	if (event == NETDEV_DOWN)
-		nfqnl_dev_drop(dev->ifindex);
+		nfqnl_dev_drop(dev_net(dev), dev->ifindex);
 	return NOTIFY_DONE;
 }
 
@@ -620,24 +817,24 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
 		   unsigned long event, void *ptr)
 {
 	struct netlink_notify *n = ptr;
+	struct nfnl_queue_net *q = nfnl_queue_pernet(n->net);
 
 	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
 		int i;
 
 		/* destroy all instances for this portid */
-		spin_lock(&instances_lock);
+		spin_lock(&q->instances_lock);
 		for (i = 0; i < INSTANCE_BUCKETS; i++) {
-			struct hlist_node *tmp, *t2;
+			struct hlist_node *t2;
 			struct nfqnl_instance *inst;
-			struct hlist_head *head = &instance_table[i];
+			struct hlist_head *head = &q->instance_table[i];
 
-			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
-				if ((n->net == &init_net) &&
-				    (n->portid == inst->peer_portid))
+			hlist_for_each_entry_safe(inst, t2, head, hlist) {
+				if (n->portid == inst->peer_portid)
 					__instance_destroy(inst);
 			}
 		}
-		spin_unlock(&instances_lock);
+		spin_unlock(&q->instances_lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -658,11 +855,12 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
 	[NFQA_MARK]		= { .type = NLA_U32 },
 };
 
-static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid)
+static struct nfqnl_instance *
+verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid)
 {
 	struct nfqnl_instance *queue;
 
-	queue = instance_lookup(queue_num);
+	queue = instance_lookup(q, queue_num);
 	if (!queue)
 		return ERR_PTR(-ENODEV);
 
@@ -706,7 +904,11 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
 	LIST_HEAD(batch_list);
 	u16 queue_num = ntohs(nfmsg->res_id);
 
-	queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid);
+	struct net *net = sock_net(ctnl);
+	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+
+	queue = verdict_instance_lookup(q, queue_num,
+					NETLINK_CB(skb).portid);
 	if (IS_ERR(queue))
 		return PTR_ERR(queue);
 
@@ -754,10 +956,13 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 	enum ip_conntrack_info uninitialized_var(ctinfo);
 	struct nf_conn *ct = NULL;
 
-	queue = instance_lookup(queue_num);
-	if (!queue)
+	struct net *net = sock_net(ctnl);
+	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 
-	queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid);
+	queue = instance_lookup(q, queue_num);
+	if (!queue)
+		queue = verdict_instance_lookup(q, queue_num,
+						NETLINK_CB(skb).portid);
 	if (IS_ERR(queue))
 		return PTR_ERR(queue);
 
@@ -821,6 +1026,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
 	struct nfqnl_instance *queue;
 	struct nfqnl_msg_config_cmd *cmd = NULL;
+	struct net *net = sock_net(ctnl);
+	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 	int ret = 0;
 
 	if (nfqa[NFQA_CFG_CMD]) {
@@ -834,7 +1041,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	}
 
 	rcu_read_lock();
-	queue = instance_lookup(queue_num);
+	queue = instance_lookup(q, queue_num);
 	if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
 		ret = -EPERM;
 		goto err_out_unlock;
@@ -847,7 +1054,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 				ret = -EBUSY;
 				goto err_out_unlock;
 			}
-			queue = instance_create(queue_num, NETLINK_CB(skb).portid);
+			queue = instance_create(q, queue_num,
+						NETLINK_CB(skb).portid);
 			if (IS_ERR(queue)) {
 				ret = PTR_ERR(queue);
 				goto err_out_unlock;
@@ -858,7 +1066,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 				ret = -ENODEV;
 				goto err_out_unlock;
 			}
-			instance_destroy(queue);
+			instance_destroy(q, queue);
 			break;
 		case NFQNL_CFG_CMD_PF_BIND:
 		case NFQNL_CFG_CMD_PF_UNBIND:
@@ -952,19 +1160,24 @@ static const struct nfnetlink_subsystem nfqnl_subsys = {
 
 #ifdef CONFIG_PROC_FS
 struct iter_state {
+	struct seq_net_private p;
 	unsigned int bucket;
 };
 
 static struct hlist_node *get_first(struct seq_file *seq)
 {
 	struct iter_state *st = seq->private;
+	struct net *net;
+	struct nfnl_queue_net *q;
 
 	if (!st)
 		return NULL;
 
+	net = seq_file_net(seq);
+	q = nfnl_queue_pernet(net);
 	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
-		if (!hlist_empty(&instance_table[st->bucket]))
-			return instance_table[st->bucket].first;
+		if (!hlist_empty(&q->instance_table[st->bucket]))
+			return q->instance_table[st->bucket].first;
 	}
 	return NULL;
 }
@@ -972,13 +1185,17 @@ static struct hlist_node *get_first(struct seq_file *seq)
 static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
 {
 	struct iter_state *st = seq->private;
+	struct net *net = seq_file_net(seq);
 
 	h = h->next;
 	while (!h) {
+		struct nfnl_queue_net *q;
+
 		if (++st->bucket >= INSTANCE_BUCKETS)
 			return NULL;
 
-		h = instance_table[st->bucket].first;
+		q = nfnl_queue_pernet(net);
+		h = q->instance_table[st->bucket].first;
 	}
 	return h;
 }
@@ -994,11 +1211,11 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
 	return pos ? NULL : head;
 }
 
-static void *seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(instances_lock)
+static void *seq_start(struct seq_file *s, loff_t *pos)
+	__acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
 {
-	spin_lock(&instances_lock);
-	return get_idx(seq, *pos);
+	spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
+	return get_idx(s, *pos);
 }
 
 static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
@@ -1008,9 +1225,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 }
 
 static void seq_stop(struct seq_file *s, void *v)
-	__releases(instances_lock)
+	__releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
 {
-	spin_unlock(&instances_lock);
+	spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
 }
 
 static int seq_show(struct seq_file *s, void *v)
@@ -1034,7 +1251,7 @@ static const struct seq_operations nfqnl_seq_ops = {
 
 static int nfqnl_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &nfqnl_seq_ops,
+	return seq_open_net(inode, file, &nfqnl_seq_ops,
 			sizeof(struct iter_state));
 }
 
@@ -1043,39 +1260,63 @@ static const struct file_operations nfqnl_file_ops = {
 	.open	 = nfqnl_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release_net,
 };
 
 #endif /* PROC_FS */
 
-static int __init nfnetlink_queue_init(void)
+static int __net_init nfnl_queue_net_init(struct net *net)
 {
-	int i, status = -ENOMEM;
+	unsigned int i;
+	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 
 	for (i = 0; i < INSTANCE_BUCKETS; i++)
-		INIT_HLIST_HEAD(&instance_table[i]);
+		INIT_HLIST_HEAD(&q->instance_table[i]);
+
+	spin_lock_init(&q->instances_lock);
+
+#ifdef CONFIG_PROC_FS
+	if (!proc_create("nfnetlink_queue", 0440,
+			 net->nf.proc_netfilter, &nfqnl_file_ops))
+		return -ENOMEM;
+#endif
+	return 0;
+}
+
+static void __net_exit nfnl_queue_net_exit(struct net *net)
+{
+	remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
+}
+
+static struct pernet_operations nfnl_queue_net_ops = {
+	.init	= nfnl_queue_net_init,
+	.exit	= nfnl_queue_net_exit,
+	.id	= &nfnl_queue_net_id,
+	.size	= sizeof(struct nfnl_queue_net),
+};
+
+static int __init nfnetlink_queue_init(void)
+{
+	int status = -ENOMEM;
 
 	netlink_register_notifier(&nfqnl_rtnl_notifier);
 	status = nfnetlink_subsys_register(&nfqnl_subsys);
 	if (status < 0) {
-		printk(KERN_ERR "nf_queue: failed to create netlink socket\n");
+		pr_err("nf_queue: failed to create netlink socket\n");
 		goto cleanup_netlink_notifier;
 	}
 
-#ifdef CONFIG_PROC_FS
-	if (!proc_create("nfnetlink_queue", 0440,
-			 proc_net_netfilter, &nfqnl_file_ops))
+	status = register_pernet_subsys(&nfnl_queue_net_ops);
+	if (status < 0) {
+		pr_err("nf_queue: failed to register pernet ops\n");
 		goto cleanup_subsys;
-#endif
-
+	}
 	register_netdevice_notifier(&nfqnl_dev_notifier);
 	nf_register_queue_handler(&nfqh);
 	return status;
 
-#ifdef CONFIG_PROC_FS
 cleanup_subsys:
 	nfnetlink_subsys_unregister(&nfqnl_subsys);
-#endif
 cleanup_netlink_notifier:
 	netlink_unregister_notifier(&nfqnl_rtnl_notifier);
 	return status;
@@ -1085,9 +1326,7 @@ static void __exit nfnetlink_queue_fini(void)
 {
 	nf_unregister_queue_handler();
 	unregister_netdevice_notifier(&nfqnl_dev_notifier);
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
-#endif
+	unregister_pernet_subsys(&nfnl_queue_net_ops);
 	nfnetlink_subsys_unregister(&nfqnl_subsys);
 	netlink_unregister_notifier(&nfqnl_rtnl_notifier);
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 7b3a9e5999c0..8b03028cca69 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -2,6 +2,7 @@
  * x_tables core - Backend for {ip,ip6,arp}_tables
  *
  * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org>
+ * Copyright (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * Based on existing ip_tables code which is
  *   Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
@@ -999,7 +1000,7 @@ static int xt_table_open(struct inode *inode, struct file *file)
 			   sizeof(struct xt_names_priv));
 	if (!ret) {
 		priv = ((struct seq_file *)file->private_data)->private;
-		priv->af = (unsigned long)PDE(inode)->data;
+		priv->af = (unsigned long)PDE_DATA(inode);
 	}
 	return ret;
 }
@@ -1147,7 +1148,7 @@ static int xt_match_open(struct inode *inode, struct file *file)
 
 	seq = file->private_data;
 	seq->private = trav;
-	trav->nfproto = (unsigned long)PDE(inode)->data;
+	trav->nfproto = (unsigned long)PDE_DATA(inode);
 	return 0;
 }
 
@@ -1211,7 +1212,7 @@ static int xt_target_open(struct inode *inode, struct file *file)
 
 	seq = file->private_data;
 	seq->private = trav;
-	trav->nfproto = (unsigned long)PDE(inode)->data;
+	trav->nfproto = (unsigned long)PDE_DATA(inode);
 	return 0;
 }
 
@@ -1323,12 +1324,12 @@ int xt_proto_init(struct net *net, u_int8_t af)
 out_remove_matches:
 	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
-	proc_net_remove(net, buf);
+	remove_proc_entry(buf, net->proc_net);
 
 out_remove_tables:
 	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
-	proc_net_remove(net, buf);
+	remove_proc_entry(buf, net->proc_net);
 out:
 	return -1;
 #endif
@@ -1342,15 +1343,15 @@ void xt_proto_fini(struct net *net, u_int8_t af)
 
 	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
-	proc_net_remove(net, buf);
+	remove_proc_entry(buf, net->proc_net);
 
 	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
-	proc_net_remove(net, buf);
+	remove_proc_entry(buf, net->proc_net);
 
 	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
-	proc_net_remove(net, buf);
+	remove_proc_entry(buf, net->proc_net);
 #endif /*CONFIG_PROC_FS*/
 }
 EXPORT_SYMBOL_GPL(xt_proto_fini);
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index ba92824086f3..3228d7f24eb4 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -124,6 +124,9 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	const struct xt_audit_info *info = par->targinfo;
 	struct audit_buffer *ab;
 
+	if (audit_enabled == 0)
+		goto errout;
+
 	ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
 	if (ab == NULL)
 		goto errout;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index bde009ed8d3b..a60261cb0e80 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -20,12 +20,8 @@
 #include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 
-static unsigned int xt_ct_target_v0(struct sk_buff *skb,
-				    const struct xt_action_param *par)
+static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
 {
-	const struct xt_ct_target_info *info = par->targinfo;
-	struct nf_conn *ct = info->ct;
-
 	/* Previously seen (loopback)? Ignore. */
 	if (skb->nfct != NULL)
 		return XT_CONTINUE;
@@ -37,21 +33,22 @@ static unsigned int xt_ct_target_v0(struct sk_buff *skb,
 	return XT_CONTINUE;
 }
 
-static unsigned int xt_ct_target_v1(struct sk_buff *skb,
+static unsigned int xt_ct_target_v0(struct sk_buff *skb,
 				    const struct xt_action_param *par)
 {
-	const struct xt_ct_target_info_v1 *info = par->targinfo;
+	const struct xt_ct_target_info *info = par->targinfo;
 	struct nf_conn *ct = info->ct;
 
-	/* Previously seen (loopback)? Ignore. */
-	if (skb->nfct != NULL)
-		return XT_CONTINUE;
+	return xt_ct_target(skb, ct);
+}
 
-	atomic_inc(&ct->ct_general.use);
-	skb->nfct = &ct->ct_general;
-	skb->nfctinfo = IP_CT_NEW;
+static unsigned int xt_ct_target_v1(struct sk_buff *skb,
+				    const struct xt_action_param *par)
+{
+	const struct xt_ct_target_info_v1 *info = par->targinfo;
+	struct nf_conn *ct = info->ct;
 
-	return XT_CONTINUE;
+	return xt_ct_target(skb, ct);
 }
 
 static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
@@ -104,67 +101,6 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
 	return 0;
 }
 
-static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par)
-{
-	struct xt_ct_target_info *info = par->targinfo;
-	struct nf_conntrack_tuple t;
-	struct nf_conn *ct;
-	int ret = -EOPNOTSUPP;
-
-	if (info->flags & ~XT_CT_NOTRACK)
-		return -EINVAL;
-
-	if (info->flags & XT_CT_NOTRACK) {
-		ct = nf_ct_untracked_get();
-		atomic_inc(&ct->ct_general.use);
-		goto out;
-	}
-
-#ifndef CONFIG_NF_CONNTRACK_ZONES
-	if (info->zone)
-		goto err1;
-#endif
-
-	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0)
-		goto err1;
-
-	memset(&t, 0, sizeof(t));
-	ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
-	ret = PTR_ERR(ct);
-	if (IS_ERR(ct))
-		goto err2;
-
-	ret = 0;
-	if ((info->ct_events || info->exp_events) &&
-	    !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
-				  GFP_KERNEL))
-		goto err3;
-
-	if (info->helper[0]) {
-		ret = xt_ct_set_helper(ct, info->helper, par);
-		if (ret < 0)
-			goto err3;
-	}
-
-	__set_bit(IPS_TEMPLATE_BIT, &ct->status);
-	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
-
-	/* Overload tuple linked list to put us in template list. */
-	hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
-				 &par->net->ct.tmpl);
-out:
-	info->ct = ct;
-	return 0;
-
-err3:
-	nf_conntrack_free(ct);
-err2:
-	nf_ct_l3proto_module_put(par->family);
-err1:
-	return ret;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 static void __xt_ct_tg_timeout_put(struct ctnl_timeout *timeout)
 {
@@ -242,16 +178,13 @@ out:
 #endif
 }
 
-static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
+static int xt_ct_tg_check(const struct xt_tgchk_param *par,
+			  struct xt_ct_target_info_v1 *info)
 {
-	struct xt_ct_target_info_v1 *info = par->targinfo;
 	struct nf_conntrack_tuple t;
 	struct nf_conn *ct;
 	int ret = -EOPNOTSUPP;
 
-	if (info->flags & ~XT_CT_NOTRACK)
-		return -EINVAL;
-
 	if (info->flags & XT_CT_NOTRACK) {
 		ct = nf_ct_untracked_get();
 		atomic_inc(&ct->ct_general.use);
@@ -309,20 +242,49 @@ err1:
 	return ret;
 }
 
-static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par)
+static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par)
 {
 	struct xt_ct_target_info *info = par->targinfo;
-	struct nf_conn *ct = info->ct;
-	struct nf_conn_help *help;
+	struct xt_ct_target_info_v1 info_v1 = {
+		.flags 		= info->flags,
+		.zone		= info->zone,
+		.ct_events	= info->ct_events,
+		.exp_events	= info->exp_events,
+	};
+	int ret;
 
-	if (!nf_ct_is_untracked(ct)) {
-		help = nfct_help(ct);
-		if (help)
-			module_put(help->helper->me);
+	if (info->flags & ~XT_CT_NOTRACK)
+		return -EINVAL;
 
-		nf_ct_l3proto_module_put(par->family);
-	}
-	nf_ct_put(info->ct);
+	memcpy(info_v1.helper, info->helper, sizeof(info->helper));
+
+	ret = xt_ct_tg_check(par, &info_v1);
+	if (ret < 0)
+		return ret;
+
+	info->ct = info_v1.ct;
+
+	return ret;
+}
+
+static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
+{
+	struct xt_ct_target_info_v1 *info = par->targinfo;
+
+	if (info->flags & ~XT_CT_NOTRACK)
+		return -EINVAL;
+
+	return xt_ct_tg_check(par, par->targinfo);
+}
+
+static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par)
+{
+	struct xt_ct_target_info_v1 *info = par->targinfo;
+
+	if (info->flags & ~XT_CT_MASK)
+		return -EINVAL;
+
+	return xt_ct_tg_check(par, par->targinfo);
 }
 
 static void xt_ct_destroy_timeout(struct nf_conn *ct)
@@ -343,9 +305,9 @@ static void xt_ct_destroy_timeout(struct nf_conn *ct)
 #endif
 }
 
-static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par)
+static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
+			     struct xt_ct_target_info_v1 *info)
 {
-	struct xt_ct_target_info_v1 *info = par->targinfo;
 	struct nf_conn *ct = info->ct;
 	struct nf_conn_help *help;
 
@@ -361,6 +323,26 @@ static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par)
 	nf_ct_put(info->ct);
 }
 
+static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par)
+{
+	struct xt_ct_target_info *info = par->targinfo;
+	struct xt_ct_target_info_v1 info_v1 = {
+		.flags 		= info->flags,
+		.zone		= info->zone,
+		.ct_events	= info->ct_events,
+		.exp_events	= info->exp_events,
+		.ct		= info->ct,
+	};
+	memcpy(info_v1.helper, info->helper, sizeof(info->helper));
+
+	xt_ct_tg_destroy(par, &info_v1);
+}
+
+static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par)
+{
+	xt_ct_tg_destroy(par, par->targinfo);
+}
+
 static struct xt_target xt_ct_tg_reg[] __read_mostly = {
 	{
 		.name		= "CT",
@@ -383,6 +365,17 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
 		.table		= "raw",
 		.me		= THIS_MODULE,
 	},
+	{
+		.name		= "CT",
+		.family		= NFPROTO_UNSPEC,
+		.revision	= 2,
+		.targetsize	= sizeof(struct xt_ct_target_info_v1),
+		.checkentry	= xt_ct_tg_check_v2,
+		.destroy	= xt_ct_tg_destroy_v1,
+		.target		= xt_ct_target_v1,
+		.table		= "raw",
+		.me		= THIS_MODULE,
+	},
 };
 
 static unsigned int
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index fa40096940a1..fe573f6c9e91 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -474,7 +474,14 @@ ipt_log_packet(u_int8_t pf,
 	       const struct nf_loginfo *loginfo,
 	       const char *prefix)
 {
-	struct sbuff *m = sb_open();
+	struct sbuff *m;
+	struct net *net = dev_net(in ? in : out);
+
+	/* FIXME: Disabled from containers until syslog ns is supported */
+	if (!net_eq(net, &init_net))
+		return;
+
+	m = sb_open();
 
 	if (!loginfo)
 		loginfo = &default_loginfo;
@@ -798,7 +805,14 @@ ip6t_log_packet(u_int8_t pf,
 		const struct nf_loginfo *loginfo,
 		const char *prefix)
 {
-	struct sbuff *m = sb_open();
+	struct sbuff *m;
+	struct net *net = dev_net(in ? in : out);
+
+	/* FIXME: Disabled from containers until syslog ns is supported */
+	if (!net_eq(net, &init_net))
+		return;
+
+	m = sb_open();
 
 	if (!loginfo)
 		loginfo = &default_loginfo;
@@ -893,23 +907,55 @@ static struct nf_logger ip6t_log_logger __read_mostly = {
 };
 #endif
 
+static int __net_init log_net_init(struct net *net)
+{
+	nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger);
+#endif
+	return 0;
+}
+
+static void __net_exit log_net_exit(struct net *net)
+{
+	nf_log_unset(net, &ipt_log_logger);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	nf_log_unset(net, &ip6t_log_logger);
+#endif
+}
+
+static struct pernet_operations log_net_ops = {
+	.init = log_net_init,
+	.exit = log_net_exit,
+};
+
 static int __init log_tg_init(void)
 {
 	int ret;
 
+	ret = register_pernet_subsys(&log_net_ops);
+	if (ret < 0)
+		goto err_pernet;
+
 	ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
 	if (ret < 0)
-		return ret;
+		goto err_target;
 
 	nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 	nf_log_register(NFPROTO_IPV6, &ip6t_log_logger);
 #endif
 	return 0;
+
+err_target:
+	unregister_pernet_subsys(&log_net_ops);
+err_pernet:
+	return ret;
 }
 
 static void __exit log_tg_exit(void)
 {
+	unregister_pernet_subsys(&log_net_ops);
 	nf_log_unregister(&ipt_log_logger);
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 	nf_log_unregister(&ip6t_log_logger);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 817f9e9f2b16..1e2fae32f81b 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -76,22 +76,31 @@ static u32 hash_v6(const struct sk_buff *skb)
 }
 #endif
 
-static unsigned int
-nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+static u32
+nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_NFQ_info_v1 *info = par->targinfo;
 	u32 queue = info->queuenum;
 
-	if (info->queues_total > 1) {
-		if (par->family == NFPROTO_IPV4)
-			queue = (((u64) hash_v4(skb) * info->queues_total) >>
-				 32) + queue;
+	if (par->family == NFPROTO_IPV4)
+		queue += ((u64) hash_v4(skb) * info->queues_total) >> 32;
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-		else if (par->family == NFPROTO_IPV6)
-			queue = (((u64) hash_v6(skb) * info->queues_total) >>
-				 32) + queue;
+	else if (par->family == NFPROTO_IPV6)
+		queue += ((u64) hash_v6(skb) * info->queues_total) >> 32;
 #endif
-	}
+
+	return queue;
+}
+
+static unsigned int
+nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_NFQ_info_v1 *info = par->targinfo;
+	u32 queue = info->queuenum;
+
+	if (info->queues_total > 1)
+		queue = nfqueue_hash(skb, par);
+
 	return NF_QUEUE_NR(queue);
 }
 
@@ -108,7 +117,7 @@ nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
 
 static int nfqueue_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_NFQ_info_v2 *info = par->targinfo;
+	const struct xt_NFQ_info_v3 *info = par->targinfo;
 	u32 maxid;
 
 	if (unlikely(!rnd_inited)) {
@@ -125,11 +134,32 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)
 		       info->queues_total, maxid);
 		return -ERANGE;
 	}
-	if (par->target->revision == 2 && info->bypass > 1)
+	if (par->target->revision == 2 && info->flags > 1)
 		return -EINVAL;
+	if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK)
+		return -EINVAL;
+
 	return 0;
 }
 
+static unsigned int
+nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_NFQ_info_v3 *info = par->targinfo;
+	u32 queue = info->queuenum;
+
+	if (info->queues_total > 1) {
+		if (info->flags & NFQ_FLAG_CPU_FANOUT) {
+			int cpu = smp_processor_id();
+
+			queue = info->queuenum + cpu % info->queues_total;
+		} else
+			queue = nfqueue_hash(skb, par);
+	}
+
+	return NF_QUEUE_NR(queue);
+}
+
 static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 	{
 		.name		= "NFQUEUE",
@@ -156,6 +186,15 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 		.targetsize	= sizeof(struct xt_NFQ_info_v2),
 		.me		= THIS_MODULE,
 	},
+	{
+		.name		= "NFQUEUE",
+		.revision	= 3,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= nfqueue_tg_check,
+		.target		= nfqueue_tg_v3,
+		.targetsize	= sizeof(struct xt_NFQ_info_v3),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index f264032b8c56..370adf622cef 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -43,12 +43,11 @@ static void xt_rateest_hash_insert(struct xt_rateest *est)
 struct xt_rateest *xt_rateest_lookup(const char *name)
 {
 	struct xt_rateest *est;
-	struct hlist_node *n;
 	unsigned int h;
 
 	h = xt_rateest_hash(name);
 	mutex_lock(&xt_rateest_mutex);
-	hlist_for_each_entry(est, n, &rateest_hash[h], list) {
+	hlist_for_each_entry(est, &rateest_hash[h], list) {
 		if (strcmp(est->name, name) == 0) {
 			est->refcnt++;
 			mutex_unlock(&xt_rateest_mutex);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 71a266de5fb4..a75240f0d42b 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -2,6 +2,7 @@
  * This is a module which is used for setting the MSS option in TCP packets.
  *
  * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
+ * Copyright (C) 2007 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
new file mode 100644
index 000000000000..12d4da8e6c77
--- /dev/null
+++ b/net/netfilter/xt_bpf.c
@@ -0,0 +1,73 @@
+/* Xtables module to match packets using a BPF filter.
+ * Copyright 2013 Google Inc.
+ * Written by Willem de Bruijn <willemb@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/filter.h>
+
+#include <linux/netfilter/xt_bpf.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>");
+MODULE_DESCRIPTION("Xtables: BPF filter match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_bpf");
+MODULE_ALIAS("ip6t_bpf");
+
+static int bpf_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_bpf_info *info = par->matchinfo;
+	struct sock_fprog program;
+
+	program.len = info->bpf_program_num_elem;
+	program.filter = (struct sock_filter __user *) info->bpf_program;
+	if (sk_unattached_filter_create(&info->filter, &program)) {
+		pr_info("bpf: check failed: parse error\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_bpf_info *info = par->matchinfo;
+
+	return SK_RUN_FILTER(info->filter, skb);
+}
+
+static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_bpf_info *info = par->matchinfo;
+	sk_unattached_filter_destroy(info->filter);
+}
+
+static struct xt_match bpf_mt_reg __read_mostly = {
+	.name		= "bpf",
+	.revision	= 0,
+	.family		= NFPROTO_UNSPEC,
+	.checkentry	= bpf_mt_check,
+	.match		= bpf_mt,
+	.destroy	= bpf_mt_destroy,
+	.matchsize	= sizeof(struct xt_bpf_info),
+	.me		= THIS_MODULE,
+};
+
+static int __init bpf_mt_init(void)
+{
+	return xt_register_match(&bpf_mt_reg);
+}
+
+static void __exit bpf_mt_exit(void)
+{
+	xt_unregister_match(&bpf_mt_reg);
+}
+
+module_init(bpf_mt_init);
+module_exit(bpf_mt_exit);
diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c
new file mode 100644
index 000000000000..9f8719df2001
--- /dev/null
+++ b/net/netfilter/xt_connlabel.c
@@ -0,0 +1,99 @@
+/*
+ * (C) 2013 Astaro GmbH & Co KG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("Xtables: add/match connection trackling labels");
+MODULE_ALIAS("ipt_connlabel");
+MODULE_ALIAS("ip6t_connlabel");
+
+static bool
+connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_connlabel_mtinfo *info = par->matchinfo;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	bool invert = info->options & XT_CONNLABEL_OP_INVERT;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL || nf_ct_is_untracked(ct))
+		return invert;
+
+	if (info->options & XT_CONNLABEL_OP_SET)
+		return (nf_connlabel_set(ct, info->bit) == 0) ^ invert;
+
+	return nf_connlabel_match(ct, info->bit) ^ invert;
+}
+
+static int connlabel_mt_check(const struct xt_mtchk_param *par)
+{
+	const int options = XT_CONNLABEL_OP_INVERT |
+			    XT_CONNLABEL_OP_SET;
+	struct xt_connlabel_mtinfo *info = par->matchinfo;
+	int ret;
+	size_t words;
+
+	if (info->bit > XT_CONNLABEL_MAXBIT)
+		return -ERANGE;
+
+	if (info->options & ~options) {
+		pr_err("Unknown options in mask %x\n", info->options);
+		return -EINVAL;
+	}
+
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
+		pr_info("cannot load conntrack support for proto=%u\n",
+							par->family);
+		return ret;
+	}
+
+	par->net->ct.labels_used++;
+	words = BITS_TO_LONGS(info->bit+1);
+	if (words > par->net->ct.label_words)
+		par->net->ct.label_words = words;
+
+	return ret;
+}
+
+static void connlabel_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	par->net->ct.labels_used--;
+	if (par->net->ct.labels_used == 0)
+		par->net->ct.label_words = 0;
+	nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_match connlabels_mt_reg __read_mostly = {
+	.name           = "connlabel",
+	.family         = NFPROTO_UNSPEC,
+	.checkentry     = connlabel_mt_check,
+	.match          = connlabel_mt,
+	.matchsize      = sizeof(struct xt_connlabel_mtinfo),
+	.destroy        = connlabel_mt_destroy,
+	.me             = THIS_MODULE,
+};
+
+static int __init connlabel_mt_init(void)
+{
+	return xt_register_match(&connlabels_mt_reg);
+}
+
+static void __exit connlabel_mt_exit(void)
+{
+	xt_unregister_match(&connlabels_mt_reg);
+}
+
+module_init(connlabel_mt_init);
+module_exit(connlabel_mt_exit);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 70b5591a2586..c40b2695633b 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -101,7 +101,7 @@ static int count_them(struct net *net,
 {
 	const struct nf_conntrack_tuple_hash *found;
 	struct xt_connlimit_conn *conn;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 	struct nf_conn *found_ct;
 	struct hlist_head *hash;
 	bool addit = true;
@@ -115,7 +115,7 @@ static int count_them(struct net *net,
 	rcu_read_lock();
 
 	/* check the saved connections */
-	hlist_for_each_entry_safe(conn, pos, n, hash, node) {
+	hlist_for_each_entry_safe(conn, n, hash, node) {
 		found    = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
 						 &conn->tuple);
 		found_ct = NULL;
@@ -258,14 +258,14 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
 	const struct xt_connlimit_info *info = par->matchinfo;
 	struct xt_connlimit_conn *conn;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 	struct hlist_head *hash = info->data->iphash;
 	unsigned int i;
 
 	nf_ct_l3proto_module_put(par->family);
 
 	for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
-		hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) {
+		hlist_for_each_entry_safe(conn, n, &hash[i], node) {
 			hlist_del(&conn->node);
 			kfree(conn);
 		}
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 61805d7b38aa..188404b9b002 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -3,6 +3,7 @@
  *	information. (Superset of Rusty's minimalistic state match.)
  *
  *	(C) 2001  Marc Boucher (marc@mbsi.ca).
+ *	(C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
  *
  *	This program is free software; you can redistribute it and/or modify
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index a9d7af953ceb..9ff035c71403 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -3,6 +3,7 @@
  *	separately for each hashbucket (sourceip/sourceport/dstip/dstport)
  *
  *	(C) 2003-2004 by Harald Welte <laforge@netfilter.org>
+ *	(C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
  *
  * Development of this code was funded by Astaro AG, http://www.astaro.com/
@@ -107,6 +108,7 @@ struct xt_hashlimit_htable {
 
 	/* seq_file stuff */
 	struct proc_dir_entry *pde;
+	const char *name;
 	struct net *net;
 
 	struct hlist_head hash[0];	/* hashtable itself */
@@ -141,11 +143,10 @@ dsthash_find(const struct xt_hashlimit_htable *ht,
 	     const struct dsthash_dst *dst)
 {
 	struct dsthash_ent *ent;
-	struct hlist_node *pos;
 	u_int32_t hash = hash_dst(ht, dst);
 
 	if (!hlist_empty(&ht->hash[hash])) {
-		hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node)
+		hlist_for_each_entry_rcu(ent, &ht->hash[hash], node)
 			if (dst_cmp(ent, dst)) {
 				spin_lock(&ent->lock);
 				return ent;
@@ -254,6 +255,11 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	hinfo->count = 0;
 	hinfo->family = family;
 	hinfo->rnd_initialized = false;
+	hinfo->name = kstrdup(minfo->name, GFP_KERNEL);
+	if (!hinfo->name) {
+		vfree(hinfo);
+		return -ENOMEM;
+	}
 	spin_lock_init(&hinfo->lock);
 
 	hinfo->pde = proc_create_data(minfo->name, 0,
@@ -261,6 +267,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
 		&dl_file_ops, hinfo);
 	if (hinfo->pde == NULL) {
+		kfree(hinfo->name);
 		vfree(hinfo);
 		return -ENOMEM;
 	}
@@ -297,8 +304,8 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
 	spin_lock_bh(&ht->lock);
 	for (i = 0; i < ht->cfg.size; i++) {
 		struct dsthash_ent *dh;
-		struct hlist_node *pos, *n;
-		hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) {
+		struct hlist_node *n;
+		hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) {
 			if ((*select)(ht, dh))
 				dsthash_free(ht, dh);
 		}
@@ -331,9 +338,10 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo)
 		parent = hashlimit_net->ip6t_hashlimit;
 
 	if(parent != NULL)
-		remove_proc_entry(hinfo->pde->name, parent);
+		remove_proc_entry(hinfo->name, parent);
 
 	htable_selective_cleanup(hinfo, select_all);
+	kfree(hinfo->name);
 	vfree(hinfo);
 }
 
@@ -343,10 +351,9 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
 {
 	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
-	struct hlist_node *pos;
 
-	hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) {
-		if (!strcmp(name, hinfo->pde->name) &&
+	hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) {
+		if (!strcmp(name, hinfo->name) &&
 		    hinfo->family == family) {
 			hinfo->use++;
 			return hinfo;
@@ -821,10 +828,9 @@ static int dl_seq_show(struct seq_file *s, void *v)
 	struct xt_hashlimit_htable *htable = s->private;
 	unsigned int *bucket = (unsigned int *)v;
 	struct dsthash_ent *ent;
-	struct hlist_node *pos;
 
 	if (!hlist_empty(&htable->hash[*bucket])) {
-		hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node)
+		hlist_for_each_entry(ent, &htable->hash[*bucket], node)
 			if (dl_seq_real_show(ent, htable->family, s))
 				return -1;
 	}
@@ -844,7 +850,7 @@ static int dl_proc_open(struct inode *inode, struct file *file)
 
 	if (!ret) {
 		struct seq_file *sf = file->private_data;
-		sf->private = PDE(inode)->data;
+		sf->private = PDE_DATA(inode);
 	}
 	return ret;
 }
@@ -867,7 +873,7 @@ static int __net_init hashlimit_proc_net_init(struct net *net)
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 	hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net);
 	if (!hashlimit_net->ip6t_hashlimit) {
-		proc_net_remove(net, "ipt_hashlimit");
+		remove_proc_entry("ipt_hashlimit", net->proc_net);
 		return -ENOMEM;
 	}
 #endif
@@ -877,7 +883,6 @@ static int __net_init hashlimit_proc_net_init(struct net *net)
 static void __net_exit hashlimit_proc_net_exit(struct net *net)
 {
 	struct xt_hashlimit_htable *hinfo;
-	struct hlist_node *pos;
 	struct proc_dir_entry *pde;
 	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 
@@ -890,16 +895,16 @@ static void __net_exit hashlimit_proc_net_exit(struct net *net)
 	if (pde == NULL)
 		pde = hashlimit_net->ip6t_hashlimit;
 
-	hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node)
-		remove_proc_entry(hinfo->pde->name, pde);
+	hlist_for_each_entry(hinfo, &hashlimit_net->htables, node)
+		remove_proc_entry(hinfo->name, pde);
 
 	hashlimit_net->ipt_hashlimit = NULL;
 	hashlimit_net->ip6t_hashlimit = NULL;
 	mutex_unlock(&hashlimit_mutex);
 
-	proc_net_remove(net, "ipt_hashlimit");
+	remove_proc_entry("ipt_hashlimit", net->proc_net);
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-	proc_net_remove(net, "ip6t_hashlimit");
+	remove_proc_entry("ip6t_hashlimit", net->proc_net);
 #endif
 }
 
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index a4c1e4528cac..bef850596558 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -1,5 +1,6 @@
 /* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
  * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index a5e673d32bda..647d989a01e6 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -201,6 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 	unsigned char opts[MAX_IPOPTLEN];
 	const struct xt_osf_finger *kf;
 	const struct xt_osf_user_finger *f;
+	struct net *net = dev_net(p->in ? p->in : p->out);
 
 	if (!info)
 		return false;
@@ -325,7 +326,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 			fcount++;
 
 			if (info->flags & XT_OSF_LOG)
-				nf_log_packet(p->family, p->hooknum, skb,
+				nf_log_packet(net, p->family, p->hooknum, skb,
 					p->in, p->out, NULL,
 					"%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
 					f->genre, f->version, f->subtype,
@@ -341,7 +342,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 	rcu_read_unlock();
 
 	if (!fcount && (info->flags & XT_OSF_LOG))
-		nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL,
+		nf_log_packet(net, p->family, p->hooknum, skb, p->in,
+			      p->out, NULL,
 			"Remote OS is not known: %pI4:%u -> %pI4:%u\n",
 				&ip->saddr, ntohs(tcp->source),
 				&ip->daddr, ntohs(tcp->dest));
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 978efc9b555a..1e657cf715c4 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -401,8 +401,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
 		ret = -ENOMEM;
 		goto out;
 	}
-	pde->uid = uid;
-	pde->gid = gid;
+	proc_set_user(pde, uid, gid);
 #endif
 	spin_lock_bh(&recent_lock);
 	list_add_tail(&t->list, &recent_net->tables);
@@ -525,14 +524,13 @@ static const struct seq_operations recent_seq_ops = {
 
 static int recent_seq_open(struct inode *inode, struct file *file)
 {
-	struct proc_dir_entry *pde = PDE(inode);
 	struct recent_iter_state *st;
 
 	st = __seq_open_private(file, &recent_seq_ops, sizeof(*st));
 	if (st == NULL)
 		return -ENOMEM;
 
-	st->table    = pde->data;
+	st->table    = PDE_DATA(inode);
 	return 0;
 }
 
@@ -540,8 +538,7 @@ static ssize_t
 recent_mt_proc_write(struct file *file, const char __user *input,
 		     size_t size, loff_t *loff)
 {
-	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
-	struct recent_table *t = pde->data;
+	struct recent_table *t = PDE_DATA(file_inode(file));
 	struct recent_entry *e;
 	char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
 	const char *c = buf;
@@ -643,7 +640,7 @@ static void __net_exit recent_proc_net_exit(struct net *net)
 	recent_net->xt_recent = NULL;
 	spin_unlock_bh(&recent_lock);
 
-	proc_net_remove(net, "xt_recent");
+	remove_proc_entry("xt_recent", net->proc_net);
 }
 #else
 static inline int recent_proc_net_init(struct net *net)
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 865a9e54f3ad..31790e789e22 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -1,7 +1,7 @@
 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
  *                         Patrick Schaaf <bof@bof.de>
  *                         Martin Josefsson <gandalf@wlug.westbo.se>
- * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_SET");
 static inline int
 match_set(ip_set_id_t index, const struct sk_buff *skb,
 	  const struct xt_action_param *par,
-	  const struct ip_set_adt_opt *opt, int inv)
+	  struct ip_set_adt_opt *opt, int inv)
 {
 	if (ip_set_test(index, skb, par, opt))
 		inv = !inv;
@@ -38,20 +38,12 @@ match_set(ip_set_id_t index, const struct sk_buff *skb,
 }
 
 #define ADT_OPT(n, f, d, fs, cfs, t)	\
-const struct ip_set_adt_opt n = {	\
-	.family	= f,			\
-	.dim = d,			\
-	.flags = fs,			\
-	.cmdflags = cfs,		\
-	.timeout = t,			\
-}
-#define ADT_MOPT(n, f, d, fs, cfs, t)	\
 struct ip_set_adt_opt n = {		\
 	.family	= f,			\
 	.dim = d,			\
 	.flags = fs,			\
 	.cmdflags = cfs,		\
-	.timeout = t,			\
+	.ext.timeout = t,		\
 }
 
 /* Revision 0 interface: backward compatible with netfilter/iptables */
@@ -197,6 +189,9 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
 	ADT_OPT(opt, par->family, info->match_set.dim,
 		info->match_set.flags, 0, UINT_MAX);
 
+	if (opt.flags & IPSET_RETURN_NOMATCH)
+		opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH;
+
 	return match_set(info->match_set.index, skb, par, &opt,
 			 info->match_set.flags & IPSET_INV_MATCH);
 }
@@ -305,15 +300,15 @@ static unsigned int
 set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v2 *info = par->targinfo;
-	ADT_MOPT(add_opt, par->family, info->add_set.dim,
-		 info->add_set.flags, info->flags, info->timeout);
+	ADT_OPT(add_opt, par->family, info->add_set.dim,
+		info->add_set.flags, info->flags, info->timeout);
 	ADT_OPT(del_opt, par->family, info->del_set.dim,
 		info->del_set.flags, 0, UINT_MAX);
 
 	/* Normalize to fit into jiffies */
-	if (add_opt.timeout != IPSET_NO_TIMEOUT &&
-	    add_opt.timeout > UINT_MAX/MSEC_PER_SEC)
-		add_opt.timeout = UINT_MAX/MSEC_PER_SEC;
+	if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
+	    add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
+		add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
 	if (info->add_set.index != IPSET_INVALID_ID)
 		ip_set_add(info->add_set.index, skb, par, &add_opt);
 	if (info->del_set.index != IPSET_INVALID_ID)
@@ -325,6 +320,52 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 #define set_target_v2_checkentry	set_target_v1_checkentry
 #define set_target_v2_destroy		set_target_v1_destroy
 
+/* Revision 3 match */
+
+static bool
+match_counter(u64 counter, const struct ip_set_counter_match *info)
+{
+	switch (info->op) {
+	case IPSET_COUNTER_NONE:
+		return true;
+	case IPSET_COUNTER_EQ:
+		return counter == info->value;
+	case IPSET_COUNTER_NE:
+		return counter != info->value;
+	case IPSET_COUNTER_LT:
+		return counter < info->value;
+	case IPSET_COUNTER_GT:
+		return counter > info->value;
+	}
+	return false;
+}
+
+static bool
+set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_set_info_match_v3 *info = par->matchinfo;
+	ADT_OPT(opt, par->family, info->match_set.dim,
+		info->match_set.flags, info->flags, UINT_MAX);
+	int ret;
+
+	if (info->packets.op != IPSET_COUNTER_NONE ||
+	    info->bytes.op != IPSET_COUNTER_NONE)
+		opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
+
+	ret = match_set(info->match_set.index, skb, par, &opt,
+			info->match_set.flags & IPSET_INV_MATCH);
+
+	if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
+		return ret;
+
+	if (!match_counter(opt.ext.packets, &info->packets))
+		return 0;
+	return match_counter(opt.ext.bytes, &info->bytes);
+}
+
+#define set_match_v3_checkentry	set_match_v1_checkentry
+#define set_match_v3_destroy	set_match_v1_destroy
+
 static struct xt_match set_matches[] __read_mostly = {
 	{
 		.name		= "set",
@@ -377,6 +418,27 @@ static struct xt_match set_matches[] __read_mostly = {
 		.destroy	= set_match_v1_destroy,
 		.me		= THIS_MODULE
 	},
+	/* counters support: update, match */
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV4,
+		.revision	= 3,
+		.match		= set_match_v3,
+		.matchsize	= sizeof(struct xt_set_info_match_v3),
+		.checkentry	= set_match_v3_checkentry,
+		.destroy	= set_match_v3_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV6,
+		.revision	= 3,
+		.match		= set_match_v3,
+		.matchsize	= sizeof(struct xt_set_info_match_v3),
+		.checkentry	= set_match_v3_checkentry,
+		.destroy	= set_match_v3_destroy,
+		.me		= THIS_MODULE
+	},
 };
 
 static struct xt_target set_targets[] __read_mostly = {
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 847d495cd4de..8a6c6ea466d8 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1189,8 +1189,6 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 	struct netlbl_unlhsh_walk_arg cb_arg;
 	u32 skip_bkt = cb->args[0];
 	u32 skip_chain = cb->args[1];
-	u32 skip_addr4 = cb->args[2];
-	u32 skip_addr6 = cb->args[3];
 	u32 iter_bkt;
 	u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
 	struct netlbl_unlhsh_iface *iface;
@@ -1215,7 +1213,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 				continue;
 			netlbl_af4list_foreach_rcu(addr4,
 						   &iface->addr4_list) {
-				if (iter_addr4++ < skip_addr4)
+				if (iter_addr4++ < cb->args[2])
 					continue;
 				if (netlbl_unlabel_staticlist_gen(
 					      NLBL_UNLABEL_C_STATICLIST,
@@ -1231,7 +1229,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 #if IS_ENABLED(CONFIG_IPV6)
 			netlbl_af6list_foreach_rcu(addr6,
 						   &iface->addr6_list) {
-				if (iter_addr6++ < skip_addr6)
+				if (iter_addr6++ < cb->args[3])
 					continue;
 				if (netlbl_unlabel_staticlist_gen(
 					      NLBL_UNLABEL_C_STATICLIST,
@@ -1250,10 +1248,10 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 
 unlabel_staticlist_return:
 	rcu_read_unlock();
-	cb->args[0] = skip_bkt;
-	cb->args[1] = skip_chain;
-	cb->args[2] = skip_addr4;
-	cb->args[3] = skip_addr6;
+	cb->args[0] = iter_bkt;
+	cb->args[1] = iter_chain;
+	cb->args[2] = iter_addr4;
+	cb->args[3] = iter_addr6;
 	return skb->len;
 }
 
@@ -1273,12 +1271,9 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 {
 	struct netlbl_unlhsh_walk_arg cb_arg;
 	struct netlbl_unlhsh_iface *iface;
-	u32 skip_addr4 = cb->args[0];
-	u32 skip_addr6 = cb->args[1];
-	u32 iter_addr4 = 0;
+	u32 iter_addr4 = 0, iter_addr6 = 0;
 	struct netlbl_af4list *addr4;
 #if IS_ENABLED(CONFIG_IPV6)
-	u32 iter_addr6 = 0;
 	struct netlbl_af6list *addr6;
 #endif
 
@@ -1292,7 +1287,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 		goto unlabel_staticlistdef_return;
 
 	netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) {
-		if (iter_addr4++ < skip_addr4)
+		if (iter_addr4++ < cb->args[0])
 			continue;
 		if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
 					      iface,
@@ -1305,7 +1300,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 	}
 #if IS_ENABLED(CONFIG_IPV6)
 	netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) {
-		if (iter_addr6++ < skip_addr6)
+		if (iter_addr6++ < cb->args[1])
 			continue;
 		if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
 					      iface,
@@ -1320,8 +1315,8 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 
 unlabel_staticlistdef_return:
 	rcu_read_unlock();
-	cb->args[0] = skip_addr4;
-	cb->args[1] = skip_addr6;
+	cb->args[0] = iter_addr4;
+	cb->args[1] = iter_addr6;
 	return skb->len;
 }
 
diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
new file mode 100644
index 000000000000..2c5e95e9bfbd
--- /dev/null
+++ b/net/netlink/Kconfig
@@ -0,0 +1,19 @@
+#
+# Netlink Sockets
+#
+
+config NETLINK_MMAP
+	bool "NETLINK: mmaped IO"
+	---help---
+	  This option enables support for memory mapped netlink IO. This
+	  reduces overhead by avoiding copying data between kernel- and
+	  userspace.
+
+	  If unsure, say N.
+
+config NETLINK_DIAG
+	tristate "NETLINK: socket monitoring interface"
+	default n
+	---help---
+	  Support for NETLINK socket monitoring interface used by the ss tool.
+	  If unsure, say Y.
diff --git a/net/netlink/Makefile b/net/netlink/Makefile
index bdd6ddf4e95b..e837917f6c03 100644
--- a/net/netlink/Makefile
+++ b/net/netlink/Makefile
@@ -3,3 +3,6 @@
 #
 
 obj-y  				:= af_netlink.o genetlink.o
+
+obj-$(CONFIG_NETLINK_DIAG)	+= netlink_diag.o
+netlink_diag-y			:= diag.o
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c0353d55d56f..12ac6b47a35c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -3,6 +3,7 @@
  *
  * 		Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>
  * 				Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ * 				Patrick McHardy <kaber@trash.net>
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -55,87 +56,45 @@
 #include <linux/types.h>
 #include <linux/audit.h>
 #include <linux/mutex.h>
+#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/scm.h>
 #include <net/netlink.h>
 
-#define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
-#define NLGRPLONGS(x)	(NLGRPSZ(x)/sizeof(unsigned long))
-
-struct netlink_sock {
-	/* struct sock has to be the first member of netlink_sock */
-	struct sock		sk;
-	u32			portid;
-	u32			dst_portid;
-	u32			dst_group;
-	u32			flags;
-	u32			subscriptions;
-	u32			ngroups;
-	unsigned long		*groups;
-	unsigned long		state;
-	wait_queue_head_t	wait;
-	struct netlink_callback	*cb;
-	struct mutex		*cb_mutex;
-	struct mutex		cb_def_mutex;
-	void			(*netlink_rcv)(struct sk_buff *skb);
-	void			(*netlink_bind)(int group);
-	struct module		*module;
-};
+#include "af_netlink.h"
 
 struct listeners {
 	struct rcu_head		rcu;
 	unsigned long		masks[0];
 };
 
+/* state bits */
+#define NETLINK_CONGESTED	0x0
+
+/* flags */
 #define NETLINK_KERNEL_SOCKET	0x1
 #define NETLINK_RECV_PKTINFO	0x2
 #define NETLINK_BROADCAST_SEND_ERROR	0x4
 #define NETLINK_RECV_NO_ENOBUFS	0x8
 
-static inline struct netlink_sock *nlk_sk(struct sock *sk)
-{
-	return container_of(sk, struct netlink_sock, sk);
-}
-
 static inline int netlink_is_kernel(struct sock *sk)
 {
 	return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
 }
 
-struct nl_portid_hash {
-	struct hlist_head	*table;
-	unsigned long		rehash_time;
-
-	unsigned int		mask;
-	unsigned int		shift;
-
-	unsigned int		entries;
-	unsigned int		max_shift;
-
-	u32			rnd;
-};
-
-struct netlink_table {
-	struct nl_portid_hash	hash;
-	struct hlist_head	mc_list;
-	struct listeners __rcu	*listeners;
-	unsigned int		flags;
-	unsigned int		groups;
-	struct mutex		*cb_mutex;
-	struct module		*module;
-	void			(*bind)(int group);
-	int			registered;
-};
-
-static struct netlink_table *nl_table;
+struct netlink_table *nl_table;
+EXPORT_SYMBOL_GPL(nl_table);
 
 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 
 static int netlink_dump(struct sock *sk);
+static void netlink_skb_destructor(struct sk_buff *skb);
 
-static DEFINE_RWLOCK(nl_table_lock);
+DEFINE_RWLOCK(nl_table_lock);
+EXPORT_SYMBOL_GPL(nl_table_lock);
 static atomic_t nl_table_users = ATOMIC_INIT(0);
 
 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
@@ -152,6 +111,599 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u
 	return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
 }
 
+static void netlink_overrun(struct sock *sk)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
+		if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
+			sk->sk_err = ENOBUFS;
+			sk->sk_error_report(sk);
+		}
+	}
+	atomic_inc(&sk->sk_drops);
+}
+
+static void netlink_rcv_wake(struct sock *sk)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	if (skb_queue_empty(&sk->sk_receive_queue))
+		clear_bit(NETLINK_CONGESTED, &nlk->state);
+	if (!test_bit(NETLINK_CONGESTED, &nlk->state))
+		wake_up_interruptible(&nlk->wait);
+}
+
+#ifdef CONFIG_NETLINK_MMAP
+static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
+{
+	return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
+}
+
+static bool netlink_rx_is_mmaped(struct sock *sk)
+{
+	return nlk_sk(sk)->rx_ring.pg_vec != NULL;
+}
+
+static bool netlink_tx_is_mmaped(struct sock *sk)
+{
+	return nlk_sk(sk)->tx_ring.pg_vec != NULL;
+}
+
+static __pure struct page *pgvec_to_page(const void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		return vmalloc_to_page(addr);
+	else
+		return virt_to_page(addr);
+}
+
+static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
+{
+	unsigned int i;
+
+	for (i = 0; i < len; i++) {
+		if (pg_vec[i] != NULL) {
+			if (is_vmalloc_addr(pg_vec[i]))
+				vfree(pg_vec[i]);
+			else
+				free_pages((unsigned long)pg_vec[i], order);
+		}
+	}
+	kfree(pg_vec);
+}
+
+static void *alloc_one_pg_vec_page(unsigned long order)
+{
+	void *buffer;
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
+			  __GFP_NOWARN | __GFP_NORETRY;
+
+	buffer = (void *)__get_free_pages(gfp_flags, order);
+	if (buffer != NULL)
+		return buffer;
+
+	buffer = vzalloc((1 << order) * PAGE_SIZE);
+	if (buffer != NULL)
+		return buffer;
+
+	gfp_flags &= ~__GFP_NORETRY;
+	return (void *)__get_free_pages(gfp_flags, order);
+}
+
+static void **alloc_pg_vec(struct netlink_sock *nlk,
+			   struct nl_mmap_req *req, unsigned int order)
+{
+	unsigned int block_nr = req->nm_block_nr;
+	unsigned int i;
+	void **pg_vec, *ptr;
+
+	pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
+	if (pg_vec == NULL)
+		return NULL;
+
+	for (i = 0; i < block_nr; i++) {
+		pg_vec[i] = ptr = alloc_one_pg_vec_page(order);
+		if (pg_vec[i] == NULL)
+			goto err1;
+	}
+
+	return pg_vec;
+err1:
+	free_pg_vec(pg_vec, order, block_nr);
+	return NULL;
+}
+
+static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
+			    bool closing, bool tx_ring)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct netlink_ring *ring;
+	struct sk_buff_head *queue;
+	void **pg_vec = NULL;
+	unsigned int order = 0;
+	int err;
+
+	ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+	queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+
+	if (!closing) {
+		if (atomic_read(&nlk->mapped))
+			return -EBUSY;
+		if (atomic_read(&ring->pending))
+			return -EBUSY;
+	}
+
+	if (req->nm_block_nr) {
+		if (ring->pg_vec != NULL)
+			return -EBUSY;
+
+		if ((int)req->nm_block_size <= 0)
+			return -EINVAL;
+		if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE))
+			return -EINVAL;
+		if (req->nm_frame_size < NL_MMAP_HDRLEN)
+			return -EINVAL;
+		if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
+			return -EINVAL;
+
+		ring->frames_per_block = req->nm_block_size /
+					 req->nm_frame_size;
+		if (ring->frames_per_block == 0)
+			return -EINVAL;
+		if (ring->frames_per_block * req->nm_block_nr !=
+		    req->nm_frame_nr)
+			return -EINVAL;
+
+		order = get_order(req->nm_block_size);
+		pg_vec = alloc_pg_vec(nlk, req, order);
+		if (pg_vec == NULL)
+			return -ENOMEM;
+	} else {
+		if (req->nm_frame_nr)
+			return -EINVAL;
+	}
+
+	err = -EBUSY;
+	mutex_lock(&nlk->pg_vec_lock);
+	if (closing || atomic_read(&nlk->mapped) == 0) {
+		err = 0;
+		spin_lock_bh(&queue->lock);
+
+		ring->frame_max		= req->nm_frame_nr - 1;
+		ring->head		= 0;
+		ring->frame_size	= req->nm_frame_size;
+		ring->pg_vec_pages	= req->nm_block_size / PAGE_SIZE;
+
+		swap(ring->pg_vec_len, req->nm_block_nr);
+		swap(ring->pg_vec_order, order);
+		swap(ring->pg_vec, pg_vec);
+
+		__skb_queue_purge(queue);
+		spin_unlock_bh(&queue->lock);
+
+		WARN_ON(atomic_read(&nlk->mapped));
+	}
+	mutex_unlock(&nlk->pg_vec_lock);
+
+	if (pg_vec)
+		free_pg_vec(pg_vec, order, req->nm_block_nr);
+	return err;
+}
+
+static void netlink_mm_open(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct socket *sock = file->private_data;
+	struct sock *sk = sock->sk;
+
+	if (sk)
+		atomic_inc(&nlk_sk(sk)->mapped);
+}
+
+static void netlink_mm_close(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct socket *sock = file->private_data;
+	struct sock *sk = sock->sk;
+
+	if (sk)
+		atomic_dec(&nlk_sk(sk)->mapped);
+}
+
+static const struct vm_operations_struct netlink_mmap_ops = {
+	.open	= netlink_mm_open,
+	.close	= netlink_mm_close,
+};
+
+static int netlink_mmap(struct file *file, struct socket *sock,
+			struct vm_area_struct *vma)
+{
+	struct sock *sk = sock->sk;
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct netlink_ring *ring;
+	unsigned long start, size, expected;
+	unsigned int i;
+	int err = -EINVAL;
+
+	if (vma->vm_pgoff)
+		return -EINVAL;
+
+	mutex_lock(&nlk->pg_vec_lock);
+
+	expected = 0;
+	for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
+		if (ring->pg_vec == NULL)
+			continue;
+		expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
+	}
+
+	if (expected == 0)
+		goto out;
+
+	size = vma->vm_end - vma->vm_start;
+	if (size != expected)
+		goto out;
+
+	start = vma->vm_start;
+	for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
+		if (ring->pg_vec == NULL)
+			continue;
+
+		for (i = 0; i < ring->pg_vec_len; i++) {
+			struct page *page;
+			void *kaddr = ring->pg_vec[i];
+			unsigned int pg_num;
+
+			for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
+				page = pgvec_to_page(kaddr);
+				err = vm_insert_page(vma, start, page);
+				if (err < 0)
+					goto out;
+				start += PAGE_SIZE;
+				kaddr += PAGE_SIZE;
+			}
+		}
+	}
+
+	atomic_inc(&nlk->mapped);
+	vma->vm_ops = &netlink_mmap_ops;
+	err = 0;
+out:
+	mutex_unlock(&nlk->pg_vec_lock);
+	return 0;
+}
+
+static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
+{
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
+	struct page *p_start, *p_end;
+
+	/* First page is flushed through netlink_{get,set}_status */
+	p_start = pgvec_to_page(hdr + PAGE_SIZE);
+	p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
+	while (p_start <= p_end) {
+		flush_dcache_page(p_start);
+		p_start++;
+	}
+#endif
+}
+
+static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
+{
+	smp_rmb();
+	flush_dcache_page(pgvec_to_page(hdr));
+	return hdr->nm_status;
+}
+
+static void netlink_set_status(struct nl_mmap_hdr *hdr,
+			       enum nl_mmap_status status)
+{
+	hdr->nm_status = status;
+	flush_dcache_page(pgvec_to_page(hdr));
+	smp_wmb();
+}
+
+static struct nl_mmap_hdr *
+__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
+{
+	unsigned int pg_vec_pos, frame_off;
+
+	pg_vec_pos = pos / ring->frames_per_block;
+	frame_off  = pos % ring->frames_per_block;
+
+	return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
+}
+
+static struct nl_mmap_hdr *
+netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
+		     enum nl_mmap_status status)
+{
+	struct nl_mmap_hdr *hdr;
+
+	hdr = __netlink_lookup_frame(ring, pos);
+	if (netlink_get_status(hdr) != status)
+		return NULL;
+
+	return hdr;
+}
+
+static struct nl_mmap_hdr *
+netlink_current_frame(const struct netlink_ring *ring,
+		      enum nl_mmap_status status)
+{
+	return netlink_lookup_frame(ring, ring->head, status);
+}
+
+static struct nl_mmap_hdr *
+netlink_previous_frame(const struct netlink_ring *ring,
+		       enum nl_mmap_status status)
+{
+	unsigned int prev;
+
+	prev = ring->head ? ring->head - 1 : ring->frame_max;
+	return netlink_lookup_frame(ring, prev, status);
+}
+
+static void netlink_increment_head(struct netlink_ring *ring)
+{
+	ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
+}
+
+static void netlink_forward_ring(struct netlink_ring *ring)
+{
+	unsigned int head = ring->head, pos = head;
+	const struct nl_mmap_hdr *hdr;
+
+	do {
+		hdr = __netlink_lookup_frame(ring, pos);
+		if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
+			break;
+		if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
+			break;
+		netlink_increment_head(ring);
+	} while (ring->head != head);
+}
+
+static bool netlink_dump_space(struct netlink_sock *nlk)
+{
+	struct netlink_ring *ring = &nlk->rx_ring;
+	struct nl_mmap_hdr *hdr;
+	unsigned int n;
+
+	hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+	if (hdr == NULL)
+		return false;
+
+	n = ring->head + ring->frame_max / 2;
+	if (n > ring->frame_max)
+		n -= ring->frame_max;
+
+	hdr = __netlink_lookup_frame(ring, n);
+
+	return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
+}
+
+static unsigned int netlink_poll(struct file *file, struct socket *sock,
+				 poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct netlink_sock *nlk = nlk_sk(sk);
+	unsigned int mask;
+	int err;
+
+	if (nlk->rx_ring.pg_vec != NULL) {
+		/* Memory mapped sockets don't call recvmsg(), so flow control
+		 * for dumps is performed here. A dump is allowed to continue
+		 * if at least half the ring is unused.
+		 */
+		while (nlk->cb != NULL && netlink_dump_space(nlk)) {
+			err = netlink_dump(sk);
+			if (err < 0) {
+				sk->sk_err = err;
+				sk->sk_error_report(sk);
+				break;
+			}
+		}
+		netlink_rcv_wake(sk);
+	}
+
+	mask = datagram_poll(file, sock, wait);
+
+	spin_lock_bh(&sk->sk_receive_queue.lock);
+	if (nlk->rx_ring.pg_vec) {
+		netlink_forward_ring(&nlk->rx_ring);
+		if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
+			mask |= POLLIN | POLLRDNORM;
+	}
+	spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+	spin_lock_bh(&sk->sk_write_queue.lock);
+	if (nlk->tx_ring.pg_vec) {
+		if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
+			mask |= POLLOUT | POLLWRNORM;
+	}
+	spin_unlock_bh(&sk->sk_write_queue.lock);
+
+	return mask;
+}
+
+static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
+{
+	return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
+}
+
+static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
+				   struct netlink_ring *ring,
+				   struct nl_mmap_hdr *hdr)
+{
+	unsigned int size;
+	void *data;
+
+	size = ring->frame_size - NL_MMAP_HDRLEN;
+	data = (void *)hdr + NL_MMAP_HDRLEN;
+
+	skb->head	= data;
+	skb->data	= data;
+	skb_reset_tail_pointer(skb);
+	skb->end	= skb->tail + size;
+	skb->len	= 0;
+
+	skb->destructor	= netlink_skb_destructor;
+	NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
+	NETLINK_CB(skb).sk = sk;
+}
+
+static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
+				u32 dst_portid, u32 dst_group,
+				struct sock_iocb *siocb)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct netlink_ring *ring;
+	struct nl_mmap_hdr *hdr;
+	struct sk_buff *skb;
+	unsigned int maxlen;
+	bool excl = true;
+	int err = 0, len = 0;
+
+	/* Netlink messages are validated by the receiver before processing.
+	 * In order to avoid userspace changing the contents of the message
+	 * after validation, the socket and the ring may only be used by a
+	 * single process, otherwise we fall back to copying.
+	 */
+	if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 ||
+	    atomic_read(&nlk->mapped) > 1)
+		excl = false;
+
+	mutex_lock(&nlk->pg_vec_lock);
+
+	ring   = &nlk->tx_ring;
+	maxlen = ring->frame_size - NL_MMAP_HDRLEN;
+
+	do {
+		hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
+		if (hdr == NULL) {
+			if (!(msg->msg_flags & MSG_DONTWAIT) &&
+			    atomic_read(&nlk->tx_ring.pending))
+				schedule();
+			continue;
+		}
+		if (hdr->nm_len > maxlen) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		netlink_frame_flush_dcache(hdr);
+
+		if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
+			skb = alloc_skb_head(GFP_KERNEL);
+			if (skb == NULL) {
+				err = -ENOBUFS;
+				goto out;
+			}
+			sock_hold(sk);
+			netlink_ring_setup_skb(skb, sk, ring, hdr);
+			NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
+			__skb_put(skb, hdr->nm_len);
+			netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
+			atomic_inc(&ring->pending);
+		} else {
+			skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
+			if (skb == NULL) {
+				err = -ENOBUFS;
+				goto out;
+			}
+			__skb_put(skb, hdr->nm_len);
+			memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
+			netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+		}
+
+		netlink_increment_head(ring);
+
+		NETLINK_CB(skb).portid	  = nlk->portid;
+		NETLINK_CB(skb).dst_group = dst_group;
+		NETLINK_CB(skb).creds	  = siocb->scm->creds;
+
+		err = security_netlink_send(sk, skb);
+		if (err) {
+			kfree_skb(skb);
+			goto out;
+		}
+
+		if (unlikely(dst_group)) {
+			atomic_inc(&skb->users);
+			netlink_broadcast(sk, skb, dst_portid, dst_group,
+					  GFP_KERNEL);
+		}
+		err = netlink_unicast(sk, skb, dst_portid,
+				      msg->msg_flags & MSG_DONTWAIT);
+		if (err < 0)
+			goto out;
+		len += err;
+
+	} while (hdr != NULL ||
+		 (!(msg->msg_flags & MSG_DONTWAIT) &&
+		  atomic_read(&nlk->tx_ring.pending)));
+
+	if (len > 0)
+		err = len;
+out:
+	mutex_unlock(&nlk->pg_vec_lock);
+	return err;
+}
+
+static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
+{
+	struct nl_mmap_hdr *hdr;
+
+	hdr = netlink_mmap_hdr(skb);
+	hdr->nm_len	= skb->len;
+	hdr->nm_group	= NETLINK_CB(skb).dst_group;
+	hdr->nm_pid	= NETLINK_CB(skb).creds.pid;
+	hdr->nm_uid	= from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
+	hdr->nm_gid	= from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
+	netlink_frame_flush_dcache(hdr);
+	netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+
+	NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
+	kfree_skb(skb);
+}
+
+static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct netlink_ring *ring = &nlk->rx_ring;
+	struct nl_mmap_hdr *hdr;
+
+	spin_lock_bh(&sk->sk_receive_queue.lock);
+	hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+	if (hdr == NULL) {
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		kfree_skb(skb);
+		netlink_overrun(sk);
+		return;
+	}
+	netlink_increment_head(ring);
+	__skb_queue_tail(&sk->sk_receive_queue, skb);
+	spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+	hdr->nm_len	= skb->len;
+	hdr->nm_group	= NETLINK_CB(skb).dst_group;
+	hdr->nm_pid	= NETLINK_CB(skb).creds.pid;
+	hdr->nm_uid	= from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
+	hdr->nm_gid	= from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
+	netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
+}
+
+#else /* CONFIG_NETLINK_MMAP */
+#define netlink_skb_is_mmaped(skb)	false
+#define netlink_rx_is_mmaped(sk)	false
+#define netlink_tx_is_mmaped(sk)	false
+#define netlink_mmap			sock_no_mmap
+#define netlink_poll			datagram_poll
+#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb)	0
+#endif /* CONFIG_NETLINK_MMAP */
+
 static void netlink_destroy_callback(struct netlink_callback *cb)
 {
 	kfree_skb(cb->skb);
@@ -164,6 +716,53 @@ static void netlink_consume_callback(struct netlink_callback *cb)
 	kfree(cb);
 }
 
+static void netlink_skb_destructor(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETLINK_MMAP
+	struct nl_mmap_hdr *hdr;
+	struct netlink_ring *ring;
+	struct sock *sk;
+
+	/* If a packet from the kernel to userspace was freed because of an
+	 * error without being delivered to userspace, the kernel must reset
+	 * the status. In the direction userspace to kernel, the status is
+	 * always reset here after the packet was processed and freed.
+	 */
+	if (netlink_skb_is_mmaped(skb)) {
+		hdr = netlink_mmap_hdr(skb);
+		sk = NETLINK_CB(skb).sk;
+
+		if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
+			netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+			ring = &nlk_sk(sk)->tx_ring;
+		} else {
+			if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
+				hdr->nm_len = 0;
+				netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+			}
+			ring = &nlk_sk(sk)->rx_ring;
+		}
+
+		WARN_ON(atomic_read(&ring->pending) == 0);
+		atomic_dec(&ring->pending);
+		sock_put(sk);
+
+		skb->data = NULL;
+	}
+#endif
+	if (skb->sk != NULL)
+		sock_rfree(skb);
+}
+
+static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+	WARN_ON(skb->sk != NULL);
+	skb->sk = sk;
+	skb->destructor = netlink_skb_destructor;
+	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+	sk_mem_charge(sk, skb->truesize);
+}
+
 static void netlink_sock_destruct(struct sock *sk)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
@@ -177,6 +776,18 @@ static void netlink_sock_destruct(struct sock *sk)
 	}
 
 	skb_queue_purge(&sk->sk_receive_queue);
+#ifdef CONFIG_NETLINK_MMAP
+	if (1) {
+		struct nl_mmap_req req;
+
+		memset(&req, 0, sizeof(req));
+		if (nlk->rx_ring.pg_vec)
+			netlink_set_ring(sk, &req, true, false);
+		memset(&req, 0, sizeof(req));
+		if (nlk->tx_ring.pg_vec)
+			netlink_set_ring(sk, &req, true, true);
+	}
+#endif /* CONFIG_NETLINK_MMAP */
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
 		printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
@@ -248,11 +859,10 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
 	struct nl_portid_hash *hash = &nl_table[protocol].hash;
 	struct hlist_head *head;
 	struct sock *sk;
-	struct hlist_node *node;
 
 	read_lock(&nl_table_lock);
 	head = nl_portid_hashfn(hash, portid);
-	sk_for_each(sk, node, head) {
+	sk_for_each(sk, head) {
 		if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) {
 			sock_hold(sk);
 			goto found;
@@ -312,9 +922,9 @@ static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
 
 	for (i = 0; i <= omask; i++) {
 		struct sock *sk;
-		struct hlist_node *node, *tmp;
+		struct hlist_node *tmp;
 
-		sk_for_each_safe(sk, node, tmp, &otable[i])
+		sk_for_each_safe(sk, tmp, &otable[i])
 			__sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
 	}
 
@@ -344,7 +954,6 @@ static void
 netlink_update_listeners(struct sock *sk)
 {
 	struct netlink_table *tbl = &nl_table[sk->sk_protocol];
-	struct hlist_node *node;
 	unsigned long mask;
 	unsigned int i;
 	struct listeners *listeners;
@@ -355,7 +964,7 @@ netlink_update_listeners(struct sock *sk)
 
 	for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
 		mask = 0;
-		sk_for_each_bound(sk, node, &tbl->mc_list) {
+		sk_for_each_bound(sk, &tbl->mc_list) {
 			if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
 				mask |= nlk_sk(sk)->groups[i];
 		}
@@ -371,18 +980,17 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 	struct hlist_head *head;
 	int err = -EADDRINUSE;
 	struct sock *osk;
-	struct hlist_node *node;
 	int len;
 
 	netlink_table_grab();
 	head = nl_portid_hashfn(hash, portid);
 	len = 0;
-	sk_for_each(osk, node, head) {
+	sk_for_each(osk, head) {
 		if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid))
 			break;
 		len++;
 	}
-	if (node)
+	if (osk)
 		goto err;
 
 	err = -EBUSY;
@@ -443,6 +1051,9 @@ static int __netlink_create(struct net *net, struct socket *sock,
 		mutex_init(nlk->cb_mutex);
 	}
 	init_waitqueue_head(&nlk->wait);
+#ifdef CONFIG_NETLINK_MMAP
+	mutex_init(&nlk->pg_vec_lock);
+#endif
 
 	sk->sk_destruct = netlink_sock_destruct;
 	sk->sk_protocol = protocol;
@@ -575,7 +1186,6 @@ static int netlink_autobind(struct socket *sock)
 	struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
 	struct hlist_head *head;
 	struct sock *osk;
-	struct hlist_node *node;
 	s32 portid = task_tgid_vnr(current);
 	int err;
 	static s32 rover = -4097;
@@ -584,7 +1194,7 @@ retry:
 	cond_resched();
 	netlink_table_grab();
 	head = nl_portid_hashfn(hash, portid);
-	sk_for_each(osk, node, head) {
+	sk_for_each(osk, head) {
 		if (!net_eq(sock_net(osk), net))
 			continue;
 		if (nlk_sk(osk)->portid == portid) {
@@ -775,19 +1385,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 	return 0;
 }
 
-static void netlink_overrun(struct sock *sk)
-{
-	struct netlink_sock *nlk = nlk_sk(sk);
-
-	if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
-		if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
-			sk->sk_err = ENOBUFS;
-			sk->sk_error_report(sk);
-		}
-	}
-	atomic_inc(&sk->sk_drops);
-}
-
 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
 {
 	struct sock *sock;
@@ -809,7 +1406,7 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
 
 struct sock *netlink_getsockbyfilp(struct file *filp)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct sock *sock;
 
 	if (!S_ISSOCK(inode->i_mode))
@@ -840,8 +1437,9 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 
 	nlk = nlk_sk(sk);
 
-	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-	    test_bit(0, &nlk->state)) {
+	if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+	     test_bit(NETLINK_CONGESTED, &nlk->state)) &&
+	    !netlink_skb_is_mmaped(skb)) {
 		DECLARE_WAITQUEUE(wait, current);
 		if (!*timeo) {
 			if (!ssk || netlink_is_kernel(ssk))
@@ -855,7 +1453,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 		add_wait_queue(&nlk->wait, &wait);
 
 		if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-		     test_bit(0, &nlk->state)) &&
+		     test_bit(NETLINK_CONGESTED, &nlk->state)) &&
 		    !sock_flag(sk, SOCK_DEAD))
 			*timeo = schedule_timeout(*timeo);
 
@@ -869,7 +1467,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 		}
 		return 1;
 	}
-	skb_set_owner_r(skb, sk);
+	netlink_skb_set_owner_r(skb, sk);
 	return 0;
 }
 
@@ -877,7 +1475,14 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
 {
 	int len = skb->len;
 
-	skb_queue_tail(&sk->sk_receive_queue, skb);
+#ifdef CONFIG_NETLINK_MMAP
+	if (netlink_skb_is_mmaped(skb))
+		netlink_queue_mmaped_skb(sk, skb);
+	else if (netlink_rx_is_mmaped(sk))
+		netlink_ring_set_copied(sk, skb);
+	else
+#endif /* CONFIG_NETLINK_MMAP */
+		skb_queue_tail(&sk->sk_receive_queue, skb);
 	sk->sk_data_ready(sk, len);
 	return len;
 }
@@ -900,7 +1505,9 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
 {
 	int delta;
 
-	skb_orphan(skb);
+	WARN_ON(skb->sk != NULL);
+	if (netlink_skb_is_mmaped(skb))
+		return skb;
 
 	delta = skb->end - skb->tail;
 	if (delta * 2 < skb->truesize)
@@ -920,16 +1527,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
 	return skb;
 }
 
-static void netlink_rcv_wake(struct sock *sk)
-{
-	struct netlink_sock *nlk = nlk_sk(sk);
-
-	if (skb_queue_empty(&sk->sk_receive_queue))
-		clear_bit(0, &nlk->state);
-	if (!test_bit(0, &nlk->state))
-		wake_up_interruptible(&nlk->wait);
-}
-
 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
 				  struct sock *ssk)
 {
@@ -939,8 +1536,8 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
 	ret = -ECONNREFUSED;
 	if (nlk->netlink_rcv != NULL) {
 		ret = skb->len;
-		skb_set_owner_r(skb, sk);
-		NETLINK_CB(skb).ssk = ssk;
+		netlink_skb_set_owner_r(skb, sk);
+		NETLINK_CB(skb).sk = ssk;
 		nlk->netlink_rcv(skb);
 		consume_skb(skb);
 	} else {
@@ -986,6 +1583,69 @@ retry:
 }
 EXPORT_SYMBOL(netlink_unicast);
 
+struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
+				  u32 dst_portid, gfp_t gfp_mask)
+{
+#ifdef CONFIG_NETLINK_MMAP
+	struct sock *sk = NULL;
+	struct sk_buff *skb;
+	struct netlink_ring *ring;
+	struct nl_mmap_hdr *hdr;
+	unsigned int maxlen;
+
+	sk = netlink_getsockbyportid(ssk, dst_portid);
+	if (IS_ERR(sk))
+		goto out;
+
+	ring = &nlk_sk(sk)->rx_ring;
+	/* fast-path without atomic ops for common case: non-mmaped receiver */
+	if (ring->pg_vec == NULL)
+		goto out_put;
+
+	skb = alloc_skb_head(gfp_mask);
+	if (skb == NULL)
+		goto err1;
+
+	spin_lock_bh(&sk->sk_receive_queue.lock);
+	/* check again under lock */
+	if (ring->pg_vec == NULL)
+		goto out_free;
+
+	maxlen = ring->frame_size - NL_MMAP_HDRLEN;
+	if (maxlen < size)
+		goto out_free;
+
+	netlink_forward_ring(ring);
+	hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+	if (hdr == NULL)
+		goto err2;
+	netlink_ring_setup_skb(skb, sk, ring, hdr);
+	netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
+	atomic_inc(&ring->pending);
+	netlink_increment_head(ring);
+
+	spin_unlock_bh(&sk->sk_receive_queue.lock);
+	return skb;
+
+err2:
+	kfree_skb(skb);
+	spin_unlock_bh(&sk->sk_receive_queue.lock);
+	netlink_overrun(sk);
+err1:
+	sock_put(sk);
+	return NULL;
+
+out_free:
+	kfree_skb(skb);
+	spin_unlock_bh(&sk->sk_receive_queue.lock);
+out_put:
+	sock_put(sk);
+out:
+#endif
+	return alloc_skb(size, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(netlink_alloc_skb);
+
 int netlink_has_listeners(struct sock *sk, unsigned int group)
 {
 	int res = 0;
@@ -1010,8 +1670,8 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
 	struct netlink_sock *nlk = nlk_sk(sk);
 
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
-	    !test_bit(0, &nlk->state)) {
-		skb_set_owner_r(skb, sk);
+	    !test_bit(NETLINK_CONGESTED, &nlk->state)) {
+		netlink_skb_set_owner_r(skb, sk);
 		__netlink_sendskb(sk, skb);
 		return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
 	}
@@ -1101,7 +1761,6 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
 {
 	struct net *net = sock_net(ssk);
 	struct netlink_broadcast_data info;
-	struct hlist_node *node;
 	struct sock *sk;
 
 	skb = netlink_trim(skb, allocation);
@@ -1124,7 +1783,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
 
 	netlink_lock_table();
 
-	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
+	sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
 		do_one_broadcast(sk, &info);
 
 	consume_skb(skb);
@@ -1200,7 +1859,6 @@ out:
 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
 {
 	struct netlink_set_err_data info;
-	struct hlist_node *node;
 	struct sock *sk;
 	int ret = 0;
 
@@ -1212,7 +1870,7 @@ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
 
 	read_lock(&nl_table_lock);
 
-	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
+	sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
 		ret += do_one_set_err(sk, &info);
 
 	read_unlock(&nl_table_lock);
@@ -1248,7 +1906,8 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 	if (level != SOL_NETLINK)
 		return -ENOPROTOOPT;
 
-	if (optlen >= sizeof(int) &&
+	if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
+	    optlen >= sizeof(int) &&
 	    get_user(val, (unsigned int __user *)optval))
 		return -EFAULT;
 
@@ -1290,13 +1949,32 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 	case NETLINK_NO_ENOBUFS:
 		if (val) {
 			nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
-			clear_bit(0, &nlk->state);
+			clear_bit(NETLINK_CONGESTED, &nlk->state);
 			wake_up_interruptible(&nlk->wait);
 		} else {
 			nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
 		}
 		err = 0;
 		break;
+#ifdef CONFIG_NETLINK_MMAP
+	case NETLINK_RX_RING:
+	case NETLINK_TX_RING: {
+		struct nl_mmap_req req;
+
+		/* Rings might consume more memory than queue limits, require
+		 * CAP_NET_ADMIN.
+		 */
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (optlen < sizeof(req))
+			return -EINVAL;
+		if (copy_from_user(&req, optval, sizeof(req)))
+			return -EFAULT;
+		err = netlink_set_ring(sk, &req, false,
+				       optname == NETLINK_TX_RING);
+		break;
+	}
+#endif /* CONFIG_NETLINK_MMAP */
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -1407,6 +2085,13 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			goto out;
 	}
 
+	if (netlink_tx_is_mmaped(sk) &&
+	    msg->msg_iov->iov_base == NULL) {
+		err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
+					   siocb);
+		goto out;
+	}
+
 	err = -EMSGSIZE;
 	if (len > sk->sk_sndbuf - 32)
 		goto out;
@@ -1676,10 +2361,9 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups)
 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
 
-	sk_for_each_bound(sk, node, &tbl->mc_list)
+	sk_for_each_bound(sk, &tbl->mc_list)
 		netlink_update_socket_mc(nlk_sk(sk), group, 0);
 }
 
@@ -1702,7 +2386,7 @@ struct nlmsghdr *
 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
 {
 	struct nlmsghdr *nlh;
-	int size = NLMSG_LENGTH(len);
+	int size = nlmsg_msg_size(len);
 
 	nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
 	nlh->nlmsg_type = type;
@@ -1711,7 +2395,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla
 	nlh->nlmsg_pid = portid;
 	nlh->nlmsg_seq = seq;
 	if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
-		memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
+		memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
 	return nlh;
 }
 EXPORT_SYMBOL(__nlmsg_put);
@@ -1740,9 +2424,13 @@ static int netlink_dump(struct sock *sk)
 
 	alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
 
-	skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
+	if (!netlink_rx_is_mmaped(sk) &&
+	    atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+		goto errout_skb;
+	skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);
 	if (!skb)
 		goto errout_skb;
+	netlink_skb_set_owner_r(skb, sk);
 
 	len = cb->dump(skb, cb);
 
@@ -1797,13 +2485,25 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	if (cb == NULL)
 		return -ENOBUFS;
 
+	/* Memory mapped dump requests need to be copied to avoid looping
+	 * on the pending state in netlink_mmap_sendmsg() while the CB hold
+	 * a reference to the skb.
+	 */
+	if (netlink_skb_is_mmaped(skb)) {
+		skb = skb_copy(skb, GFP_KERNEL);
+		if (skb == NULL) {
+			kfree(cb);
+			return -ENOBUFS;
+		}
+	} else
+		atomic_inc(&skb->users);
+
 	cb->dump = control->dump;
 	cb->done = control->done;
 	cb->nlh = nlh;
 	cb->data = control->data;
 	cb->module = control->module;
 	cb->min_dump_alloc = control->min_dump_alloc;
-	atomic_inc(&skb->users);
 	cb->skb = skb;
 
 	sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
@@ -1857,7 +2557,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 	if (err)
 		payload += nlmsg_len(nlh);
 
-	skb = nlmsg_new(payload, GFP_KERNEL);
+	skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
+				NETLINK_CB(in_skb).portid, GFP_KERNEL);
 	if (!skb) {
 		struct sock *sk;
 
@@ -1974,14 +2675,13 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
 	struct nl_seq_iter *iter = seq->private;
 	int i, j;
 	struct sock *s;
-	struct hlist_node *node;
 	loff_t off = 0;
 
 	for (i = 0; i < MAX_LINKS; i++) {
 		struct nl_portid_hash *hash = &nl_table[i].hash;
 
 		for (j = 0; j <= hash->mask; j++) {
-			sk_for_each(s, node, &hash->table[j]) {
+			sk_for_each(s, &hash->table[j]) {
 				if (sock_net(s) != seq_file_net(seq))
 					continue;
 				if (off == pos) {
@@ -2124,7 +2824,7 @@ static const struct proto_ops netlink_ops = {
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.getname =	netlink_getname,
-	.poll =		datagram_poll,
+	.poll =		netlink_poll,
 	.ioctl =	sock_no_ioctl,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
@@ -2132,7 +2832,7 @@ static const struct proto_ops netlink_ops = {
 	.getsockopt =	netlink_getsockopt,
 	.sendmsg =	netlink_sendmsg,
 	.recvmsg =	netlink_recvmsg,
-	.mmap =		sock_no_mmap,
+	.mmap =		netlink_mmap,
 	.sendpage =	sock_no_sendpage,
 };
 
@@ -2145,7 +2845,7 @@ static const struct net_proto_family netlink_family_ops = {
 static int __net_init netlink_net_init(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
+	if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops))
 		return -ENOMEM;
 #endif
 	return 0;
@@ -2154,7 +2854,7 @@ static int __net_init netlink_net_init(struct net *net)
 static void __net_exit netlink_net_exit(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	proc_net_remove(net, "netlink");
+	remove_proc_entry("netlink", net->proc_net);
 #endif
 }
 
@@ -2185,7 +2885,6 @@ static struct pernet_operations __net_initdata netlink_net_ops = {
 
 static int __init netlink_proto_init(void)
 {
-	struct sk_buff *dummy_skb;
 	int i;
 	unsigned long limit;
 	unsigned int order;
@@ -2194,7 +2893,7 @@ static int __init netlink_proto_init(void)
 	if (err != 0)
 		goto out;
 
-	BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
+	BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
 	if (!nl_table)
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
new file mode 100644
index 000000000000..ed8522265f4e
--- /dev/null
+++ b/net/netlink/af_netlink.h
@@ -0,0 +1,82 @@
+#ifndef _AF_NETLINK_H
+#define _AF_NETLINK_H
+
+#include <net/sock.h>
+
+#define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
+#define NLGRPLONGS(x)	(NLGRPSZ(x)/sizeof(unsigned long))
+
+struct netlink_ring {
+	void			**pg_vec;
+	unsigned int		head;
+	unsigned int		frames_per_block;
+	unsigned int		frame_size;
+	unsigned int		frame_max;
+
+	unsigned int		pg_vec_order;
+	unsigned int		pg_vec_pages;
+	unsigned int		pg_vec_len;
+
+	atomic_t		pending;
+};
+
+struct netlink_sock {
+	/* struct sock has to be the first member of netlink_sock */
+	struct sock		sk;
+	u32			portid;
+	u32			dst_portid;
+	u32			dst_group;
+	u32			flags;
+	u32			subscriptions;
+	u32			ngroups;
+	unsigned long		*groups;
+	unsigned long		state;
+	wait_queue_head_t	wait;
+	struct netlink_callback	*cb;
+	struct mutex		*cb_mutex;
+	struct mutex		cb_def_mutex;
+	void			(*netlink_rcv)(struct sk_buff *skb);
+	void			(*netlink_bind)(int group);
+	struct module		*module;
+#ifdef CONFIG_NETLINK_MMAP
+	struct mutex		pg_vec_lock;
+	struct netlink_ring	rx_ring;
+	struct netlink_ring	tx_ring;
+	atomic_t		mapped;
+#endif /* CONFIG_NETLINK_MMAP */
+};
+
+static inline struct netlink_sock *nlk_sk(struct sock *sk)
+{
+	return container_of(sk, struct netlink_sock, sk);
+}
+
+struct nl_portid_hash {
+	struct hlist_head	*table;
+	unsigned long		rehash_time;
+
+	unsigned int		mask;
+	unsigned int		shift;
+
+	unsigned int		entries;
+	unsigned int		max_shift;
+
+	u32			rnd;
+};
+
+struct netlink_table {
+	struct nl_portid_hash	hash;
+	struct hlist_head	mc_list;
+	struct listeners __rcu	*listeners;
+	unsigned int		flags;
+	unsigned int		groups;
+	struct mutex		*cb_mutex;
+	struct module		*module;
+	void			(*bind)(int group);
+	int			registered;
+};
+
+extern struct netlink_table *nl_table;
+extern rwlock_t nl_table_lock;
+
+#endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
new file mode 100644
index 000000000000..1af29624b92f
--- /dev/null
+++ b/net/netlink/diag.c
@@ -0,0 +1,227 @@
+#include <linux/module.h>
+
+#include <net/sock.h>
+#include <linux/netlink.h>
+#include <linux/sock_diag.h>
+#include <linux/netlink_diag.h>
+
+#include "af_netlink.h"
+
+#ifdef CONFIG_NETLINK_MMAP
+static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
+			    struct sk_buff *nlskb)
+{
+	struct netlink_diag_ring ndr;
+
+	ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
+	ndr.ndr_block_nr   = ring->pg_vec_len;
+	ndr.ndr_frame_size = ring->frame_size;
+	ndr.ndr_frame_nr   = ring->frame_max + 1;
+
+	return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
+}
+
+static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+	int ret;
+
+	mutex_lock(&nlk->pg_vec_lock);
+	ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
+	if (!ret)
+		ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
+				       nlskb);
+	mutex_unlock(&nlk->pg_vec_lock);
+
+	return ret;
+}
+#else
+static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
+{
+	return 0;
+}
+#endif
+
+static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	if (nlk->groups == NULL)
+		return 0;
+
+	return nla_put(nlskb, NETLINK_DIAG_GROUPS, NLGRPSZ(nlk->ngroups),
+		       nlk->groups);
+}
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+			struct netlink_diag_req *req,
+			u32 portid, u32 seq, u32 flags, int sk_ino)
+{
+	struct nlmsghdr *nlh;
+	struct netlink_diag_msg *rep;
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
+			flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	rep = nlmsg_data(nlh);
+	rep->ndiag_family	= AF_NETLINK;
+	rep->ndiag_type		= sk->sk_type;
+	rep->ndiag_protocol	= sk->sk_protocol;
+	rep->ndiag_state	= sk->sk_state;
+
+	rep->ndiag_ino		= sk_ino;
+	rep->ndiag_portid	= nlk->portid;
+	rep->ndiag_dst_portid	= nlk->dst_portid;
+	rep->ndiag_dst_group	= nlk->dst_group;
+	sock_diag_save_cookie(sk, rep->ndiag_cookie);
+
+	if ((req->ndiag_show & NDIAG_SHOW_GROUPS) &&
+	    sk_diag_dump_groups(sk, skb))
+		goto out_nlmsg_trim;
+
+	if ((req->ndiag_show & NDIAG_SHOW_MEMINFO) &&
+	    sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
+		goto out_nlmsg_trim;
+
+	if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
+	    sk_diag_put_rings_cfg(sk, skb))
+		goto out_nlmsg_trim;
+
+	return nlmsg_end(skb, nlh);
+
+out_nlmsg_trim:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+				int protocol, int s_num)
+{
+	struct netlink_table *tbl = &nl_table[protocol];
+	struct nl_portid_hash *hash = &tbl->hash;
+	struct net *net = sock_net(skb->sk);
+	struct netlink_diag_req *req;
+	struct sock *sk;
+	int ret = 0, num = 0, i;
+
+	req = nlmsg_data(cb->nlh);
+
+	for (i = 0; i <= hash->mask; i++) {
+		sk_for_each(sk, &hash->table[i]) {
+			if (!net_eq(sock_net(sk), net))
+				continue;
+			if (num < s_num) {
+				num++;
+				continue;
+			}
+
+			if (sk_diag_fill(sk, skb, req,
+					 NETLINK_CB(cb->skb).portid,
+					 cb->nlh->nlmsg_seq,
+					 NLM_F_MULTI,
+					 sock_i_ino(sk)) < 0) {
+				ret = 1;
+				goto done;
+			}
+
+			num++;
+		}
+	}
+
+	sk_for_each_bound(sk, &tbl->mc_list) {
+		if (sk_hashed(sk))
+			continue;
+		if (!net_eq(sock_net(sk), net))
+			continue;
+		if (num < s_num) {
+			num++;
+			continue;
+		}
+
+		if (sk_diag_fill(sk, skb, req,
+				 NETLINK_CB(cb->skb).portid,
+				 cb->nlh->nlmsg_seq,
+				 NLM_F_MULTI,
+				 sock_i_ino(sk)) < 0) {
+			ret = 1;
+			goto done;
+		}
+		num++;
+	}
+done:
+	cb->args[0] = num;
+	cb->args[1] = protocol;
+
+	return ret;
+}
+
+static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct netlink_diag_req *req;
+	int s_num = cb->args[0];
+
+	req = nlmsg_data(cb->nlh);
+
+	read_lock(&nl_table_lock);
+
+	if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
+		int i;
+
+		for (i = cb->args[1]; i < MAX_LINKS; i++) {
+			if (__netlink_diag_dump(skb, cb, i, s_num))
+				break;
+			s_num = 0;
+		}
+	} else {
+		if (req->sdiag_protocol >= MAX_LINKS) {
+			read_unlock(&nl_table_lock);
+			return -ENOENT;
+		}
+
+		__netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
+	}
+
+	read_unlock(&nl_table_lock);
+
+	return skb->len;
+}
+
+static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+	int hdrlen = sizeof(struct netlink_diag_req);
+	struct net *net = sock_net(skb->sk);
+
+	if (nlmsg_len(h) < hdrlen)
+		return -EINVAL;
+
+	if (h->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = netlink_diag_dump,
+		};
+		return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+	} else
+		return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler netlink_diag_handler = {
+	.family = AF_NETLINK,
+	.dump = netlink_diag_handler_dump,
+};
+
+static int __init netlink_diag_init(void)
+{
+	return sock_diag_register(&netlink_diag_handler);
+}
+
+static void __exit netlink_diag_exit(void)
+{
+	sock_diag_unregister(&netlink_diag_handler);
+}
+
+module_init(netlink_diag_init);
+module_exit(netlink_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f2aabb6f4105..2fd6dbea327a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -16,10 +16,12 @@
 #include <linux/skbuff.h>
 #include <linux/mutex.h>
 #include <linux/bitmap.h>
+#include <linux/rwsem.h>
 #include <net/sock.h>
 #include <net/genetlink.h>
 
 static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
+static DECLARE_RWSEM(cb_lock);
 
 void genl_lock(void)
 {
@@ -41,6 +43,18 @@ int lockdep_genl_is_held(void)
 EXPORT_SYMBOL(lockdep_genl_is_held);
 #endif
 
+static void genl_lock_all(void)
+{
+	down_write(&cb_lock);
+	genl_lock();
+}
+
+static void genl_unlock_all(void)
+{
+	genl_unlock();
+	up_write(&cb_lock);
+}
+
 #define GENL_FAM_TAB_SIZE	16
 #define GENL_FAM_TAB_MASK	(GENL_FAM_TAB_SIZE - 1)
 
@@ -142,8 +156,9 @@ int genl_register_mc_group(struct genl_family *family,
 	int err = 0;
 
 	BUG_ON(grp->name[0] == '\0');
+	BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL);
 
-	genl_lock();
+	genl_lock_all();
 
 	/* special-case our own group */
 	if (grp == &notify_grp)
@@ -212,7 +227,7 @@ int genl_register_mc_group(struct genl_family *family,
 
 	genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp);
  out:
-	genl_unlock();
+	genl_unlock_all();
 	return err;
 }
 EXPORT_SYMBOL(genl_register_mc_group);
@@ -254,9 +269,9 @@ static void __genl_unregister_mc_group(struct genl_family *family,
 void genl_unregister_mc_group(struct genl_family *family,
 			      struct genl_multicast_group *grp)
 {
-	genl_lock();
+	genl_lock_all();
 	__genl_unregister_mc_group(family, grp);
-	genl_unlock();
+	genl_unlock_all();
 }
 EXPORT_SYMBOL(genl_unregister_mc_group);
 
@@ -302,9 +317,9 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops)
 	if (ops->policy)
 		ops->flags |= GENL_CMD_CAP_HASPOL;
 
-	genl_lock();
+	genl_lock_all();
 	list_add_tail(&ops->ops_list, &family->ops_list);
-	genl_unlock();
+	genl_unlock_all();
 
 	genl_ctrl_event(CTRL_CMD_NEWOPS, ops);
 	err = 0;
@@ -333,16 +348,16 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops)
 {
 	struct genl_ops *rc;
 
-	genl_lock();
+	genl_lock_all();
 	list_for_each_entry(rc, &family->ops_list, ops_list) {
 		if (rc == ops) {
 			list_del(&ops->ops_list);
-			genl_unlock();
+			genl_unlock_all();
 			genl_ctrl_event(CTRL_CMD_DELOPS, ops);
 			return 0;
 		}
 	}
-	genl_unlock();
+	genl_unlock_all();
 
 	return -ENOENT;
 }
@@ -372,7 +387,7 @@ int genl_register_family(struct genl_family *family)
 	INIT_LIST_HEAD(&family->ops_list);
 	INIT_LIST_HEAD(&family->mcast_groups);
 
-	genl_lock();
+	genl_lock_all();
 
 	if (genl_family_find_byname(family->name)) {
 		err = -EEXIST;
@@ -393,7 +408,7 @@ int genl_register_family(struct genl_family *family)
 		goto errout_locked;
 	}
 
-	if (family->maxattr) {
+	if (family->maxattr && !family->parallel_ops) {
 		family->attrbuf = kmalloc((family->maxattr+1) *
 					sizeof(struct nlattr *), GFP_KERNEL);
 		if (family->attrbuf == NULL) {
@@ -404,14 +419,14 @@ int genl_register_family(struct genl_family *family)
 		family->attrbuf = NULL;
 
 	list_add_tail(&family->family_list, genl_family_chain(family->id));
-	genl_unlock();
+	genl_unlock_all();
 
 	genl_ctrl_event(CTRL_CMD_NEWFAMILY, family);
 
 	return 0;
 
 errout_locked:
-	genl_unlock();
+	genl_unlock_all();
 errout:
 	return err;
 }
@@ -475,7 +490,7 @@ int genl_unregister_family(struct genl_family *family)
 {
 	struct genl_family *rc;
 
-	genl_lock();
+	genl_lock_all();
 
 	genl_unregister_mc_groups(family);
 
@@ -485,14 +500,14 @@ int genl_unregister_family(struct genl_family *family)
 
 		list_del(&rc->family_list);
 		INIT_LIST_HEAD(&family->ops_list);
-		genl_unlock();
+		genl_unlock_all();
 
 		kfree(family->attrbuf);
 		genl_ctrl_event(CTRL_CMD_DELFAMILY, family);
 		return 0;
 	}
 
-	genl_unlock();
+	genl_unlock_all();
 
 	return -ENOENT;
 }
@@ -529,19 +544,17 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
 }
 EXPORT_SYMBOL(genlmsg_put);
 
-static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int genl_family_rcv_msg(struct genl_family *family,
+			       struct sk_buff *skb,
+			       struct nlmsghdr *nlh)
 {
 	struct genl_ops *ops;
-	struct genl_family *family;
 	struct net *net = sock_net(skb->sk);
 	struct genl_info info;
 	struct genlmsghdr *hdr = nlmsg_data(nlh);
+	struct nlattr **attrbuf;
 	int hdrlen, err;
 
-	family = genl_family_find_byid(nlh->nlmsg_type);
-	if (family == NULL)
-		return -ENOENT;
-
 	/* this family doesn't exist in this netns */
 	if (!family->netnsok && !net_eq(net, &init_net))
 		return -ENOENT;
@@ -559,29 +572,33 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return -EPERM;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = ops->dumpit,
+			.done = ops->done,
+		};
+
 		if (ops->dumpit == NULL)
 			return -EOPNOTSUPP;
 
-		genl_unlock();
-		{
-			struct netlink_dump_control c = {
-				.dump = ops->dumpit,
-				.done = ops->done,
-			};
-			err = netlink_dump_start(net->genl_sock, skb, nlh, &c);
-		}
-		genl_lock();
-		return err;
+		return netlink_dump_start(net->genl_sock, skb, nlh, &c);
 	}
 
 	if (ops->doit == NULL)
 		return -EOPNOTSUPP;
 
-	if (family->attrbuf) {
-		err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr,
+	if (family->maxattr && family->parallel_ops) {
+		attrbuf = kmalloc((family->maxattr+1) *
+					sizeof(struct nlattr *), GFP_KERNEL);
+		if (attrbuf == NULL)
+			return -ENOMEM;
+	} else
+		attrbuf = family->attrbuf;
+
+	if (attrbuf) {
+		err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
 				  ops->policy);
 		if (err < 0)
-			return err;
+			goto out;
 	}
 
 	info.snd_seq = nlh->nlmsg_seq;
@@ -589,14 +606,14 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	info.nlhdr = nlh;
 	info.genlhdr = nlmsg_data(nlh);
 	info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
-	info.attrs = family->attrbuf;
+	info.attrs = attrbuf;
 	genl_info_net_set(&info, net);
 	memset(&info.user_ptr, 0, sizeof(info.user_ptr));
 
 	if (family->pre_doit) {
 		err = family->pre_doit(ops, skb, &info);
 		if (err)
-			return err;
+			goto out;
 	}
 
 	err = ops->doit(skb, &info);
@@ -604,14 +621,38 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (family->post_doit)
 		family->post_doit(ops, skb, &info);
 
+out:
+	if (family->parallel_ops)
+		kfree(attrbuf);
+
+	return err;
+}
+
+static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct genl_family *family;
+	int err;
+
+	family = genl_family_find_byid(nlh->nlmsg_type);
+	if (family == NULL)
+		return -ENOENT;
+
+	if (!family->parallel_ops)
+		genl_lock();
+
+	err = genl_family_rcv_msg(family, skb, nlh);
+
+	if (!family->parallel_ops)
+		genl_unlock();
+
 	return err;
 }
 
 static void genl_rcv(struct sk_buff *skb)
 {
-	genl_lock();
+	down_read(&cb_lock);
 	netlink_rcv_skb(skb, &genl_rcv_msg);
-	genl_unlock();
+	up_read(&cb_lock);
 }
 
 /**************************************************************************
@@ -917,7 +958,6 @@ static int __net_init genl_pernet_init(struct net *net)
 {
 	struct netlink_kernel_cfg cfg = {
 		.input		= genl_rcv,
-		.cb_mutex	= &genl_mutex,
 		.flags		= NL_CFG_F_NONROOT_RECV,
 	};
 
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 7261eb81974f..ec0c80fde69f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -104,10 +104,9 @@ static void nr_remove_socket(struct sock *sk)
 static void nr_kill_by_device(struct net_device *dev)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&nr_list_lock);
-	sk_for_each(s, node, &nr_list)
+	sk_for_each(s, &nr_list)
 		if (nr_sk(s)->device == dev)
 			nr_disconnect(s, ENETUNREACH);
 	spin_unlock_bh(&nr_list_lock);
@@ -149,10 +148,9 @@ static void nr_insert_socket(struct sock *sk)
 static struct sock *nr_find_listener(ax25_address *addr)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&nr_list_lock);
-	sk_for_each(s, node, &nr_list)
+	sk_for_each(s, &nr_list)
 		if (!ax25cmp(&nr_sk(s)->source_addr, addr) &&
 		    s->sk_state == TCP_LISTEN) {
 			bh_lock_sock(s);
@@ -170,10 +168,9 @@ found:
 static struct sock *nr_find_socket(unsigned char index, unsigned char id)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&nr_list_lock);
-	sk_for_each(s, node, &nr_list) {
+	sk_for_each(s, &nr_list) {
 		struct nr_sock *nr = nr_sk(s);
 
 		if (nr->my_index == index && nr->my_id == id) {
@@ -194,10 +191,9 @@ static struct sock *nr_find_peer(unsigned char index, unsigned char id,
 	ax25_address *dest)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&nr_list_lock);
-	sk_for_each(s, node, &nr_list) {
+	sk_for_each(s, &nr_list) {
 		struct nr_sock *nr = nr_sk(s);
 
 		if (nr->your_index == index && nr->your_id == id &&
@@ -838,6 +834,8 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
 	struct sock *sk = sock->sk;
 	struct nr_sock *nr = nr_sk(sk);
 
+	memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25));
+
 	lock_sock(sk);
 	if (peer != 0) {
 		if (sk->sk_state != TCP_ESTABLISHED) {
@@ -1177,6 +1175,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
 	}
 
 	if (sax != NULL) {
+		memset(sax, 0, sizeof(*sax));
 		sax->sax25_family = AF_NETROM;
 		skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call,
 			      AX25_ADDR_LEN);
@@ -1452,9 +1451,9 @@ static int __init nr_proto_init(void)
 
 	nr_loopback_init();
 
-	proc_net_fops_create(&init_net, "nr", S_IRUGO, &nr_info_fops);
-	proc_net_fops_create(&init_net, "nr_neigh", S_IRUGO, &nr_neigh_fops);
-	proc_net_fops_create(&init_net, "nr_nodes", S_IRUGO, &nr_nodes_fops);
+	proc_create("nr", S_IRUGO, init_net.proc_net, &nr_info_fops);
+	proc_create("nr_neigh", S_IRUGO, init_net.proc_net, &nr_neigh_fops);
+	proc_create("nr_nodes", S_IRUGO, init_net.proc_net, &nr_nodes_fops);
 out:
 	return rc;
 fail:
@@ -1482,9 +1481,9 @@ static void __exit nr_exit(void)
 {
 	int i;
 
-	proc_net_remove(&init_net, "nr");
-	proc_net_remove(&init_net, "nr_neigh");
-	proc_net_remove(&init_net, "nr_nodes");
+	remove_proc_entry("nr", init_net.proc_net);
+	remove_proc_entry("nr_neigh", init_net.proc_net);
+	remove_proc_entry("nr_nodes", init_net.proc_net);
 	nr_loopback_clear();
 
 	nr_rt_free();
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 70ffff76a967..b976d5eff2de 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -49,10 +49,9 @@ static struct nr_node *nr_node_get(ax25_address *callsign)
 {
 	struct nr_node *found = NULL;
 	struct nr_node *nr_node;
-	struct hlist_node *node;
 
 	spin_lock_bh(&nr_node_list_lock);
-	nr_node_for_each(nr_node, node, &nr_node_list)
+	nr_node_for_each(nr_node, &nr_node_list)
 		if (ax25cmp(callsign, &nr_node->callsign) == 0) {
 			nr_node_hold(nr_node);
 			found = nr_node;
@@ -67,10 +66,9 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign,
 {
 	struct nr_neigh *found = NULL;
 	struct nr_neigh *nr_neigh;
-	struct hlist_node *node;
 
 	spin_lock_bh(&nr_neigh_list_lock);
-	nr_neigh_for_each(nr_neigh, node, &nr_neigh_list)
+	nr_neigh_for_each(nr_neigh, &nr_neigh_list)
 		if (ax25cmp(callsign, &nr_neigh->callsign) == 0 &&
 		    nr_neigh->dev == dev) {
 			nr_neigh_hold(nr_neigh);
@@ -114,10 +112,9 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
 	 */
 	if (nr_neigh != NULL && nr_neigh->failed != 0 && quality == 0) {
 		struct nr_node *nr_nodet;
-		struct hlist_node *node;
 
 		spin_lock_bh(&nr_node_list_lock);
-		nr_node_for_each(nr_nodet, node, &nr_node_list) {
+		nr_node_for_each(nr_nodet, &nr_node_list) {
 			nr_node_lock(nr_nodet);
 			for (i = 0; i < nr_nodet->count; i++)
 				if (nr_nodet->routes[i].neighbour == nr_neigh)
@@ -485,11 +482,11 @@ static int nr_dec_obs(void)
 {
 	struct nr_neigh *nr_neigh;
 	struct nr_node  *s;
-	struct hlist_node *node, *nodet;
+	struct hlist_node *nodet;
 	int i;
 
 	spin_lock_bh(&nr_node_list_lock);
-	nr_node_for_each_safe(s, node, nodet, &nr_node_list) {
+	nr_node_for_each_safe(s, nodet, &nr_node_list) {
 		nr_node_lock(s);
 		for (i = 0; i < s->count; i++) {
 			switch (s->routes[i].obs_count) {
@@ -540,15 +537,15 @@ static int nr_dec_obs(void)
 void nr_rt_device_down(struct net_device *dev)
 {
 	struct nr_neigh *s;
-	struct hlist_node *node, *nodet, *node2, *node2t;
+	struct hlist_node *nodet, *node2t;
 	struct nr_node  *t;
 	int i;
 
 	spin_lock_bh(&nr_neigh_list_lock);
-	nr_neigh_for_each_safe(s, node, nodet, &nr_neigh_list) {
+	nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) {
 		if (s->dev == dev) {
 			spin_lock_bh(&nr_node_list_lock);
-			nr_node_for_each_safe(t, node2, node2t, &nr_node_list) {
+			nr_node_for_each_safe(t, node2t, &nr_node_list) {
 				nr_node_lock(t);
 				for (i = 0; i < t->count; i++) {
 					if (t->routes[i].neighbour == s) {
@@ -737,11 +734,10 @@ int nr_rt_ioctl(unsigned int cmd, void __user *arg)
 void nr_link_failed(ax25_cb *ax25, int reason)
 {
 	struct nr_neigh *s, *nr_neigh = NULL;
-	struct hlist_node *node;
 	struct nr_node  *nr_node = NULL;
 
 	spin_lock_bh(&nr_neigh_list_lock);
-	nr_neigh_for_each(s, node, &nr_neigh_list) {
+	nr_neigh_for_each(s, &nr_neigh_list) {
 		if (s->ax25 == ax25) {
 			nr_neigh_hold(s);
 			nr_neigh = s;
@@ -761,7 +757,7 @@ void nr_link_failed(ax25_cb *ax25, int reason)
 		return;
 	}
 	spin_lock_bh(&nr_node_list_lock);
-	nr_node_for_each(nr_node, node, &nr_node_list) {
+	nr_node_for_each(nr_node, &nr_node_list) {
 		nr_node_lock(nr_node);
 		if (nr_node->which < nr_node->count &&
 		    nr_node->routes[nr_node->which].neighbour == nr_neigh)
@@ -1013,16 +1009,16 @@ void __exit nr_rt_free(void)
 {
 	struct nr_neigh *s = NULL;
 	struct nr_node  *t = NULL;
-	struct hlist_node *node, *nodet;
+	struct hlist_node *nodet;
 
 	spin_lock_bh(&nr_neigh_list_lock);
 	spin_lock_bh(&nr_node_list_lock);
-	nr_node_for_each_safe(t, node, nodet, &nr_node_list) {
+	nr_node_for_each_safe(t, nodet, &nr_node_list) {
 		nr_node_lock(t);
 		nr_remove_node_locked(t);
 		nr_node_unlock(t);
 	}
-	nr_neigh_for_each_safe(s, node, nodet, &nr_neigh_list) {
+	nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) {
 		while(s->count) {
 			s->count--;
 			nr_neigh_put(s);
diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig
index 60c3bbb63e8e..5948b2fc72f6 100644
--- a/net/nfc/Kconfig
+++ b/net/nfc/Kconfig
@@ -4,6 +4,7 @@
 
 menuconfig NFC
 	depends on NET
+	depends on RFKILL || !RFKILL
 	tristate "NFC subsystem support"
 	default n
 	help
@@ -15,6 +16,5 @@ menuconfig NFC
 
 source "net/nfc/nci/Kconfig"
 source "net/nfc/hci/Kconfig"
-source "net/nfc/llcp/Kconfig"
 
 source "drivers/nfc/Kconfig"
diff --git a/net/nfc/Makefile b/net/nfc/Makefile
index d1a117c2c401..fb799deaed4f 100644
--- a/net/nfc/Makefile
+++ b/net/nfc/Makefile
@@ -5,6 +5,8 @@
 obj-$(CONFIG_NFC) += nfc.o
 obj-$(CONFIG_NFC_NCI) += nci/
 obj-$(CONFIG_NFC_HCI) += hci/
+#obj-$(CONFIG_NFC_LLCP) += llcp/
+
+nfc-objs := core.o netlink.o af_nfc.o rawsock.o llcp_core.o llcp_commands.o \
+		llcp_sock.o
 
-nfc-objs := core.o netlink.o af_nfc.o rawsock.o
-nfc-$(CONFIG_NFC_LLCP)	+= llcp/llcp.o llcp/commands.o llcp/sock.o
diff --git a/net/nfc/core.c b/net/nfc/core.c
index aa64ea441676..40d2527693da 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/rfkill.h>
 #include <linux/nfc.h>
 
 #include <net/genetlink.h>
@@ -58,6 +59,11 @@ int nfc_dev_up(struct nfc_dev *dev)
 
 	device_lock(&dev->dev);
 
+	if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
+		rc = -ERFKILL;
+		goto error;
+	}
+
 	if (!device_is_registered(&dev->dev)) {
 		rc = -ENODEV;
 		goto error;
@@ -117,6 +123,24 @@ error:
 	return rc;
 }
 
+static int nfc_rfkill_set_block(void *data, bool blocked)
+{
+	struct nfc_dev *dev = data;
+
+	pr_debug("%s blocked %d", dev_name(&dev->dev), blocked);
+
+	if (!blocked)
+		return 0;
+
+	nfc_dev_down(dev);
+
+	return 0;
+}
+
+static const struct rfkill_ops nfc_rfkill_ops = {
+	.set_block = nfc_rfkill_set_block,
+};
+
 /**
  * nfc_start_poll - start polling for nfc targets
  *
@@ -143,6 +167,11 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols)
 		goto error;
 	}
 
+	if (!dev->dev_up) {
+		rc = -ENODEV;
+		goto error;
+	}
+
 	if (dev->polling) {
 		rc = -EBUSY;
 		goto error;
@@ -338,7 +367,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol)
 		dev->active_target = target;
 		dev->rf_mode = NFC_RF_INITIATOR;
 
-		if (dev->ops->check_presence)
+		if (dev->ops->check_presence && !dev->shutting_down)
 			mod_timer(&dev->check_pres_timer, jiffies +
 				  msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS));
 	}
@@ -429,7 +458,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
 		rc = dev->ops->im_transceive(dev, dev->active_target, skb, cb,
 					     cb_context);
 
-		if (!rc && dev->ops->check_presence)
+		if (!rc && dev->ops->check_presence && !dev->shutting_down)
 			mod_timer(&dev->check_pres_timer, jiffies +
 				  msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS));
 	} else if (dev->rf_mode == NFC_RF_TARGET && dev->ops->tm_send != NULL) {
@@ -684,11 +713,6 @@ static void nfc_release(struct device *d)
 
 	pr_debug("dev_name=%s\n", dev_name(&dev->dev));
 
-	if (dev->ops->check_presence) {
-		del_timer_sync(&dev->check_pres_timer);
-		cancel_work_sync(&dev->check_pres_work);
-	}
-
 	nfc_genl_data_exit(&dev->genl_data);
 	kfree(dev->targets);
 	kfree(dev);
@@ -706,15 +730,16 @@ static void nfc_check_pres_work(struct work_struct *work)
 		rc = dev->ops->check_presence(dev, dev->active_target);
 		if (rc == -EOPNOTSUPP)
 			goto exit;
-		if (!rc) {
-			mod_timer(&dev->check_pres_timer, jiffies +
-				  msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS));
-		} else {
+		if (rc) {
 			u32 active_target_idx = dev->active_target->idx;
 			device_unlock(&dev->dev);
 			nfc_target_lost(dev, active_target_idx);
 			return;
 		}
+
+		if (!dev->shutting_down)
+			mod_timer(&dev->check_pres_timer, jiffies +
+				  msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS));
 	}
 
 exit:
@@ -734,10 +759,10 @@ struct class nfc_class = {
 };
 EXPORT_SYMBOL(nfc_class);
 
-static int match_idx(struct device *d, void *data)
+static int match_idx(struct device *d, const void *data)
 {
 	struct nfc_dev *dev = to_nfc_dev(d);
-	unsigned int *idx = data;
+	const unsigned int *idx = data;
 
 	return dev->idx == *idx;
 }
@@ -761,6 +786,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx)
  */
 struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
 				    u32 supported_protocols,
+				    u32 supported_se,
 				    int tx_headroom, int tx_tailroom)
 {
 	struct nfc_dev *dev;
@@ -778,6 +804,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
 
 	dev->ops = ops;
 	dev->supported_protocols = supported_protocols;
+	dev->supported_se = supported_se;
+	dev->active_se = NFC_SE_NONE;
 	dev->tx_headroom = tx_headroom;
 	dev->tx_tailroom = tx_tailroom;
 
@@ -836,6 +864,15 @@ int nfc_register_device(struct nfc_dev *dev)
 		pr_debug("The userspace won't be notified that the device %s was added\n",
 			 dev_name(&dev->dev));
 
+	dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev,
+				   RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev);
+	if (dev->rfkill) {
+		if (rfkill_register(dev->rfkill) < 0) {
+			rfkill_destroy(dev->rfkill);
+			dev->rfkill = NULL;
+		}
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(nfc_register_device);
@@ -853,26 +890,32 @@ void nfc_unregister_device(struct nfc_dev *dev)
 
 	id = dev->idx;
 
-	mutex_lock(&nfc_devlist_mutex);
-	nfc_devlist_generation++;
+	if (dev->rfkill) {
+		rfkill_unregister(dev->rfkill);
+		rfkill_destroy(dev->rfkill);
+	}
 
-	/* lock to avoid unregistering a device while an operation
-	   is in progress */
-	device_lock(&dev->dev);
-	device_del(&dev->dev);
-	device_unlock(&dev->dev);
+	if (dev->ops->check_presence) {
+		device_lock(&dev->dev);
+		dev->shutting_down = true;
+		device_unlock(&dev->dev);
+		del_timer_sync(&dev->check_pres_timer);
+		cancel_work_sync(&dev->check_pres_work);
+	}
 
-	mutex_unlock(&nfc_devlist_mutex);
+	rc = nfc_genl_device_removed(dev);
+	if (rc)
+		pr_debug("The userspace won't be notified that the device %s "
+			 "was removed\n", dev_name(&dev->dev));
 
 	nfc_llcp_unregister_device(dev);
 
-	rc = nfc_genl_device_removed(dev);
-	if (rc)
-		pr_debug("The userspace won't be notified that the device %s was removed\n",
-			 dev_name(&dev->dev));
+	mutex_lock(&nfc_devlist_mutex);
+	nfc_devlist_generation++;
+	device_del(&dev->dev);
+	mutex_unlock(&nfc_devlist_mutex);
 
 	ida_simple_remove(&nfc_index_ida, id);
-
 }
 EXPORT_SYMBOL(nfc_unregister_device);
 
diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index 7d99410e6c1a..64f922be9281 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c
@@ -280,14 +280,19 @@ static int nfc_hci_delete_pipe(struct nfc_hci_dev *hdev, u8 pipe)
 static int nfc_hci_clear_all_pipes(struct nfc_hci_dev *hdev)
 {
 	u8 param[2];
+	size_t param_len = 2;
 
 	/* TODO: Find out what the identity reference data is
 	 * and fill param with it. HCI spec 6.1.3.5 */
 
 	pr_debug("\n");
 
+	if (test_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &hdev->quirks))
+		param_len = 0;
+
 	return nfc_hci_execute_cmd(hdev, NFC_HCI_ADMIN_PIPE,
-				   NFC_HCI_ADM_CLEAR_ALL_PIPE, param, 2, NULL);
+				   NFC_HCI_ADM_CLEAR_ALL_PIPE, param, param_len,
+				   NULL);
 }
 
 int nfc_hci_disconnect_gate(struct nfc_hci_dev *hdev, u8 gate)
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 7bea574d5934..91020b210d87 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -57,6 +57,8 @@ static void nfc_hci_msg_tx_work(struct work_struct *work)
 	int r = 0;
 
 	mutex_lock(&hdev->msg_tx_mutex);
+	if (hdev->shutting_down)
+		goto exit;
 
 	if (hdev->cmd_pending_msg) {
 		if (timer_pending(&hdev->cmd_timer) == 0) {
@@ -295,6 +297,12 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
 		goto exit;
 	}
 
+	if (hdev->ops->event_received) {
+		r = hdev->ops->event_received(hdev, gate, event, skb);
+		if (r <= 0)
+			goto exit_noskb;
+	}
+
 	switch (event) {
 	case NFC_HCI_EVT_TARGET_DISCOVERED:
 		if (skb->len < 1) {	/* no status data? */
@@ -320,17 +328,15 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
 		r = nfc_hci_target_discovered(hdev, gate);
 		break;
 	default:
-		if (hdev->ops->event_received) {
-			hdev->ops->event_received(hdev, gate, event, skb);
-			return;
-		}
-
+		pr_info("Discarded unknown event %x to gate %x\n", event, gate);
+		r = -EINVAL;
 		break;
 	}
 
 exit:
 	kfree_skb(skb);
 
+exit_noskb:
 	if (r) {
 		/* TODO: There was an error dispatching the event,
 		 * how to propagate up to nfc core?
@@ -669,8 +675,10 @@ static int hci_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb)
 
 	if (hdev->ops->tm_send)
 		return hdev->ops->tm_send(hdev, skb);
-	else
-		return -ENOTSUPP;
+
+	kfree_skb(skb);
+
+	return -ENOTSUPP;
 }
 
 static int hci_check_presence(struct nfc_dev *nfc_dev,
@@ -787,7 +795,9 @@ static struct nfc_ops hci_nfc_ops = {
 
 struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
 					    struct nfc_hci_init_data *init_data,
+					    unsigned long quirks,
 					    u32 protocols,
+					    u32 supported_se,
 					    const char *llc_name,
 					    int tx_headroom,
 					    int tx_tailroom,
@@ -813,7 +823,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
 		return NULL;
 	}
 
-	hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols,
+	hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, supported_se,
 					 tx_headroom + HCI_CMDS_HEADROOM,
 					 tx_tailroom);
 	if (!hdev->ndev) {
@@ -830,6 +840,8 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
 
 	memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe));
 
+	hdev->quirks = quirks;
+
 	return hdev;
 }
 EXPORT_SYMBOL(nfc_hci_allocate_device);
@@ -868,6 +880,28 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev)
 {
 	struct hci_msg *msg, *n;
 
+	mutex_lock(&hdev->msg_tx_mutex);
+
+	if (hdev->cmd_pending_msg) {
+		if (hdev->cmd_pending_msg->cb)
+			hdev->cmd_pending_msg->cb(
+					     hdev->cmd_pending_msg->cb_context,
+					     NULL, -ESHUTDOWN);
+		kfree(hdev->cmd_pending_msg);
+		hdev->cmd_pending_msg = NULL;
+	}
+
+	hdev->shutting_down = true;
+
+	mutex_unlock(&hdev->msg_tx_mutex);
+
+	del_timer_sync(&hdev->cmd_timer);
+	cancel_work_sync(&hdev->msg_tx_work);
+
+	cancel_work_sync(&hdev->msg_rx_work);
+
+	nfc_unregister_device(hdev->ndev);
+
 	skb_queue_purge(&hdev->rx_hcp_frags);
 	skb_queue_purge(&hdev->msg_rx_queue);
 
@@ -876,13 +910,6 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev)
 		skb_queue_purge(&msg->msg_frags);
 		kfree(msg);
 	}
-
-	del_timer_sync(&hdev->cmd_timer);
-
-	nfc_unregister_device(hdev->ndev);
-
-	cancel_work_sync(&hdev->msg_tx_work);
-	cancel_work_sync(&hdev->msg_rx_work);
 }
 EXPORT_SYMBOL(nfc_hci_unregister_device);
 
diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c
index bc308a7ca609..b6b4109f2343 100644
--- a/net/nfc/hci/hcp.c
+++ b/net/nfc/hci/hcp.c
@@ -105,6 +105,13 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe,
 	}
 
 	mutex_lock(&hdev->msg_tx_mutex);
+
+	if (hdev->shutting_down) {
+		err = -ESHUTDOWN;
+		mutex_unlock(&hdev->msg_tx_mutex);
+		goto out_skb_err;
+	}
+
 	list_add_tail(&cmd->msg_l, &hdev->msg_tx_queue);
 	mutex_unlock(&hdev->msg_tx_mutex);
 
diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp.h
index 0d62366f8cc3..ff8c434f7df8 100644
--- a/net/nfc/llcp/llcp.h
+++ b/net/nfc/llcp.h
@@ -31,6 +31,7 @@ enum llcp_state {
 #define LLCP_MAX_LTO  0xff
 #define LLCP_MAX_RW   15
 #define LLCP_MAX_MIUX 0x7ff
+#define LLCP_MAX_MIU (LLCP_MAX_MIUX + 128)
 
 #define LLCP_WKS_NUM_SAP   16
 #define LLCP_SDP_NUM_SAP   16
@@ -46,6 +47,19 @@ struct llcp_sock_list {
 	rwlock_t          lock;
 };
 
+struct nfc_llcp_sdp_tlv {
+	u8 *tlv;
+	u8 tlv_len;
+
+	char *uri;
+	u8 tid;
+	u8 sap;
+
+	unsigned long time;
+
+	struct hlist_node node;
+};
+
 struct nfc_llcp_local {
 	struct list_head list;
 	struct nfc_dev *dev;
@@ -86,6 +100,12 @@ struct nfc_llcp_local {
 	u8  remote_opt;
 	u16 remote_wks;
 
+	struct mutex sdreq_lock;
+	struct hlist_head pending_sdreqs;
+	struct timer_list sdreq_timer;
+	struct work_struct sdreq_timeout_work;
+	u8 sdreq_next_tid;
+
 	/* sockets array */
 	struct llcp_sock_list sockets;
 	struct llcp_sock_list connecting_sockets;
@@ -105,7 +125,12 @@ struct nfc_llcp_sock {
 	char *service_name;
 	size_t service_name_len;
 	u8 rw;
-	u16 miu;
+	__be16 miux;
+
+
+	/* Remote link parameters */
+	u8 remote_rw;
+	u16 remote_miu;
 
 	/* Link variables */
 	u8 send_n;
@@ -121,7 +146,6 @@ struct nfc_llcp_sock {
 
 	struct sk_buff_head tx_queue;
 	struct sk_buff_head tx_pending_queue;
-	struct sk_buff_head tx_backlog_queue;
 
 	struct list_head accept_queue;
 	struct sock *parent;
@@ -139,6 +163,7 @@ struct nfc_llcp_ui_cb {
 
 #define LLCP_HEADER_SIZE   2
 #define LLCP_SEQUENCE_SIZE 1
+#define LLCP_AGF_PDU_HEADER_SIZE 2
 
 /* LLCP versions: 1.1 is 1.0 plus SDP */
 #define LLCP_VERSION_10 0x10
@@ -187,6 +212,7 @@ struct nfc_llcp_ui_cb {
 
 void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *s);
 void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *s);
+void nfc_llcp_socket_remote_param_init(struct nfc_llcp_sock *sock);
 struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
 struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local);
 int nfc_llcp_local_put(struct nfc_llcp_local *local);
@@ -214,12 +240,20 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
 /* Commands API */
 void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
 u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length);
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap);
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+						  size_t uri_len);
+void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
+void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head);
 void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
 int nfc_llcp_disconnect(struct nfc_llcp_sock *sock);
 int nfc_llcp_send_symm(struct nfc_dev *dev);
 int nfc_llcp_send_connect(struct nfc_llcp_sock *sock);
 int nfc_llcp_send_cc(struct nfc_llcp_sock *sock);
-int nfc_llcp_send_snl(struct nfc_llcp_local *local, u8 tid, u8 sap);
+int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local,
+			    struct hlist_head *tlv_list, size_t tlvs_len);
+int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local,
+			    struct hlist_head *tlv_list, size_t tlvs_len);
 int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason);
 int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock);
 int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
diff --git a/net/nfc/llcp/Kconfig b/net/nfc/llcp/Kconfig
deleted file mode 100644
index a1a41cd68255..000000000000
--- a/net/nfc/llcp/Kconfig
+++ /dev/null
@@ -1,7 +0,0 @@
-config NFC_LLCP
-       depends on NFC
-       bool "NFC LLCP support"
-       default n
-       help
-	 Say Y here if you want to build support for a kernel NFC LLCP
-	 implementation.
-\ No newline at end of file
diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp_commands.c
index df24be48d4da..c1b23eef83ca 100644
--- a/net/nfc/llcp/commands.c
+++ b/net/nfc/llcp_commands.c
@@ -26,7 +26,7 @@
 
 #include <net/nfc/nfc.h>
 
-#include "../nfc.h"
+#include "nfc.h"
 #include "llcp.h"
 
 static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
@@ -117,6 +117,88 @@ u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length)
 	return tlv;
 }
 
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap)
+{
+	struct nfc_llcp_sdp_tlv *sdres;
+	u8 value[2];
+
+	sdres = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
+	if (sdres == NULL)
+		return NULL;
+
+	value[0] = tid;
+	value[1] = sap;
+
+	sdres->tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, value, 2,
+					&sdres->tlv_len);
+	if (sdres->tlv == NULL) {
+		kfree(sdres);
+		return NULL;
+	}
+
+	sdres->tid = tid;
+	sdres->sap = sap;
+
+	INIT_HLIST_NODE(&sdres->node);
+
+	return sdres;
+}
+
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+						  size_t uri_len)
+{
+	struct nfc_llcp_sdp_tlv *sdreq;
+
+	pr_debug("uri: %s, len: %zu\n", uri, uri_len);
+
+	sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
+	if (sdreq == NULL)
+		return NULL;
+
+	sdreq->tlv_len = uri_len + 3;
+
+	if (uri[uri_len - 1] == 0)
+		sdreq->tlv_len--;
+
+	sdreq->tlv = kzalloc(sdreq->tlv_len + 1, GFP_KERNEL);
+	if (sdreq->tlv == NULL) {
+		kfree(sdreq);
+		return NULL;
+	}
+
+	sdreq->tlv[0] = LLCP_TLV_SDREQ;
+	sdreq->tlv[1] = sdreq->tlv_len - 2;
+	sdreq->tlv[2] = tid;
+
+	sdreq->tid = tid;
+	sdreq->uri = sdreq->tlv + 3;
+	memcpy(sdreq->uri, uri, uri_len);
+
+	sdreq->time = jiffies;
+
+	INIT_HLIST_NODE(&sdreq->node);
+
+	return sdreq;
+}
+
+void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp)
+{
+	kfree(sdp->tlv);
+	kfree(sdp);
+}
+
+void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head)
+{
+	struct nfc_llcp_sdp_tlv *sdp;
+	struct hlist_node *n;
+
+	hlist_for_each_entry_safe(sdp, n, head, node) {
+		hlist_del(&sdp->node);
+
+		nfc_llcp_free_sdp_tlv(sdp);
+	}
+}
+
 int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
 			  u8 *tlv_array, u16 tlv_array_len)
 {
@@ -184,10 +266,10 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
 
 		switch (type) {
 		case LLCP_TLV_MIUX:
-			sock->miu = llcp_tlv_miux(tlv) + 128;
+			sock->remote_miu = llcp_tlv_miux(tlv) + 128;
 			break;
 		case LLCP_TLV_RW:
-			sock->rw = llcp_tlv_rw(tlv);
+			sock->remote_rw = llcp_tlv_rw(tlv);
 			break;
 		case LLCP_TLV_SN:
 			break;
@@ -200,7 +282,8 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
 		tlv += length + 2;
 	}
 
-	pr_debug("sock %p rw %d miu %d\n", sock, sock->rw, sock->miu);
+	pr_debug("sock %p rw %d miu %d\n", sock,
+		 sock->remote_rw, sock->remote_miu);
 
 	return 0;
 }
@@ -304,6 +387,8 @@ int nfc_llcp_send_symm(struct nfc_dev *dev)
 
 	skb = llcp_add_header(skb, 0, 0, LLCP_PDU_SYMM);
 
+	__net_timestamp(skb);
+
 	nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX);
 
 	return nfc_data_exchange(dev, local->target_idx, skb,
@@ -316,9 +401,9 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
 	struct sk_buff *skb;
 	u8 *service_name_tlv = NULL, service_name_tlv_length;
 	u8 *miux_tlv = NULL, miux_tlv_length;
-	u8 *rw_tlv = NULL, rw_tlv_length;
+	u8 *rw_tlv = NULL, rw_tlv_length, rw;
 	int err;
-	u16 size = 0;
+	u16 size = 0, miux;
 
 	pr_debug("Sending CONNECT\n");
 
@@ -334,11 +419,16 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
 		size += service_name_tlv_length;
 	}
 
-	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0,
+	/* If the socket parameters are not set, use the local ones */
+	miux = be16_to_cpu(sock->miux) > LLCP_MAX_MIUX ?
+		local->miux : sock->miux;
+	rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw;
+
+	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
 				      &miux_tlv_length);
 	size += miux_tlv_length;
 
-	rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length);
+	rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
 	size += rw_tlv_length;
 
 	pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len);
@@ -375,9 +465,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
 	struct nfc_llcp_local *local;
 	struct sk_buff *skb;
 	u8 *miux_tlv = NULL, miux_tlv_length;
-	u8 *rw_tlv = NULL, rw_tlv_length;
+	u8 *rw_tlv = NULL, rw_tlv_length, rw;
 	int err;
-	u16 size = 0;
+	u16 size = 0, miux;
 
 	pr_debug("Sending CC\n");
 
@@ -385,11 +475,16 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
 	if (local == NULL)
 		return -ENODEV;
 
-	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0,
+	/* If the socket parameters are not set, use the local ones */
+	miux = be16_to_cpu(sock->miux) > LLCP_MAX_MIUX ?
+		local->miux : sock->miux;
+	rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw;
+
+	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
 				      &miux_tlv_length);
 	size += miux_tlv_length;
 
-	rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length);
+	rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
 	size += rw_tlv_length;
 
 	skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size);
@@ -414,48 +509,90 @@ error_tlv:
 	return err;
 }
 
-int nfc_llcp_send_snl(struct nfc_llcp_local *local, u8 tid, u8 sap)
+static struct sk_buff *nfc_llcp_allocate_snl(struct nfc_llcp_local *local,
+					     size_t tlv_length)
 {
 	struct sk_buff *skb;
 	struct nfc_dev *dev;
-	u8 *sdres_tlv = NULL, sdres_tlv_length, sdres[2];
 	u16 size = 0;
 
-	pr_debug("Sending SNL tid 0x%x sap 0x%x\n", tid, sap);
-
 	if (local == NULL)
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 
 	dev = local->dev;
 	if (dev == NULL)
-		return -ENODEV;
-
-	sdres[0] = tid;
-	sdres[1] = sap;
-	sdres_tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, sdres, 0,
-				       &sdres_tlv_length);
-	if (sdres_tlv == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENODEV);
 
 	size += LLCP_HEADER_SIZE;
 	size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;
-	size += sdres_tlv_length;
+	size += tlv_length;
 
 	skb = alloc_skb(size, GFP_KERNEL);
-	if (skb == NULL) {
-		kfree(sdres_tlv);
-		return -ENOMEM;
-	}
+	if (skb == NULL)
+		return ERR_PTR(-ENOMEM);
 
 	skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);
 
 	skb = llcp_add_header(skb, LLCP_SAP_SDP, LLCP_SAP_SDP, LLCP_PDU_SNL);
 
-	memcpy(skb_put(skb, sdres_tlv_length), sdres_tlv, sdres_tlv_length);
+	return skb;
+}
+
+int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local,
+			    struct hlist_head *tlv_list, size_t tlvs_len)
+{
+	struct nfc_llcp_sdp_tlv *sdp;
+	struct hlist_node *n;
+	struct sk_buff *skb;
+
+	skb = nfc_llcp_allocate_snl(local, tlvs_len);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	hlist_for_each_entry_safe(sdp, n, tlv_list, node) {
+		memcpy(skb_put(skb, sdp->tlv_len), sdp->tlv, sdp->tlv_len);
+
+		hlist_del(&sdp->node);
+
+		nfc_llcp_free_sdp_tlv(sdp);
+	}
 
 	skb_queue_tail(&local->tx_queue, skb);
 
-	kfree(sdres_tlv);
+	return 0;
+}
+
+int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local,
+			    struct hlist_head *tlv_list, size_t tlvs_len)
+{
+	struct nfc_llcp_sdp_tlv *sdreq;
+	struct hlist_node *n;
+	struct sk_buff *skb;
+
+	skb = nfc_llcp_allocate_snl(local, tlvs_len);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	mutex_lock(&local->sdreq_lock);
+
+	if (hlist_empty(&local->pending_sdreqs))
+		mod_timer(&local->sdreq_timer,
+			  jiffies + msecs_to_jiffies(3 * local->remote_lto));
+
+	hlist_for_each_entry_safe(sdreq, n, tlv_list, node) {
+		pr_debug("tid %d for %s\n", sdreq->tid, sdreq->uri);
+
+		memcpy(skb_put(skb, sdreq->tlv_len), sdreq->tlv,
+		       sdreq->tlv_len);
+
+		hlist_del(&sdreq->node);
+
+		hlist_add_head(&sdreq->node, &local->pending_sdreqs);
+	}
+
+	mutex_unlock(&local->sdreq_lock);
+
+	skb_queue_tail(&local->tx_queue, skb);
 
 	return 0;
 }
@@ -521,6 +658,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
 	struct nfc_llcp_local *local;
 	size_t frag_len = 0, remaining_len;
 	u8 *msg_data, *msg_ptr;
+	u16 remote_miu;
 
 	pr_debug("Send I frame len %zd\n", len);
 
@@ -530,8 +668,8 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
 
 	/* Remote is ready but has not acknowledged our frames */
 	if((sock->remote_ready &&
-	    skb_queue_len(&sock->tx_pending_queue) >= sock->rw &&
-	    skb_queue_len(&sock->tx_queue) >= 2 * sock->rw)) {
+	    skb_queue_len(&sock->tx_pending_queue) >= sock->remote_rw &&
+	    skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) {
 		pr_err("Pending queue is full %d frames\n",
 		       skb_queue_len(&sock->tx_pending_queue));
 		return -ENOBUFS;
@@ -539,7 +677,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
 
 	/* Remote is not ready and we've been queueing enough frames */
 	if ((!sock->remote_ready &&
-	     skb_queue_len(&sock->tx_queue) >= 2 * sock->rw)) {
+	     skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) {
 		pr_err("Tx queue is full %d frames\n",
 		       skb_queue_len(&sock->tx_queue));
 		return -ENOBUFS;
@@ -557,9 +695,11 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
 	remaining_len = len;
 	msg_ptr = msg_data;
 
-	while (remaining_len > 0) {
+	do {
+		remote_miu = sock->remote_miu > LLCP_MAX_MIU ?
+				local->remote_miu : sock->remote_miu;
 
-		frag_len = min_t(size_t, sock->miu, remaining_len);
+		frag_len = min_t(size_t, remote_miu, remaining_len);
 
 		pr_debug("Fragment %zd bytes remaining %zd",
 			 frag_len, remaining_len);
@@ -571,7 +711,8 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
 
 		skb_put(pdu, LLCP_SEQUENCE_SIZE);
 
-		memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);
+		if (likely(frag_len > 0))
+			memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);
 
 		skb_queue_tail(&sock->tx_queue, pdu);
 
@@ -583,7 +724,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
 
 		remaining_len -= frag_len;
 		msg_ptr += frag_len;
-	}
+	} while (remaining_len > 0);
 
 	kfree(msg_data);
 
@@ -597,6 +738,7 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
 	struct nfc_llcp_local *local;
 	size_t frag_len = 0, remaining_len;
 	u8 *msg_ptr, *msg_data;
+	u16 remote_miu;
 	int err;
 
 	pr_debug("Send UI frame len %zd\n", len);
@@ -617,9 +759,11 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
 	remaining_len = len;
 	msg_ptr = msg_data;
 
-	while (remaining_len > 0) {
+	do {
+		remote_miu = sock->remote_miu > LLCP_MAX_MIU ?
+				local->remote_miu : sock->remote_miu;
 
-		frag_len = min_t(size_t, sock->miu, remaining_len);
+		frag_len = min_t(size_t, remote_miu, remaining_len);
 
 		pr_debug("Fragment %zd bytes remaining %zd",
 			 frag_len, remaining_len);
@@ -633,14 +777,15 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
 
 		pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI);
 
-		memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);
+		if (likely(frag_len > 0))
+			memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);
 
 		/* No need to check for the peer RW for UI frames */
 		skb_queue_tail(&local->tx_queue, pdu);
 
 		remaining_len -= frag_len;
 		msg_ptr += frag_len;
-	}
+	} while (remaining_len > 0);
 
 	kfree(msg_data);
 
diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp_core.c
index ec43914c92a9..158bdbf668cc 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp_core.c
@@ -24,13 +24,15 @@
 #include <linux/list.h>
 #include <linux/nfc.h>
 
-#include "../nfc.h"
+#include "nfc.h"
 #include "llcp.h"
 
 static u8 llcp_magic[3] = {0x46, 0x66, 0x6d};
 
 static struct list_head llcp_devices;
 
+static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb);
+
 void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *sk)
 {
 	write_lock(&l->lock);
@@ -45,6 +47,12 @@ void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *sk)
 	write_unlock(&l->lock);
 }
 
+void nfc_llcp_socket_remote_param_init(struct nfc_llcp_sock *sock)
+{
+	sock->remote_rw = LLCP_DEFAULT_RW;
+	sock->remote_miu = LLCP_MAX_MIU + 1;
+}
+
 static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)
 {
 	struct nfc_llcp_local *local = sock->local;
@@ -54,7 +62,6 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)
 
 	skb_queue_purge(&sock->tx_queue);
 	skb_queue_purge(&sock->tx_pending_queue);
-	skb_queue_purge(&sock->tx_backlog_queue);
 
 	if (local == NULL)
 		return;
@@ -69,17 +76,18 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)
 	}
 }
 
-static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
+static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device,
+				    int err)
 {
 	struct sock *sk;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	struct nfc_llcp_sock *llcp_sock;
 
 	skb_queue_purge(&local->tx_queue);
 
 	write_lock(&local->sockets.lock);
 
-	sk_for_each_safe(sk, node, tmp, &local->sockets.head) {
+	sk_for_each_safe(sk, tmp, &local->sockets.head) {
 		llcp_sock = nfc_llcp_sock(sk);
 
 		bh_lock_sock(sk);
@@ -101,39 +109,51 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
 
 				nfc_llcp_accept_unlink(accept_sk);
 
+				if (err)
+					accept_sk->sk_err = err;
 				accept_sk->sk_state = LLCP_CLOSED;
+				accept_sk->sk_state_change(sk);
 
 				bh_unlock_sock(accept_sk);
-
-				sock_orphan(accept_sk);
 			}
-
-			if (listen == true) {
-				bh_unlock_sock(sk);
-				continue;
-			}
-		}
-
-		/*
-		 * If we have a connection less socket bound, we keep it alive
-		 * if the device is still present.
-		 */
-		if (sk->sk_state == LLCP_BOUND && sk->sk_type == SOCK_DGRAM &&
-		    listen == true) {
-			bh_unlock_sock(sk);
-			continue;
 		}
 
+		if (err)
+			sk->sk_err = err;
 		sk->sk_state = LLCP_CLOSED;
+		sk->sk_state_change(sk);
 
 		bh_unlock_sock(sk);
 
-		sock_orphan(sk);
-
 		sk_del_node_init(sk);
 	}
 
 	write_unlock(&local->sockets.lock);
+
+	/* If we still have a device, we keep the RAW sockets alive */
+	if (device == true)
+		return;
+
+	write_lock(&local->raw_sockets.lock);
+
+	sk_for_each_safe(sk, tmp, &local->raw_sockets.head) {
+		llcp_sock = nfc_llcp_sock(sk);
+
+		bh_lock_sock(sk);
+
+		nfc_llcp_socket_purge(llcp_sock);
+
+		if (err)
+			sk->sk_err = err;
+		sk->sk_state = LLCP_CLOSED;
+		sk->sk_state_change(sk);
+
+		bh_unlock_sock(sk);
+
+		sk_del_node_init(sk);
+	}
+
+	write_unlock(&local->raw_sockets.lock);
 }
 
 struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
@@ -143,20 +163,28 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
 	return local;
 }
 
-static void local_release(struct kref *ref)
+static void local_cleanup(struct nfc_llcp_local *local)
 {
-	struct nfc_llcp_local *local;
-
-	local = container_of(ref, struct nfc_llcp_local, ref);
-
-	list_del(&local->list);
-	nfc_llcp_socket_release(local, false);
+	nfc_llcp_socket_release(local, false, ENXIO);
 	del_timer_sync(&local->link_timer);
 	skb_queue_purge(&local->tx_queue);
 	cancel_work_sync(&local->tx_work);
 	cancel_work_sync(&local->rx_work);
 	cancel_work_sync(&local->timeout_work);
 	kfree_skb(local->rx_pending);
+	del_timer_sync(&local->sdreq_timer);
+	cancel_work_sync(&local->sdreq_timeout_work);
+	nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs);
+}
+
+static void local_release(struct kref *ref)
+{
+	struct nfc_llcp_local *local;
+
+	local = container_of(ref, struct nfc_llcp_local, ref);
+
+	list_del(&local->list);
+	local_cleanup(local);
 	kfree(local);
 }
 
@@ -172,7 +200,6 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local,
 					       u8 ssap, u8 dsap)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	struct nfc_llcp_sock *llcp_sock, *tmp_sock;
 
 	pr_debug("ssap dsap %d %d\n", ssap, dsap);
@@ -184,7 +211,7 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local,
 
 	llcp_sock = NULL;
 
-	sk_for_each(sk, node, &local->sockets.head) {
+	sk_for_each(sk, &local->sockets.head) {
 		tmp_sock = nfc_llcp_sock(sk);
 
 		if (tmp_sock->ssap == ssap && tmp_sock->dsap == dsap) {
@@ -225,6 +252,47 @@ static void nfc_llcp_symm_timer(unsigned long data)
 	schedule_work(&local->timeout_work);
 }
 
+static void nfc_llcp_sdreq_timeout_work(struct work_struct *work)
+{
+	unsigned long time;
+	HLIST_HEAD(nl_sdres_list);
+	struct hlist_node *n;
+	struct nfc_llcp_sdp_tlv *sdp;
+	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
+						    sdreq_timeout_work);
+
+	mutex_lock(&local->sdreq_lock);
+
+	time = jiffies - msecs_to_jiffies(3 * local->remote_lto);
+
+	hlist_for_each_entry_safe(sdp, n, &local->pending_sdreqs, node) {
+		if (time_after(sdp->time, time))
+			continue;
+
+		sdp->sap = LLCP_SDP_UNBOUND;
+
+		hlist_del(&sdp->node);
+
+		hlist_add_head(&sdp->node, &nl_sdres_list);
+	}
+
+	if (!hlist_empty(&local->pending_sdreqs))
+		mod_timer(&local->sdreq_timer,
+			  jiffies + msecs_to_jiffies(3 * local->remote_lto));
+
+	mutex_unlock(&local->sdreq_lock);
+
+	if (!hlist_empty(&nl_sdres_list))
+		nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
+}
+
+static void nfc_llcp_sdreq_timer(unsigned long data)
+{
+	struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
+
+	schedule_work(&local->sdreq_timeout_work);
+}
+
 struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
 {
 	struct nfc_llcp_local *local, *n;
@@ -273,7 +341,6 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local,
 					    u8 *sn, size_t sn_len)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	struct nfc_llcp_sock *llcp_sock, *tmp_sock;
 
 	pr_debug("sn %zd %p\n", sn_len, sn);
@@ -285,7 +352,7 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local,
 
 	llcp_sock = NULL;
 
-	sk_for_each(sk, node, &local->sockets.head) {
+	sk_for_each(sk, &local->sockets.head) {
 		tmp_sock = nfc_llcp_sock(sk);
 
 		pr_debug("llcp sock %p\n", tmp_sock);
@@ -550,14 +617,13 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
 		pr_err("No LLCP device\n");
 		return -ENODEV;
 	}
+	if (gb_len < 3)
+		return -EINVAL;
 
 	memset(local->remote_gb, 0, NFC_MAX_GT_LEN);
 	memcpy(local->remote_gb, gb, gb_len);
 	local->remote_gb_len = gb_len;
 
-	if (local->remote_gb == NULL || local->remote_gb_len == 0)
-		return -ENODEV;
-
 	if (memcmp(local->remote_gb, llcp_magic, 3)) {
 		pr_err("MAC does not support LLCP\n");
 		return -EINVAL;
@@ -603,14 +669,13 @@ static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu)
 void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
 			       struct sk_buff *skb, u8 direction)
 {
-	struct hlist_node *node;
 	struct sk_buff *skb_copy = NULL, *nskb;
 	struct sock *sk;
 	u8 *data;
 
 	read_lock(&local->raw_sockets.lock);
 
-	sk_for_each(sk, node, &local->raw_sockets.head) {
+	sk_for_each(sk, &local->raw_sockets.head) {
 		if (sk->sk_state != LLCP_BOUND)
 			continue;
 
@@ -668,6 +733,8 @@ static void nfc_llcp_tx_work(struct work_struct *work)
 			if (ptype == LLCP_PDU_I)
 				copy_skb = skb_copy(skb, GFP_ATOMIC);
 
+			__net_timestamp(skb);
+
 			nfc_llcp_send_to_raw_sock(local, skb,
 						  NFC_LLCP_DIRECTION_TX);
 
@@ -697,11 +764,10 @@ static struct nfc_llcp_sock *nfc_llcp_connecting_sock_get(struct nfc_llcp_local
 {
 	struct sock *sk;
 	struct nfc_llcp_sock *llcp_sock;
-	struct hlist_node *node;
 
 	read_lock(&local->connecting_sockets.lock);
 
-	sk_for_each(sk, node, &local->connecting_sockets.head) {
+	sk_for_each(sk, &local->connecting_sockets.head) {
 		llcp_sock = nfc_llcp_sock(sk);
 
 		if (llcp_sock->ssap == ssap) {
@@ -770,8 +836,6 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local,
 	ui_cb->dsap = dsap;
 	ui_cb->ssap = ssap;
 
-	printk("%s %d %d\n", __func__, dsap, ssap);
-
 	pr_debug("%d %d\n", dsap, ssap);
 
 	/* We're looking for a bound socket, not a client one */
@@ -781,9 +845,14 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local,
 
 	/* There is no sequence with UI frames */
 	skb_pull(skb, LLCP_HEADER_SIZE);
-	if (sock_queue_rcv_skb(&llcp_sock->sk, skb)) {
-		pr_err("receive queue is full\n");
-		skb_queue_head(&llcp_sock->tx_backlog_queue, skb);
+	if (!sock_queue_rcv_skb(&llcp_sock->sk, skb)) {
+		/*
+		 * UI frames will be freed from the socket layer, so we
+		 * need to keep them alive until someone receives them.
+		 */
+		skb_get(skb);
+	} else {
+		pr_err("Receive queue is full\n");
 	}
 
 	nfc_llcp_sock_put(llcp_sock);
@@ -863,7 +932,9 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
 	new_sock = nfc_llcp_sock(new_sk);
 	new_sock->dev = local->dev;
 	new_sock->local = nfc_llcp_local_get(local);
-	new_sock->miu = local->remote_miu;
+	new_sock->rw = sock->rw;
+	new_sock->miux = sock->miux;
+	new_sock->remote_miu = local->remote_miu;
 	new_sock->nfc_protocol = sock->nfc_protocol;
 	new_sock->dsap = ssap;
 	new_sock->target_idx = local->target_idx;
@@ -917,11 +988,11 @@ int nfc_llcp_queue_i_frames(struct nfc_llcp_sock *sock)
 
 	pr_debug("Remote ready %d tx queue len %d remote rw %d",
 		 sock->remote_ready, skb_queue_len(&sock->tx_pending_queue),
-		 sock->rw);
+		 sock->remote_rw);
 
 	/* Try to queue some I frames for transmission */
 	while (sock->remote_ready &&
-	       skb_queue_len(&sock->tx_pending_queue) < sock->rw) {
+	       skb_queue_len(&sock->tx_pending_queue) < sock->remote_rw) {
 		struct sk_buff *pdu;
 
 		pdu = skb_dequeue(&sock->tx_queue);
@@ -976,9 +1047,14 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local,
 			pr_err("Received out of sequence I PDU\n");
 
 		skb_pull(skb, LLCP_HEADER_SIZE + LLCP_SEQUENCE_SIZE);
-		if (sock_queue_rcv_skb(&llcp_sock->sk, skb)) {
-			pr_err("receive queue is full\n");
-			skb_queue_head(&llcp_sock->tx_backlog_queue, skb);
+		if (!sock_queue_rcv_skb(&llcp_sock->sk, skb)) {
+			/*
+			 * I frames will be freed from the socket layer, so we
+			 * need to keep them alive until someone receives them.
+			 */
+			skb_get(skb);
+		} else {
+			pr_err("Receive queue is full\n");
 		}
 	}
 
@@ -1030,6 +1106,12 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
 	dsap = nfc_llcp_dsap(skb);
 	ssap = nfc_llcp_ssap(skb);
 
+	if ((dsap == 0) && (ssap == 0)) {
+		pr_debug("Connection termination");
+		nfc_dep_link_down(local->dev);
+		return;
+	}
+
 	llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);
 	if (llcp_sock == NULL) {
 		nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN);
@@ -1136,6 +1218,10 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
 	u16 tlv_len, offset;
 	char *service_name;
 	size_t service_name_len;
+	struct nfc_llcp_sdp_tlv *sdp;
+	HLIST_HEAD(llc_sdres_list);
+	size_t sdres_tlvs_len;
+	HLIST_HEAD(nl_sdres_list);
 
 	dsap = nfc_llcp_dsap(skb);
 	ssap = nfc_llcp_ssap(skb);
@@ -1150,6 +1236,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
 	tlv = &skb->data[LLCP_HEADER_SIZE];
 	tlv_len = skb->len - LLCP_HEADER_SIZE;
 	offset = 0;
+	sdres_tlvs_len = 0;
 
 	while (offset < tlv_len) {
 		type = tlv[0];
@@ -1167,14 +1254,14 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
 			    !strncmp(service_name, "urn:nfc:sn:sdp",
 				     service_name_len)) {
 				sap = 1;
-				goto send_snl;
+				goto add_snl;
 			}
 
 			llcp_sock = nfc_llcp_sock_from_sn(local, service_name,
 							  service_name_len);
 			if (!llcp_sock) {
 				sap = 0;
-				goto send_snl;
+				goto add_snl;
 			}
 
 			/*
@@ -1191,7 +1278,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
 
 				if (sap == LLCP_SAP_MAX) {
 					sap = 0;
-					goto send_snl;
+					goto add_snl;
 				}
 
 				client_count =
@@ -1208,8 +1295,37 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
 
 			pr_debug("%p %d\n", llcp_sock, sap);
 
-send_snl:
-			nfc_llcp_send_snl(local, tid, sap);
+add_snl:
+			sdp = nfc_llcp_build_sdres_tlv(tid, sap);
+			if (sdp == NULL)
+				goto exit;
+
+			sdres_tlvs_len += sdp->tlv_len;
+			hlist_add_head(&sdp->node, &llc_sdres_list);
+			break;
+
+		case LLCP_TLV_SDRES:
+			mutex_lock(&local->sdreq_lock);
+
+			pr_debug("LLCP_TLV_SDRES: searching tid %d\n", tlv[2]);
+
+			hlist_for_each_entry(sdp, &local->pending_sdreqs, node) {
+				if (sdp->tid != tlv[2])
+					continue;
+
+				sdp->sap = tlv[3];
+
+				pr_debug("Found: uri=%s, sap=%d\n",
+					 sdp->uri, sdp->sap);
+
+				hlist_del(&sdp->node);
+
+				hlist_add_head(&sdp->node, &nl_sdres_list);
+
+				break;
+			}
+
+			mutex_unlock(&local->sdreq_lock);
 			break;
 
 		default:
@@ -1220,21 +1336,63 @@ send_snl:
 		offset += length + 2;
 		tlv += length + 2;
 	}
+
+exit:
+	if (!hlist_empty(&nl_sdres_list))
+		nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
+
+	if (!hlist_empty(&llc_sdres_list))
+		nfc_llcp_send_snl_sdres(local, &llc_sdres_list, sdres_tlvs_len);
 }
 
-static void nfc_llcp_rx_work(struct work_struct *work)
+static void nfc_llcp_recv_agf(struct nfc_llcp_local *local, struct sk_buff *skb)
 {
-	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
-						    rx_work);
-	u8 dsap, ssap, ptype;
-	struct sk_buff *skb;
+	u8 ptype;
+	u16 pdu_len;
+	struct sk_buff *new_skb;
 
-	skb = local->rx_pending;
-	if (skb == NULL) {
-		pr_debug("No pending SKB\n");
+	if (skb->len <= LLCP_HEADER_SIZE) {
+		pr_err("Malformed AGF PDU\n");
 		return;
 	}
 
+	skb_pull(skb, LLCP_HEADER_SIZE);
+
+	while (skb->len > LLCP_AGF_PDU_HEADER_SIZE) {
+		pdu_len = skb->data[0] << 8 | skb->data[1];
+
+		skb_pull(skb, LLCP_AGF_PDU_HEADER_SIZE);
+
+		if (pdu_len < LLCP_HEADER_SIZE || pdu_len > skb->len) {
+			pr_err("Malformed AGF PDU\n");
+			return;
+		}
+
+		ptype = nfc_llcp_ptype(skb);
+
+		if (ptype == LLCP_PDU_SYMM || ptype == LLCP_PDU_AGF)
+			goto next;
+
+		new_skb = nfc_alloc_recv_skb(pdu_len, GFP_KERNEL);
+		if (new_skb == NULL) {
+			pr_err("Could not allocate PDU\n");
+			return;
+		}
+
+		memcpy(skb_put(new_skb, pdu_len), skb->data, pdu_len);
+
+		nfc_llcp_rx_skb(local, new_skb);
+
+		kfree_skb(new_skb);
+next:
+		skb_pull(skb, pdu_len);
+	}
+}
+
+static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb)
+{
+	u8 dsap, ssap, ptype;
+
 	ptype = nfc_llcp_ptype(skb);
 	dsap = nfc_llcp_dsap(skb);
 	ssap = nfc_llcp_ssap(skb);
@@ -1245,8 +1403,6 @@ static void nfc_llcp_rx_work(struct work_struct *work)
 		print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET,
 			       16, 1, skb->data, skb->len, true);
 
-	nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX);
-
 	switch (ptype) {
 	case LLCP_PDU_SYMM:
 		pr_debug("SYMM\n");
@@ -1289,13 +1445,43 @@ static void nfc_llcp_rx_work(struct work_struct *work)
 		nfc_llcp_recv_hdlc(local, skb);
 		break;
 
+	case LLCP_PDU_AGF:
+		pr_debug("AGF frame\n");
+		nfc_llcp_recv_agf(local, skb);
+		break;
 	}
+}
+
+static void nfc_llcp_rx_work(struct work_struct *work)
+{
+	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
+						    rx_work);
+	struct sk_buff *skb;
+
+	skb = local->rx_pending;
+	if (skb == NULL) {
+		pr_debug("No pending SKB\n");
+		return;
+	}
+
+	__net_timestamp(skb);
+
+	nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX);
+
+	nfc_llcp_rx_skb(local, skb);
 
 	schedule_work(&local->tx_work);
 	kfree_skb(local->rx_pending);
 	local->rx_pending = NULL;
 }
 
+static void __nfc_llcp_recv(struct nfc_llcp_local *local, struct sk_buff *skb)
+{
+	local->rx_pending = skb;
+	del_timer(&local->link_timer);
+	schedule_work(&local->rx_work);
+}
+
 void nfc_llcp_recv(void *data, struct sk_buff *skb, int err)
 {
 	struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
@@ -1306,9 +1492,7 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err)
 		return;
 	}
 
-	local->rx_pending = skb_get(skb);
-	del_timer(&local->link_timer);
-	schedule_work(&local->rx_work);
+	__nfc_llcp_recv(local, skb);
 }
 
 int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb)
@@ -1319,9 +1503,7 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb)
 	if (local == NULL)
 		return -ENODEV;
 
-	local->rx_pending = skb_get(skb);
-	del_timer(&local->link_timer);
-	schedule_work(&local->rx_work);
+	__nfc_llcp_recv(local, skb);
 
 	return 0;
 }
@@ -1334,8 +1516,11 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev)
 	if (local == NULL)
 		return;
 
+	local->remote_miu = LLCP_DEFAULT_MIU;
+	local->remote_lto = LLCP_DEFAULT_LTO;
+
 	/* Close and purge all existing sockets */
-	nfc_llcp_socket_release(local, true);
+	nfc_llcp_socket_release(local, true, 0);
 }
 
 void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
@@ -1400,6 +1585,13 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
 	local->remote_miu = LLCP_DEFAULT_MIU;
 	local->remote_lto = LLCP_DEFAULT_LTO;
 
+	mutex_init(&local->sdreq_lock);
+	INIT_HLIST_HEAD(&local->pending_sdreqs);
+	init_timer(&local->sdreq_timer);
+	local->sdreq_timer.data = (unsigned long) local;
+	local->sdreq_timer.function = nfc_llcp_sdreq_timer;
+	INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
+
 	list_add(&local->list, &llcp_devices);
 
 	return 0;
@@ -1414,6 +1606,8 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev)
 		return;
 	}
 
+	local_cleanup(local);
+
 	nfc_llcp_local_put(local);
 }
 
diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp_sock.c
index fea22eb41b82..380253eccb74 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp_sock.c
@@ -24,7 +24,7 @@
 #include <linux/module.h>
 #include <linux/nfc.h>
 
-#include "../nfc.h"
+#include "nfc.h"
 #include "llcp.h"
 
 static int sock_wait_state(struct sock *sk, int state, unsigned long timeo)
@@ -223,6 +223,156 @@ error:
 	return ret;
 }
 
+static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+	u32 opt;
+	int err = 0;
+
+	pr_debug("%p optname %d\n", sk, optname);
+
+	if (level != SOL_NFC)
+		return -ENOPROTOOPT;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case NFC_LLCP_RW:
+		if (sk->sk_state == LLCP_CONNECTED ||
+		    sk->sk_state == LLCP_BOUND ||
+		    sk->sk_state == LLCP_LISTEN) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (get_user(opt, (u32 __user *) optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (opt > LLCP_MAX_RW) {
+			err = -EINVAL;
+			break;
+		}
+
+		llcp_sock->rw = (u8) opt;
+
+		break;
+
+	case NFC_LLCP_MIUX:
+		if (sk->sk_state == LLCP_CONNECTED ||
+		    sk->sk_state == LLCP_BOUND ||
+		    sk->sk_state == LLCP_LISTEN) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (get_user(opt, (u32 __user *) optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (opt > LLCP_MAX_MIUX) {
+			err = -EINVAL;
+			break;
+		}
+
+		llcp_sock->miux = cpu_to_be16((u16) opt);
+
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+
+	pr_debug("%p rw %d miux %d\n", llcp_sock,
+		 llcp_sock->rw, llcp_sock->miux);
+
+	return err;
+}
+
+static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, int __user *optlen)
+{
+	struct nfc_llcp_local *local;
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+	int len, err = 0;
+	u16 miux, remote_miu;
+	u8 rw;
+
+	pr_debug("%p optname %d\n", sk, optname);
+
+	if (level != SOL_NFC)
+		return -ENOPROTOOPT;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	local = llcp_sock->local;
+	if (!local)
+		return -ENODEV;
+
+	len = min_t(u32, len, sizeof(u32));
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case NFC_LLCP_RW:
+		rw = llcp_sock->rw > LLCP_MAX_RW ? local->rw : llcp_sock->rw;
+		if (put_user(rw, (u32 __user *) optval))
+			err = -EFAULT;
+
+		break;
+
+	case NFC_LLCP_MIUX:
+		miux = be16_to_cpu(llcp_sock->miux) > LLCP_MAX_MIUX ?
+			be16_to_cpu(local->miux) : be16_to_cpu(llcp_sock->miux);
+
+		if (put_user(miux, (u32 __user *) optval))
+			err = -EFAULT;
+
+		break;
+
+	case NFC_LLCP_REMOTE_MIU:
+		remote_miu = llcp_sock->remote_miu > LLCP_MAX_MIU ?
+				local->remote_miu : llcp_sock->remote_miu;
+
+		if (put_user(remote_miu, (u32 __user *) optval))
+			err = -EFAULT;
+
+		break;
+
+	case NFC_LLCP_REMOTE_LTO:
+		if (put_user(local->remote_lto / 10, (u32 __user *) optval))
+			err = -EFAULT;
+
+		break;
+
+	case NFC_LLCP_REMOTE_RW:
+		if (put_user(llcp_sock->remote_rw, (u32 __user *) optval))
+			err = -EFAULT;
+
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	return err;
+}
+
 void nfc_llcp_accept_unlink(struct sock *sk)
 {
 	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -270,7 +420,9 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
 		}
 
 		if (sk->sk_state == LLCP_CONNECTED || !newsock) {
-			nfc_llcp_accept_unlink(sk);
+			list_del_init(&lsk->accept_queue);
+			sock_put(sk);
+
 			if (newsock)
 				sock_graft(sk, newsock);
 
@@ -278,6 +430,8 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
 
 			pr_debug("Returning sk state %d\n", sk->sk_state);
 
+			sk_acceptq_removed(parent);
+
 			return sk;
 		}
 
@@ -354,12 +508,13 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
 	pr_debug("%p %d %d %d\n", sk, llcp_sock->target_idx,
 		 llcp_sock->dsap, llcp_sock->ssap);
 
-	uaddr->sa_family = AF_NFC;
-
+	memset(llcp_addr, 0, sizeof(*llcp_addr));
 	*len = sizeof(struct sockaddr_nfc_llcp);
 
+	llcp_addr->sa_family = AF_NFC;
 	llcp_addr->dev_idx = llcp_sock->dev->idx;
 	llcp_addr->target_idx = llcp_sock->target_idx;
+	llcp_addr->nfc_protocol = llcp_sock->nfc_protocol;
 	llcp_addr->dsap = llcp_sock->dsap;
 	llcp_addr->ssap = llcp_sock->ssap;
 	llcp_addr->service_name_len = llcp_sock->service_name_len;
@@ -401,7 +556,8 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
 		return llcp_accept_poll(sk);
 
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
-		mask |= POLLERR;
+		mask |= POLLERR |
+			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
 
 	if (!skb_queue_empty(&sk->sk_receive_queue))
 		mask |= POLLIN | POLLRDNORM;
@@ -462,8 +618,6 @@ static int llcp_sock_release(struct socket *sock)
 			nfc_llcp_accept_unlink(accept_sk);
 
 			release_sock(accept_sk);
-
-			sock_orphan(accept_sk);
 		}
 	}
 
@@ -541,7 +695,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
 
 	llcp_sock->dev = dev;
 	llcp_sock->local = nfc_llcp_local_get(local);
-	llcp_sock->miu = llcp_sock->local->remote_miu;
+	llcp_sock->remote_miu = llcp_sock->local->remote_miu;
 	llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
 	if (llcp_sock->ssap == LLCP_SAP_MAX) {
 		ret = -ENOMEM;
@@ -644,6 +798,8 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	pr_debug("%p %zu\n", sk, len);
 
+	msg->msg_namelen = 0;
+
 	lock_sock(sk);
 
 	if (sk->sk_state == LLCP_CLOSED &&
@@ -672,25 +828,28 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	copied = min_t(unsigned int, rlen, len);
 
 	cskb = skb;
-	if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) {
+	if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) {
 		if (!(flags & MSG_PEEK))
 			skb_queue_head(&sk->sk_receive_queue, skb);
 		return -EFAULT;
 	}
 
+	sock_recv_timestamp(msg, sk, skb);
+
 	if (sk->sk_type == SOCK_DGRAM && msg->msg_name) {
 		struct nfc_llcp_ui_cb *ui_cb = nfc_llcp_ui_skb_cb(skb);
-		struct sockaddr_nfc_llcp sockaddr;
+		struct sockaddr_nfc_llcp *sockaddr =
+			(struct sockaddr_nfc_llcp *) msg->msg_name;
 
-		pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap);
+		msg->msg_namelen = sizeof(struct sockaddr_nfc_llcp);
 
-		sockaddr.sa_family = AF_NFC;
-		sockaddr.nfc_protocol = NFC_PROTO_NFC_DEP;
-		sockaddr.dsap = ui_cb->dsap;
-		sockaddr.ssap = ui_cb->ssap;
+		pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap);
 
-		memcpy(msg->msg_name, &sockaddr, sizeof(sockaddr));
-		msg->msg_namelen = sizeof(sockaddr);
+		memset(sockaddr, 0, sizeof(*sockaddr));
+		sockaddr->sa_family = AF_NFC;
+		sockaddr->nfc_protocol = NFC_PROTO_NFC_DEP;
+		sockaddr->dsap = ui_cb->dsap;
+		sockaddr->ssap = ui_cb->ssap;
 	}
 
 	/* Mark read part of skb as used */
@@ -733,8 +892,8 @@ static const struct proto_ops llcp_sock_ops = {
 	.ioctl          = sock_no_ioctl,
 	.listen         = llcp_sock_listen,
 	.shutdown       = sock_no_shutdown,
-	.setsockopt     = sock_no_setsockopt,
-	.getsockopt     = sock_no_getsockopt,
+	.setsockopt     = nfc_llcp_setsockopt,
+	.getsockopt     = nfc_llcp_getsockopt,
 	.sendmsg        = llcp_sock_sendmsg,
 	.recvmsg        = llcp_sock_recvmsg,
 	.mmap           = sock_no_mmap,
@@ -798,15 +957,15 @@ struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp)
 
 	llcp_sock->ssap = 0;
 	llcp_sock->dsap = LLCP_SAP_SDP;
-	llcp_sock->rw = LLCP_DEFAULT_RW;
-	llcp_sock->miu = LLCP_DEFAULT_MIU;
+	llcp_sock->rw = LLCP_MAX_RW + 1;
+	llcp_sock->miux = cpu_to_be16(LLCP_MAX_MIUX + 1);
 	llcp_sock->send_n = llcp_sock->send_ack_n = 0;
 	llcp_sock->recv_n = llcp_sock->recv_ack_n = 0;
 	llcp_sock->remote_ready = 1;
 	llcp_sock->reserved_ssap = LLCP_SAP_MAX;
+	nfc_llcp_socket_remote_param_init(llcp_sock);
 	skb_queue_head_init(&llcp_sock->tx_queue);
 	skb_queue_head_init(&llcp_sock->tx_pending_queue);
-	skb_queue_head_init(&llcp_sock->tx_backlog_queue);
 	INIT_LIST_HEAD(&llcp_sock->accept_queue);
 
 	if (sock != NULL)
@@ -821,7 +980,6 @@ void nfc_llcp_sock_free(struct nfc_llcp_sock *sock)
 
 	skb_queue_purge(&sock->tx_queue);
 	skb_queue_purge(&sock->tx_pending_queue);
-	skb_queue_purge(&sock->tx_backlog_queue);
 
 	list_del_init(&sock->accept_queue);
 
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 5f98dc1bf039..48ada0ec749e 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -658,6 +658,7 @@ static struct nfc_ops nci_nfc_ops = {
  */
 struct nci_dev *nci_allocate_device(struct nci_ops *ops,
 				    __u32 supported_protocols,
+				    __u32 supported_se,
 				    int tx_headroom, int tx_tailroom)
 {
 	struct nci_dev *ndev;
@@ -680,6 +681,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
 
 	ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops,
 					    supported_protocols,
+					    supported_se,
 					    tx_headroom + NCI_DATA_HDR_SIZE,
 					    tx_tailroom);
 	if (!ndev->nfc_dev)
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 3568ae16786d..f0c4d61f37c0 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -28,8 +28,7 @@
 #include <linux/slab.h>
 
 #include "nfc.h"
-
-#include "llcp/llcp.h"
+#include "llcp.h"
 
 static struct genl_multicast_group nfc_genl_event_mcgrp = {
 	.name = NFC_GENL_MCAST_EVENT_NAME,
@@ -53,6 +52,15 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
 	[NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 },
 	[NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 },
 	[NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 },
+	[NFC_ATTR_LLC_PARAM_LTO] = { .type = NLA_U8 },
+	[NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 },
+	[NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 },
+	[NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
+	[NFC_SDP_ATTR_URI] = { .type = NLA_STRING },
+	[NFC_SDP_ATTR_SAP] = { .type = NLA_U8 },
 };
 
 static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
@@ -348,6 +356,74 @@ free_msg:
 	return -EMSGSIZE;
 }
 
+int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list)
+{
+	struct sk_buff *msg;
+	struct nlattr *sdp_attr, *uri_attr;
+	struct nfc_llcp_sdp_tlv *sdres;
+	struct hlist_node *n;
+	void *hdr;
+	int rc = -EMSGSIZE;
+	int i;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
+			  NFC_EVENT_LLC_SDRES);
+	if (!hdr)
+		goto free_msg;
+
+	if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
+		goto nla_put_failure;
+
+	sdp_attr = nla_nest_start(msg, NFC_ATTR_LLC_SDP);
+	if (sdp_attr == NULL) {
+		rc = -ENOMEM;
+		goto nla_put_failure;
+	}
+
+	i = 1;
+	hlist_for_each_entry_safe(sdres, n, sdres_list, node) {
+		pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap);
+
+		uri_attr = nla_nest_start(msg, i++);
+		if (uri_attr == NULL) {
+			rc = -ENOMEM;
+			goto nla_put_failure;
+		}
+
+		if (nla_put_u8(msg, NFC_SDP_ATTR_SAP, sdres->sap))
+			goto nla_put_failure;
+
+		if (nla_put_string(msg, NFC_SDP_ATTR_URI, sdres->uri))
+			goto nla_put_failure;
+
+		nla_nest_end(msg, uri_attr);
+
+		hlist_del(&sdres->node);
+
+		nfc_llcp_free_sdp_tlv(sdres);
+	}
+
+	nla_nest_end(msg, sdp_attr);
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+
+free_msg:
+	nlmsg_free(msg);
+
+	nfc_llcp_free_sdp_tlv_list(sdres_list);
+
+	return rc;
+}
+
 static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
 				u32 portid, u32 seq,
 				struct netlink_callback *cb,
@@ -366,6 +442,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
 	if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) ||
 	    nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
 	    nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) ||
+	    nla_put_u32(msg, NFC_ATTR_SE, dev->supported_se) ||
 	    nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) ||
 	    nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode))
 		goto nla_put_failure;
@@ -858,6 +935,96 @@ exit:
 	return rc;
 }
 
+static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nfc_dev *dev;
+	struct nfc_llcp_local *local;
+	struct nlattr *attr, *sdp_attrs[NFC_SDP_ATTR_MAX+1];
+	u32 idx;
+	u8 tid;
+	char *uri;
+	int rc = 0, rem;
+	size_t uri_len, tlvs_len;
+	struct hlist_head sdreq_list;
+	struct nfc_llcp_sdp_tlv *sdreq;
+
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+	    !info->attrs[NFC_ATTR_LLC_SDP])
+		return -EINVAL;
+
+	idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+
+	dev = nfc_get_device(idx);
+	if (!dev) {
+		rc = -ENODEV;
+		goto exit;
+	}
+
+	device_lock(&dev->dev);
+
+	if (dev->dep_link_up == false) {
+		rc = -ENOLINK;
+		goto exit;
+	}
+
+	local = nfc_llcp_find_local(dev);
+	if (!local) {
+		nfc_put_device(dev);
+		rc = -ENODEV;
+		goto exit;
+	}
+
+	INIT_HLIST_HEAD(&sdreq_list);
+
+	tlvs_len = 0;
+
+	nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) {
+		rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr,
+				      nfc_sdp_genl_policy);
+
+		if (rc != 0) {
+			rc = -EINVAL;
+			goto exit;
+		}
+
+		if (!sdp_attrs[NFC_SDP_ATTR_URI])
+			continue;
+
+		uri_len = nla_len(sdp_attrs[NFC_SDP_ATTR_URI]);
+		if (uri_len == 0)
+			continue;
+
+		uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]);
+		if (uri == NULL || *uri == 0)
+			continue;
+
+		tid = local->sdreq_next_tid++;
+
+		sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len);
+		if (sdreq == NULL) {
+			rc = -ENOMEM;
+			goto exit;
+		}
+
+		tlvs_len += sdreq->tlv_len;
+
+		hlist_add_head(&sdreq->node, &sdreq_list);
+	}
+
+	if (hlist_empty(&sdreq_list)) {
+		rc = -EINVAL;
+		goto exit;
+	}
+
+	rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len);
+exit:
+	device_unlock(&dev->dev);
+
+	nfc_put_device(dev);
+
+	return rc;
+}
+
 static struct genl_ops nfc_genl_ops[] = {
 	{
 		.cmd = NFC_CMD_GET_DEVICE,
@@ -912,6 +1079,11 @@ static struct genl_ops nfc_genl_ops[] = {
 		.doit = nfc_genl_llc_set_params,
 		.policy = nfc_genl_policy,
 	},
+	{
+		.cmd = NFC_CMD_LLC_SDREQ,
+		.doit = nfc_genl_llc_sdreq,
+		.policy = nfc_genl_policy,
+	},
 };
 
 
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 87d914d2876a..afa1f84ba040 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -46,7 +46,7 @@ struct nfc_rawsock {
 #define to_rawsock_sk(_tx_work) \
 	((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work))
 
-#ifdef CONFIG_NFC_LLCP
+struct nfc_llcp_sdp_tlv;
 
 void nfc_llcp_mac_is_down(struct nfc_dev *dev);
 void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
@@ -59,60 +59,8 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb);
 struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
 int __init nfc_llcp_init(void);
 void nfc_llcp_exit(void);
-
-#else
-
-static inline void nfc_llcp_mac_is_down(struct nfc_dev *dev)
-{
-}
-
-static inline void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
-				      u8 comm_mode, u8 rf_mode)
-{
-}
-
-static inline int nfc_llcp_register_device(struct nfc_dev *dev)
-{
-	return 0;
-}
-
-static inline void nfc_llcp_unregister_device(struct nfc_dev *dev)
-{
-}
-
-static inline int nfc_llcp_set_remote_gb(struct nfc_dev *dev,
-					 u8 *gb, u8 gb_len)
-{
-	return 0;
-}
-
-static inline u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *gb_len)
-{
-	*gb_len = 0;
-	return NULL;
-}
-
-static inline int nfc_llcp_data_received(struct nfc_dev *dev,
-					 struct sk_buff *skb)
-{
-	return 0;
-}
-
-static inline struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
-{
-	return NULL;
-}
-
-static inline int nfc_llcp_init(void)
-{
-	return 0;
-}
-
-static inline void nfc_llcp_exit(void)
-{
-}
-
-#endif
+void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
+void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head);
 
 int __init rawsock_init(void);
 void rawsock_exit(void);
@@ -144,6 +92,8 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev);
 int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol);
 int nfc_genl_tm_deactivated(struct nfc_dev *dev);
 
+int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list);
+
 struct nfc_dev *nfc_get_device(unsigned int idx);
 
 static inline void nfc_put_device(struct nfc_dev *dev)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ac2defeeba83..894b6cbdd929 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -58,7 +58,7 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_sub(skb->csum, csum_partial(skb->data
-					+ ETH_HLEN, VLAN_HLEN, 0));
+					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
 
 	vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
 	*current_tci = vhdr->h_vlan_TCI;
@@ -98,7 +98,7 @@ static int pop_vlan(struct sk_buff *skb)
 	if (unlikely(err))
 		return err;
 
-	__vlan_hwaccel_put_tag(skb, ntohs(tci));
+	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));
 	return 0;
 }
 
@@ -110,15 +110,15 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
 		/* push down current VLAN tag */
 		current_tag = vlan_tx_tag_get(skb);
 
-		if (!__vlan_put_tag(skb, current_tag))
+		if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
 			return -ENOMEM;
 
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 			skb->csum = csum_add(skb->csum, csum_partial(skb->data
-					+ ETH_HLEN, VLAN_HLEN, 0));
+					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
 
 	}
-	__vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
+	__vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 	return 0;
 }
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f996db343247..d12d6b8b5e8b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -44,6 +44,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/inetdevice.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
 #include <linux/openvswitch.h>
 #include <linux/rculist.h>
 #include <linux/dmi.h>
@@ -55,39 +56,61 @@
 #include "datapath.h"
 #include "flow.h"
 #include "vport-internal_dev.h"
+#include "vport-netdev.h"
 
-/**
- * struct ovs_net - Per net-namespace data for ovs.
- * @dps: List of datapaths to enable dumping them all out.
- * Protected by genl_mutex.
- */
-struct ovs_net {
-	struct list_head dps;
-};
-
-static int ovs_net_id __read_mostly;
 
 #define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
 static void rehash_flow_table(struct work_struct *work);
 static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
 
+int ovs_net_id __read_mostly;
+
+static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
+		       struct genl_multicast_group *grp)
+{
+	genl_notify(skb, genl_info_net(info), info->snd_portid,
+		    grp->id, info->nlhdr, GFP_KERNEL);
+}
+
 /**
  * DOC: Locking:
  *
- * Writes to device state (add/remove datapath, port, set operations on vports,
- * etc.) are protected by RTNL.
- *
- * Writes to other state (flow table modifications, set miscellaneous datapath
- * parameters, etc.) are protected by genl_mutex.  The RTNL lock nests inside
- * genl_mutex.
+ * All writes e.g. Writes to device state (add/remove datapath, port, set
+ * operations on vports, etc.), Writes to other state (flow table
+ * modifications, set miscellaneous datapath parameters, etc.) are protected
+ * by ovs_lock.
  *
  * Reads are protected by RCU.
  *
  * There are a few special cases (mostly stats) that have their own
  * synchronization but they nest under all of above and don't interact with
  * each other.
+ *
+ * The RTNL lock nests inside ovs_mutex.
  */
 
+static DEFINE_MUTEX(ovs_mutex);
+
+void ovs_lock(void)
+{
+	mutex_lock(&ovs_mutex);
+}
+
+void ovs_unlock(void)
+{
+	mutex_unlock(&ovs_mutex);
+}
+
+#ifdef CONFIG_LOCKDEP
+int lockdep_ovsl_is_held(void)
+{
+	if (debug_locks)
+		return lockdep_is_held(&ovs_mutex);
+	else
+		return 1;
+}
+#endif
+
 static struct vport *new_vport(const struct vport_parms *);
 static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
 			     const struct dp_upcall_info *);
@@ -95,7 +118,7 @@ static int queue_userspace_packet(struct net *, int dp_ifindex,
 				  struct sk_buff *,
 				  const struct dp_upcall_info *);
 
-/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
+/* Must be called with rcu_read_lock or ovs_mutex. */
 static struct datapath *get_dp(struct net *net, int dp_ifindex)
 {
 	struct datapath *dp = NULL;
@@ -113,10 +136,10 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
 	return dp;
 }
 
-/* Must be called with rcu_read_lock or RTNL lock. */
+/* Must be called with rcu_read_lock or ovs_mutex. */
 const char *ovs_dp_name(const struct datapath *dp)
 {
-	struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL);
+	struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
 	return vport->ops->get_name(vport);
 }
 
@@ -129,7 +152,7 @@ static int get_dpifindex(struct datapath *dp)
 
 	local = ovs_vport_rcu(dp, OVSP_LOCAL);
 	if (local)
-		ifindex = local->ops->get_ifindex(local);
+		ifindex = netdev_vport_priv(local)->dev->ifindex;
 	else
 		ifindex = 0;
 
@@ -158,18 +181,17 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 {
 	struct vport *vport;
-	struct hlist_node *n;
 	struct hlist_head *head;
 
 	head = vport_hash_bucket(dp, port_no);
-	hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) {
+	hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
 		if (vport->port_no == port_no)
 			return vport;
 	}
 	return NULL;
 }
 
-/* Called with RTNL lock and genl_lock. */
+/* Called with ovs_mutex. */
 static struct vport *new_vport(const struct vport_parms *parms)
 {
 	struct vport *vport;
@@ -181,14 +203,12 @@ static struct vport *new_vport(const struct vport_parms *parms)
 
 		hlist_add_head_rcu(&vport->dp_hash_node, head);
 	}
-
 	return vport;
 }
 
-/* Called with RTNL lock. */
 void ovs_dp_detach_port(struct vport *p)
 {
-	ASSERT_RTNL();
+	ASSERT_OVSL();
 
 	/* First drop references to device. */
 	hlist_del_rcu(&p->dp_hash_node);
@@ -251,7 +271,8 @@ static struct genl_family dp_packet_genl_family = {
 	.name = OVS_PACKET_FAMILY,
 	.version = OVS_PACKET_VERSION,
 	.maxattr = OVS_PACKET_ATTR_MAX,
-	.netnsok = true
+	.netnsok = true,
+	.parallel_ops = true,
 };
 
 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
@@ -301,7 +322,7 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
 	struct sk_buff *segs, *nskb;
 	int err;
 
-	segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+	segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
 	if (IS_ERR(segs))
 		return PTR_ERR(segs);
 
@@ -338,6 +359,35 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
 	return err;
 }
 
+static size_t key_attr_size(void)
+{
+	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
+		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
+		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_8021Q */
+		+ nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
+		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
+		+ nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
+		+ nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
+		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
+}
+
+static size_t upcall_msg_size(const struct sk_buff *skb,
+			      const struct nlattr *userdata)
+{
+	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+		+ nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
+		+ nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
+
+	/* OVS_PACKET_ATTR_USERDATA */
+	if (userdata)
+		size += NLA_ALIGN(userdata->nla_len);
+
+	return size;
+}
+
 static int queue_userspace_packet(struct net *net, int dp_ifindex,
 				  struct sk_buff *skb,
 				  const struct dp_upcall_info *upcall_info)
@@ -346,7 +396,6 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 	struct sk_buff *nskb = NULL;
 	struct sk_buff *user_skb; /* to be queued to userspace */
 	struct nlattr *nla;
-	unsigned int len;
 	int err;
 
 	if (vlan_tx_tag_present(skb)) {
@@ -354,7 +403,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 		if (!nskb)
 			return -ENOMEM;
 
-		nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb));
+		nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
 		if (!nskb)
 			return -ENOMEM;
 
@@ -367,13 +416,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 		goto out;
 	}
 
-	len = sizeof(struct ovs_header);
-	len += nla_total_size(skb->len);
-	len += nla_total_size(FLOW_BUFSIZE);
-	if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
-		len += nla_total_size(8);
-
-	user_skb = genlmsg_new(len, GFP_ATOMIC);
+	user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
 	if (!user_skb) {
 		err = -ENOMEM;
 		goto out;
@@ -388,13 +431,15 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 	nla_nest_end(user_skb, nla);
 
 	if (upcall_info->userdata)
-		nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
-			    nla_get_u64(upcall_info->userdata));
+		__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
+			  nla_len(upcall_info->userdata),
+			  nla_data(upcall_info->userdata));
 
 	nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
 
 	skb_copy_and_csum_dev(skb, nla_data(nla));
 
+	genlmsg_end(user_skb, upcall);
 	err = genlmsg_unicast(net, user_skb, upcall_info->portid);
 
 out:
@@ -402,13 +447,13 @@ out:
 	return err;
 }
 
-/* Called with genl_mutex. */
+/* Called with ovs_mutex. */
 static int flush_flows(struct datapath *dp)
 {
 	struct flow_table *old_table;
 	struct flow_table *new_table;
 
-	old_table = genl_dereference(dp->table);
+	old_table = ovsl_dereference(dp->table);
 	new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
 	if (!new_table)
 		return -ENOMEM;
@@ -544,7 +589,7 @@ static int validate_userspace(const struct nlattr *attr)
 {
 	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =	{
 		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
-		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
+		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
 	};
 	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
 	int error;
@@ -661,8 +706,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
 	err = -EINVAL;
 	if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
-	    !a[OVS_PACKET_ATTR_ACTIONS] ||
-	    nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
+	    !a[OVS_PACKET_ATTR_ACTIONS])
 		goto err;
 
 	len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
@@ -672,7 +716,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 		goto err;
 	skb_reserve(packet, NET_IP_ALIGN);
 
-	memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
+	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 
 	skb_reset_mac_header(packet);
 	eth = eth_hdr(packet);
@@ -680,7 +724,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	/* Normally, setting the skb 'protocol' field would be handled by a
 	 * call to eth_type_trans(), but it assumes there's a sending
 	 * device, which we may not have. */
-	if (ntohs(eth->h_proto) >= 1536)
+	if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
 		packet->protocol = eth->h_proto;
 	else
 		packet->protocol = htons(ETH_P_802_2);
@@ -743,7 +787,7 @@ err:
 }
 
 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
-	[OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
+	[OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
 	[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 };
@@ -759,7 +803,7 @@ static struct genl_ops dp_packet_genl_ops[] = {
 static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 {
 	int i;
-	struct flow_table *table = genl_dereference(dp->table);
+	struct flow_table *table = ovsl_dereference(dp->table);
 
 	stats->n_flows = ovs_flow_tbl_count(table);
 
@@ -794,14 +838,25 @@ static struct genl_family dp_flow_genl_family = {
 	.name = OVS_FLOW_FAMILY,
 	.version = OVS_FLOW_VERSION,
 	.maxattr = OVS_FLOW_ATTR_MAX,
-	.netnsok = true
+	.netnsok = true,
+	.parallel_ops = true,
 };
 
 static struct genl_multicast_group ovs_dp_flow_multicast_group = {
 	.name = OVS_FLOW_MCGROUP
 };
 
-/* Called with genl_lock. */
+static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
+{
+	return NLMSG_ALIGN(sizeof(struct ovs_header))
+		+ nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+		+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+		+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
+		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
+}
+
+/* Called with ovs_mutex. */
 static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
 				  struct sk_buff *skb, u32 portid,
 				  u32 seq, u32 flags, u8 cmd)
@@ -815,8 +870,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
 	u8 tcp_flags;
 	int err;
 
-	sf_acts = rcu_dereference_protected(flow->sf_acts,
-					    lockdep_genl_is_held());
+	sf_acts = ovsl_dereference(flow->sf_acts);
 
 	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
 	if (!ovs_header)
@@ -879,25 +933,10 @@ error:
 static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
 {
 	const struct sw_flow_actions *sf_acts;
-	int len;
-
-	sf_acts = rcu_dereference_protected(flow->sf_acts,
-					    lockdep_genl_is_held());
-
-	/* OVS_FLOW_ATTR_KEY */
-	len = nla_total_size(FLOW_BUFSIZE);
-	/* OVS_FLOW_ATTR_ACTIONS */
-	len += nla_total_size(sf_acts->actions_len);
-	/* OVS_FLOW_ATTR_STATS */
-	len += nla_total_size(sizeof(struct ovs_flow_stats));
-	/* OVS_FLOW_ATTR_TCP_FLAGS */
-	len += nla_total_size(1);
-	/* OVS_FLOW_ATTR_USED */
-	len += nla_total_size(8);
 
-	len += NLMSG_ALIGN(sizeof(struct ovs_header));
+	sf_acts = ovsl_dereference(flow->sf_acts);
 
-	return genlmsg_new(len, GFP_KERNEL);
+	return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
 }
 
 static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
@@ -946,12 +985,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		goto error;
 	}
 
+	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 	error = -ENODEV;
 	if (!dp)
-		goto error;
+		goto err_unlock_ovs;
 
-	table = genl_dereference(dp->table);
+	table = ovsl_dereference(dp->table);
 	flow = ovs_flow_tbl_lookup(table, &key, key_len);
 	if (!flow) {
 		struct sw_flow_actions *acts;
@@ -959,7 +999,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		/* Bail out if we're not allowed to create a new flow. */
 		error = -ENOENT;
 		if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
-			goto error;
+			goto err_unlock_ovs;
 
 		/* Expand table, if necessary, to make room. */
 		if (ovs_flow_tbl_need_to_expand(table)) {
@@ -969,7 +1009,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 			if (!IS_ERR(new_table)) {
 				rcu_assign_pointer(dp->table, new_table);
 				ovs_flow_tbl_deferred_destroy(table);
-				table = genl_dereference(dp->table);
+				table = ovsl_dereference(dp->table);
 			}
 		}
 
@@ -977,7 +1017,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		flow = ovs_flow_alloc();
 		if (IS_ERR(flow)) {
 			error = PTR_ERR(flow);
-			goto error;
+			goto err_unlock_ovs;
 		}
 		flow->key = key;
 		clear_stats(flow);
@@ -1010,11 +1050,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		error = -EEXIST;
 		if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
 		    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
-			goto error;
+			goto err_unlock_ovs;
 
 		/* Update actions. */
-		old_acts = rcu_dereference_protected(flow->sf_acts,
-						     lockdep_genl_is_held());
+		old_acts = ovsl_dereference(flow->sf_acts);
 		acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
 		if (acts_attrs &&
 		   (old_acts->actions_len != nla_len(acts_attrs) ||
@@ -1025,7 +1064,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 			new_acts = ovs_flow_actions_alloc(acts_attrs);
 			error = PTR_ERR(new_acts);
 			if (IS_ERR(new_acts))
-				goto error;
+				goto err_unlock_ovs;
 
 			rcu_assign_pointer(flow->sf_acts, new_acts);
 			ovs_flow_deferred_free_acts(old_acts);
@@ -1041,11 +1080,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 			spin_unlock_bh(&flow->lock);
 		}
 	}
+	ovs_unlock();
 
 	if (!IS_ERR(reply))
-		genl_notify(reply, genl_info_net(info), info->snd_portid,
-			   ovs_dp_flow_multicast_group.id, info->nlhdr,
-			   GFP_KERNEL);
+		ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
 	else
 		netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
 				ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
@@ -1053,6 +1091,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 
 error_free_flow:
 	ovs_flow_free(flow);
+err_unlock_ovs:
+	ovs_unlock();
 error:
 	return error;
 }
@@ -1075,21 +1115,32 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		return err;
 
+	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
-	if (!dp)
-		return -ENODEV;
+	if (!dp) {
+		err = -ENODEV;
+		goto unlock;
+	}
 
-	table = genl_dereference(dp->table);
+	table = ovsl_dereference(dp->table);
 	flow = ovs_flow_tbl_lookup(table, &key, key_len);
-	if (!flow)
-		return -ENOENT;
+	if (!flow) {
+		err = -ENOENT;
+		goto unlock;
+	}
 
 	reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
 					info->snd_seq, OVS_FLOW_CMD_NEW);
-	if (IS_ERR(reply))
-		return PTR_ERR(reply);
+	if (IS_ERR(reply)) {
+		err = PTR_ERR(reply);
+		goto unlock;
+	}
 
+	ovs_unlock();
 	return genlmsg_reply(reply, info);
+unlock:
+	ovs_unlock();
+	return err;
 }
 
 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
@@ -1104,25 +1155,33 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	int err;
 	int key_len;
 
+	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
-	if (!dp)
-		return -ENODEV;
-
-	if (!a[OVS_FLOW_ATTR_KEY])
-		return flush_flows(dp);
+	if (!dp) {
+		err = -ENODEV;
+		goto unlock;
+	}
 
+	if (!a[OVS_FLOW_ATTR_KEY]) {
+		err = flush_flows(dp);
+		goto unlock;
+	}
 	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
 	if (err)
-		return err;
+		goto unlock;
 
-	table = genl_dereference(dp->table);
+	table = ovsl_dereference(dp->table);
 	flow = ovs_flow_tbl_lookup(table, &key, key_len);
-	if (!flow)
-		return -ENOENT;
+	if (!flow) {
+		err = -ENOENT;
+		goto unlock;
+	}
 
 	reply = ovs_flow_cmd_alloc_info(flow);
-	if (!reply)
-		return -ENOMEM;
+	if (!reply) {
+		err = -ENOMEM;
+		goto unlock;
+	}
 
 	ovs_flow_tbl_remove(table, flow);
 
@@ -1131,10 +1190,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	BUG_ON(err < 0);
 
 	ovs_flow_deferred_free(flow);
+	ovs_unlock();
 
-	genl_notify(reply, genl_info_net(info), info->snd_portid,
-		    ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
+	ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
 	return 0;
+unlock:
+	ovs_unlock();
+	return err;
 }
 
 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1143,11 +1205,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	struct datapath *dp;
 	struct flow_table *table;
 
+	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
-	if (!dp)
+	if (!dp) {
+		ovs_unlock();
 		return -ENODEV;
+	}
 
-	table = genl_dereference(dp->table);
+	table = ovsl_dereference(dp->table);
 
 	for (;;) {
 		struct sw_flow *flow;
@@ -1168,6 +1233,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		cb->args[0] = bucket;
 		cb->args[1] = obj;
 	}
+	ovs_unlock();
 	return skb->len;
 }
 
@@ -1206,13 +1272,24 @@ static struct genl_family dp_datapath_genl_family = {
 	.name = OVS_DATAPATH_FAMILY,
 	.version = OVS_DATAPATH_VERSION,
 	.maxattr = OVS_DP_ATTR_MAX,
-	.netnsok = true
+	.netnsok = true,
+	.parallel_ops = true,
 };
 
 static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
 	.name = OVS_DATAPATH_MCGROUP
 };
 
+static size_t ovs_dp_cmd_msg_size(void)
+{
+	size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
+
+	msgsize += nla_total_size(IFNAMSIZ);
+	msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
+
+	return msgsize;
+}
+
 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
 				u32 portid, u32 seq, u32 flags, u8 cmd)
 {
@@ -1251,7 +1328,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
 	struct sk_buff *skb;
 	int retval;
 
-	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
@@ -1263,7 +1340,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
 	return skb;
 }
 
-/* Called with genl_mutex and optionally with RTNL lock also. */
+/* Called with ovs_mutex. */
 static struct datapath *lookup_datapath(struct net *net,
 					struct ovs_header *ovs_header,
 					struct nlattr *a[OVS_DP_ATTR_MAX + 1])
@@ -1297,12 +1374,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
 		goto err;
 
-	rtnl_lock();
+	ovs_lock();
 
 	err = -ENOMEM;
 	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
 	if (dp == NULL)
-		goto err_unlock_rtnl;
+		goto err_unlock_ovs;
 
 	ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
 
@@ -1353,55 +1430,49 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 
 	ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
 	list_add_tail(&dp->list_node, &ovs_net->dps);
-	rtnl_unlock();
 
-	genl_notify(reply, genl_info_net(info), info->snd_portid,
-		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
-		    GFP_KERNEL);
+	ovs_unlock();
+
+	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
 	return 0;
 
 err_destroy_local_port:
-	ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
+	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
 err_destroy_ports_array:
 	kfree(dp->ports);
 err_destroy_percpu:
 	free_percpu(dp->stats_percpu);
 err_destroy_table:
-	ovs_flow_tbl_destroy(genl_dereference(dp->table));
+	ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
 err_free_dp:
 	release_net(ovs_dp_get_net(dp));
 	kfree(dp);
-err_unlock_rtnl:
-	rtnl_unlock();
+err_unlock_ovs:
+	ovs_unlock();
 err:
 	return err;
 }
 
-/* Called with genl_mutex. */
+/* Called with ovs_mutex. */
 static void __dp_destroy(struct datapath *dp)
 {
 	int i;
 
-	rtnl_lock();
-
 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
 		struct vport *vport;
-		struct hlist_node *node, *n;
+		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node)
+		hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
 			if (vport->port_no != OVSP_LOCAL)
 				ovs_dp_detach_port(vport);
 	}
 
 	list_del(&dp->list_node);
-	ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
 
-	/* rtnl_unlock() will wait until all the references to devices that
-	 * are pending unregistration have been dropped.  We do it here to
-	 * ensure that any internal devices (which contain DP pointers) are
-	 * fully destroyed before freeing the datapath.
+	/* OVSP_LOCAL is datapath internal port. We need to make sure that
+	 * all port in datapath are destroyed first before freeing datapath.
 	 */
-	rtnl_unlock();
+	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
 
 	call_rcu(&dp->rcu, destroy_dp_rcu);
 }
@@ -1412,24 +1483,27 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	int err;
 
+	ovs_lock();
 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
 	err = PTR_ERR(dp);
 	if (IS_ERR(dp))
-		return err;
+		goto unlock;
 
 	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
 				      info->snd_seq, OVS_DP_CMD_DEL);
 	err = PTR_ERR(reply);
 	if (IS_ERR(reply))
-		return err;
+		goto unlock;
 
 	__dp_destroy(dp);
+	ovs_unlock();
 
-	genl_notify(reply, genl_info_net(info), info->snd_portid,
-		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
-		    GFP_KERNEL);
+	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
 
 	return 0;
+unlock:
+	ovs_unlock();
+	return err;
 }
 
 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
@@ -1438,9 +1512,11 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	int err;
 
+	ovs_lock();
 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+	err = PTR_ERR(dp);
 	if (IS_ERR(dp))
-		return PTR_ERR(dp);
+		goto unlock;
 
 	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
 				      info->snd_seq, OVS_DP_CMD_NEW);
@@ -1448,31 +1524,45 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		err = PTR_ERR(reply);
 		netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
 				ovs_dp_datapath_multicast_group.id, err);
-		return 0;
+		err = 0;
+		goto unlock;
 	}
 
-	genl_notify(reply, genl_info_net(info), info->snd_portid,
-		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
-		    GFP_KERNEL);
+	ovs_unlock();
+	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
 
 	return 0;
+unlock:
+	ovs_unlock();
+	return err;
 }
 
 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
 {
 	struct sk_buff *reply;
 	struct datapath *dp;
+	int err;
 
+	ovs_lock();
 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
-	if (IS_ERR(dp))
-		return PTR_ERR(dp);
+	if (IS_ERR(dp)) {
+		err = PTR_ERR(dp);
+		goto unlock;
+	}
 
 	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
 				      info->snd_seq, OVS_DP_CMD_NEW);
-	if (IS_ERR(reply))
-		return PTR_ERR(reply);
+	if (IS_ERR(reply)) {
+		err = PTR_ERR(reply);
+		goto unlock;
+	}
 
+	ovs_unlock();
 	return genlmsg_reply(reply, info);
+
+unlock:
+	ovs_unlock();
+	return err;
 }
 
 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1482,6 +1572,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	int skip = cb->args[0];
 	int i = 0;
 
+	ovs_lock();
 	list_for_each_entry(dp, &ovs_net->dps, list_node) {
 		if (i >= skip &&
 		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
@@ -1490,6 +1581,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			break;
 		i++;
 	}
+	ovs_unlock();
 
 	cb->args[0] = i;
 
@@ -1535,14 +1627,15 @@ static struct genl_family dp_vport_genl_family = {
 	.name = OVS_VPORT_FAMILY,
 	.version = OVS_VPORT_VERSION,
 	.maxattr = OVS_VPORT_ATTR_MAX,
-	.netnsok = true
+	.netnsok = true,
+	.parallel_ops = true,
 };
 
 struct genl_multicast_group ovs_dp_vport_multicast_group = {
 	.name = OVS_VPORT_MCGROUP
 };
 
-/* Called with RTNL lock or RCU read lock. */
+/* Called with ovs_mutex or RCU read lock. */
 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 				   u32 portid, u32 seq, u32 flags, u8 cmd)
 {
@@ -1581,7 +1674,7 @@ error:
 	return err;
 }
 
-/* Called with RTNL lock or RCU read lock. */
+/* Called with ovs_mutex or RCU read lock. */
 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
 					 u32 seq, u8 cmd)
 {
@@ -1593,14 +1686,12 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
 		return ERR_PTR(-ENOMEM);
 
 	retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
-	if (retval < 0) {
-		kfree_skb(skb);
-		return ERR_PTR(retval);
-	}
+	BUG_ON(retval < 0);
+
 	return skb;
 }
 
-/* Called with RTNL lock or RCU read lock. */
+/* Called with ovs_mutex or RCU read lock. */
 static struct vport *lookup_vport(struct net *net,
 				  struct ovs_header *ovs_header,
 				  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
@@ -1626,9 +1717,9 @@ static struct vport *lookup_vport(struct net *net,
 		if (!dp)
 			return ERR_PTR(-ENODEV);
 
-		vport = ovs_vport_rtnl_rcu(dp, port_no);
+		vport = ovs_vport_ovsl_rcu(dp, port_no);
 		if (!vport)
-			return ERR_PTR(-ENOENT);
+			return ERR_PTR(-ENODEV);
 		return vport;
 	} else
 		return ERR_PTR(-EINVAL);
@@ -1650,7 +1741,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	    !a[OVS_VPORT_ATTR_UPCALL_PID])
 		goto exit;
 
-	rtnl_lock();
+	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 	err = -ENODEV;
 	if (!dp)
@@ -1663,7 +1754,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		if (port_no >= DP_MAX_PORTS)
 			goto exit_unlock;
 
-		vport = ovs_vport_rtnl_rcu(dp, port_no);
+		vport = ovs_vport_ovsl(dp, port_no);
 		err = -EBUSY;
 		if (vport)
 			goto exit_unlock;
@@ -1673,7 +1764,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 				err = -EFBIG;
 				goto exit_unlock;
 			}
-			vport = ovs_vport_rtnl(dp, port_no);
+			vport = ovs_vport_ovsl(dp, port_no);
 			if (!vport)
 				break;
 		}
@@ -1691,6 +1782,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(vport))
 		goto exit_unlock;
 
+	err = 0;
 	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
 					 OVS_VPORT_CMD_NEW);
 	if (IS_ERR(reply)) {
@@ -1698,11 +1790,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		ovs_dp_detach_port(vport);
 		goto exit_unlock;
 	}
-	genl_notify(reply, genl_info_net(info), info->snd_portid,
-		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+
+	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
 
 exit_unlock:
-	rtnl_unlock();
+	ovs_unlock();
 exit:
 	return err;
 }
@@ -1714,7 +1806,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct vport *vport;
 	int err;
 
-	rtnl_lock();
+	ovs_lock();
 	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
@@ -1725,26 +1817,35 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
 		err = -EINVAL;
 
+	reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!reply) {
+		err = -ENOMEM;
+		goto exit_unlock;
+	}
+
 	if (!err && a[OVS_VPORT_ATTR_OPTIONS])
 		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
 	if (err)
-		goto exit_unlock;
+		goto exit_free;
+
 	if (a[OVS_VPORT_ATTR_UPCALL_PID])
 		vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
 
-	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
-					 OVS_VPORT_CMD_NEW);
-	if (IS_ERR(reply)) {
-		netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
-				ovs_dp_vport_multicast_group.id, PTR_ERR(reply));
-		goto exit_unlock;
-	}
+	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+	BUG_ON(err < 0);
+
+	ovs_unlock();
+	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
+	return 0;
 
-	genl_notify(reply, genl_info_net(info), info->snd_portid,
-		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+	rtnl_unlock();
+	return 0;
 
+exit_free:
+	kfree_skb(reply);
 exit_unlock:
-	rtnl_unlock();
+	ovs_unlock();
 	return err;
 }
 
@@ -1755,7 +1856,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	struct vport *vport;
 	int err;
 
-	rtnl_lock();
+	ovs_lock();
 	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
@@ -1772,13 +1873,13 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(reply))
 		goto exit_unlock;
 
+	err = 0;
 	ovs_dp_detach_port(vport);
 
-	genl_notify(reply, genl_info_net(info), info->snd_portid,
-		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
 
 exit_unlock:
-	rtnl_unlock();
+	ovs_unlock();
 	return err;
 }
 
@@ -1825,10 +1926,9 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	rcu_read_lock();
 	for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
 		struct vport *vport;
-		struct hlist_node *n;
 
 		j = 0;
-		hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) {
+		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
 			if (j >= skip &&
 			    ovs_vport_cmd_fill_info(vport, skb,
 						    NETLINK_CB(cb->skb).portid,
@@ -1939,13 +2039,13 @@ static void rehash_flow_table(struct work_struct *work)
 	struct datapath *dp;
 	struct net *net;
 
-	genl_lock();
+	ovs_lock();
 	rtnl_lock();
 	for_each_net(net) {
 		struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
 
 		list_for_each_entry(dp, &ovs_net->dps, list_node) {
-			struct flow_table *old_table = genl_dereference(dp->table);
+			struct flow_table *old_table = ovsl_dereference(dp->table);
 			struct flow_table *new_table;
 
 			new_table = ovs_flow_tbl_rehash(old_table);
@@ -1956,8 +2056,7 @@ static void rehash_flow_table(struct work_struct *work)
 		}
 	}
 	rtnl_unlock();
-	genl_unlock();
-
+	ovs_unlock();
 	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
 }
 
@@ -1966,18 +2065,21 @@ static int __net_init ovs_init_net(struct net *net)
 	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
 
 	INIT_LIST_HEAD(&ovs_net->dps);
+	INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
 	return 0;
 }
 
 static void __net_exit ovs_exit_net(struct net *net)
 {
-	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
 	struct datapath *dp, *dp_next;
+	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
 
-	genl_lock();
+	ovs_lock();
 	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
 		__dp_destroy(dp);
-	genl_unlock();
+	ovs_unlock();
+
+	cancel_work_sync(&ovs_net->dp_notify_work);
 }
 
 static struct pernet_operations ovs_net_ops = {
@@ -1989,10 +2091,9 @@ static struct pernet_operations ovs_net_ops = {
 
 static int __init dp_init(void)
 {
-	struct sk_buff *dummy_skb;
 	int err;
 
-	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
+	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	pr_info("Open vSwitch switching datapath\n");
 
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 031dfbf37c93..16b840695216 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -57,10 +57,9 @@ struct dp_stats_percpu {
  * struct datapath - datapath for flow-based packet switching
  * @rcu: RCU callback head for deferred destruction.
  * @list_node: Element in global 'dps' list.
- * @n_flows: Number of flows currently in flow table.
- * @table: Current flow table.  Protected by genl_lock and RCU.
+ * @table: Current flow table.  Protected by ovs_mutex and RCU.
  * @ports: Hash table for ports.  %OVSP_LOCAL port always exists.  Protected by
- * RTNL and RCU.
+ * ovs_mutex and RCU.
  * @stats_percpu: Per-CPU datapath statistics.
  * @net: Reference to net namespace.
  *
@@ -86,26 +85,6 @@ struct datapath {
 #endif
 };
 
-struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
-
-static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
-{
-	WARN_ON_ONCE(!rcu_read_lock_held());
-	return ovs_lookup_vport(dp, port_no);
-}
-
-static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no)
-{
-	WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
-	return ovs_lookup_vport(dp, port_no);
-}
-
-static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no)
-{
-	ASSERT_RTNL();
-	return ovs_lookup_vport(dp, port_no);
-}
-
 /**
  * struct ovs_skb_cb - OVS data in skb CB
  * @flow: The flow associated with this packet.  May be %NULL if no flow.
@@ -119,7 +98,7 @@ struct ovs_skb_cb {
  * struct dp_upcall - metadata to include with a packet to send to userspace
  * @cmd: One of %OVS_PACKET_CMD_*.
  * @key: Becomes %OVS_PACKET_ATTR_KEY.  Must be nonnull.
- * @userdata: If nonnull, its u64 value is extracted and passed to userspace as
+ * @userdata: If nonnull, its variable-length value is passed to userspace as
  * %OVS_PACKET_ATTR_USERDATA.
  * @pid: Netlink PID to which packet should be sent.  If @pid is 0 then no
  * packet is sent and the packet is accounted in the datapath's @n_lost
@@ -132,6 +111,30 @@ struct dp_upcall_info {
 	u32 portid;
 };
 
+/**
+ * struct ovs_net - Per net-namespace data for ovs.
+ * @dps: List of datapaths to enable dumping them all out.
+ * Protected by genl_mutex.
+ */
+struct ovs_net {
+	struct list_head dps;
+	struct work_struct dp_notify_work;
+};
+
+extern int ovs_net_id;
+void ovs_lock(void);
+void ovs_unlock(void);
+
+#ifdef CONFIG_LOCKDEP
+int lockdep_ovsl_is_held(void);
+#else
+#define lockdep_ovsl_is_held()	1
+#endif
+
+#define ASSERT_OVSL()		WARN_ON(unlikely(!lockdep_ovsl_is_held()))
+#define ovsl_dereference(p)					\
+	rcu_dereference_protected(p, lockdep_ovsl_is_held())
+
 static inline struct net *ovs_dp_get_net(struct datapath *dp)
 {
 	return read_pnet(&dp->net);
@@ -142,6 +145,26 @@ static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
 	write_pnet(&dp->net, net);
 }
 
+struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
+
+static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	return ovs_lookup_vport(dp, port_no);
+}
+
+static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
+	return ovs_lookup_vport(dp, port_no);
+}
+
+static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
+{
+	ASSERT_OVSL();
+	return ovs_lookup_vport(dp, port_no);
+}
+
 extern struct notifier_block ovs_dp_device_notifier;
 extern struct genl_multicast_group ovs_dp_vport_multicast_group;
 
@@ -155,4 +178,5 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
 					 u8 cmd);
 
 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+void ovs_dp_notify_wq(struct work_struct *work);
 #endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 5558350e0d33..ef4feec6cd84 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -18,46 +18,78 @@
 
 #include <linux/netdevice.h>
 #include <net/genetlink.h>
+#include <net/netns/generic.h>
 
 #include "datapath.h"
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
 
+static void dp_detach_port_notify(struct vport *vport)
+{
+	struct sk_buff *notify;
+	struct datapath *dp;
+
+	dp = vport->dp;
+	notify = ovs_vport_cmd_build_info(vport, 0, 0,
+					  OVS_VPORT_CMD_DEL);
+	ovs_dp_detach_port(vport);
+	if (IS_ERR(notify)) {
+		netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
+				ovs_dp_vport_multicast_group.id,
+				PTR_ERR(notify));
+		return;
+	}
+
+	genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
+				ovs_dp_vport_multicast_group.id,
+				GFP_KERNEL);
+}
+
+void ovs_dp_notify_wq(struct work_struct *work)
+{
+	struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work);
+	struct datapath *dp;
+
+	ovs_lock();
+	list_for_each_entry(dp, &ovs_net->dps, list_node) {
+		int i;
+
+		for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+			struct vport *vport;
+			struct hlist_node *n;
+
+			hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
+				struct netdev_vport *netdev_vport;
+
+				if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
+					continue;
+
+				netdev_vport = netdev_vport_priv(vport);
+				if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED ||
+				    netdev_vport->dev->reg_state == NETREG_UNREGISTERING)
+					dp_detach_port_notify(vport);
+			}
+		}
+	}
+	ovs_unlock();
+}
+
 static int dp_device_event(struct notifier_block *unused, unsigned long event,
 			   void *ptr)
 {
+	struct ovs_net *ovs_net;
 	struct net_device *dev = ptr;
-	struct vport *vport;
+	struct vport *vport = NULL;
 
-	if (ovs_is_internal_dev(dev))
-		vport = ovs_internal_dev_get_vport(dev);
-	else
+	if (!ovs_is_internal_dev(dev))
 		vport = ovs_netdev_get_vport(dev);
 
 	if (!vport)
 		return NOTIFY_DONE;
 
-	switch (event) {
-	case NETDEV_UNREGISTER:
-		if (!ovs_is_internal_dev(dev)) {
-			struct sk_buff *notify;
-			struct datapath *dp = vport->dp;
-
-			notify = ovs_vport_cmd_build_info(vport, 0, 0,
-							  OVS_VPORT_CMD_DEL);
-			ovs_dp_detach_port(vport);
-			if (IS_ERR(notify)) {
-				netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
-						ovs_dp_vport_multicast_group.id,
-						PTR_ERR(notify));
-				break;
-			}
-
-			genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
-						ovs_dp_vport_multicast_group.id,
-						GFP_KERNEL);
-		}
-		break;
+	if (event == NETDEV_UNREGISTER) {
+		ovs_net = net_generic(dev_net(dev), ovs_net_id);
+		queue_work(system_wq, &ovs_net->dp_notify_work);
 	}
 
 	return NOTIFY_DONE;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index c3294cebc4f2..b15321a2228c 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -211,7 +211,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
 		return ERR_PTR(-ENOMEM);
 
 	sfa->actions_len = actions_len;
-	memcpy(sfa->actions, nla_data(actions), actions_len);
+	nla_memcpy(sfa->actions, actions, actions_len);
 	return sfa;
 }
 
@@ -299,10 +299,10 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
 	for (i = 0; i < table->n_buckets; i++) {
 		struct sw_flow *flow;
 		struct hlist_head *head = flex_array_get(table->buckets, i);
-		struct hlist_node *node, *n;
+		struct hlist_node *n;
 		int ver = table->node_ver;
 
-		hlist_for_each_entry_safe(flow, node, n, head, hash_node[ver]) {
+		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
 			hlist_del_rcu(&flow->hash_node[ver]);
 			ovs_flow_free(flow);
 		}
@@ -332,7 +332,6 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
 {
 	struct sw_flow *flow;
 	struct hlist_head *head;
-	struct hlist_node *n;
 	int ver;
 	int i;
 
@@ -340,7 +339,7 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
 	while (*bucket < table->n_buckets) {
 		i = 0;
 		head = flex_array_get(table->buckets, *bucket);
-		hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) {
+		hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
 			if (i < *last) {
 				i++;
 				continue;
@@ -367,11 +366,10 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new
 	for (i = 0; i < old->n_buckets; i++) {
 		struct sw_flow *flow;
 		struct hlist_head *head;
-		struct hlist_node *n;
 
 		head = flex_array_get(old->buckets, i);
 
-		hlist_for_each_entry(flow, n, head, hash_node[old_ver])
+		hlist_for_each_entry(flow, head, hash_node[old_ver])
 			ovs_flow_tbl_insert(new, flow);
 	}
 	old->keep_flows = true;
@@ -468,7 +466,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 	proto = *(__be16 *) skb->data;
 	__skb_pull(skb, sizeof(__be16));
 
-	if (ntohs(proto) >= 1536)
+	if (ntohs(proto) >= ETH_P_802_3_MIN)
 		return proto;
 
 	if (skb->len < sizeof(struct llc_snap_hdr))
@@ -484,7 +482,11 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 		return htons(ETH_P_802_2);
 
 	__skb_pull(skb, sizeof(struct llc_snap_hdr));
-	return llc->ethertype;
+
+	if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)
+		return llc->ethertype;
+
+	return htons(ETH_P_802_2);
 }
 
 static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
@@ -766,14 +768,13 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
 				struct sw_flow_key *key, int key_len)
 {
 	struct sw_flow *flow;
-	struct hlist_node *n;
 	struct hlist_head *head;
 	u32 hash;
 
 	hash = ovs_flow_hash(key, key_len);
 
 	head = find_bucket(table, hash);
-	hlist_for_each_entry_rcu(flow, n, head, hash_node[table->node_ver]) {
+	hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
 
 		if (flow->hash == hash &&
 		    !memcmp(&flow->key, key, key_len)) {
@@ -794,9 +795,9 @@ void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
 
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
 {
+	BUG_ON(table->count == 0);
 	hlist_del_rcu(&flow->hash_node[table->node_ver]);
 	table->count--;
-	BUG_ON(table->count < 0);
 }
 
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
@@ -1037,7 +1038,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 
 	if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
 		swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-		if (ntohs(swkey->eth.type) < 1536)
+		if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN)
 			return -EINVAL;
 		attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
 	} else {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a7bb60ff3b5b..0875fde65b9c 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -138,27 +138,6 @@ int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
 void ovs_flow_used(struct sw_flow *, struct sk_buff *);
 u64 ovs_flow_used_time(unsigned long flow_jiffies);
 
-/* Upper bound on the length of a nlattr-formatted flow key.  The longest
- * nlattr-formatted flow key would be:
- *
- *                         struct  pad  nl hdr  total
- *                         ------  ---  ------  -----
- *  OVS_KEY_ATTR_PRIORITY      4    --     4      8
- *  OVS_KEY_ATTR_IN_PORT       4    --     4      8
- *  OVS_KEY_ATTR_SKB_MARK      4    --     4      8
- *  OVS_KEY_ATTR_ETHERNET     12    --     4     16
- *  OVS_KEY_ATTR_ETHERTYPE     2     2     4      8  (outer VLAN ethertype)
- *  OVS_KEY_ATTR_8021Q         4    --     4      8
- *  OVS_KEY_ATTR_ENCAP         0    --     4      4  (VLAN encapsulation)
- *  OVS_KEY_ATTR_ETHERTYPE     2     2     4      8  (inner VLAN ethertype)
- *  OVS_KEY_ATTR_IPV6         40    --     4     44
- *  OVS_KEY_ATTR_ICMPV6        2     2     4      8
- *  OVS_KEY_ATTR_ND           28    --     4     32
- *  -------------------------------------------------
- *  total                                       152
- */
-#define FLOW_BUFSIZE 152
-
 int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
 int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 		      const struct nlattr *);
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 5d460c37df07..84e0a0379186 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -63,17 +63,6 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde
 	return stats;
 }
 
-static int internal_dev_mac_addr(struct net_device *dev, void *p)
-{
-	struct sockaddr *addr = p;
-
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
-	dev->addr_assign_type &= ~NET_ADDR_RANDOM;
-	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
-	return 0;
-}
-
 /* Called with rcu_read_lock_bh. */
 static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
@@ -98,7 +87,7 @@ static int internal_dev_stop(struct net_device *netdev)
 static void internal_dev_getinfo(struct net_device *netdev,
 				 struct ethtool_drvinfo *info)
 {
-	strcpy(info->driver, "openvswitch");
+	strlcpy(info->driver, "openvswitch", sizeof(info->driver));
 }
 
 static const struct ethtool_ops internal_dev_ethtool_ops = {
@@ -127,7 +116,7 @@ static const struct net_device_ops internal_dev_netdev_ops = {
 	.ndo_open = internal_dev_open,
 	.ndo_stop = internal_dev_stop,
 	.ndo_start_xmit = internal_dev_xmit,
-	.ndo_set_mac_address = internal_dev_mac_addr,
+	.ndo_set_mac_address = eth_mac_addr,
 	.ndo_change_mtu = internal_dev_change_mtu,
 	.ndo_get_stats64 = internal_dev_get_stats,
 };
@@ -139,6 +128,7 @@ static void do_setup(struct net_device *netdev)
 	netdev->netdev_ops = &internal_dev_netdev_ops;
 
 	netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 	netdev->destructor = internal_dev_destructor;
 	SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
 	netdev->tx_queue_len = 0;
@@ -147,7 +137,7 @@ static void do_setup(struct net_device *netdev)
 			   NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
 
 	netdev->vlan_features = netdev->features;
-	netdev->features |= NETIF_F_HW_VLAN_TX;
+	netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
 	netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
 	eth_hw_addr_random(netdev);
 }
@@ -183,16 +173,19 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
 	if (vport->port_no == OVSP_LOCAL)
 		netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
 
+	rtnl_lock();
 	err = register_netdevice(netdev_vport->dev);
 	if (err)
 		goto error_free_netdev;
 
 	dev_set_promiscuity(netdev_vport->dev, 1);
+	rtnl_unlock();
 	netif_start_queue(netdev_vport->dev);
 
 	return vport;
 
 error_free_netdev:
+	rtnl_unlock();
 	free_netdev(netdev_vport->dev);
 error_free_vport:
 	ovs_vport_free(vport);
@@ -205,10 +198,13 @@ static void internal_dev_destroy(struct vport *vport)
 	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
 
 	netif_stop_queue(netdev_vport->dev);
+	rtnl_lock();
 	dev_set_promiscuity(netdev_vport->dev, -1);
 
 	/* unregister_netdevice() waits for an RCU grace period. */
 	unregister_netdevice(netdev_vport->dev);
+
+	rtnl_unlock();
 }
 
 static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
@@ -236,7 +232,6 @@ const struct vport_ops ovs_internal_vport_ops = {
 	.create		= internal_dev_create,
 	.destroy	= internal_dev_destroy,
 	.get_name	= ovs_netdev_get_name,
-	.get_ifindex	= ovs_netdev_get_ifindex,
 	.send		= internal_dev_recv,
 };
 
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 670cbc3518de..4f01c6d2ffa4 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -43,8 +43,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 
 	/* Make our own copy of the packet.  Otherwise we will mangle the
 	 * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
-	 * (No one comes after us, since we tell handle_bridge() that we took
-	 * the packet.) */
+	 */
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(!skb))
 		return;
@@ -101,16 +100,20 @@ static struct vport *netdev_create(const struct vport_parms *parms)
 		goto error_put;
 	}
 
+	rtnl_lock();
 	err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
 					 vport);
 	if (err)
-		goto error_put;
+		goto error_unlock;
 
 	dev_set_promiscuity(netdev_vport->dev, 1);
 	netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
+	rtnl_unlock();
 
 	return vport;
 
+error_unlock:
+	rtnl_unlock();
 error_put:
 	dev_put(netdev_vport->dev);
 error_free_vport:
@@ -132,9 +135,11 @@ static void netdev_destroy(struct vport *vport)
 {
 	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
 
+	rtnl_lock();
 	netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
 	netdev_rx_handler_unregister(netdev_vport->dev);
 	dev_set_promiscuity(netdev_vport->dev, -1);
+	rtnl_unlock();
 
 	call_rcu(&netdev_vport->rcu, free_port_rcu);
 }
@@ -145,12 +150,6 @@ const char *ovs_netdev_get_name(const struct vport *vport)
 	return netdev_vport->dev->name;
 }
 
-int ovs_netdev_get_ifindex(const struct vport *vport)
-{
-	const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
-	return netdev_vport->dev->ifindex;
-}
-
 static unsigned int packet_length(const struct sk_buff *skb)
 {
 	unsigned int length = skb->len - ETH_HLEN;
@@ -201,6 +200,5 @@ const struct vport_ops ovs_netdev_vport_ops = {
 	.create		= netdev_create,
 	.destroy	= netdev_destroy,
 	.get_name	= ovs_netdev_get_name,
-	.get_ifindex	= ovs_netdev_get_ifindex,
 	.send		= netdev_send,
 };
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index 6478079b3417..a3cb3a32cd77 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -40,6 +40,5 @@ netdev_vport_priv(const struct vport *vport)
 
 const char *ovs_netdev_get_name(const struct vport *);
 const char *ovs_netdev_get_config(const struct vport *);
-int ovs_netdev_get_ifindex(const struct vport *);
 
 #endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 70af0bedbac4..720623190eaa 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -40,7 +40,7 @@ static const struct vport_ops *vport_ops_list[] = {
 	&ovs_internal_vport_ops,
 };
 
-/* Protected by RCU read lock for reading, RTNL lock for writing. */
+/* Protected by RCU read lock for reading, ovs_mutex for writing. */
 static struct hlist_head *dev_table;
 #define VPORT_HASH_BUCKETS 1024
 
@@ -80,15 +80,14 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name)
  *
  * @name: name of port to find
  *
- * Must be called with RTNL or RCU read lock.
+ * Must be called with ovs or RCU read lock.
  */
 struct vport *ovs_vport_locate(struct net *net, const char *name)
 {
 	struct hlist_head *bucket = hash_bucket(net, name);
 	struct vport *vport;
-	struct hlist_node *node;
 
-	hlist_for_each_entry_rcu(vport, node, bucket, hash_node)
+	hlist_for_each_entry_rcu(vport, bucket, hash_node)
 		if (!strcmp(name, vport->ops->get_name(vport)) &&
 		    net_eq(ovs_dp_get_net(vport->dp), net))
 			return vport;
@@ -129,7 +128,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
 	vport->ops = ops;
 	INIT_HLIST_NODE(&vport->dp_hash_node);
 
-	vport->percpu_stats = alloc_percpu(struct vport_percpu_stats);
+	vport->percpu_stats = alloc_percpu(struct pcpu_tstats);
 	if (!vport->percpu_stats) {
 		kfree(vport);
 		return ERR_PTR(-ENOMEM);
@@ -162,7 +161,7 @@ void ovs_vport_free(struct vport *vport)
  * @parms: Information about new vport.
  *
  * Creates a new vport with the specified configuration (which is dependent on
- * device type).  RTNL lock must be held.
+ * device type).  ovs_mutex must be held.
  */
 struct vport *ovs_vport_add(const struct vport_parms *parms)
 {
@@ -170,8 +169,6 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
 	int err = 0;
 	int i;
 
-	ASSERT_RTNL();
-
 	for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
 		if (vport_ops_list[i]->type == parms->type) {
 			struct hlist_head *bucket;
@@ -202,12 +199,10 @@ out:
  * @port: New configuration.
  *
  * Modifies an existing device with the specified configuration (which is
- * dependent on device type).  RTNL lock must be held.
+ * dependent on device type).  ovs_mutex must be held.
  */
 int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
 {
-	ASSERT_RTNL();
-
 	if (!vport->ops->set_options)
 		return -EOPNOTSUPP;
 	return vport->ops->set_options(vport, options);
@@ -219,11 +214,11 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
  * @vport: vport to delete.
  *
  * Detaches @vport from its datapath and destroys it.  It is possible to fail
- * for reasons such as lack of memory.  RTNL lock must be held.
+ * for reasons such as lack of memory.  ovs_mutex must be held.
  */
 void ovs_vport_del(struct vport *vport)
 {
-	ASSERT_RTNL();
+	ASSERT_OVSL();
 
 	hlist_del_rcu(&vport->hash_node);
 
@@ -238,7 +233,7 @@ void ovs_vport_del(struct vport *vport)
  *
  * Retrieves transmit, receive, and error stats for the given device.
  *
- * Must be called with RTNL lock or rcu_read_lock.
+ * Must be called with ovs_mutex or rcu_read_lock.
  */
 void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
 {
@@ -265,16 +260,16 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
 	spin_unlock_bh(&vport->stats_lock);
 
 	for_each_possible_cpu(i) {
-		const struct vport_percpu_stats *percpu_stats;
-		struct vport_percpu_stats local_stats;
+		const struct pcpu_tstats *percpu_stats;
+		struct pcpu_tstats local_stats;
 		unsigned int start;
 
 		percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
 
 		do {
-			start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
+			start = u64_stats_fetch_begin_bh(&percpu_stats->syncp);
 			local_stats = *percpu_stats;
-		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
+		} while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start));
 
 		stats->rx_bytes		+= local_stats.rx_bytes;
 		stats->rx_packets	+= local_stats.rx_packets;
@@ -297,22 +292,24 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
  * negative error code if a real error occurred.  If an error occurs, @skb is
  * left unmodified.
  *
- * Must be called with RTNL lock or rcu_read_lock.
+ * Must be called with ovs_mutex or rcu_read_lock.
  */
 int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
 {
 	struct nlattr *nla;
+	int err;
+
+	if (!vport->ops->get_options)
+		return 0;
 
 	nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
 	if (!nla)
 		return -EMSGSIZE;
 
-	if (vport->ops->get_options) {
-		int err = vport->ops->get_options(vport, skb);
-		if (err) {
-			nla_nest_cancel(skb, nla);
-			return err;
-		}
+	err = vport->ops->get_options(vport, skb);
+	if (err) {
+		nla_nest_cancel(skb, nla);
+		return err;
 	}
 
 	nla_nest_end(skb, nla);
@@ -326,18 +323,17 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
  * @skb: skb that was received
  *
  * Must be called with rcu_read_lock.  The packet cannot be shared and
- * skb->data should point to the Ethernet header.  The caller must have already
- * called compute_ip_summed() to initialize the checksumming fields.
+ * skb->data should point to the Ethernet header.
  */
 void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
 {
-	struct vport_percpu_stats *stats;
+	struct pcpu_tstats *stats;
 
 	stats = this_cpu_ptr(vport->percpu_stats);
-	u64_stats_update_begin(&stats->sync);
+	u64_stats_update_begin(&stats->syncp);
 	stats->rx_packets++;
 	stats->rx_bytes += skb->len;
-	u64_stats_update_end(&stats->sync);
+	u64_stats_update_end(&stats->syncp);
 
 	ovs_dp_process_received_packet(vport, skb);
 }
@@ -348,7 +344,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
  * @vport: vport on which to send the packet
  * @skb: skb to send
  *
- * Sends the given packet and returns the length of data sent.  Either RTNL
+ * Sends the given packet and returns the length of data sent.  Either ovs
  * lock or rcu_read_lock must be held.
  */
 int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
@@ -356,14 +352,14 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
 	int sent = vport->ops->send(vport, skb);
 
 	if (likely(sent)) {
-		struct vport_percpu_stats *stats;
+		struct pcpu_tstats *stats;
 
 		stats = this_cpu_ptr(vport->percpu_stats);
 
-		u64_stats_update_begin(&stats->sync);
+		u64_stats_update_begin(&stats->syncp);
 		stats->tx_packets++;
 		stats->tx_bytes += sent;
-		u64_stats_update_end(&stats->sync);
+		u64_stats_update_end(&stats->syncp);
 	}
 	return sent;
 }
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 3f7961ea3c56..68a377bc0841 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -19,6 +19,7 @@
 #ifndef VPORT_H
 #define VPORT_H 1
 
+#include <linux/if_tunnel.h>
 #include <linux/list.h>
 #include <linux/netlink.h>
 #include <linux/openvswitch.h>
@@ -50,14 +51,6 @@ int ovs_vport_send(struct vport *, struct sk_buff *);
 
 /* The following definitions are for implementers of vport devices: */
 
-struct vport_percpu_stats {
-	u64 rx_bytes;
-	u64 rx_packets;
-	u64 tx_bytes;
-	u64 tx_packets;
-	struct u64_stats_sync sync;
-};
-
 struct vport_err_stats {
 	u64 rx_dropped;
 	u64 rx_errors;
@@ -68,10 +61,10 @@ struct vport_err_stats {
 /**
  * struct vport - one port within a datapath
  * @rcu: RCU callback head for deferred destruction.
- * @port_no: Index into @dp's @ports array.
  * @dp: Datapath to which this port belongs.
  * @upcall_portid: The Netlink port to use for packets received on this port that
  * miss the flow table.
+ * @port_no: Index into @dp's @ports array.
  * @hash_node: Element in @dev_table hash table in vport.c.
  * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
  * @ops: Class structure.
@@ -81,15 +74,15 @@ struct vport_err_stats {
  */
 struct vport {
 	struct rcu_head rcu;
-	u16 port_no;
 	struct datapath	*dp;
 	u32 upcall_portid;
+	u16 port_no;
 
 	struct hlist_node hash_node;
 	struct hlist_node dp_hash_node;
 	const struct vport_ops *ops;
 
-	struct vport_percpu_stats __percpu *percpu_stats;
+	struct pcpu_tstats __percpu *percpu_stats;
 
 	spinlock_t stats_lock;
 	struct vport_err_stats err_stats;
@@ -131,24 +124,22 @@ struct vport_parms {
  * have any configuration.
  * @get_name: Get the device's name.
  * @get_config: Get the device's configuration.
- * @get_ifindex: Get the system interface index associated with the device.
  * May be null if the device does not have an ifindex.
  * @send: Send a packet on the device.  Returns the length of the packet sent.
  */
 struct vport_ops {
 	enum ovs_vport_type type;
 
-	/* Called with RTNL lock. */
+	/* Called with ovs_mutex. */
 	struct vport *(*create)(const struct vport_parms *);
 	void (*destroy)(struct vport *);
 
 	int (*set_options)(struct vport *, struct nlattr *);
 	int (*get_options)(const struct vport *, struct sk_buff *);
 
-	/* Called with rcu_read_lock or RTNL lock. */
+	/* Called with rcu_read_lock or ovs_mutex. */
 	const char *(*get_name)(const struct vport *);
 	void (*get_config)(const struct vport *, void *);
-	int (*get_ifindex)(const struct vport *);
 
 	int (*send)(struct vport *, struct sk_buff *);
 };
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c111bd0e083a..8ec1bca7f859 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -158,10 +158,16 @@ struct packet_mreq_max {
 	unsigned char	mr_address[MAX_ADDR_LEN];
 };
 
+union tpacket_uhdr {
+	struct tpacket_hdr  *h1;
+	struct tpacket2_hdr *h2;
+	struct tpacket3_hdr *h3;
+	void *raw;
+};
+
 static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		int closing, int tx_ring);
 
-
 #define V3_ALIGNMENT	(8)
 
 #define BLK_HDR_LEN	(ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
@@ -181,6 +187,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 
 struct packet_sock;
 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
+static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
+		       struct packet_type *pt, struct net_device *orig_dev);
 
 static void *packet_previous_frame(struct packet_sock *po,
 		struct packet_ring_buffer *rb,
@@ -288,11 +296,7 @@ static inline __pure struct page *pgv_to_page(void *addr)
 
 static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 {
-	union {
-		struct tpacket_hdr *h1;
-		struct tpacket2_hdr *h2;
-		void *raw;
-	} h;
+	union tpacket_uhdr h;
 
 	h.raw = frame;
 	switch (po->tp_version) {
@@ -315,11 +319,7 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 
 static int __packet_get_status(struct packet_sock *po, void *frame)
 {
-	union {
-		struct tpacket_hdr *h1;
-		struct tpacket2_hdr *h2;
-		void *raw;
-	} h;
+	union tpacket_uhdr h;
 
 	smp_rmb();
 
@@ -339,17 +339,66 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
 	}
 }
 
+static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
+				   unsigned int flags)
+{
+	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+
+	if (shhwtstamps) {
+		if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
+		    ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
+			return TP_STATUS_TS_SYS_HARDWARE;
+		if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
+			return TP_STATUS_TS_RAW_HARDWARE;
+	}
+
+	if (ktime_to_timespec_cond(skb->tstamp, ts))
+		return TP_STATUS_TS_SOFTWARE;
+
+	return 0;
+}
+
+static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
+				    struct sk_buff *skb)
+{
+	union tpacket_uhdr h;
+	struct timespec ts;
+	__u32 ts_status;
+
+	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
+		return 0;
+
+	h.raw = frame;
+	switch (po->tp_version) {
+	case TPACKET_V1:
+		h.h1->tp_sec = ts.tv_sec;
+		h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
+		break;
+	case TPACKET_V2:
+		h.h2->tp_sec = ts.tv_sec;
+		h.h2->tp_nsec = ts.tv_nsec;
+		break;
+	case TPACKET_V3:
+	default:
+		WARN(1, "TPACKET version not supported.\n");
+		BUG();
+	}
+
+	/* one flush is safe, as both fields always lie on the same cacheline */
+	flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
+	smp_wmb();
+
+	return ts_status;
+}
+
 static void *packet_lookup_frame(struct packet_sock *po,
 		struct packet_ring_buffer *rb,
 		unsigned int position,
 		int status)
 {
 	unsigned int pg_vec_pos, frame_offset;
-	union {
-		struct tpacket_hdr *h1;
-		struct tpacket2_hdr *h2;
-		void *raw;
-	} h;
+	union tpacket_uhdr h;
 
 	pg_vec_pos = position / rb->frames_per_block;
 	frame_offset = position % rb->frames_per_block;
@@ -479,7 +528,7 @@ static void init_prb_bdqc(struct packet_sock *po,
 	p1->hdrlen = po->tp_hdrlen;
 	p1->version = po->tp_version;
 	p1->last_kactive_blk_num = 0;
-	po->stats_u.stats3.tp_freeze_q_cnt = 0;
+	po->stats.stats3.tp_freeze_q_cnt = 0;
 	if (req_u->req3.tp_retire_blk_tov)
 		p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
 	else
@@ -647,7 +696,7 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1,
 	struct tpacket3_hdr *last_pkt;
 	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
 
-	if (po->stats.tp_drops)
+	if (po->stats.stats3.tp_drops)
 		status |= TP_STATUS_LOSING;
 
 	last_pkt = (struct tpacket3_hdr *)pkc1->prev;
@@ -693,36 +742,33 @@ static void prb_open_block(struct tpacket_kbdq_core *pkc1,
 
 	smp_rmb();
 
-	if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) {
+	/* We could have just memset this but we will lose the
+	 * flexibility of making the priv area sticky
+	 */
 
-		/* We could have just memset this but we will lose the
-		 * flexibility of making the priv area sticky
-		 */
-		BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
-		BLOCK_NUM_PKTS(pbd1) = 0;
-		BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
-		getnstimeofday(&ts);
-		h1->ts_first_pkt.ts_sec = ts.tv_sec;
-		h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
-		pkc1->pkblk_start = (char *)pbd1;
-		pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
-		BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
-		BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
-		pbd1->version = pkc1->version;
-		pkc1->prev = pkc1->nxt_offset;
-		pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
-		prb_thaw_queue(pkc1);
-		_prb_refresh_rx_retire_blk_timer(pkc1);
+	BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
+	BLOCK_NUM_PKTS(pbd1) = 0;
+	BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
 
-		smp_wmb();
+	getnstimeofday(&ts);
 
-		return;
-	}
+	h1->ts_first_pkt.ts_sec = ts.tv_sec;
+	h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
+
+	pkc1->pkblk_start = (char *)pbd1;
+	pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
+
+	BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
+	BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
+
+	pbd1->version = pkc1->version;
+	pkc1->prev = pkc1->nxt_offset;
+	pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
+
+	prb_thaw_queue(pkc1);
+	_prb_refresh_rx_retire_blk_timer(pkc1);
 
-	WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n",
-		pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num);
-	dump_stack();
-	BUG();
+	smp_wmb();
 }
 
 /*
@@ -752,7 +798,7 @@ static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
 				  struct packet_sock *po)
 {
 	pkc->reset_pending_on_curr_blk = 1;
-	po->stats_u.stats3.tp_freeze_q_cnt++;
+	po->stats.stats3.tp_freeze_q_cnt++;
 }
 
 #define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
@@ -813,10 +859,6 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
 		prb_close_block(pkc, pbd, po, status);
 		return;
 	}
-
-	WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd);
-	dump_stack();
-	BUG();
 }
 
 static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
@@ -973,11 +1015,11 @@ static void *packet_current_rx_frame(struct packet_sock *po,
 
 static void *prb_lookup_block(struct packet_sock *po,
 				     struct packet_ring_buffer *rb,
-				     unsigned int previous,
+				     unsigned int idx,
 				     int status)
 {
 	struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
-	struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous);
+	struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
 
 	if (status != BLOCK_STATUS(pbd))
 		return NULL;
@@ -1041,6 +1083,29 @@ static void packet_increment_head(struct packet_ring_buffer *buff)
 	buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
 }
 
+static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
+{
+	struct sock *sk = &po->sk;
+	bool has_room;
+
+	if (po->prot_hook.func != tpacket_rcv)
+		return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize)
+			<= sk->sk_rcvbuf;
+
+	spin_lock(&sk->sk_receive_queue.lock);
+	if (po->tp_version == TPACKET_V3)
+		has_room = prb_lookup_block(po, &po->rx_ring,
+					    po->rx_ring.prb_bdqc.kactive_blk_num,
+					    TP_STATUS_KERNEL);
+	else
+		has_room = packet_lookup_frame(po, &po->rx_ring,
+					       po->rx_ring.head,
+					       TP_STATUS_KERNEL);
+	spin_unlock(&sk->sk_receive_queue.lock);
+
+	return has_room;
+}
+
 static void packet_sock_destruct(struct sock *sk)
 {
 	skb_queue_purge(&sk->sk_error_queue);
@@ -1066,16 +1131,16 @@ static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
 	return x;
 }
 
-static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
+static unsigned int fanout_demux_hash(struct packet_fanout *f,
+				      struct sk_buff *skb,
+				      unsigned int num)
 {
-	u32 idx, hash = skb->rxhash;
-
-	idx = ((u64)hash * num) >> 32;
-
-	return f->arr[idx];
+	return (((u64)skb->rxhash) * num) >> 32;
 }
 
-static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
+static unsigned int fanout_demux_lb(struct packet_fanout *f,
+				    struct sk_buff *skb,
+				    unsigned int num)
 {
 	int cur, old;
 
@@ -1083,14 +1148,40 @@ static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb
 	while ((old = atomic_cmpxchg(&f->rr_cur, cur,
 				     fanout_rr_next(f, num))) != cur)
 		cur = old;
-	return f->arr[cur];
+	return cur;
+}
+
+static unsigned int fanout_demux_cpu(struct packet_fanout *f,
+				     struct sk_buff *skb,
+				     unsigned int num)
+{
+	return smp_processor_id() % num;
 }
 
-static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
+static unsigned int fanout_demux_rollover(struct packet_fanout *f,
+					  struct sk_buff *skb,
+					  unsigned int idx, unsigned int skip,
+					  unsigned int num)
 {
-	unsigned int cpu = smp_processor_id();
+	unsigned int i, j;
+
+	i = j = min_t(int, f->next[idx], num - 1);
+	do {
+		if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) {
+			if (i != j)
+				f->next[idx] = i;
+			return i;
+		}
+		if (++i == num)
+			i = 0;
+	} while (i != j);
+
+	return idx;
+}
 
-	return f->arr[cpu % num];
+static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
+{
+	return f->flags & (flag >> 8);
 }
 
 static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
@@ -1099,7 +1190,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
 	struct packet_fanout *f = pt->af_packet_priv;
 	unsigned int num = f->num_members;
 	struct packet_sock *po;
-	struct sock *sk;
+	unsigned int idx;
 
 	if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
 	    !num) {
@@ -1110,23 +1201,31 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
 	switch (f->type) {
 	case PACKET_FANOUT_HASH:
 	default:
-		if (f->defrag) {
+		if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
 			skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
 			if (!skb)
 				return 0;
 		}
 		skb_get_rxhash(skb);
-		sk = fanout_demux_hash(f, skb, num);
+		idx = fanout_demux_hash(f, skb, num);
 		break;
 	case PACKET_FANOUT_LB:
-		sk = fanout_demux_lb(f, skb, num);
+		idx = fanout_demux_lb(f, skb, num);
 		break;
 	case PACKET_FANOUT_CPU:
-		sk = fanout_demux_cpu(f, skb, num);
+		idx = fanout_demux_cpu(f, skb, num);
+		break;
+	case PACKET_FANOUT_ROLLOVER:
+		idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
 		break;
 	}
 
-	po = pkt_sk(sk);
+	po = pkt_sk(f->arr[idx]);
+	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) &&
+	    unlikely(!packet_rcv_has_room(po, skb))) {
+		idx = fanout_demux_rollover(f, skb, idx, idx, num);
+		po = pkt_sk(f->arr[idx]);
+	}
 
 	return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
 }
@@ -1175,10 +1274,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 	struct packet_sock *po = pkt_sk(sk);
 	struct packet_fanout *f, *match;
 	u8 type = type_flags & 0xff;
-	u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0;
+	u8 flags = type_flags >> 8;
 	int err;
 
 	switch (type) {
+	case PACKET_FANOUT_ROLLOVER:
+		if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
+			return -EINVAL;
 	case PACKET_FANOUT_HASH:
 	case PACKET_FANOUT_LB:
 	case PACKET_FANOUT_CPU:
@@ -1203,7 +1305,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		}
 	}
 	err = -EINVAL;
-	if (match && match->defrag != defrag)
+	if (match && match->flags != flags)
 		goto out;
 	if (!match) {
 		err = -ENOMEM;
@@ -1213,7 +1315,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		write_pnet(&match->net, sock_net(sk));
 		match->id = id;
 		match->type = type;
-		match->defrag = defrag;
+		match->flags = flags;
 		atomic_set(&match->rr_cur, 0);
 		INIT_LIST_HEAD(&match->list);
 		spin_lock_init(&match->lock);
@@ -1443,13 +1545,14 @@ retry:
 	skb->dev = dev;
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
-	if (err < 0)
-		goto out_unlock;
+
+	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
 
 	if (unlikely(extra_len == 4))
 		skb->no_fcs = 1;
 
+	skb_probe_transport_header(skb, 0);
+
 	dev_queue_xmit(skb);
 	rcu_read_unlock();
 	return len;
@@ -1577,7 +1680,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 	nf_reset(skb);
 
 	spin_lock(&sk->sk_receive_queue.lock);
-	po->stats.tp_packets++;
+	po->stats.stats1.tp_packets++;
 	skb->dropcount = atomic_read(&sk->sk_drops);
 	__skb_queue_tail(&sk->sk_receive_queue, skb);
 	spin_unlock(&sk->sk_receive_queue.lock);
@@ -1586,7 +1689,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 
 drop_n_acct:
 	spin_lock(&sk->sk_receive_queue.lock);
-	po->stats.tp_drops++;
+	po->stats.stats1.tp_drops++;
 	atomic_inc(&sk->sk_drops);
 	spin_unlock(&sk->sk_receive_queue.lock);
 
@@ -1606,21 +1709,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct sock *sk;
 	struct packet_sock *po;
 	struct sockaddr_ll *sll;
-	union {
-		struct tpacket_hdr *h1;
-		struct tpacket2_hdr *h2;
-		struct tpacket3_hdr *h3;
-		void *raw;
-	} h;
+	union tpacket_uhdr h;
 	u8 *skb_head = skb->data;
 	int skb_len = skb->len;
 	unsigned int snaplen, res;
 	unsigned long status = TP_STATUS_USER;
 	unsigned short macoff, netoff, hdrlen;
 	struct sk_buff *copy_skb = NULL;
-	struct timeval tv;
 	struct timespec ts;
-	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+	__u32 ts_status;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -1692,10 +1789,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	 * Anyways, moving it for V1/V2 only as V3 doesn't need this
 	 * at packet level.
 	 */
-		if (po->stats.tp_drops)
+		if (po->stats.stats1.tp_drops)
 			status |= TP_STATUS_LOSING;
 	}
-	po->stats.tp_packets++;
+	po->stats.stats1.tp_packets++;
 	if (copy_skb) {
 		status |= TP_STATUS_COPY;
 		__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
@@ -1704,24 +1801,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
 
+	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
+		getnstimeofday(&ts);
+
+	status |= ts_status;
+
 	switch (po->tp_version) {
 	case TPACKET_V1:
 		h.h1->tp_len = skb->len;
 		h.h1->tp_snaplen = snaplen;
 		h.h1->tp_mac = macoff;
 		h.h1->tp_net = netoff;
-		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
-				&& shhwtstamps->syststamp.tv64)
-			tv = ktime_to_timeval(shhwtstamps->syststamp);
-		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
-				&& shhwtstamps->hwtstamp.tv64)
-			tv = ktime_to_timeval(shhwtstamps->hwtstamp);
-		else if (skb->tstamp.tv64)
-			tv = ktime_to_timeval(skb->tstamp);
-		else
-			do_gettimeofday(&tv);
-		h.h1->tp_sec = tv.tv_sec;
-		h.h1->tp_usec = tv.tv_usec;
+		h.h1->tp_sec = ts.tv_sec;
+		h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
 		hdrlen = sizeof(*h.h1);
 		break;
 	case TPACKET_V2:
@@ -1729,16 +1821,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h2->tp_snaplen = snaplen;
 		h.h2->tp_mac = macoff;
 		h.h2->tp_net = netoff;
-		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
-				&& shhwtstamps->syststamp.tv64)
-			ts = ktime_to_timespec(shhwtstamps->syststamp);
-		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
-				&& shhwtstamps->hwtstamp.tv64)
-			ts = ktime_to_timespec(shhwtstamps->hwtstamp);
-		else if (skb->tstamp.tv64)
-			ts = ktime_to_timespec(skb->tstamp);
-		else
-			getnstimeofday(&ts);
 		h.h2->tp_sec = ts.tv_sec;
 		h.h2->tp_nsec = ts.tv_nsec;
 		if (vlan_tx_tag_present(skb)) {
@@ -1759,16 +1841,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h3->tp_snaplen = snaplen;
 		h.h3->tp_mac = macoff;
 		h.h3->tp_net = netoff;
-		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
-				&& shhwtstamps->syststamp.tv64)
-			ts = ktime_to_timespec(shhwtstamps->syststamp);
-		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
-				&& shhwtstamps->hwtstamp.tv64)
-			ts = ktime_to_timespec(shhwtstamps->hwtstamp);
-		else if (skb->tstamp.tv64)
-			ts = ktime_to_timespec(skb->tstamp);
-		else
-			getnstimeofday(&ts);
 		h.h3->tp_sec  = ts.tv_sec;
 		h.h3->tp_nsec = ts.tv_nsec;
 		hdrlen = sizeof(*h.h3);
@@ -1819,7 +1891,7 @@ drop:
 	return 0;
 
 ring_is_full:
-	po->stats.tp_drops++;
+	po->stats.stats1.tp_drops++;
 	spin_unlock(&sk->sk_receive_queue.lock);
 
 	sk->sk_data_ready(sk, 0);
@@ -1833,10 +1905,14 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
 	void *ph;
 
 	if (likely(po->tx_ring.pg_vec)) {
+		__u32 ts;
+
 		ph = skb_shinfo(skb)->destructor_arg;
 		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
 		atomic_dec(&po->tx_ring.pending);
-		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
+
+		ts = __packet_set_timestamp(po, ph, skb);
+		__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
 	}
 
 	sock_wfree(skb);
@@ -1846,11 +1922,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		void *frame, struct net_device *dev, int size_max,
 		__be16 proto, unsigned char *addr, int hlen)
 {
-	union {
-		struct tpacket_hdr *h1;
-		struct tpacket2_hdr *h2;
-		void *raw;
-	} ph;
+	union tpacket_uhdr ph;
 	int to_write, offset, len, tp_len, nr_frags, len_max;
 	struct socket *sock = po->sk.sk_socket;
 	struct page *page;
@@ -1863,6 +1935,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb->dev = dev;
 	skb->priority = po->sk.sk_priority;
 	skb->mark = po->sk.sk_mark;
+	sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
 	skb_shinfo(skb)->destructor_arg = ph.raw;
 
 	switch (po->tp_version) {
@@ -1880,6 +1953,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 
 	skb_reserve(skb, hlen);
 	skb_reset_network_header(skb);
+	skb_probe_transport_header(skb, 0);
 
 	if (po->tp_tx_has_off) {
 		int off_min, off_max, off;
@@ -2247,9 +2321,8 @@ static int packet_snd(struct socket *sock,
 	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
 	if (err)
 		goto out_free;
-	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
-	if (err < 0)
-		goto out_free;
+
+	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
 
 	if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
 		/* Earlier code assumed this would be a VLAN pkt,
@@ -2289,6 +2362,8 @@ static int packet_snd(struct socket *sock,
 		len += vnet_hdr_len;
 	}
 
+	skb_probe_transport_header(skb, reserve);
+
 	if (unlikely(extra_len == 4))
 		skb->no_fcs = 1;
 
@@ -3165,8 +3240,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 	struct sock *sk = sock->sk;
 	struct packet_sock *po = pkt_sk(sk);
 	void *data = &val;
-	struct tpacket_stats st;
-	union tpacket_stats_u st_u;
+	union tpacket_stats_u st;
 
 	if (level != SOL_PACKET)
 		return -ENOPROTOOPT;
@@ -3180,22 +3254,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 	switch (optname) {
 	case PACKET_STATISTICS:
 		spin_lock_bh(&sk->sk_receive_queue.lock);
+		memcpy(&st, &po->stats, sizeof(st));
+		memset(&po->stats, 0, sizeof(po->stats));
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+
 		if (po->tp_version == TPACKET_V3) {
 			lv = sizeof(struct tpacket_stats_v3);
-			memcpy(&st_u.stats3, &po->stats,
-			       sizeof(struct tpacket_stats));
-			st_u.stats3.tp_freeze_q_cnt =
-					po->stats_u.stats3.tp_freeze_q_cnt;
-			st_u.stats3.tp_packets += po->stats.tp_drops;
-			data = &st_u.stats3;
+			data = &st.stats3;
 		} else {
 			lv = sizeof(struct tpacket_stats);
-			st = po->stats;
-			st.tp_packets += st.tp_drops;
-			data = &st;
+			data = &st.stats1;
 		}
-		memset(&po->stats, 0, sizeof(st));
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
+
 		break;
 	case PACKET_AUXDATA:
 		val = po->auxdata;
@@ -3240,7 +3310,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 	case PACKET_FANOUT:
 		val = (po->fanout ?
 		       ((u32)po->fanout->id |
-			((u32)po->fanout->type << 16)) :
+			((u32)po->fanout->type << 16) |
+			((u32)po->fanout->flags << 24)) :
 		       0);
 		break;
 	case PACKET_TX_HAS_OFF:
@@ -3263,12 +3334,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	struct net_device *dev = data;
 	struct net *net = dev_net(dev);
 
 	rcu_read_lock();
-	sk_for_each_rcu(sk, node, &net->packet.sklist) {
+	sk_for_each_rcu(sk, &net->packet.sklist) {
 		struct packet_sock *po = pkt_sk(sk);
 
 		switch (msg) {
@@ -3828,7 +3898,7 @@ static int __net_init packet_net_init(struct net *net)
 	mutex_init(&net->packet.sklist_lock);
 	INIT_HLIST_HEAD(&net->packet.sklist);
 
-	if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
+	if (!proc_create("packet", 0, net->proc_net, &packet_seq_fops))
 		return -ENOMEM;
 
 	return 0;
@@ -3836,7 +3906,7 @@ static int __net_init packet_net_init(struct net *net)
 
 static void __net_exit packet_net_exit(struct net *net)
 {
-	proc_net_remove(net, "packet");
+	remove_proc_entry("packet", net->proc_net);
 }
 
 static struct pernet_operations packet_net_ops = {
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 8db6e21c46bd..a9584a2f6d69 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -125,8 +125,10 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
 	return ret;
 }
 
-static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req,
-		u32 portid, u32 seq, u32 flags, int sk_ino)
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+			struct packet_diag_req *req,
+			struct user_namespace *user_ns,
+			u32 portid, u32 seq, u32 flags, int sk_ino)
 {
 	struct nlmsghdr *nlh;
 	struct packet_diag_msg *rp;
@@ -147,6 +149,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag
 			pdiag_put_info(po, skb))
 		goto out_nlmsg_trim;
 
+	if ((req->pdiag_show & PACKET_SHOW_INFO) &&
+	    nla_put_u32(skb, PACKET_DIAG_UID,
+			from_kuid_munged(user_ns, sock_i_uid(sk))))
+		goto out_nlmsg_trim;
+
 	if ((req->pdiag_show & PACKET_SHOW_MCLIST) &&
 			pdiag_put_mclist(po, skb))
 		goto out_nlmsg_trim;
@@ -159,6 +166,14 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag
 			pdiag_put_fanout(po, skb))
 		goto out_nlmsg_trim;
 
+	if ((req->pdiag_show & PACKET_SHOW_MEMINFO) &&
+	    sock_diag_put_meminfo(sk, skb, PACKET_DIAG_MEMINFO))
+		goto out_nlmsg_trim;
+
+	if ((req->pdiag_show & PACKET_SHOW_FILTER) &&
+	    sock_diag_put_filterinfo(user_ns, sk, skb, PACKET_DIAG_FILTER))
+		goto out_nlmsg_trim;
+
 	return nlmsg_end(skb, nlh);
 
 out_nlmsg_trim:
@@ -172,21 +187,22 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	struct packet_diag_req *req;
 	struct net *net;
 	struct sock *sk;
-	struct hlist_node *node;
 
 	net = sock_net(skb->sk);
 	req = nlmsg_data(cb->nlh);
 
 	mutex_lock(&net->packet.sklist_lock);
-	sk_for_each(sk, node, &net->packet.sklist) {
+	sk_for_each(sk, &net->packet.sklist) {
 		if (!net_eq(sock_net(sk), net))
 			continue;
 		if (num < s_num)
 			goto next;
 
-		if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid,
-					cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					sock_i_ino(sk)) < 0)
+		if (sk_diag_fill(sk, skb, req,
+				 sk_user_ns(NETLINK_CB(cb->skb).sk),
+				 NETLINK_CB(cb->skb).portid,
+				 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				 sock_i_ino(sk)) < 0)
 			goto done;
 next:
 		num++;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index e84cab8cb7a9..c4e4b4561207 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -54,6 +54,7 @@ struct pgv {
 
 struct packet_ring_buffer {
 	struct pgv		*pg_vec;
+
 	unsigned int		head;
 	unsigned int		frames_per_block;
 	unsigned int		frame_size;
@@ -63,8 +64,9 @@ struct packet_ring_buffer {
 	unsigned int		pg_vec_pages;
 	unsigned int		pg_vec_len;
 
-	struct tpacket_kbdq_core	prb_bdqc;
 	atomic_t		pending;
+
+	struct tpacket_kbdq_core	prb_bdqc;
 };
 
 extern struct mutex fanout_mutex;
@@ -77,10 +79,11 @@ struct packet_fanout {
 	unsigned int		num_members;
 	u16			id;
 	u8			type;
-	u8			defrag;
+	u8			flags;
 	atomic_t		rr_cur;
 	struct list_head	list;
 	struct sock		*arr[PACKET_FANOUT_MAX];
+	int			next[PACKET_FANOUT_MAX];
 	spinlock_t		lock;
 	atomic_t		sk_ref;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
@@ -90,8 +93,7 @@ struct packet_sock {
 	/* struct sock has to be the first member of packet_sock */
 	struct sock		sk;
 	struct packet_fanout	*fanout;
-	struct tpacket_stats	stats;
-	union  tpacket_stats_u	stats_u;
+	union  tpacket_stats_u	stats;
 	struct packet_ring_buffer	rx_ring;
 	struct packet_ring_buffer	tx_ring;
 	int			copy_thresh;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 576f22c9c76e..e77411735de8 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -640,11 +640,10 @@ static struct sock *pep_find_pipe(const struct hlist_head *hlist,
 					const struct sockaddr_pn *dst,
 					u8 pipe_handle)
 {
-	struct hlist_node *node;
 	struct sock *sknode;
 	u16 dobj = pn_sockaddr_get_object(dst);
 
-	sk_for_each(sknode, node, hlist) {
+	sk_for_each(sknode, hlist) {
 		struct pep_sock *pnnode = pep_sk(sknode);
 
 		/* Ports match, but addresses might not: */
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 5bf6341e2dd4..45a7df6575de 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -320,7 +320,7 @@ static int __net_init phonet_init_net(struct net *net)
 {
 	struct phonet_net *pnn = phonet_pernet(net);
 
-	if (!proc_net_fops_create(net, "phonet", 0, &pn_sock_seq_fops))
+	if (!proc_create("phonet", 0, net->proc_net, &pn_sock_seq_fops))
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&pnn->pndevs.list);
@@ -331,7 +331,7 @@ static int __net_init phonet_init_net(struct net *net)
 
 static void __net_exit phonet_exit_net(struct net *net)
 {
-	proc_net_remove(net, "phonet");
+	remove_proc_entry("phonet", net->proc_net);
 }
 
 static struct pernet_operations phonet_net_ops = {
@@ -348,7 +348,7 @@ int __init phonet_device_init(void)
 	if (err)
 		return err;
 
-	proc_net_fops_create(&init_net, "pnresource", 0, &pn_res_seq_fops);
+	proc_create("pnresource", 0, init_net.proc_net, &pn_res_seq_fops);
 	register_netdevice_notifier(&phonet_device_notifier);
 	err = phonet_netlink_register();
 	if (err)
@@ -361,7 +361,7 @@ void phonet_device_exit(void)
 	rtnl_unregister_all(PF_PHONET);
 	unregister_netdevice_notifier(&phonet_device_notifier);
 	unregister_pernet_subsys(&phonet_net_ops);
-	proc_net_remove(&init_net, "pnresource");
+	remove_proc_entry("pnresource", init_net.proc_net);
 }
 
 int phonet_route_add(struct net_device *dev, u8 daddr)
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 0193630d3061..dc15f4300808 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -61,7 +61,7 @@ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = {
 	[IFA_LOCAL] = { .type = NLA_U8 },
 };
 
-static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr)
+static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tb[IFA_MAX+1];
@@ -224,7 +224,7 @@ static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = {
 	[RTA_OIF] = { .type = NLA_U32 },
 };
 
-static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr)
+static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tb[RTA_MAX+1];
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index b7e982782255..1afd1381cdc7 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -76,7 +76,6 @@ static struct hlist_head *pn_hash_list(u16 obj)
  */
 struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn)
 {
-	struct hlist_node *node;
 	struct sock *sknode;
 	struct sock *rval = NULL;
 	u16 obj = pn_sockaddr_get_object(spn);
@@ -84,7 +83,7 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn)
 	struct hlist_head *hlist = pn_hash_list(obj);
 
 	rcu_read_lock();
-	sk_for_each_rcu(sknode, node, hlist) {
+	sk_for_each_rcu(sknode, hlist) {
 		struct pn_sock *pn = pn_sk(sknode);
 		BUG_ON(!pn->sobject); /* unbound socket */
 
@@ -120,10 +119,9 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
 
 	rcu_read_lock();
 	for (h = 0; h < PN_HASHSIZE; h++) {
-		struct hlist_node *node;
 		struct sock *sknode;
 
-		sk_for_each(sknode, node, hlist) {
+		sk_for_each(sknode, hlist) {
 			struct sk_buff *clone;
 
 			if (!net_eq(sock_net(sknode), net))
@@ -543,12 +541,11 @@ static struct sock *pn_sock_get_idx(struct seq_file *seq, loff_t pos)
 {
 	struct net *net = seq_file_net(seq);
 	struct hlist_head *hlist = pnsocks.hlist;
-	struct hlist_node *node;
 	struct sock *sknode;
 	unsigned int h;
 
 	for (h = 0; h < PN_HASHSIZE; h++) {
-		sk_for_each_rcu(sknode, node, hlist) {
+		sk_for_each_rcu(sknode, hlist) {
 			if (!net_eq(net, sock_net(sknode)))
 				continue;
 			if (!pos)
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index ec753b3ae72a..f2c670ba7b9b 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -1,7 +1,7 @@
 
 config RDS
-	tristate "The RDS Protocol (EXPERIMENTAL)"
-	depends on INET && EXPERIMENTAL
+	tristate "The RDS Protocol"
+	depends on INET
 	---help---
 	  The RDS (Reliable Datagram Sockets) protocol provides reliable,
 	  sequenced delivery of datagrams over Infiniband, iWARP,
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 637bde56c9db..b5ad65a0067e 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -52,13 +52,12 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
 					struct rds_sock *insert)
 {
 	struct rds_sock *rs;
-	struct hlist_node *node;
 	struct hlist_head *head = hash_to_bucket(addr, port);
 	u64 cmp;
 	u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) {
+	hlist_for_each_entry_rcu(rs, head, rs_bound_node) {
 		cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
 		      be16_to_cpu(rs->rs_bound_port);
 
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 9e07c756d1f9..642ad42c416b 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -69,9 +69,8 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
 					      struct rds_transport *trans)
 {
 	struct rds_connection *conn, *ret = NULL;
-	struct hlist_node *pos;
 
-	hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
+	hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 		if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
 				conn->c_trans == trans) {
 			ret = conn;
@@ -376,7 +375,6 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 				  int want_send)
 {
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct list_head *list;
 	struct rds_connection *conn;
 	struct rds_message *rm;
@@ -390,7 +388,7 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 
 	for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
 	     i++, head++) {
-		hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
+		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 			if (want_send)
 				list = &conn->c_send_queue;
 			else
@@ -439,7 +437,6 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 {
 	uint64_t buffer[(item_len + 7) / 8];
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct rds_connection *conn;
 	size_t i;
 
@@ -450,7 +447,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 
 	for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
 	     i++, head++) {
-		hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
+		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 
 			/* XXX no c_lock usage.. */
 			if (!visitor(conn, buffer))
diff --git a/net/rds/message.c b/net/rds/message.c
index f0a4658f3273..aba232f9f308 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -82,10 +82,7 @@ static void rds_message_purge(struct rds_message *rm)
 void rds_message_put(struct rds_message *rm)
 {
 	rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
-	if (atomic_read(&rm->m_refcount) == 0) {
-printk(KERN_CRIT "danger refcount zero on %p\n", rm);
-WARN_ON(1);
-	}
+	WARN(!atomic_read(&rm->m_refcount), "danger refcount zero on %p\n", rm);
 	if (atomic_dec_and_test(&rm->m_refcount)) {
 		BUG_ON(!list_empty(&rm->m_sock_item));
 		BUG_ON(!list_empty(&rm->m_conn_item));
@@ -197,6 +194,9 @@ struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp)
 {
 	struct rds_message *rm;
 
+	if (extra_len > KMALLOC_MAX_SIZE - sizeof(struct rds_message))
+		return NULL;
+
 	rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp);
 	if (!rm)
 		goto out;
diff --git a/net/rds/stats.c b/net/rds/stats.c
index 7be790d60b90..73be187d389e 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -87,6 +87,7 @@ void rds_stats_info_copy(struct rds_info_iterator *iter,
 	for (i = 0; i < nr; i++) {
 		BUG_ON(strlen(names[i]) >= sizeof(ctr.name));
 		strncpy(ctr.name, names[i], sizeof(ctr.name) - 1);
+		ctr.name[sizeof(ctr.name) - 1] = '\0';
 		ctr.value = values[i];
 
 		rds_info_copy(iter, &ctr, sizeof(ctr));
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 9b9be5279f5d..1cec5e4f3a5e 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -587,7 +587,7 @@ static ssize_t rfkill_name_show(struct device *dev,
 
 static const char *rfkill_get_type_str(enum rfkill_type type)
 {
-	BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_FM + 1);
+	BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_NFC + 1);
 
 	switch (type) {
 	case RFKILL_TYPE_WLAN:
@@ -604,6 +604,8 @@ static const char *rfkill_get_type_str(enum rfkill_type type)
 		return "gps";
 	case RFKILL_TYPE_FM:
 		return "fm";
+	case RFKILL_TYPE_NFC:
+		return "nfc";
 	default:
 		BUG();
 	}
diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index c9d931e7ffec..b85107b5ef62 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -148,11 +148,9 @@ static unsigned long rfkill_ratelimit(const unsigned long last)
 
 static void rfkill_schedule_ratelimited(void)
 {
-	if (delayed_work_pending(&rfkill_op_work))
-		return;
-	schedule_delayed_work(&rfkill_op_work,
-			      rfkill_ratelimit(rfkill_last_scheduled));
-	rfkill_last_scheduled = jiffies;
+	if (schedule_delayed_work(&rfkill_op_work,
+				  rfkill_ratelimit(rfkill_last_scheduled)))
+		rfkill_last_scheduled = jiffies;
 }
 
 static void rfkill_schedule_global_op(enum rfkill_sched_op op)
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 78fc0937948d..fb076cd6f808 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -131,6 +131,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 		rfkill->pwr_clk = clk_get(&pdev->dev, pdata->power_clk_name);
 		if (IS_ERR(rfkill->pwr_clk)) {
 			pr_warn("%s: can't find pwr_clk.\n", __func__);
+			ret = PTR_ERR(rfkill->pwr_clk);
 			goto fail_shutdown_name;
 		}
 	}
@@ -152,9 +153,11 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 	}
 
 	rfkill->rfkill_dev = rfkill_alloc(pdata->name, &pdev->dev, pdata->type,
-				&rfkill_gpio_ops, rfkill);
-	if (!rfkill->rfkill_dev)
+					  &rfkill_gpio_ops, rfkill);
+	if (!rfkill->rfkill_dev) {
+		ret = -ENOMEM;
 		goto fail_shutdown;
+	}
 
 	ret = rfkill_register(rfkill->rfkill_dev);
 	if (ret < 0)
diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c
index 4b5ab21ecb24..d11ac79246e4 100644
--- a/net/rfkill/rfkill-regulator.c
+++ b/net/rfkill/rfkill-regulator.c
@@ -51,7 +51,7 @@ static int rfkill_regulator_set_block(void *data, bool blocked)
 	return 0;
 }
 
-struct rfkill_ops rfkill_regulator_ops = {
+static struct rfkill_ops rfkill_regulator_ops = {
 	.set_block = rfkill_regulator_set_block,
 };
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index c4719ce604c2..9c8347451597 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -165,10 +165,9 @@ static void rose_remove_socket(struct sock *sk)
 void rose_kill_by_neigh(struct rose_neigh *neigh)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&rose_list_lock);
-	sk_for_each(s, node, &rose_list) {
+	sk_for_each(s, &rose_list) {
 		struct rose_sock *rose = rose_sk(s);
 
 		if (rose->neighbour == neigh) {
@@ -186,10 +185,9 @@ void rose_kill_by_neigh(struct rose_neigh *neigh)
 static void rose_kill_by_device(struct net_device *dev)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&rose_list_lock);
-	sk_for_each(s, node, &rose_list) {
+	sk_for_each(s, &rose_list) {
 		struct rose_sock *rose = rose_sk(s);
 
 		if (rose->device == dev) {
@@ -246,10 +244,9 @@ static void rose_insert_socket(struct sock *sk)
 static struct sock *rose_find_listener(rose_address *addr, ax25_address *call)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&rose_list_lock);
-	sk_for_each(s, node, &rose_list) {
+	sk_for_each(s, &rose_list) {
 		struct rose_sock *rose = rose_sk(s);
 
 		if (!rosecmp(&rose->source_addr, addr) &&
@@ -258,7 +255,7 @@ static struct sock *rose_find_listener(rose_address *addr, ax25_address *call)
 			goto found;
 	}
 
-	sk_for_each(s, node, &rose_list) {
+	sk_for_each(s, &rose_list) {
 		struct rose_sock *rose = rose_sk(s);
 
 		if (!rosecmp(&rose->source_addr, addr) &&
@@ -278,10 +275,9 @@ found:
 struct sock *rose_find_socket(unsigned int lci, struct rose_neigh *neigh)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&rose_list_lock);
-	sk_for_each(s, node, &rose_list) {
+	sk_for_each(s, &rose_list) {
 		struct rose_sock *rose = rose_sk(s);
 
 		if (rose->lci == lci && rose->neighbour == neigh)
@@ -1257,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
 	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 
 	if (srose != NULL) {
+		memset(srose, 0, msg->msg_namelen);
 		srose->srose_family = AF_ROSE;
 		srose->srose_addr   = rose->dest_addr;
 		srose->srose_call   = rose->dest_call;
@@ -1575,10 +1572,13 @@ static int __init rose_proto_init(void)
 
 	rose_add_loopback_neigh();
 
-	proc_net_fops_create(&init_net, "rose", S_IRUGO, &rose_info_fops);
-	proc_net_fops_create(&init_net, "rose_neigh", S_IRUGO, &rose_neigh_fops);
-	proc_net_fops_create(&init_net, "rose_nodes", S_IRUGO, &rose_nodes_fops);
-	proc_net_fops_create(&init_net, "rose_routes", S_IRUGO, &rose_routes_fops);
+	proc_create("rose", S_IRUGO, init_net.proc_net, &rose_info_fops);
+	proc_create("rose_neigh", S_IRUGO, init_net.proc_net,
+		    &rose_neigh_fops);
+	proc_create("rose_nodes", S_IRUGO, init_net.proc_net,
+		    &rose_nodes_fops);
+	proc_create("rose_routes", S_IRUGO, init_net.proc_net,
+		    &rose_routes_fops);
 out:
 	return rc;
 fail:
@@ -1605,10 +1605,10 @@ static void __exit rose_exit(void)
 {
 	int i;
 
-	proc_net_remove(&init_net, "rose");
-	proc_net_remove(&init_net, "rose_neigh");
-	proc_net_remove(&init_net, "rose_nodes");
-	proc_net_remove(&init_net, "rose_routes");
+	remove_proc_entry("rose", init_net.proc_net);
+	remove_proc_entry("rose_neigh", init_net.proc_net);
+	remove_proc_entry("rose_nodes", init_net.proc_net);
+	remove_proc_entry("rose_routes", init_net.proc_net);
 	rose_loopback_clear();
 
 	rose_rt_free();
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 0d3103c4f11c..23dcef12b986 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -4,7 +4,7 @@
 
 config AF_RXRPC
 	tristate "RxRPC session sockets"
-	depends on INET && EXPERIMENTAL
+	depends on INET
 	select CRYPTO
 	select KEYS
 	help
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 05996d0dd828..e61aa6001c65 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/kernel.h>
 #include <linux/net.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
@@ -792,10 +793,9 @@ static const struct net_proto_family rxrpc_family_ops = {
  */
 static int __init af_rxrpc_init(void)
 {
-	struct sk_buff *dummy_skb;
 	int ret = -1;
 
-	BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof(dummy_skb->cb));
+	BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	rxrpc_epoch = htonl(get_seconds());
 
@@ -839,8 +839,9 @@ static int __init af_rxrpc_init(void)
 	}
 
 #ifdef CONFIG_PROC_FS
-	proc_net_fops_create(&init_net, "rxrpc_calls", 0, &rxrpc_call_seq_fops);
-	proc_net_fops_create(&init_net, "rxrpc_conns", 0, &rxrpc_connection_seq_fops);
+	proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops);
+	proc_create("rxrpc_conns", 0, init_net.proc_net,
+		    &rxrpc_connection_seq_fops);
 #endif
 	return 0;
 
@@ -878,8 +879,8 @@ static void __exit af_rxrpc_exit(void)
 
 	_debug("flush scheduled work");
 	flush_workqueue(rxrpc_workqueue);
-	proc_net_remove(&init_net, "rxrpc_conns");
-	proc_net_remove(&init_net, "rxrpc_calls");
+	remove_proc_entry("rxrpc_conns", init_net.proc_net);
+	remove_proc_entry("rxrpc_calls", init_net.proc_net);
 	destroy_workqueue(rxrpc_workqueue);
 	kmem_cache_destroy(rxrpc_call_jar);
 	_leave("");
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 65d240cbf74b..fd7072827a40 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -485,8 +485,9 @@ errout:
 	return err;
 }
 
-struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind)
+struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
+				    struct nlattr *est, char *name, int ovr,
+				    int bind)
 {
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
@@ -542,9 +543,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
 
 	/* backward compatibility for policer */
 	if (name == NULL)
-		err = a_o->init(tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
+		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
 	else
-		err = a_o->init(nla, est, a, ovr, bind);
+		err = a_o->init(net, nla, est, a, ovr, bind);
 	if (err < 0)
 		goto err_free;
 
@@ -566,8 +567,9 @@ err_out:
 	return ERR_PTR(err);
 }
 
-struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
-				  char *name, int ovr, int bind)
+struct tc_action *tcf_action_init(struct net *net, struct nlattr *nla,
+				  struct nlattr *est, char *name, int ovr,
+				  int bind)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *head = NULL, *act, *act_prev = NULL;
@@ -579,7 +581,7 @@ struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
 		return ERR_PTR(err);
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_init_1(tb[i], est, name, ovr, bind);
+		act = tcf_action_init_1(net, tb[i], est, name, ovr, bind);
 		if (IS_ERR(act))
 			goto err;
 		act->order = i;
@@ -960,7 +962,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	struct tc_action *a;
 	u32 seq = n->nlmsg_seq;
 
-	act = tcf_action_init(nla, NULL, NULL, ovr, 0);
+	act = tcf_action_init(net, nla, NULL, NULL, ovr, 0);
 	if (act == NULL)
 		goto done;
 	if (IS_ERR(act)) {
@@ -980,7 +982,7 @@ done:
 	return ret;
 }
 
-static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_ACT_MAX + 1];
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 2c8ad7c86e43..3a4c0caa1f7d 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -51,7 +51,7 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
 	[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
 };
 
-static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
+static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
 			 struct tc_action *a, int ovr, int bind)
 {
 	struct nlattr *tb[TCA_CSUM_MAX + 1];
@@ -166,15 +166,17 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
 	return 1;
 }
 
-static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+static int tcf_csum_ipv6_icmp(struct sk_buff *skb,
 			      unsigned int ihl, unsigned int ipl)
 {
 	struct icmp6hdr *icmp6h;
+	const struct ipv6hdr *ip6h;
 
 	icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
 	if (icmp6h == NULL)
 		return 0;
 
+	ip6h = ipv6_hdr(skb);
 	icmp6h->icmp6_cksum = 0;
 	skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
 	icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -186,15 +188,17 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
 	return 1;
 }
 
-static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
+static int tcf_csum_ipv4_tcp(struct sk_buff *skb,
 			     unsigned int ihl, unsigned int ipl)
 {
 	struct tcphdr *tcph;
+	const struct iphdr *iph;
 
 	tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
 	if (tcph == NULL)
 		return 0;
 
+	iph = ip_hdr(skb);
 	tcph->check = 0;
 	skb->csum = csum_partial(tcph, ipl - ihl, 0);
 	tcph->check = tcp_v4_check(ipl - ihl,
@@ -205,15 +209,17 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
 	return 1;
 }
 
-static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+static int tcf_csum_ipv6_tcp(struct sk_buff *skb,
 			     unsigned int ihl, unsigned int ipl)
 {
 	struct tcphdr *tcph;
+	const struct ipv6hdr *ip6h;
 
 	tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
 	if (tcph == NULL)
 		return 0;
 
+	ip6h = ipv6_hdr(skb);
 	tcph->check = 0;
 	skb->csum = csum_partial(tcph, ipl - ihl, 0);
 	tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -225,10 +231,11 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
 	return 1;
 }
 
-static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
+static int tcf_csum_ipv4_udp(struct sk_buff *skb,
 			     unsigned int ihl, unsigned int ipl, int udplite)
 {
 	struct udphdr *udph;
+	const struct iphdr *iph;
 	u16 ul;
 
 	/*
@@ -242,6 +249,7 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
 	if (udph == NULL)
 		return 0;
 
+	iph = ip_hdr(skb);
 	ul = ntohs(udph->len);
 
 	if (udplite || udph->check) {
@@ -276,10 +284,11 @@ ignore_obscure_skb:
 	return 1;
 }
 
-static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+static int tcf_csum_ipv6_udp(struct sk_buff *skb,
 			     unsigned int ihl, unsigned int ipl, int udplite)
 {
 	struct udphdr *udph;
+	const struct ipv6hdr *ip6h;
 	u16 ul;
 
 	/*
@@ -293,6 +302,7 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
 	if (udph == NULL)
 		return 0;
 
+	ip6h = ipv6_hdr(skb);
 	ul = ntohs(udph->len);
 
 	udph->check = 0;
@@ -328,7 +338,7 @@ ignore_obscure_skb:
 
 static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
 {
-	struct iphdr *iph;
+	const struct iphdr *iph;
 	int ntkoff;
 
 	ntkoff = skb_network_offset(skb);
@@ -353,19 +363,19 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
 		break;
 	case IPPROTO_TCP:
 		if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
-			if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4,
+			if (!tcf_csum_ipv4_tcp(skb, iph->ihl * 4,
 					       ntohs(iph->tot_len)))
 				goto fail;
 		break;
 	case IPPROTO_UDP:
 		if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
-			if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
+			if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
 					       ntohs(iph->tot_len), 0))
 				goto fail;
 		break;
 	case IPPROTO_UDPLITE:
 		if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
-			if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
+			if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
 					       ntohs(iph->tot_len), 1))
 				goto fail;
 		break;
@@ -377,7 +387,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
 		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 			goto fail;
 
-		ip_send_check(iph);
+		ip_send_check(ip_hdr(skb));
 	}
 
 	return 1;
@@ -456,6 +466,7 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
 			ixhl = ipv6_optlen(ip6xh);
 			if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
 				goto fail;
+			ip6xh = (void *)(skb_network_header(skb) + hl);
 			if ((nexthdr == NEXTHDR_HOP) &&
 			    !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
 				goto fail;
@@ -464,25 +475,25 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
 			break;
 		case IPPROTO_ICMPV6:
 			if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
-				if (!tcf_csum_ipv6_icmp(skb, ip6h,
+				if (!tcf_csum_ipv6_icmp(skb,
 							hl, pl + sizeof(*ip6h)))
 					goto fail;
 			goto done;
 		case IPPROTO_TCP:
 			if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
-				if (!tcf_csum_ipv6_tcp(skb, ip6h,
+				if (!tcf_csum_ipv6_tcp(skb,
 						       hl, pl + sizeof(*ip6h)))
 					goto fail;
 			goto done;
 		case IPPROTO_UDP:
 			if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
-				if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
+				if (!tcf_csum_ipv6_udp(skb, hl,
 						       pl + sizeof(*ip6h), 0))
 					goto fail;
 			goto done;
 		case IPPROTO_UDPLITE:
 			if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
-				if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
+				if (!tcf_csum_ipv6_udp(skb, hl,
 						       pl + sizeof(*ip6h), 1))
 					goto fail;
 			goto done;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 05d60859d8e3..fd2b3cff5fa2 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -58,8 +58,9 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 	[TCA_GACT_PROB]		= { .len = sizeof(struct tc_gact_p) },
 };
 
-static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
-			 struct tc_action *a, int ovr, int bind)
+static int tcf_gact_init(struct net *net, struct nlattr *nla,
+			 struct nlattr *est, struct tc_action *a,
+			 int ovr, int bind)
 {
 	struct nlattr *tb[TCA_GACT_MAX + 1];
 	struct tc_gact *parm;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 58fb3c7aab9e..60d88b6b9560 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -8,7 +8,7 @@
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  *
- * Copyright:	Jamal Hadi Salim (2002-4)
+ * Copyright:	Jamal Hadi Salim (2002-13)
  */
 
 #include <linux/types.h>
@@ -102,7 +102,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
 	[TCA_IPT_TARG]	= { .len = sizeof(struct xt_entry_target) },
 };
 
-static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
+static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 			struct tc_action *a, int ovr, int bind)
 {
 	struct nlattr *tb[TCA_IPT_MAX + 1];
@@ -207,10 +207,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
 	struct tcf_ipt *ipt = a->priv;
 	struct xt_action_param par;
 
-	if (skb_cloned(skb)) {
-		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-			return TC_ACT_UNSPEC;
-	}
+	if (skb_unclone(skb, GFP_ATOMIC))
+		return TC_ACT_UNSPEC;
 
 	spin_lock(&ipt->tcf_lock);
 
@@ -305,17 +303,44 @@ static struct tc_action_ops act_ipt_ops = {
 	.walk		=	tcf_generic_walker
 };
 
-MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
+static struct tc_action_ops act_xt_ops = {
+	.kind		=	"xt",
+	.hinfo		=	&ipt_hash_info,
+	.type		=	TCA_ACT_IPT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_ipt,
+	.dump		=	tcf_ipt_dump,
+	.cleanup	=	tcf_ipt_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_ipt_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
 MODULE_DESCRIPTION("Iptables target actions");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("act_xt");
 
 static int __init ipt_init_module(void)
 {
-	return tcf_register_action(&act_ipt_ops);
+	int ret1, ret2;
+	ret1 = tcf_register_action(&act_xt_ops);
+	if (ret1 < 0)
+		printk("Failed to load xt action\n");
+	ret2 = tcf_register_action(&act_ipt_ops);
+	if (ret2 < 0)
+		printk("Failed to load ipt action\n");
+
+	if (ret1 < 0 && ret2 < 0)
+		return ret1;
+	else
+		return 0;
 }
 
 static void __exit ipt_cleanup_module(void)
 {
+	tcf_unregister_action(&act_xt_ops);
 	tcf_unregister_action(&act_ipt_ops);
 }
 
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 9c0fd0c78814..5d676edc22a6 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -62,8 +62,9 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
 	[TCA_MIRRED_PARMS]	= { .len = sizeof(struct tc_mirred) },
 };
 
-static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
-			   struct tc_action *a, int ovr, int bind)
+static int tcf_mirred_init(struct net *net, struct nlattr *nla,
+			   struct nlattr *est, struct tc_action *a, int ovr,
+			   int bind)
 {
 	struct nlattr *tb[TCA_MIRRED_MAX + 1];
 	struct tc_mirred *parm;
@@ -88,7 +89,7 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
 		return -EINVAL;
 	}
 	if (parm->ifindex) {
-		dev = __dev_get_by_index(&init_net, parm->ifindex);
+		dev = __dev_get_by_index(net, parm->ifindex);
 		if (dev == NULL)
 			return -ENODEV;
 		switch (dev->type) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index b5d029eb44f2..876f0ef29694 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -44,7 +44,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 	[TCA_NAT_PARMS]	= { .len = sizeof(struct tc_nat) },
 };
 
-static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
+static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 			struct tc_action *a, int ovr, int bind)
 {
 	struct nlattr *tb[TCA_NAT_MAX + 1];
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 45c53ab067a6..7ed78c9e505c 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -38,8 +38,9 @@ static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
 	[TCA_PEDIT_PARMS]	= { .len = sizeof(struct tc_pedit) },
 };
 
-static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
-			  struct tc_action *a, int ovr, int bind)
+static int tcf_pedit_init(struct net *net, struct nlattr *nla,
+			  struct nlattr *est, struct tc_action *a,
+			  int ovr, int bind)
 {
 	struct nlattr *tb[TCA_PEDIT_MAX + 1];
 	struct tc_pedit *parm;
@@ -130,8 +131,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 	int i, munged = 0;
 	unsigned int off;
 
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		return p->tcf_action;
 
 	off = skb_network_offset(skb);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index a9de23297d47..823463adbd21 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,8 +22,23 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
-#define L2T(p, L)   qdisc_l2t((p)->tcfp_R_tab, L)
-#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L)
+struct tcf_police {
+	struct tcf_common	common;
+	int			tcfp_result;
+	u32			tcfp_ewma_rate;
+	s64			tcfp_burst;
+	u32			tcfp_mtu;
+	s64			tcfp_toks;
+	s64			tcfp_ptoks;
+	s64			tcfp_mtu_ptoks;
+	s64			tcfp_t_c;
+	struct psched_ratecfg	rate;
+	bool			rate_present;
+	struct psched_ratecfg	peak;
+	bool			peak_present;
+};
+#define to_police(pc)	\
+	container_of(pc, struct tcf_police, common)
 
 #define POL_TAB_MASK     15
 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
@@ -108,10 +123,6 @@ static void tcf_police_destroy(struct tcf_police *p)
 			write_unlock_bh(&police_lock);
 			gen_kill_estimator(&p->tcf_bstats,
 					   &p->tcf_rate_est);
-			if (p->tcfp_R_tab)
-				qdisc_put_rtab(p->tcfp_R_tab);
-			if (p->tcfp_P_tab)
-				qdisc_put_rtab(p->tcfp_P_tab);
 			/*
 			 * gen_estimator est_timer() might access p->tcf_lock
 			 * or bstats, wait a RCU grace period before freeing p
@@ -130,8 +141,9 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 	[TCA_POLICE_RESULT]	= { .type = NLA_U32 },
 };
 
-static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
-				 struct tc_action *a, int ovr, int bind)
+static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
+				 struct nlattr *est, struct tc_action *a,
+				 int ovr, int bind)
 {
 	unsigned int h;
 	int ret = 0, err;
@@ -211,26 +223,36 @@ override:
 	}
 
 	/* No failure allowed after this point */
-	if (R_tab != NULL) {
-		qdisc_put_rtab(police->tcfp_R_tab);
-		police->tcfp_R_tab = R_tab;
+	police->tcfp_mtu = parm->mtu;
+	if (police->tcfp_mtu == 0) {
+		police->tcfp_mtu = ~0;
+		if (R_tab)
+			police->tcfp_mtu = 255 << R_tab->rate.cell_log;
+	}
+	if (R_tab) {
+		police->rate_present = true;
+		psched_ratecfg_precompute(&police->rate, R_tab->rate.rate);
+		qdisc_put_rtab(R_tab);
+	} else {
+		police->rate_present = false;
 	}
-	if (P_tab != NULL) {
-		qdisc_put_rtab(police->tcfp_P_tab);
-		police->tcfp_P_tab = P_tab;
+	if (P_tab) {
+		police->peak_present = true;
+		psched_ratecfg_precompute(&police->peak, P_tab->rate.rate);
+		qdisc_put_rtab(P_tab);
+	} else {
+		police->peak_present = false;
 	}
 
 	if (tb[TCA_POLICE_RESULT])
 		police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
-	police->tcfp_toks = police->tcfp_burst = parm->burst;
-	police->tcfp_mtu = parm->mtu;
-	if (police->tcfp_mtu == 0) {
-		police->tcfp_mtu = ~0;
-		if (police->tcfp_R_tab)
-			police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
+	police->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
+	police->tcfp_toks = police->tcfp_burst;
+	if (police->peak_present) {
+		police->tcfp_mtu_ptoks = (s64) psched_l2t_ns(&police->peak,
+							     police->tcfp_mtu);
+		police->tcfp_ptoks = police->tcfp_mtu_ptoks;
 	}
-	if (police->tcfp_P_tab)
-		police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
 	police->tcf_action = parm->action;
 
 	if (tb[TCA_POLICE_AVRATE])
@@ -240,7 +262,7 @@ override:
 	if (ret != ACT_P_CREATED)
 		return ret;
 
-	police->tcfp_t_c = psched_get_time();
+	police->tcfp_t_c = ktime_to_ns(ktime_get());
 	police->tcf_index = parm->index ? parm->index :
 		tcf_hash_new_index(&police_idx_gen, &police_hash_info);
 	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
@@ -286,9 +308,9 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
 			  struct tcf_result *res)
 {
 	struct tcf_police *police = a->priv;
-	psched_time_t now;
-	long toks;
-	long ptoks = 0;
+	s64 now;
+	s64 toks;
+	s64 ptoks = 0;
 
 	spin_lock(&police->tcf_lock);
 
@@ -304,24 +326,25 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
 	}
 
 	if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
-		if (police->tcfp_R_tab == NULL) {
+		if (!police->rate_present) {
 			spin_unlock(&police->tcf_lock);
 			return police->tcfp_result;
 		}
 
-		now = psched_get_time();
-		toks = psched_tdiff_bounded(now, police->tcfp_t_c,
-					    police->tcfp_burst);
-		if (police->tcfp_P_tab) {
+		now = ktime_to_ns(ktime_get());
+		toks = min_t(s64, now - police->tcfp_t_c,
+			     police->tcfp_burst);
+		if (police->peak_present) {
 			ptoks = toks + police->tcfp_ptoks;
-			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
-				ptoks = (long)L2T_P(police, police->tcfp_mtu);
-			ptoks -= L2T_P(police, qdisc_pkt_len(skb));
+			if (ptoks > police->tcfp_mtu_ptoks)
+				ptoks = police->tcfp_mtu_ptoks;
+			ptoks -= (s64) psched_l2t_ns(&police->peak,
+						     qdisc_pkt_len(skb));
 		}
 		toks += police->tcfp_toks;
-		if (toks > (long)police->tcfp_burst)
+		if (toks > police->tcfp_burst)
 			toks = police->tcfp_burst;
-		toks -= L2T(police, qdisc_pkt_len(skb));
+		toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb));
 		if ((toks|ptoks) >= 0) {
 			police->tcfp_t_c = now;
 			police->tcfp_toks = toks;
@@ -347,15 +370,15 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 		.index = police->tcf_index,
 		.action = police->tcf_action,
 		.mtu = police->tcfp_mtu,
-		.burst = police->tcfp_burst,
+		.burst = PSCHED_NS2TICKS(police->tcfp_burst),
 		.refcnt = police->tcf_refcnt - ref,
 		.bindcnt = police->tcf_bindcnt - bind,
 	};
 
-	if (police->tcfp_R_tab)
-		opt.rate = police->tcfp_R_tab->rate;
-	if (police->tcfp_P_tab)
-		opt.peakrate = police->tcfp_P_tab->rate;
+	if (police->rate_present)
+		opt.rate.rate = psched_ratecfg_getrate(&police->rate);
+	if (police->peak_present)
+		opt.peakrate.rate = psched_ratecfg_getrate(&police->peak);
 	if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
 		goto nla_put_failure;
 	if (police->tcfp_result &&
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 3714f60f0b3c..7725eb4ab756 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -95,8 +95,9 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 	[TCA_DEF_DATA]	= { .type = NLA_STRING, .len = SIMP_MAX_DATA },
 };
 
-static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
-			 struct tc_action *a, int ovr, int bind)
+static int tcf_simp_init(struct net *net, struct nlattr *nla,
+			 struct nlattr *est, struct tc_action *a,
+			 int ovr, int bind)
 {
 	struct nlattr *tb[TCA_DEF_MAX + 1];
 	struct tc_defact *parm;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 476e0fac6712..cb4221171f93 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -67,8 +67,9 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 	[TCA_SKBEDIT_MARK]		= { .len = sizeof(u32) },
 };
 
-static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
-			 struct tc_action *a, int ovr, int bind)
+static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
+			    struct nlattr *est, struct tc_action *a,
+			    int ovr, int bind)
 {
 	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
 	struct tc_skbedit *parm;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ff55ed6c49b2..8e118af90973 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -22,7 +22,6 @@
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
-#include <linux/netlink.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <net/net_namespace.h>
@@ -118,7 +117,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
 
 /* Add/change/delete/get a filter node */
 
-static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_MAX + 1];
@@ -141,7 +140,12 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
 	if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN))
 		return -EPERM;
+
 replay:
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
+	if (err < 0)
+		return err;
+
 	t = nlmsg_data(n);
 	protocol = TC_H_MIN(t->tcm_info);
 	prio = TC_H_MAJ(t->tcm_info);
@@ -164,10 +168,6 @@ replay:
 	if (dev == NULL)
 		return -ENODEV;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
-	if (err < 0)
-		return err;
-
 	/* Find qdisc */
 	if (!parent) {
 		q = dev->qdisc;
@@ -321,7 +321,7 @@ replay:
 		}
 	}
 
-	err = tp->ops->change(skb, tp, cl, t->tcm_handle, tca, &fh);
+	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh);
 	if (err == 0) {
 		if (tp_created) {
 			spin_lock_bh(root_lock);
@@ -427,7 +427,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	const struct Qdisc_class_ops *cops;
 	struct tcf_dump_args arg;
 
-	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
 		return skb->len;
 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
 	if (!dev)
@@ -508,7 +508,7 @@ void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
 }
 EXPORT_SYMBOL(tcf_exts_destroy);
 
-int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
+int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		  struct nlattr *rate_tlv, struct tcf_exts *exts,
 		  const struct tcf_ext_map *map)
 {
@@ -519,7 +519,7 @@ int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
 		struct tc_action *act;
 
 		if (map->police && tb[map->police]) {
-			act = tcf_action_init_1(tb[map->police], rate_tlv,
+			act = tcf_action_init_1(net, tb[map->police], rate_tlv,
 						"police", TCA_ACT_NOREPLACE,
 						TCA_ACT_BIND);
 			if (IS_ERR(act))
@@ -528,8 +528,9 @@ int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
 			act->type = TCA_OLD_COMPAT;
 			exts->action = act;
 		} else if (map->action && tb[map->action]) {
-			act = tcf_action_init(tb[map->action], rate_tlv, NULL,
-					      TCA_ACT_NOREPLACE, TCA_ACT_BIND);
+			act = tcf_action_init(net, tb[map->action], rate_tlv,
+					      NULL, TCA_ACT_NOREPLACE,
+					      TCA_ACT_BIND);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 344a11b342e5..d76a35d0dc85 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -132,15 +132,16 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
 	[TCA_BASIC_EMATCHES]	= { .type = NLA_NESTED },
 };
 
-static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
-			   unsigned long base, struct nlattr **tb,
+static int basic_set_parms(struct net *net, struct tcf_proto *tp,
+			   struct basic_filter *f, unsigned long base,
+			   struct nlattr **tb,
 			   struct nlattr *est)
 {
 	int err = -EINVAL;
 	struct tcf_exts e;
 	struct tcf_ematch_tree t;
 
-	err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map);
+	err = tcf_exts_validate(net, tp, tb, est, &e, &basic_ext_map);
 	if (err < 0)
 		return err;
 
@@ -162,7 +163,7 @@ errout:
 	return err;
 }
 
-static int basic_change(struct sk_buff *in_skb,
+static int basic_change(struct net *net, struct sk_buff *in_skb,
 			struct tcf_proto *tp, unsigned long base, u32 handle,
 			struct nlattr **tca, unsigned long *arg)
 {
@@ -182,7 +183,7 @@ static int basic_change(struct sk_buff *in_skb,
 	if (f != NULL) {
 		if (handle && f->handle != handle)
 			return -EINVAL;
-		return basic_set_parms(tp, f, base, tb, tca[TCA_RATE]);
+		return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]);
 	}
 
 	err = -ENOBUFS;
@@ -208,7 +209,7 @@ static int basic_change(struct sk_buff *in_skb,
 		f->handle = head->hgenerator;
 	}
 
-	err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE]);
+	err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]);
 	if (err < 0)
 		goto errout;
 
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 6db7855b9029..3a294eb98d61 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -178,7 +178,7 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
 	[TCA_CGROUP_EMATCHES]	= { .type = NLA_NESTED },
 };
 
-static int cls_cgroup_change(struct sk_buff *in_skb,
+static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 			     struct tcf_proto *tp, unsigned long base,
 			     u32 handle, struct nlattr **tca,
 			     unsigned long *arg)
@@ -215,7 +215,8 @@ static int cls_cgroup_change(struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e,
+				&cgroup_ext_map);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index ce82d0cb1b47..7881e2fccbc2 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -351,7 +351,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
 	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
 };
 
-static int flow_change(struct sk_buff *in_skb, 
+static int flow_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
 		       unsigned long *arg)
@@ -393,11 +393,11 @@ static int flow_change(struct sk_buff *in_skb,
 			return -EOPNOTSUPP;
 
 		if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) &&
-		    sk_user_ns(NETLINK_CB(in_skb).ssk) != &init_user_ns)
+		    sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns)
 			return -EOPNOTSUPP;
 	}
 
-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 4075a0aef2aa..9b97172db84a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -192,7 +192,7 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
 };
 
 static int
-fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
+fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
 	struct nlattr **tb, struct nlattr **tca, unsigned long base)
 {
 	struct fw_head *head = (struct fw_head *)tp->root;
@@ -200,11 +200,10 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
 	u32 mask;
 	int err;
 
-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &fw_ext_map);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &fw_ext_map);
 	if (err < 0)
 		return err;
 
-	err = -EINVAL;
 	if (tb[TCA_FW_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
 		tcf_bind_filter(tp, &f->res, base);
@@ -218,6 +217,7 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
 	}
 #endif /* CONFIG_NET_CLS_IND */
 
+	err = -EINVAL;
 	if (tb[TCA_FW_MASK]) {
 		mask = nla_get_u32(tb[TCA_FW_MASK]);
 		if (mask != head->mask)
@@ -233,7 +233,7 @@ errout:
 	return err;
 }
 
-static int fw_change(struct sk_buff *in_skb,
+static int fw_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle,
 		     struct nlattr **tca,
@@ -255,7 +255,7 @@ static int fw_change(struct sk_buff *in_skb,
 	if (f != NULL) {
 		if (f->id != handle && handle)
 			return -EINVAL;
-		return fw_change_attrs(tp, f, tb, tca, base);
+		return fw_change_attrs(net, tp, f, tb, tca, base);
 	}
 
 	if (!handle)
@@ -282,7 +282,7 @@ static int fw_change(struct sk_buff *in_skb,
 
 	f->id = handle;
 
-	err = fw_change_attrs(tp, f, tb, tca, base);
+	err = fw_change_attrs(net, tp, f, tb, tca, base);
 	if (err < 0)
 		goto errout;
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index c10d57bf98f2..37da567d833e 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -335,9 +335,10 @@ static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = {
 	[TCA_ROUTE4_IIF]	= { .type = NLA_U32 },
 };
 
-static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
-	struct route4_filter *f, u32 handle, struct route4_head *head,
-	struct nlattr **tb, struct nlattr *est, int new)
+static int route4_set_parms(struct net *net, struct tcf_proto *tp,
+			    unsigned long base, struct route4_filter *f,
+			    u32 handle, struct route4_head *head,
+			    struct nlattr **tb, struct nlattr *est, int new)
 {
 	int err;
 	u32 id = 0, to = 0, nhandle = 0x8000;
@@ -346,7 +347,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 	struct route4_bucket *b;
 	struct tcf_exts e;
 
-	err = tcf_exts_validate(tp, tb, est, &e, &route_ext_map);
+	err = tcf_exts_validate(net, tp, tb, est, &e, &route_ext_map);
 	if (err < 0)
 		return err;
 
@@ -427,7 +428,7 @@ errout:
 	return err;
 }
 
-static int route4_change(struct sk_buff *in_skb,
+static int route4_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
@@ -457,7 +458,7 @@ static int route4_change(struct sk_buff *in_skb,
 		if (f->bkt)
 			old_handle = f->handle;
 
-		err = route4_set_parms(tp, base, f, handle, head, tb,
+		err = route4_set_parms(net, tp, base, f, handle, head, tb,
 			tca[TCA_RATE], 0);
 		if (err < 0)
 			return err;
@@ -480,7 +481,7 @@ static int route4_change(struct sk_buff *in_skb,
 	if (f == NULL)
 		goto errout;
 
-	err = route4_set_parms(tp, base, f, handle, head, tb,
+	err = route4_set_parms(net, tp, base, f, handle, head, tb,
 		tca[TCA_RATE], 1);
 	if (err < 0)
 		goto errout;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 494bbb90924a..252d8b05872e 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -416,7 +416,7 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
 	[TCA_RSVP_PINFO]	= { .len = sizeof(struct tc_rsvp_pinfo) },
 };
 
-static int rsvp_change(struct sk_buff *in_skb,
+static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
@@ -440,7 +440,7 @@ static int rsvp_change(struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index a1293b4ab7a1..b86535a40169 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -197,9 +197,10 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
 };
 
 static int
-tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
-		  struct tcindex_data *p, struct tcindex_filter_result *r,
-		  struct nlattr **tb, struct nlattr *est)
+tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
+		  u32 handle, struct tcindex_data *p,
+		  struct tcindex_filter_result *r, struct nlattr **tb,
+		 struct nlattr *est)
 {
 	int err, balloc = 0;
 	struct tcindex_filter_result new_filter_result, *old_r = r;
@@ -208,7 +209,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 	struct tcindex_filter *f = NULL; /* make gcc behave */
 	struct tcf_exts e;
 
-	err = tcf_exts_validate(tp, tb, est, &e, &tcindex_ext_map);
+	err = tcf_exts_validate(net, tp, tb, est, &e, &tcindex_ext_map);
 	if (err < 0)
 		return err;
 
@@ -332,7 +333,7 @@ errout:
 }
 
 static int
-tcindex_change(struct sk_buff *in_skb,
+tcindex_change(struct net *net, struct sk_buff *in_skb,
 	       struct tcf_proto *tp, unsigned long base, u32 handle,
 	       struct nlattr **tca, unsigned long *arg)
 {
@@ -353,7 +354,8 @@ tcindex_change(struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE]);
+	return tcindex_set_parms(net, tp, base, handle, p, r, tb,
+				 tca[TCA_RATE]);
 }
 
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index c7c27bc91b5a..eb07a1e536e6 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -488,15 +488,15 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
 	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
 };
 
-static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
-			 struct tc_u_hnode *ht,
+static int u32_set_parms(struct net *net, struct tcf_proto *tp,
+			 unsigned long base, struct tc_u_hnode *ht,
 			 struct tc_u_knode *n, struct nlattr **tb,
 			 struct nlattr *est)
 {
 	int err;
 	struct tcf_exts e;
 
-	err = tcf_exts_validate(tp, tb, est, &e, &u32_ext_map);
+	err = tcf_exts_validate(net, tp, tb, est, &e, &u32_ext_map);
 	if (err < 0)
 		return err;
 
@@ -544,7 +544,7 @@ errout:
 	return err;
 }
 
-static int u32_change(struct sk_buff *in_skb,
+static int u32_change(struct net *net, struct sk_buff *in_skb,
 		      struct tcf_proto *tp, unsigned long base, u32 handle,
 		      struct nlattr **tca,
 		      unsigned long *arg)
@@ -570,7 +570,8 @@ static int u32_change(struct sk_buff *in_skb,
 		if (TC_U32_KEY(n->handle) == 0)
 			return -EINVAL;
 
-		return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE]);
+		return u32_set_parms(net, tp, base, n->ht_up, n, tb,
+				     tca[TCA_RATE]);
 	}
 
 	if (tb[TCA_U32_DIVISOR]) {
@@ -656,7 +657,7 @@ static int u32_change(struct sk_buff *in_skb,
 	}
 #endif
 
-	err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE]);
+	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE]);
 	if (err == 0) {
 		struct tc_u_knode **ins;
 		for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 3130320997e2..938b7cbf5627 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -83,7 +83,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
 	opt.dim = set->dim;
 	opt.flags = set->flags;
 	opt.cmdflags = 0;
-	opt.timeout = ~0u;
+	opt.ext.timeout = ~0u;
 
 	network_offset = skb_network_offset(skb);
 	skb_pull(skb, network_offset);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d84f7e734cd7..2b935e7cfe7b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -493,7 +493,7 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_watchdog_init);
 
-void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
+void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
 {
 	if (test_bit(__QDISC_STATE_DEACTIVATED,
 		     &qdisc_root_sleeping(wd->qdisc)->state))
@@ -502,10 +502,10 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
 	qdisc_throttled(wd->qdisc);
 
 	hrtimer_start(&wd->timer,
-		      ns_to_ktime(PSCHED_TICKS2NS(expires)),
+		      ns_to_ktime(expires),
 		      HRTIMER_MODE_ABS);
 }
-EXPORT_SYMBOL(qdisc_watchdog_schedule);
+EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
 
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
@@ -545,7 +545,7 @@ static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
 {
 	struct Qdisc_class_common *cl;
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	struct hlist_head *nhash, *ohash;
 	unsigned int nsize, nmask, osize;
 	unsigned int i, h;
@@ -564,7 +564,7 @@ void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
 
 	sch_tree_lock(sch);
 	for (i = 0; i < osize; i++) {
-		hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
+		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
 			h = qdisc_class_hash(cl->classid, nmask);
 			hlist_add_head(&cl->hnode, &nhash[h]);
 		}
@@ -971,13 +971,13 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
  * Delete/get qdisc.
  */
 
-static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
 {
 	struct net *net = sock_net(skb->sk);
 	struct tcmsg *tcm = nlmsg_data(n);
 	struct nlattr *tca[TCA_MAX + 1];
 	struct net_device *dev;
-	u32 clid = tcm->tcm_parent;
+	u32 clid;
 	struct Qdisc *q = NULL;
 	struct Qdisc *p = NULL;
 	int err;
@@ -985,14 +985,15 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
-	if (!dev)
-		return -ENODEV;
-
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
 	if (err < 0)
 		return err;
 
+	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+	if (!dev)
+		return -ENODEV;
+
+	clid = tcm->tcm_parent;
 	if (clid) {
 		if (clid != TC_H_ROOT) {
 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
@@ -1038,7 +1039,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
  * Create/change qdisc.
  */
 
-static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
 {
 	struct net *net = sock_net(skb->sk);
 	struct tcmsg *tcm;
@@ -1053,6 +1054,10 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
 replay:
 	/* Reinit, just in case something touches this. */
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+	if (err < 0)
+		return err;
+
 	tcm = nlmsg_data(n);
 	clid = tcm->tcm_parent;
 	q = p = NULL;
@@ -1061,9 +1066,6 @@ replay:
 	if (!dev)
 		return -ENODEV;
 
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
-	if (err < 0)
-		return err;
 
 	if (clid) {
 		if (clid != TC_H_ROOT) {
@@ -1372,7 +1374,7 @@ done:
 
 
 
-static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
 {
 	struct net *net = sock_net(skb->sk);
 	struct tcmsg *tcm = nlmsg_data(n);
@@ -1382,22 +1384,22 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	const struct Qdisc_class_ops *cops;
 	unsigned long cl = 0;
 	unsigned long new_cl;
-	u32 portid = tcm->tcm_parent;
-	u32 clid = tcm->tcm_handle;
-	u32 qid = TC_H_MAJ(clid);
+	u32 portid;
+	u32 clid;
+	u32 qid;
 	int err;
 
 	if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
-	if (!dev)
-		return -ENODEV;
-
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
 	if (err < 0)
 		return err;
 
+	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+	if (!dev)
+		return -ENODEV;
+
 	/*
 	   parent == TC_H_UNSPEC - unspecified parent.
 	   parent == TC_H_ROOT   - class is root, which has no parent.
@@ -1413,6 +1415,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
 	/* Step 1. Determine qdisc handle X:0 */
 
+	portid = tcm->tcm_parent;
+	clid = tcm->tcm_handle;
+	qid = TC_H_MAJ(clid);
+
 	if (portid != TC_H_ROOT) {
 		u32 qid1 = TC_H_MAJ(portid);
 
@@ -1636,7 +1642,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net_device *dev;
 	int t, s_t;
 
-	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
 		return 0;
 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
 	if (!dev)
@@ -1768,7 +1774,7 @@ static int __net_init psched_net_init(struct net *net)
 {
 	struct proc_dir_entry *e;
 
-	e = proc_net_fops_create(net, "psched", 0, &psched_fops);
+	e = proc_create("psched", 0, net->proc_net, &psched_fops);
 	if (e == NULL)
 		return -ENOMEM;
 
@@ -1777,7 +1783,7 @@ static int __net_init psched_net_init(struct net *net)
 
 static void __net_exit psched_net_exit(struct net *net)
 {
-	proc_net_remove(net, "psched");
+	remove_proc_entry("psched", net->proc_net);
 }
 #else
 static int __net_init psched_net_init(struct net *net)
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 0e19948470b8..1bc210ffcba2 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -962,8 +962,11 @@ cbq_dequeue(struct Qdisc *sch)
 		cbq_update(q);
 		if ((incr -= incr2) < 0)
 			incr = 0;
+		q->now += incr;
+	} else {
+		if (now > q->now)
+			q->now = now;
 	}
-	q->now += incr;
 	q->now_rt = now;
 
 	for (;;) {
@@ -1041,14 +1044,13 @@ static void cbq_adjust_levels(struct cbq_class *this)
 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
 {
 	struct cbq_class *cl;
-	struct hlist_node *n;
 	unsigned int h;
 
 	if (q->quanta[prio] == 0)
 		return;
 
 	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
 			/* BUGGGG... Beware! This expression suffer of
 			 * arithmetic overflows!
 			 */
@@ -1087,10 +1089,9 @@ static void cbq_sync_defmap(struct cbq_class *cl)
 			continue;
 
 		for (h = 0; h < q->clhash.hashsize; h++) {
-			struct hlist_node *n;
 			struct cbq_class *c;
 
-			hlist_for_each_entry(c, n, &q->clhash.hash[h],
+			hlist_for_each_entry(c, &q->clhash.hash[h],
 					     common.hnode) {
 				if (c->split == split && c->level < level &&
 				    c->defmap & (1<<i)) {
@@ -1210,7 +1211,6 @@ cbq_reset(struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl;
-	struct hlist_node *n;
 	int prio;
 	unsigned int h;
 
@@ -1228,7 +1228,7 @@ cbq_reset(struct Qdisc *sch)
 		q->active[prio] = NULL;
 
 	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
 			qdisc_reset(cl->q);
 
 			cl->next_alive = NULL;
@@ -1697,7 +1697,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 static void cbq_destroy(struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	struct cbq_class *cl;
 	unsigned int h;
 
@@ -1710,11 +1710,11 @@ static void cbq_destroy(struct Qdisc *sch)
 	 * be bound to classes which have been destroyed already. --TGR '04
 	 */
 	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
 			tcf_destroy_chain(&cl->filter_list);
 	}
 	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
+		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
 					  common.hnode)
 			cbq_destroy_class(sch, cl);
 	}
@@ -2013,14 +2013,13 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl;
-	struct hlist_node *n;
 	unsigned int h;
 
 	if (arg->stop)
 		return;
 
 	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index cc37dd52ecf9..ef53ab8d0aae 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -80,7 +80,7 @@ struct choke_sched_data {
 /* deliver a random number between 0 and N - 1 */
 static u32 random_N(unsigned int N)
 {
-	return reciprocal_divide(random32(), N);
+	return reciprocal_divide(prandom_u32(), N);
 }
 
 /* number of elements in queue including holes */
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 71e50c80315f..759b308d1a8d 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -293,14 +293,13 @@ static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct drr_sched *q = qdisc_priv(sch);
 	struct drr_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	if (arg->stop)
 		return;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
@@ -451,11 +450,10 @@ static void drr_reset_qdisc(struct Qdisc *sch)
 {
 	struct drr_sched *q = qdisc_priv(sch);
 	struct drr_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (cl->qdisc->q.qlen)
 				list_del(&cl->alist);
 			qdisc_reset(cl->qdisc);
@@ -468,13 +466,13 @@ static void drr_destroy_qdisc(struct Qdisc *sch)
 {
 	struct drr_sched *q = qdisc_priv(sch);
 	struct drr_class *cl;
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	unsigned int i;
 
 	tcf_destroy_chain(&q->filter_list);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
 					  common.hnode)
 			drr_destroy_class(sch, cl);
 	}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 4e606fcb2534..55786283a3df 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -195,7 +195,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		flow->deficit = q->quantum;
 		flow->dropped = 0;
 	}
-	if (++sch->q.qlen < sch->limit)
+	if (++sch->q.qlen <= sch->limit)
 		return NET_XMIT_SUCCESS;
 
 	q->drop_overlimit++;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5d81a4478514..eac7e0ee23c1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -25,6 +25,7 @@
 #include <linux/rcupdate.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 #include <net/dst.h>
 
@@ -896,3 +897,39 @@ void dev_shutdown(struct net_device *dev)
 
 	WARN_ON(timer_pending(&dev->watchdog_timer));
 }
+
+void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate)
+{
+	u64 factor;
+	u64 mult;
+	int shift;
+
+	r->rate_bps = (u64)rate << 3;
+	r->shift = 0;
+	r->mult = 1;
+	/*
+	 * Calibrate mult, shift so that token counting is accurate
+	 * for smallest packet size (64 bytes).  Token (time in ns) is
+	 * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps.  It will
+	 * work as long as the smallest packet transfer time can be
+	 * accurately represented in nanosec.
+	 */
+	if (r->rate_bps > 0) {
+		/*
+		 * Higher shift gives better accuracy.  Find the largest
+		 * shift such that mult fits in 32 bits.
+		 */
+		for (shift = 0; shift < 16; shift++) {
+			r->shift = shift;
+			factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
+			mult = div64_u64(factor, r->rate_bps);
+			if (mult > UINT_MAX)
+				break;
+		}
+
+		r->shift = shift - 1;
+		factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
+		r->mult = div64_u64(factor, r->rate_bps);
+	}
+}
+EXPORT_SYMBOL(psched_ratecfg_precompute);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 6c2ec4510540..9facea03faeb 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1389,7 +1389,6 @@ static void
 hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
-	struct hlist_node *n;
 	struct hfsc_class *cl;
 	unsigned int i;
 
@@ -1397,7 +1396,7 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 		return;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i],
+		hlist_for_each_entry(cl, &q->clhash.hash[i],
 				     cl_common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
@@ -1523,11 +1522,10 @@ hfsc_reset_qdisc(struct Qdisc *sch)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 	struct hfsc_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
 			hfsc_reset_class(cl);
 	}
 	q->eligible = RB_ROOT;
@@ -1540,16 +1538,16 @@ static void
 hfsc_destroy_qdisc(struct Qdisc *sch)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	struct hfsc_class *cl;
 	unsigned int i;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
 			tcf_destroy_chain(&cl->filter_list);
 	}
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
 					  cl_common.hnode)
 			hfsc_destroy_class(sch, cl);
 	}
@@ -1564,12 +1562,11 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_hfsc_qopt qopt;
 	struct hfsc_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	sch->qstats.backlog = 0;
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
 			sch->qstats.backlog += cl->qdisc->qstats.backlog;
 	}
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 79e8ed4ac7ce..79b1876b6cd2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -38,6 +38,7 @@
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 #include <net/netlink.h>
+#include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 
 /* HTB algorithm.
@@ -71,12 +72,6 @@ enum htb_cmode {
 	HTB_CAN_SEND		/* class can send */
 };
 
-struct htb_rate_cfg {
-	u64 rate_bps;
-	u32 mult;
-	u32 shift;
-};
-
 /* interior & leaf nodes; props specific to leaves are marked L: */
 struct htb_class {
 	struct Qdisc_class_common common;
@@ -124,8 +119,8 @@ struct htb_class {
 	int filter_cnt;
 
 	/* token bucket parameters */
-	struct htb_rate_cfg rate;
-	struct htb_rate_cfg ceil;
+	struct psched_ratecfg rate;
+	struct psched_ratecfg ceil;
 	s64 buffer, cbuffer;	/* token bucket depth/rate */
 	psched_tdiff_t mbuffer;	/* max wait time */
 	s64 tokens, ctokens;	/* current number of tokens */
@@ -168,45 +163,6 @@ struct htb_sched {
 	struct work_struct work;
 };
 
-static u64 l2t_ns(struct htb_rate_cfg *r, unsigned int len)
-{
-	return ((u64)len * r->mult) >> r->shift;
-}
-
-static void htb_precompute_ratedata(struct htb_rate_cfg *r)
-{
-	u64 factor;
-	u64 mult;
-	int shift;
-
-	r->shift = 0;
-	r->mult = 1;
-	/*
-	 * Calibrate mult, shift so that token counting is accurate
-	 * for smallest packet size (64 bytes).  Token (time in ns) is
-	 * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps.  It will
-	 * work as long as the smallest packet transfer time can be
-	 * accurately represented in nanosec.
-	 */
-	if (r->rate_bps > 0) {
-		/*
-		 * Higher shift gives better accuracy.  Find the largest
-		 * shift such that mult fits in 32 bits.
-		 */
-		for (shift = 0; shift < 16; shift++) {
-			r->shift = shift;
-			factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
-			mult = div64_u64(factor, r->rate_bps);
-			if (mult > UINT_MAX)
-				break;
-		}
-
-		r->shift = shift - 1;
-		factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
-		r->mult = div64_u64(factor, r->rate_bps);
-	}
-}
-
 /* find class in global hash table using given handle */
 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 {
@@ -632,7 +588,7 @@ static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
 
 	if (toks > cl->buffer)
 		toks = cl->buffer;
-	toks -= (s64) l2t_ns(&cl->rate, bytes);
+	toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
 	if (toks <= -cl->mbuffer)
 		toks = 1 - cl->mbuffer;
 
@@ -645,7 +601,7 @@ static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
 
 	if (toks > cl->cbuffer)
 		toks = cl->cbuffer;
-	toks -= (s64) l2t_ns(&cl->ceil, bytes);
+	toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
 	if (toks <= -cl->mbuffer)
 		toks = 1 - cl->mbuffer;
 
@@ -993,11 +949,10 @@ static void htb_reset(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (cl->level)
 				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 			else {
@@ -1026,6 +981,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
 	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
 	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
 };
 
 static void htb_work_func(struct work_struct *work)
@@ -1039,7 +995,7 @@ static void htb_work_func(struct work_struct *work)
 static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct nlattr *tb[TCA_HTB_INIT + 1];
+	struct nlattr *tb[TCA_HTB_MAX + 1];
 	struct tc_htb_glob *gopt;
 	int err;
 	int i;
@@ -1047,20 +1003,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!opt)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
+	err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_HTB_INIT] == NULL) {
-		pr_err("HTB: hey probably you have bad tc tool ?\n");
+	if (!tb[TCA_HTB_INIT])
 		return -EINVAL;
-	}
+
 	gopt = nla_data(tb[TCA_HTB_INIT]);
-	if (gopt->version != HTB_VER >> 16) {
-		pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
-		       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
+	if (gopt->version != HTB_VER >> 16)
 		return -EINVAL;
-	}
 
 	err = qdisc_class_hash_init(&q->clhash);
 	if (err < 0)
@@ -1072,10 +1024,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 	INIT_WORK(&q->work, htb_work_func);
 	skb_queue_head_init(&q->direct_queue);
 
-	q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
-	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
-		q->direct_qlen = 2;
-
+	if (tb[TCA_HTB_DIRECT_QLEN])
+		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
+	else {
+		q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
+		if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
+			q->direct_qlen = 2;
+	}
 	if ((q->rate2quantum = gopt->rate2quantum) < 1)
 		q->rate2quantum = 1;
 	q->defcls = gopt->defcls;
@@ -1101,7 +1056,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
-	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt))
+	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
+	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 
@@ -1134,9 +1090,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 
 	memset(&opt, 0, sizeof(opt));
 
-	opt.rate.rate = cl->rate.rate_bps >> 3;
+	opt.rate.rate = psched_ratecfg_getrate(&cl->rate);
 	opt.buffer = PSCHED_NS2TICKS(cl->buffer);
-	opt.ceil.rate = cl->ceil.rate_bps >> 3;
+	opt.ceil.rate = psched_ratecfg_getrate(&cl->ceil);
 	opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
 	opt.quantum = cl->quantum;
 	opt.prio = cl->prio;
@@ -1262,7 +1218,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 static void htb_destroy(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	struct htb_class *cl;
 	unsigned int i;
 
@@ -1276,11 +1232,11 @@ static void htb_destroy(struct Qdisc *sch)
 	tcf_destroy_chain(&q->filter_list);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
 			tcf_destroy_chain(&cl->filter_list);
 	}
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
 					  common.hnode)
 			htb_destroy_class(sch, cl);
 	}
@@ -1356,7 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
 	struct nlattr *opt = tca[TCA_OPTIONS];
-	struct nlattr *tb[__TCA_HTB_MAX];
+	struct nlattr *tb[TCA_HTB_MAX + 1];
 	struct tc_htb_opt *hopt;
 
 	/* extract all subattrs from opt attr */
@@ -1459,8 +1415,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		cl->parent = parent;
 
 		/* set class to be in HTB_CAN_SEND state */
-		cl->tokens = hopt->buffer;
-		cl->ctokens = hopt->cbuffer;
+		cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
+		cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
 		cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;	/* 1min */
 		cl->t_c = psched_get_time();
 		cl->cmode = HTB_CAN_SEND;
@@ -1503,17 +1459,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			cl->prio = TC_HTB_NUMPRIO - 1;
 	}
 
-	cl->buffer = hopt->buffer;
-	cl->cbuffer = hopt->cbuffer;
-
-	cl->rate.rate_bps = (u64)hopt->rate.rate << 3;
-	cl->ceil.rate_bps = (u64)hopt->ceil.rate << 3;
-
-	htb_precompute_ratedata(&cl->rate);
-	htb_precompute_ratedata(&cl->ceil);
+	psched_ratecfg_precompute(&cl->rate, hopt->rate.rate);
+	psched_ratecfg_precompute(&cl->ceil, hopt->ceil.rate);
 
-	cl->buffer = hopt->buffer << PSCHED_SHIFT;
-	cl->cbuffer = hopt->buffer << PSCHED_SHIFT;
+	cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
+	cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer);
 
 	sch_tree_unlock(sch);
 
@@ -1566,14 +1516,13 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	if (arg->stop)
 		return;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 6ed37652a4c3..d51852bba01c 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -276,9 +276,8 @@ static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
 					  u32 lmax, u32 weight)
 {
 	struct qfq_aggregate *agg;
-	struct hlist_node *n;
 
-	hlist_for_each_entry(agg, n, &q->nonfull_aggs, nonfull_next)
+	hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
 		if (agg->lmax == lmax && agg->class_weight == weight)
 			return agg;
 
@@ -299,6 +298,10 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
 	    new_num_classes == q->max_agg_classes - 1) /* agg no more full */
 		hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
 
+	/* The next assignment may let
+	 * agg->initial_budget > agg->budgetmax
+	 * hold, we will take it into account in charge_actual_service().
+	 */
 	agg->budgetmax = new_num_classes * agg->lmax;
 	new_agg_weight = agg->class_weight * new_num_classes;
 	agg->inv_w = ONE_FP/new_agg_weight;
@@ -670,14 +673,13 @@ static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
 	struct qfq_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	if (arg->stop)
 		return;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
@@ -819,7 +821,7 @@ static void qfq_make_eligible(struct qfq_sched *q)
 	unsigned long old_vslot = q->oldV >> q->min_slot_shift;
 
 	if (vslot != old_vslot) {
-		unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1;
+		unsigned long mask = (1ULL << fls(vslot ^ old_vslot)) - 1;
 		qfq_move_groups(q, mask, IR, ER);
 		qfq_move_groups(q, mask, IB, EB);
 	}
@@ -990,12 +992,23 @@ static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
 /* Update F according to the actual service received by the aggregate. */
 static inline void charge_actual_service(struct qfq_aggregate *agg)
 {
-	/* compute the service received by the aggregate */
-	u32 service_received = agg->initial_budget - agg->budget;
+	/* Compute the service received by the aggregate, taking into
+	 * account that, after decreasing the number of classes in
+	 * agg, it may happen that
+	 * agg->initial_budget - agg->budget > agg->bugdetmax
+	 */
+	u32 service_received = min(agg->budgetmax,
+				   agg->initial_budget - agg->budget);
 
 	agg->F = agg->S + (u64)service_received * agg->inv_w;
 }
 
+static inline void qfq_update_agg_ts(struct qfq_sched *q,
+				     struct qfq_aggregate *agg,
+				     enum update_reason reason);
+
+static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
+
 static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
@@ -1023,7 +1036,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
 		in_serv_agg->initial_budget = in_serv_agg->budget =
 			in_serv_agg->budgetmax;
 
-		if (!list_empty(&in_serv_agg->active))
+		if (!list_empty(&in_serv_agg->active)) {
 			/*
 			 * Still active: reschedule for
 			 * service. Possible optimization: if no other
@@ -1034,8 +1047,9 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
 			 * handle it, we would need to maintain an
 			 * extra num_active_aggs field.
 			*/
-			qfq_activate_agg(q, in_serv_agg, requeue);
-		else if (sch->q.qlen == 0) { /* no aggregate to serve */
+			qfq_update_agg_ts(q, in_serv_agg, requeue);
+			qfq_schedule_agg(q, in_serv_agg);
+		} else if (sch->q.qlen == 0) { /* no aggregate to serve */
 			q->in_serv_agg = NULL;
 			return NULL;
 		}
@@ -1054,7 +1068,15 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
 	qdisc_bstats_update(sch, skb);
 
 	agg_dequeue(in_serv_agg, cl, len);
-	in_serv_agg->budget -= len;
+	/* If lmax is lowered, through qfq_change_class, for a class
+	 * owning pending packets with larger size than the new value
+	 * of lmax, then the following condition may hold.
+	 */
+	if (unlikely(in_serv_agg->budget < len))
+		in_serv_agg->budget = 0;
+	else
+		in_serv_agg->budget -= len;
+
 	q->V += (u64)len * IWSUM;
 	pr_debug("qfq dequeue: len %u F %lld now %lld\n",
 		 len, (unsigned long long) in_serv_agg->F,
@@ -1219,17 +1241,11 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	cl->deficit = agg->lmax;
 	list_add_tail(&cl->alist, &agg->active);
 
-	if (list_first_entry(&agg->active, struct qfq_class, alist) != cl)
-		return err; /* aggregate was not empty, nothing else to do */
+	if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
+	    q->in_serv_agg == agg)
+		return err; /* non-empty or in service, nothing else to do */
 
-	/* recharge budget */
-	agg->initial_budget = agg->budget = agg->budgetmax;
-
-	qfq_update_agg_ts(q, agg, enqueue);
-	if (q->in_serv_agg == NULL)
-		q->in_serv_agg = agg;
-	else if (agg != q->in_serv_agg)
-		qfq_schedule_agg(q, agg);
+	qfq_activate_agg(q, agg, enqueue);
 
 	return err;
 }
@@ -1263,7 +1279,8 @@ static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
 		/* group was surely ineligible, remove */
 		__clear_bit(grp->index, &q->bitmaps[IR]);
 		__clear_bit(grp->index, &q->bitmaps[IB]);
-	} else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
+	} else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
+		   q->in_serv_agg == NULL)
 		q->V = roundedS;
 
 	grp->S = roundedS;
@@ -1286,8 +1303,15 @@ skip_update:
 static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
 			     enum update_reason reason)
 {
+	agg->initial_budget = agg->budget = agg->budgetmax; /* recharge budg. */
+
 	qfq_update_agg_ts(q, agg, reason);
-	qfq_schedule_agg(q, agg);
+	if (q->in_serv_agg == NULL) { /* no aggr. in service or scheduled */
+		q->in_serv_agg = agg; /* start serving this aggregate */
+		 /* update V: to be in service, agg must be eligible */
+		q->oldV = q->V = agg->S;
+	} else if (agg != q->in_serv_agg)
+		qfq_schedule_agg(q, agg);
 }
 
 static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
@@ -1359,8 +1383,6 @@ static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
 			__set_bit(grp->index, &q->bitmaps[s]);
 		}
 	}
-
-	qfq_update_eligible(q);
 }
 
 static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
@@ -1376,11 +1398,10 @@ static unsigned int qfq_drop_from_slot(struct qfq_sched *q,
 				       struct hlist_head *slot)
 {
 	struct qfq_aggregate *agg;
-	struct hlist_node *n;
 	struct qfq_class *cl;
 	unsigned int len;
 
-	hlist_for_each_entry(agg, n, slot, next) {
+	hlist_for_each_entry(agg, slot, next) {
 		list_for_each_entry(cl, &agg->active, alist) {
 
 			if (!cl->qdisc->ops->drop)
@@ -1459,11 +1480,10 @@ static void qfq_reset_qdisc(struct Qdisc *sch)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
 	struct qfq_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (cl->qdisc->q.qlen > 0)
 				qfq_deactivate_class(q, cl);
 
@@ -1477,13 +1497,13 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
 	struct qfq_class *cl;
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	unsigned int i;
 
 	tcf_destroy_chain(&q->filter_list);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
 					  common.hnode) {
 			qfq_destroy_class(sch, cl);
 		}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 4b056c15e90c..c8388f3c3426 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -19,6 +19,7 @@
 #include <linux/errno.h>
 #include <linux/skbuff.h>
 #include <net/netlink.h>
+#include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 
 
@@ -100,23 +101,21 @@
 struct tbf_sched_data {
 /* Parameters */
 	u32		limit;		/* Maximal length of backlog: bytes */
-	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
-	u32		mtu;
+	s64		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
+	s64		mtu;
 	u32		max_size;
-	struct qdisc_rate_table	*R_tab;
-	struct qdisc_rate_table	*P_tab;
+	struct psched_ratecfg rate;
+	struct psched_ratecfg peak;
+	bool peak_present;
 
 /* Variables */
-	long	tokens;			/* Current number of B tokens */
-	long	ptokens;		/* Current number of P tokens */
-	psched_time_t	t_c;		/* Time check-point */
+	s64	tokens;			/* Current number of B tokens */
+	s64	ptokens;		/* Current number of P tokens */
+	s64	t_c;			/* Time check-point */
 	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
 	struct qdisc_watchdog watchdog;	/* Watchdog timer */
 };
 
-#define L2T(q, L)   qdisc_l2t((q)->R_tab, L)
-#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
-
 static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
@@ -156,24 +155,24 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
 	skb = q->qdisc->ops->peek(q->qdisc);
 
 	if (skb) {
-		psched_time_t now;
-		long toks;
-		long ptoks = 0;
+		s64 now;
+		s64 toks;
+		s64 ptoks = 0;
 		unsigned int len = qdisc_pkt_len(skb);
 
-		now = psched_get_time();
-		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
+		now = ktime_to_ns(ktime_get());
+		toks = min_t(s64, now - q->t_c, q->buffer);
 
-		if (q->P_tab) {
+		if (q->peak_present) {
 			ptoks = toks + q->ptokens;
-			if (ptoks > (long)q->mtu)
+			if (ptoks > q->mtu)
 				ptoks = q->mtu;
-			ptoks -= L2T_P(q, len);
+			ptoks -= (s64) psched_l2t_ns(&q->peak, len);
 		}
 		toks += q->tokens;
-		if (toks > (long)q->buffer)
+		if (toks > q->buffer)
 			toks = q->buffer;
-		toks -= L2T(q, len);
+		toks -= (s64) psched_l2t_ns(&q->rate, len);
 
 		if ((toks|ptoks) >= 0) {
 			skb = qdisc_dequeue_peeked(q->qdisc);
@@ -189,8 +188,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
 			return skb;
 		}
 
-		qdisc_watchdog_schedule(&q->watchdog,
-					now + max_t(long, -toks, -ptoks));
+		qdisc_watchdog_schedule_ns(&q->watchdog,
+					   now + max_t(long, -toks, -ptoks));
 
 		/* Maybe we have a shorter packet in the queue,
 		   which can be sent now. It sounds cool,
@@ -214,7 +213,7 @@ static void tbf_reset(struct Qdisc *sch)
 
 	qdisc_reset(q->qdisc);
 	sch->q.qlen = 0;
-	q->t_c = psched_get_time();
+	q->t_c = ktime_to_ns(ktime_get());
 	q->tokens = q->buffer;
 	q->ptokens = q->mtu;
 	qdisc_watchdog_cancel(&q->watchdog);
@@ -293,14 +292,19 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 		q->qdisc = child;
 	}
 	q->limit = qopt->limit;
-	q->mtu = qopt->mtu;
+	q->mtu = PSCHED_TICKS2NS(qopt->mtu);
 	q->max_size = max_size;
-	q->buffer = qopt->buffer;
+	q->buffer = PSCHED_TICKS2NS(qopt->buffer);
 	q->tokens = q->buffer;
 	q->ptokens = q->mtu;
 
-	swap(q->R_tab, rtab);
-	swap(q->P_tab, ptab);
+	psched_ratecfg_precompute(&q->rate, rtab->rate.rate);
+	if (ptab) {
+		psched_ratecfg_precompute(&q->peak, ptab->rate.rate);
+		q->peak_present = true;
+	} else {
+		q->peak_present = false;
+	}
 
 	sch_tree_unlock(sch);
 	err = 0;
@@ -319,7 +323,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
-	q->t_c = psched_get_time();
+	q->t_c = ktime_to_ns(ktime_get());
 	qdisc_watchdog_init(&q->watchdog, sch);
 	q->qdisc = &noop_qdisc;
 
@@ -331,12 +335,6 @@ static void tbf_destroy(struct Qdisc *sch)
 	struct tbf_sched_data *q = qdisc_priv(sch);
 
 	qdisc_watchdog_cancel(&q->watchdog);
-
-	if (q->P_tab)
-		qdisc_put_rtab(q->P_tab);
-	if (q->R_tab)
-		qdisc_put_rtab(q->R_tab);
-
 	qdisc_destroy(q->qdisc);
 }
 
@@ -352,13 +350,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_put_failure;
 
 	opt.limit = q->limit;
-	opt.rate = q->R_tab->rate;
-	if (q->P_tab)
-		opt.peakrate = q->P_tab->rate;
+	opt.rate.rate = psched_ratecfg_getrate(&q->rate);
+	if (q->peak_present)
+		opt.peakrate.rate = psched_ratecfg_getrate(&q->peak);
 	else
 		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
-	opt.mtu = q->mtu;
-	opt.buffer = q->buffer;
+	opt.mtu = PSCHED_NS2TICKS(q->mtu);
+	opt.buffer = PSCHED_NS2TICKS(q->buffer);
 	if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b45ed1f96921..91cfd8f94a19 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -66,13 +66,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work);
 static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc);
 static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc);
 
-/* Keep track of the new idr low so that we don't re-use association id
- * numbers too fast.  It is protected by they idr spin lock is in the
- * range of 1 - INT_MAX.
- */
-static u32 idr_low = 1;
-
-
 /* 1st Level Abstractions. */
 
 /* Initialize a new association from provided memory. */
@@ -104,8 +97,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
 	/* Initialize the object handling fields.  */
 	atomic_set(&asoc->base.refcnt, 1);
-	asoc->base.dead = 0;
-	asoc->base.malloced = 0;
+	asoc->base.dead = false;
 
 	/* Initialize the bind addr area.  */
 	sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
@@ -371,7 +363,6 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
 	if (!sctp_association_init(asoc, ep, sk, scope, gfp))
 		goto fail_init;
 
-	asoc->base.malloced = 1;
 	SCTP_DBG_OBJCNT_INC(assoc);
 	SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc);
 
@@ -409,7 +400,7 @@ void sctp_association_free(struct sctp_association *asoc)
 	/* Mark as dead, so other users can know this structure is
 	 * going away.
 	 */
-	asoc->base.dead = 1;
+	asoc->base.dead = true;
 
 	/* Dispose of any data lying around in the outqueue. */
 	sctp_outq_free(&asoc->outqueue);
@@ -434,8 +425,7 @@ void sctp_association_free(struct sctp_association *asoc)
 	 * on our state.
 	 */
 	for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) {
-		if (timer_pending(&asoc->timers[i]) &&
-		    del_timer(&asoc->timers[i]))
+		if (del_timer(&asoc->timers[i]))
 			sctp_association_put(asoc);
 	}
 
@@ -485,10 +475,8 @@ static void sctp_association_destroy(struct sctp_association *asoc)
 
 	WARN_ON(atomic_read(&asoc->rmem_alloc));
 
-	if (asoc->base.malloced) {
-		kfree(asoc);
-		SCTP_DBG_OBJCNT_DEC(assoc);
-	}
+	kfree(asoc);
+	SCTP_DBG_OBJCNT_DEC(assoc);
 }
 
 /* Change the primary destination address for the peer. */
@@ -1080,7 +1068,7 @@ struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *asoc,
 			transports) {
 
 		if (transport == active)
-			break;
+			continue;
 		list_for_each_entry(chunk, &transport->transmitted,
 				transmitted_list) {
 			if (key == chunk->subh.data_hdr->tsn) {
@@ -1497,7 +1485,7 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
 
 		/* Stop the SACK timer.  */
 		timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK];
-		if (timer_pending(timer) && del_timer(timer))
+		if (del_timer(timer))
 			sctp_association_put(asoc);
 	}
 }
@@ -1592,32 +1580,26 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
 /* Set an association id for a given association */
 int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
 {
-	int assoc_id;
-	int error = 0;
+	bool preload = gfp & __GFP_WAIT;
+	int ret;
 
 	/* If the id is already assigned, keep it. */
 	if (asoc->assoc_id)
-		return error;
-retry:
-	if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp)))
-		return -ENOMEM;
+		return 0;
 
+	if (preload)
+		idr_preload(gfp);
 	spin_lock_bh(&sctp_assocs_id_lock);
-	error = idr_get_new_above(&sctp_assocs_id, (void *)asoc,
-				    idr_low, &assoc_id);
-	if (!error) {
-		idr_low = assoc_id + 1;
-		if (idr_low == INT_MAX)
-			idr_low = 1;
-	}
+	/* 0 is not a valid assoc_id, must be >= 1 */
+	ret = idr_alloc_cyclic(&sctp_assocs_id, asoc, 1, 0, GFP_NOWAIT);
 	spin_unlock_bh(&sctp_assocs_id_lock);
-	if (error == -EAGAIN)
-		goto retry;
-	else if (error)
-		return error;
+	if (preload)
+		idr_preload_end();
+	if (ret < 0)
+		return ret;
 
-	asoc->assoc_id = (sctp_assoc_t) assoc_id;
-	return error;
+	asoc->assoc_id = (sctp_assoc_t)ret;
+	return 0;
 }
 
 /* Free the ASCONF queue */
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index d8420ae614dc..ba1dfc3f8def 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -200,27 +200,28 @@ static struct sctp_auth_bytes *sctp_auth_make_key_vector(
 	struct sctp_auth_bytes *new;
 	__u32	len;
 	__u32	offset = 0;
+	__u16	random_len, hmacs_len, chunks_len = 0;
 
-	len = ntohs(random->param_hdr.length) + ntohs(hmacs->param_hdr.length);
-        if (chunks)
-		len += ntohs(chunks->param_hdr.length);
+	random_len = ntohs(random->param_hdr.length);
+	hmacs_len = ntohs(hmacs->param_hdr.length);
+	if (chunks)
+		chunks_len = ntohs(chunks->param_hdr.length);
 
-	new = kmalloc(sizeof(struct sctp_auth_bytes) + len, gfp);
+	len = random_len + hmacs_len + chunks_len;
+
+	new = sctp_auth_create_key(len, gfp);
 	if (!new)
 		return NULL;
 
-	new->len = len;
-
-	memcpy(new->data, random, ntohs(random->param_hdr.length));
-	offset += ntohs(random->param_hdr.length);
+	memcpy(new->data, random, random_len);
+	offset += random_len;
 
 	if (chunks) {
-		memcpy(new->data + offset, chunks,
-			ntohs(chunks->param_hdr.length));
-		offset += ntohs(chunks->param_hdr.length);
+		memcpy(new->data + offset, chunks, chunks_len);
+		offset += chunks_len;
 	}
 
-	memcpy(new->data + offset, hmacs, ntohs(hmacs->param_hdr.length));
+	memcpy(new->data + offset, hmacs, hmacs_len);
 
 	return new;
 }
@@ -350,8 +351,8 @@ static struct sctp_auth_bytes *sctp_auth_asoc_create_secret(
 	secret = sctp_auth_asoc_set_secret(ep_key, first_vector, last_vector,
 					    gfp);
 out:
-	kfree(local_key_vector);
-	kfree(peer_key_vector);
+	sctp_auth_key_put(local_key_vector);
+	sctp_auth_key_put(peer_key_vector);
 
 	return secret;
 }
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index d886b3bf84f5..41145fe31813 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -131,8 +131,6 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest,
  */
 void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port)
 {
-	bp->malloced = 0;
-
 	INIT_LIST_HEAD(&bp->address_list);
 	bp->port = port;
 }
@@ -155,11 +153,6 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
 {
 	/* Empty the bind address list. */
 	sctp_bind_addr_clean(bp);
-
-	if (bp->malloced) {
-		kfree(bp);
-		SCTP_DBG_OBJCNT_DEC(bind_addr);
-	}
 }
 
 /* Add an address to the bind address list in the SCTP_bind_addr structure. */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 1a9c5fb77310..5fbd7bc6bb11 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -121,8 +121,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 
 	/* Initialize the basic object fields. */
 	atomic_set(&ep->base.refcnt, 1);
-	ep->base.dead = 0;
-	ep->base.malloced = 1;
+	ep->base.dead = false;
 
 	/* Create an input queue.  */
 	sctp_inq_init(&ep->base.inqueue);
@@ -151,13 +150,11 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	ep->rcvbuf_policy = net->sctp.rcvbuf_policy;
 
 	/* Initialize the secret key used with cookie. */
-	get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE);
-	ep->last_key = ep->current_key = 0;
-	ep->key_changed_at = jiffies;
+	get_random_bytes(ep->secret_key, sizeof(ep->secret_key));
 
 	/* SCTP-AUTH extensions*/
 	INIT_LIST_HEAD(&ep->endpoint_shared_keys);
-	null_key = sctp_auth_shkey_create(0, GFP_KERNEL);
+	null_key = sctp_auth_shkey_create(0, gfp);
 	if (!null_key)
 		goto nomem;
 
@@ -200,7 +197,7 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp)
 		goto fail;
 	if (!sctp_endpoint_init(ep, sk, gfp))
 		goto fail_init;
-	ep->base.malloced = 1;
+
 	SCTP_DBG_OBJCNT_INC(ep);
 	return ep;
 
@@ -236,7 +233,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep,
  */
 void sctp_endpoint_free(struct sctp_endpoint *ep)
 {
-	ep->base.dead = 1;
+	ep->base.dead = true;
 
 	ep->base.sk->sk_state = SCTP_SS_CLOSED;
 
@@ -249,8 +246,6 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
 /* Final destructor for endpoint.  */
 static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
 {
-	int i;
-
 	SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return);
 
 	/* Free up the HMAC transform. */
@@ -273,8 +268,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
 	sctp_inq_free(&ep->base.inqueue);
 	sctp_bind_addr_free(&ep->base.bind_addr);
 
-	for (i = 0; i < SCTP_HOW_MANY_SECRETS; ++i)
-		memset(&ep->secret_key[i], 0, SCTP_SECRET_SIZE);
+	memset(ep->secret_key, 0, sizeof(ep->secret_key));
 
 	/* Remove and free the port */
 	if (sctp_sk(ep->base.sk)->bind_hash)
@@ -284,11 +278,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
 	if (ep->base.sk)
 		sock_put(ep->base.sk);
 
-	/* Finally, free up our memory. */
-	if (ep->base.malloced) {
-		kfree(ep);
-		SCTP_DBG_OBJCNT_DEC(ep);
-	}
+	kfree(ep);
+	SCTP_DBG_OBJCNT_DEC(ep);
 }
 
 /* Hold a reference to an endpoint. */
@@ -337,7 +328,6 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
 	struct sctp_transport *t = NULL;
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
-	struct hlist_node *node;
 	int hash;
 	int rport;
 
@@ -355,7 +345,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
 				 rport);
 	head = &sctp_assoc_hashtable[hash];
 	read_lock(&head->lock);
-	sctp_for_each_hentry(epb, node, &head->chain) {
+	sctp_for_each_hentry(epb, &head->chain) {
 		tmp = sctp_assoc(epb);
 		if (tmp->ep != ep || rport != tmp->peer.port)
 			continue;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 8bd3c279427e..4b2c83146aa7 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -468,8 +468,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
 	} else {
 		struct net *net = sock_net(sk);
 
-		if (timer_pending(&t->proto_unreach_timer) &&
-		    del_timer(&t->proto_unreach_timer))
+		if (del_timer(&t->proto_unreach_timer))
 			sctp_association_put(asoc);
 
 		sctp_do_sm(net, SCTP_EVENT_T_OTHER,
@@ -785,13 +784,12 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
 	struct sctp_endpoint *ep;
-	struct hlist_node *node;
 	int hash;
 
 	hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port));
 	head = &sctp_ep_hashtable[hash];
 	read_lock(&head->lock);
-	sctp_for_each_hentry(epb, node, &head->chain) {
+	sctp_for_each_hentry(epb, &head->chain) {
 		ep = sctp_ep(epb);
 		if (sctp_endpoint_is_match(ep, net, laddr))
 			goto hit;
@@ -877,7 +875,6 @@ static struct sctp_association *__sctp_lookup_association(
 	struct sctp_ep_common *epb;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport;
-	struct hlist_node *node;
 	int hash;
 
 	/* Optimize here for direct hit, only listening connections can
@@ -887,7 +884,7 @@ static struct sctp_association *__sctp_lookup_association(
 				 ntohs(peer->v4.sin_port));
 	head = &sctp_assoc_hashtable[hash];
 	read_lock(&head->lock);
-	sctp_for_each_hentry(epb, node, &head->chain) {
+	sctp_for_each_hentry(epb, &head->chain) {
 		asoc = sctp_assoc(epb);
 		transport = sctp_assoc_is_match(asoc, net, local, peer);
 		if (transport)
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 2d5ad280de38..3221d073448c 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -58,8 +58,6 @@ void sctp_inq_init(struct sctp_inq *queue)
 
 	/* Create a task for delivering data.  */
 	INIT_WORK(&queue->immediate, NULL);
-
-	queue->malloced = 0;
 }
 
 /* Release the memory associated with an SCTP inqueue.  */
@@ -80,11 +78,6 @@ void sctp_inq_free(struct sctp_inq *queue)
 		sctp_chunk_free(queue->in_progress);
 		queue->in_progress = NULL;
 	}
-
-	if (queue->malloced) {
-		/* Dump the master memory segment.  */
-		kfree(queue);
-	}
 }
 
 /* Put a new packet in an SCTP inqueue.
diff --git a/net/sctp/output.c b/net/sctp/output.c
index f5200a2ad852..bbef4a7a9b56 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -136,7 +136,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet,
 	packet->overhead = overhead;
 	sctp_packet_reset(packet);
 	packet->vtag = 0;
-	packet->malloced = 0;
+
 	return packet;
 }
 
@@ -151,9 +151,6 @@ void sctp_packet_free(struct sctp_packet *packet)
 		list_del_init(&chunk->list);
 		sctp_chunk_free(chunk);
 	}
-
-	if (packet->malloced)
-		kfree(packet);
 }
 
 /* This routine tries to append the chunk to the offered packet. If adding
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 9bcdbd02d777..32a4625fef77 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -217,8 +217,6 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
 	q->outstanding_bytes = 0;
 	q->empty = 1;
 	q->cork  = 0;
-
-	q->malloced = 0;
 	q->out_qlen = 0;
 }
 
@@ -295,10 +293,6 @@ void sctp_outq_free(struct sctp_outq *q)
 {
 	/* Throw away leftover chunks. */
 	__sctp_outq_teardown(q);
-
-	/* If we were kmalloc()'d, free the memory.  */
-	if (q->malloced)
-		kfree(q);
 }
 
 /* Put a new chunk in an sctp_outq.  */
@@ -707,11 +701,10 @@ redo:
 /* Cork the outqueue so queued chunks are really queued. */
 int sctp_outq_uncork(struct sctp_outq *q)
 {
-	int error = 0;
 	if (q->cork)
 		q->cork = 0;
-	error = sctp_outq_flush(q, 0);
-	return error;
+
+	return sctp_outq_flush(q, 0);
 }
 
 
@@ -1700,10 +1693,8 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 		 * address.
 		 */
 		if (!transport->flight_size) {
-			if (timer_pending(&transport->T3_rtx_timer) &&
-			    del_timer(&transport->T3_rtx_timer)) {
+			if (del_timer(&transport->T3_rtx_timer))
 				sctp_transport_put(transport);
-			}
 		} else if (restart_timer) {
 			if (!mod_timer(&transport->T3_rtx_timer,
 				       jiffies + transport->rto))
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 5f7518de2fd1..e62c22535be4 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -63,7 +63,7 @@ static struct {
 	struct timespec	  tstart;
 } sctpw;
 
-static void printl(const char *fmt, ...)
+static __printf(1, 2) void printl(const char *fmt, ...)
 {
 	va_list args;
 	int len;
@@ -122,12 +122,12 @@ static const struct file_operations sctpprobe_fops = {
 	.llseek = noop_llseek,
 };
 
-sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const sctp_subtype_t type,
+					    void *arg,
+					    sctp_cmd_seq_t *commands)
 {
 	struct sctp_transport *sp;
 	static __u32 lcwnd = 0;
@@ -183,13 +183,20 @@ static __init int sctpprobe_init(void)
 {
 	int ret = -ENOMEM;
 
+	/* Warning: if the function signature of sctp_sf_eat_sack_6_2,
+	 * has been changed, you also have to change the signature of
+	 * jsctp_sf_eat_sack, otherwise you end up right here!
+	 */
+	BUILD_BUG_ON(__same_type(sctp_sf_eat_sack_6_2,
+				 jsctp_sf_eat_sack) == 0);
+
 	init_waitqueue_head(&sctpw.wait);
 	spin_lock_init(&sctpw.lock);
 	if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL))
 		return ret;
 
-	if (!proc_net_fops_create(&init_net, procname, S_IRUSR,
-				  &sctpprobe_fops))
+	if (!proc_create(procname, S_IRUSR, init_net.proc_net,
+			 &sctpprobe_fops))
 		goto free_kfifo;
 
 	ret = register_jprobe(&sctp_recv_probe);
@@ -201,7 +208,7 @@ static __init int sctpprobe_init(void)
 	return 0;
 
 remove_proc:
-	proc_net_remove(&init_net, procname);
+	remove_proc_entry(procname, init_net.proc_net);
 free_kfifo:
 	kfifo_free(&sctpw.fifo);
 	return ret;
@@ -210,7 +217,7 @@ free_kfifo:
 static __exit void sctpprobe_exit(void)
 {
 	kfifo_free(&sctpw.fifo);
-	proc_net_remove(&init_net, procname);
+	remove_proc_entry(procname, init_net.proc_net);
 	unregister_jprobe(&sctp_recv_probe);
 }
 
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 8c19e97262ca..4e45ee35d0db 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -213,7 +213,6 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
 	struct sctp_ep_common *epb;
 	struct sctp_endpoint *ep;
 	struct sock *sk;
-	struct hlist_node *node;
 	int    hash = *(loff_t *)v;
 
 	if (hash >= sctp_ep_hashsize)
@@ -222,7 +221,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
 	head = &sctp_ep_hashtable[hash];
 	sctp_local_bh_disable();
 	read_lock(&head->lock);
-	sctp_for_each_hentry(epb, node, &head->chain) {
+	sctp_for_each_hentry(epb, &head->chain) {
 		ep = sctp_ep(epb);
 		sk = epb->sk;
 		if (!net_eq(sock_net(sk), seq_file_net(seq)))
@@ -296,7 +295,8 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
 		seq_printf(seq, " ASSOC     SOCK   STY SST ST HBKT "
 				"ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "
 				"RPORT LADDRS <-> RADDRS "
-				"HBINT INS OUTS MAXRT T1X T2X RTXC\n");
+				"HBINT INS OUTS MAXRT T1X T2X RTXC "
+				"wmema wmemq sndbuf rcvbuf\n");
 
 	return (void *)pos;
 }
@@ -321,7 +321,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 	struct sctp_ep_common *epb;
 	struct sctp_association *assoc;
 	struct sock *sk;
-	struct hlist_node *node;
 	int    hash = *(loff_t *)v;
 
 	if (hash >= sctp_assoc_hashsize)
@@ -330,7 +329,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 	head = &sctp_assoc_hashtable[hash];
 	sctp_local_bh_disable();
 	read_lock(&head->lock);
-	sctp_for_each_hentry(epb, node, &head->chain) {
+	sctp_for_each_hentry(epb, &head->chain) {
 		assoc = sctp_assoc(epb);
 		sk = epb->sk;
 		if (!net_eq(sock_net(sk), seq_file_net(seq)))
@@ -351,11 +350,16 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 		sctp_seq_dump_local_addrs(seq, epb);
 		seq_printf(seq, "<-> ");
 		sctp_seq_dump_remote_addrs(seq, assoc);
-		seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d ",
+		seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
+			   "%8d %8d %8d %8d",
 			assoc->hbinterval, assoc->c.sinit_max_instreams,
 			assoc->c.sinit_num_ostreams, assoc->max_retrans,
 			assoc->init_retries, assoc->shutdown_retries,
-			assoc->rtx_data_chunks);
+			assoc->rtx_data_chunks,
+			atomic_read(&sk->sk_wmem_alloc),
+			sk->sk_wmem_queued,
+			sk->sk_sndbuf,
+			sk->sk_rcvbuf);
 		seq_printf(seq, "\n");
 	}
 	read_unlock(&head->lock);
@@ -436,7 +440,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
 	struct sctp_association *assoc;
-	struct hlist_node *node;
 	struct sctp_transport *tsp;
 	int    hash = *(loff_t *)v;
 
@@ -447,7 +450,7 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 	sctp_local_bh_disable();
 	read_lock(&head->lock);
 	rcu_read_lock();
-	sctp_for_each_hentry(epb, node, &head->chain) {
+	sctp_for_each_hentry(epb, &head->chain) {
 		if (!net_eq(sock_net(epb->sk), seq_file_net(seq)))
 			continue;
 		assoc = sctp_assoc(epb);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f898b1c58bd2..eaee00c61139 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -595,7 +595,7 @@ static void sctp_v4_ecn_capable(struct sock *sk)
 	INET_ECN_xmit(sk);
 }
 
-void sctp_addr_wq_timeout_handler(unsigned long arg)
+static void sctp_addr_wq_timeout_handler(unsigned long arg)
 {
 	struct net *net = (struct net *)arg;
 	struct sctp_sockaddr_entry *addrw, *temp;
@@ -1403,7 +1403,7 @@ SCTP_STATIC __init int sctp_init(void)
 
 	/* Allocate and initialize the endpoint hash table.  */
 	sctp_ep_hashsize = 64;
-	sctp_ep_hashtable = (struct sctp_hashbucket *)
+	sctp_ep_hashtable =
 		kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL);
 	if (!sctp_ep_hashtable) {
 		pr_err("Failed endpoint_hash alloc\n");
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index e1c5fc2be6b8..cf579e71cff0 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1201,7 +1201,7 @@ nodata:
  * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT)
  * This is a helper function to allocate an error chunk for
  * for those invalid parameter codes in which we may not want
- * to report all the errors, if the incomming chunk is large
+ * to report all the errors, if the incoming chunk is large
  */
 static inline struct sctp_chunk *sctp_make_op_error_fixed(
 	const struct sctp_association *asoc,
@@ -1589,8 +1589,6 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 	struct sctp_signed_cookie *cookie;
 	struct scatterlist sg;
 	int headersize, bodysize;
-	unsigned int keylen;
-	char *key;
 
 	/* Header size is static data prior to the actual cookie, including
 	 * any padding.
@@ -1650,12 +1648,11 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 
 		/* Sign the message.  */
 		sg_init_one(&sg, &cookie->c, bodysize);
-		keylen = SCTP_SECRET_SIZE;
-		key = (char *)ep->secret_key[ep->current_key];
 		desc.tfm = sctp_sk(ep->base.sk)->hmac;
 		desc.flags = 0;
 
-		if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+		if (crypto_hash_setkey(desc.tfm, ep->secret_key,
+				       sizeof(ep->secret_key)) ||
 		    crypto_hash_digest(&desc, &sg, bodysize, cookie->signature))
 			goto free_cookie;
 	}
@@ -1682,8 +1679,7 @@ struct sctp_association *sctp_unpack_cookie(
 	int headersize, bodysize, fixed_size;
 	__u8 *digest = ep->digest;
 	struct scatterlist sg;
-	unsigned int keylen, len;
-	char *key;
+	unsigned int len;
 	sctp_scope_t scope;
 	struct sk_buff *skb = chunk->skb;
 	struct timeval tv;
@@ -1718,34 +1714,21 @@ struct sctp_association *sctp_unpack_cookie(
 		goto no_hmac;
 
 	/* Check the signature.  */
-	keylen = SCTP_SECRET_SIZE;
 	sg_init_one(&sg, bear_cookie, bodysize);
-	key = (char *)ep->secret_key[ep->current_key];
 	desc.tfm = sctp_sk(ep->base.sk)->hmac;
 	desc.flags = 0;
 
 	memset(digest, 0x00, SCTP_SIGNATURE_SIZE);
-	if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+	if (crypto_hash_setkey(desc.tfm, ep->secret_key,
+			       sizeof(ep->secret_key)) ||
 	    crypto_hash_digest(&desc, &sg, bodysize, digest)) {
 		*error = -SCTP_IERROR_NOMEM;
 		goto fail;
 	}
 
 	if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
-		/* Try the previous key. */
-		key = (char *)ep->secret_key[ep->last_key];
-		memset(digest, 0x00, SCTP_SIGNATURE_SIZE);
-		if (crypto_hash_setkey(desc.tfm, key, keylen) ||
-		    crypto_hash_digest(&desc, &sg, bodysize, digest)) {
-			*error = -SCTP_IERROR_NOMEM;
-			goto fail;
-		}
-
-		if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
-			/* Yikes!  Still bad signature! */
-			*error = -SCTP_IERROR_BAD_SIG;
-			goto fail;
-		}
+		*error = -SCTP_IERROR_BAD_SIG;
+		goto fail;
 	}
 
 no_hmac:
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index c9577754a708..8aab894aeabe 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -674,10 +674,8 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds,
 
 	list_for_each_entry(t, &asoc->peer.transport_addr_list,
 			transports) {
-		if (timer_pending(&t->T3_rtx_timer) &&
-		    del_timer(&t->T3_rtx_timer)) {
+		if (del_timer(&t->T3_rtx_timer))
 			sctp_transport_put(t);
-		}
 	}
 }
 
@@ -1517,7 +1515,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 
 		case SCTP_CMD_TIMER_STOP:
 			timer = &asoc->timers[cmd->obj.to];
-			if (timer_pending(timer) && del_timer(timer))
+			if (del_timer(timer))
 				sctp_association_put(asoc);
 			break;
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 5131fcfedb03..de1a0138317f 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2082,7 +2082,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 	}
 
 	/* Delete the tempory new association. */
-	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+	sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC(new_asoc));
 	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
 
 	/* Restore association pointer to provide SCTP command interpeter
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index cedd9bf67b8c..f631c5ff4dbf 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1119,9 +1119,10 @@ static int __sctp_connect(struct sock* sk,
 		/* Make sure the destination port is correctly set
 		 * in all addresses.
 		 */
-		if (asoc && asoc->peer.port && asoc->peer.port != port)
+		if (asoc && asoc->peer.port && asoc->peer.port != port) {
+			err = -EINVAL;
 			goto out_free;
-
+		}
 
 		/* Check if there already is a matching association on the
 		 * endpoint (other than the one created here).
@@ -5653,6 +5654,9 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len,
 	if (len < sizeof(sctp_assoc_t))
 		return -EINVAL;
 
+	/* Allow the struct to grow and fill in as much as possible */
+	len = min_t(size_t, len, sizeof(sas));
+
 	if (copy_from_user(&sas, optval, len))
 		return -EFAULT;
 
@@ -5686,9 +5690,6 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len,
 	/* Mark beginning of a new observation period */
 	asoc->stats.max_obs_rto = asoc->rto_min;
 
-	/* Allow the struct to grow and fill in as much as possible */
-	len = min_t(size_t, len, sizeof(sas));
-
 	if (put_user(len, optlen))
 		return -EFAULT;
 
@@ -5882,8 +5883,7 @@ static struct sctp_bind_bucket *sctp_bucket_create(
 static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 {
 	struct sctp_bind_hashbucket *head; /* hash list */
-	struct sctp_bind_bucket *pp; /* hash list port iterator */
-	struct hlist_node *node;
+	struct sctp_bind_bucket *pp;
 	unsigned short snum;
 	int ret;
 
@@ -5910,7 +5910,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 			index = sctp_phashfn(sock_net(sk), rover);
 			head = &sctp_port_hashtable[index];
 			sctp_spin_lock(&head->lock);
-			sctp_for_each_hentry(pp, node, &head->chain)
+			sctp_for_each_hentry(pp, &head->chain)
 				if ((pp->port == rover) &&
 				    net_eq(sock_net(sk), pp->net))
 					goto next;
@@ -5938,7 +5938,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 		 */
 		head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)];
 		sctp_spin_lock(&head->lock);
-		sctp_for_each_hentry(pp, node, &head->chain) {
+		sctp_for_each_hentry(pp, &head->chain) {
 			if ((pp->port == snum) && net_eq(pp->net, sock_net(sk)))
 				goto pp_found;
 		}
@@ -5970,7 +5970,7 @@ pp_found:
 		 * that this port/socket (sk) combination are already
 		 * in an endpoint.
 		 */
-		sk_for_each_bound(sk2, node, &pp->owner) {
+		sk_for_each_bound(sk2, &pp->owner) {
 			struct sctp_endpoint *ep2;
 			ep2 = sctp_sk(sk2)->ep;
 
@@ -6186,7 +6186,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
 
 	/* Is there any exceptional events?  */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
-		mask |= POLLERR;
+		mask |= POLLERR |
+			sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
index 442ad4ed6315..da8603523808 100644
--- a/net/sctp/ssnmap.c
+++ b/net/sctp/ssnmap.c
@@ -41,8 +41,6 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
-#define MAX_KMALLOC_SIZE	131072
-
 static struct sctp_ssnmap *sctp_ssnmap_init(struct sctp_ssnmap *map, __u16 in,
 					    __u16 out);
 
@@ -65,7 +63,7 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out,
 	int size;
 
 	size = sctp_ssnmap_size(in, out);
-	if (size <= MAX_KMALLOC_SIZE)
+	if (size <= KMALLOC_MAX_SIZE)
 		retval = kmalloc(size, gfp);
 	else
 		retval = (struct sctp_ssnmap *)
@@ -76,13 +74,12 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out,
 	if (!sctp_ssnmap_init(retval, in, out))
 		goto fail_map;
 
-	retval->malloced = 1;
 	SCTP_DBG_OBJCNT_INC(ssnmap);
 
 	return retval;
 
 fail_map:
-	if (size <= MAX_KMALLOC_SIZE)
+	if (size <= KMALLOC_MAX_SIZE)
 		kfree(retval);
 	else
 		free_pages((unsigned long)retval, get_order(size));
@@ -120,14 +117,16 @@ void sctp_ssnmap_clear(struct sctp_ssnmap *map)
 /* Dispose of a ssnmap.  */
 void sctp_ssnmap_free(struct sctp_ssnmap *map)
 {
-	if (map && map->malloced) {
-		int size;
-
-		size = sctp_ssnmap_size(map->in.len, map->out.len);
-		if (size <= MAX_KMALLOC_SIZE)
-			kfree(map);
-		else
-			free_pages((unsigned long)map, get_order(size));
-		SCTP_DBG_OBJCNT_DEC(ssnmap);
-	}
+	int size;
+
+	if (unlikely(!map))
+		return;
+
+	size = sctp_ssnmap_size(map->in.len, map->out.len);
+	if (size <= KMALLOC_MAX_SIZE)
+		kfree(map);
+	else
+		free_pages((unsigned long)map, get_order(size));
+
+	SCTP_DBG_OBJCNT_DEC(ssnmap);
 }
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 4e45bb68aef0..098f1d5f769e 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -123,7 +123,6 @@ struct sctp_transport *sctp_transport_new(struct net *net,
 	if (!sctp_transport_init(net, transport, addr, gfp))
 		goto fail_init;
 
-	transport->malloced = 1;
 	SCTP_DBG_OBJCNT_INC(transport);
 
 	return transport;
@@ -151,13 +150,11 @@ void sctp_transport_free(struct sctp_transport *transport)
 	 * structure hang around in memory since we know
 	 * the tranport is going away.
 	 */
-	if (timer_pending(&transport->T3_rtx_timer) &&
-	    del_timer(&transport->T3_rtx_timer))
+	if (del_timer(&transport->T3_rtx_timer))
 		sctp_transport_put(transport);
 
 	/* Delete the ICMP proto unreachable timer if it's active. */
-	if (timer_pending(&transport->proto_unreach_timer) &&
-	    del_timer(&transport->proto_unreach_timer))
+	if (del_timer(&transport->proto_unreach_timer))
 		sctp_association_put(transport->asoc);
 
 	sctp_transport_put(transport);
@@ -168,10 +165,6 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head)
 	struct sctp_transport *transport;
 
 	transport = container_of(head, struct sctp_transport, rcu);
-	if (transport->asoc)
-		sctp_association_put(transport->asoc);
-
-	sctp_packet_free(&transport->packet);
 
 	dst_release(transport->dst);
 	kfree(transport);
@@ -186,6 +179,11 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
 	SCTP_ASSERT(transport->dead, "Transport is not dead", return);
 
 	call_rcu(&transport->rcu, sctp_transport_destroy_rcu);
+
+	sctp_packet_free(&transport->packet);
+
+	if (transport->asoc)
+		sctp_association_put(transport->asoc);
 }
 
 /* Start T3_rtx timer if it is not already running and update the heartbeat
@@ -654,10 +652,9 @@ void sctp_transport_reset(struct sctp_transport *t)
 void sctp_transport_immediate_rtx(struct sctp_transport *t)
 {
 	/* Stop pending T3_rtx_timer */
-	if (timer_pending(&t->T3_rtx_timer)) {
-		(void)del_timer(&t->T3_rtx_timer);
+	if (del_timer(&t->T3_rtx_timer))
 		sctp_transport_put(t);
-	}
+
 	sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX);
 	if (!timer_pending(&t->T3_rtx_timer)) {
 		if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto))
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index 5f25e0c92c31..396c45174e5b 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -51,7 +51,7 @@
 static void sctp_tsnmap_update(struct sctp_tsnmap *map);
 static void sctp_tsnmap_find_gap_ack(unsigned long *map, __u16 off,
 				     __u16 len, __u16 *start, __u16 *end);
-static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap);
+static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 size);
 
 /* Initialize a block of memory as a tsnmap.  */
 struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len,
@@ -124,7 +124,7 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn,
 
 	gap = tsn - map->base_tsn;
 
-	if (gap >= map->len && !sctp_tsnmap_grow(map, gap))
+	if (gap >= map->len && !sctp_tsnmap_grow(map, gap + 1))
 		return -ENOMEM;
 
 	if (!sctp_tsnmap_has_gap(map) && gap == 0) {
@@ -360,23 +360,24 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map,
 	return ngaps;
 }
 
-static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap)
+static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 size)
 {
 	unsigned long *new;
 	unsigned long inc;
 	u16  len;
 
-	if (gap >= SCTP_TSN_MAP_SIZE)
+	if (size > SCTP_TSN_MAP_SIZE)
 		return 0;
 
-	inc = ALIGN((gap - map->len),BITS_PER_LONG) + SCTP_TSN_MAP_INCREMENT;
+	inc = ALIGN((size - map->len), BITS_PER_LONG) + SCTP_TSN_MAP_INCREMENT;
 	len = min_t(u16, map->len + inc, SCTP_TSN_MAP_SIZE);
 
 	new = kzalloc(len>>3, GFP_ATOMIC);
 	if (!new)
 		return 0;
 
-	bitmap_copy(new, map->tsn_map, map->max_tsn_seen - map->base_tsn);
+	bitmap_copy(new, map->tsn_map,
+		map->max_tsn_seen - map->cumulative_tsn_ack_point);
 	kfree(map->tsn_map);
 	map->tsn_map = new;
 	map->len = len;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index ada17464b65b..04e3d470f877 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -68,7 +68,6 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq,
 	skb_queue_head_init(&ulpq->reasm);
 	skb_queue_head_init(&ulpq->lobby);
 	ulpq->pd_mode  = 0;
-	ulpq->malloced = 0;
 
 	return ulpq;
 }
@@ -96,8 +95,6 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq)
 void sctp_ulpq_free(struct sctp_ulpq *ulpq)
 {
 	sctp_ulpq_flush(ulpq);
-	if (ulpq->malloced)
-		kfree(ulpq);
 }
 
 /* Process an incoming DATA chunk.  */
@@ -106,6 +103,7 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 {
 	struct sk_buff_head temp;
 	struct sctp_ulpevent *event;
+	int event_eor = 0;
 
 	/* Create an event from the incoming chunk. */
 	event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp);
@@ -127,10 +125,12 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 	/* Send event to the ULP.  'event' is the sctp_ulpevent for
 	 * very first SKB on the 'temp' list.
 	 */
-	if (event)
+	if (event) {
+		event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0;
 		sctp_ulpq_tail_event(ulpq, event);
+	}
 
-	return 0;
+	return event_eor;
 }
 
 /* Add a new event for propagation to the ULP.  */
@@ -540,14 +540,19 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq)
 		ctsn = cevent->tsn;
 
 		switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+		case SCTP_DATA_FIRST_FRAG:
+			if (!first_frag)
+				return NULL;
+			goto done;
 		case SCTP_DATA_MIDDLE_FRAG:
 			if (!first_frag) {
 				first_frag = pos;
 				next_tsn = ctsn + 1;
 				last_frag = pos;
-			} else if (next_tsn == ctsn)
+			} else if (next_tsn == ctsn) {
 				next_tsn++;
-			else
+				last_frag = pos;
+			} else
 				goto done;
 			break;
 		case SCTP_DATA_LAST_FRAG:
@@ -651,6 +656,14 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq)
 			} else
 				goto done;
 			break;
+
+		case SCTP_DATA_LAST_FRAG:
+			if (!first_frag)
+				return NULL;
+			else
+				goto done;
+			break;
+
 		default:
 			return NULL;
 		}
@@ -962,20 +975,43 @@ static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq,
 		struct sk_buff_head *list, __u16 needed)
 {
 	__u16 freed = 0;
-	__u32 tsn;
-	struct sk_buff *skb;
+	__u32 tsn, last_tsn;
+	struct sk_buff *skb, *flist, *last;
 	struct sctp_ulpevent *event;
 	struct sctp_tsnmap *tsnmap;
 
 	tsnmap = &ulpq->asoc->peer.tsn_map;
 
-	while ((skb = __skb_dequeue_tail(list)) != NULL) {
-		freed += skb_headlen(skb);
+	while ((skb = skb_peek_tail(list)) != NULL) {
 		event = sctp_skb2event(skb);
 		tsn = event->tsn;
 
+		/* Don't renege below the Cumulative TSN ACK Point. */
+		if (TSN_lte(tsn, sctp_tsnmap_get_ctsn(tsnmap)))
+			break;
+
+		/* Events in ordering queue may have multiple fragments
+		 * corresponding to additional TSNs.  Sum the total
+		 * freed space; find the last TSN.
+		 */
+		freed += skb_headlen(skb);
+		flist = skb_shinfo(skb)->frag_list;
+		for (last = flist; flist; flist = flist->next) {
+			last = flist;
+			freed += skb_headlen(last);
+		}
+		if (last)
+			last_tsn = sctp_skb2event(last)->tsn;
+		else
+			last_tsn = tsn;
+
+		/* Unlink the event, then renege all applicable TSNs. */
+		__skb_unlink(skb, list);
 		sctp_ulpevent_free(event);
-		sctp_tsnmap_renege(tsnmap, tsn);
+		while (TSN_lte(tsn, last_tsn)) {
+			sctp_tsnmap_renege(tsnmap, tsn);
+			tsn++;
+		}
 		if (freed >= needed)
 			return freed;
 	}
@@ -1002,16 +1038,28 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
 	struct sctp_ulpevent *event;
 	struct sctp_association *asoc;
 	struct sctp_sock *sp;
+	__u32 ctsn;
+	struct sk_buff *skb;
 
 	asoc = ulpq->asoc;
 	sp = sctp_sk(asoc->base.sk);
 
 	/* If the association is already in Partial Delivery mode
-	 * we have noting to do.
+	 * we have nothing to do.
 	 */
 	if (ulpq->pd_mode)
 		return;
 
+	/* Data must be at or below the Cumulative TSN ACK Point to
+	 * start partial delivery.
+	 */
+	skb = skb_peek(&asoc->ulpq.reasm);
+	if (skb != NULL) {
+		ctsn = sctp_skb2event(skb)->tsn;
+		if (!TSN_lte(ctsn, sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map)))
+			return;
+	}
+
 	/* If the user enabled fragment interleave socket option,
 	 * multiple associations can enter partial delivery.
 	 * Otherwise, we can only enter partial delivery if the
@@ -1054,12 +1102,16 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 	}
 	/* If able to free enough room, accept this chunk. */
 	if (chunk && (freed >= needed)) {
-		__u32 tsn;
-		tsn = ntohl(chunk->subh.data_hdr->tsn);
-		sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport);
-		sctp_ulpq_tail_data(ulpq, chunk, gfp);
-
-		sctp_ulpq_partial_delivery(ulpq, gfp);
+		int retval;
+		retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
+		/*
+		 * Enter partial delivery if chunk has not been
+		 * delivered; otherwise, drain the reassembly queue.
+		 */
+		if (retval <= 0)
+			sctp_ulpq_partial_delivery(ulpq, gfp);
+		else if (retval == 1)
+			sctp_ulpq_reasm_drain(ulpq);
 	}
 
 	sk_mem_reclaim(asoc->base.sk);
diff --git a/net/socket.c b/net/socket.c
index 1bbc37b7a312..6b94633ca61d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -69,7 +69,6 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
-#include <linux/wanrouter.h>
 #include <linux/if_bridge.h>
 #include <linux/if_frad.h>
 #include <linux/if_vlan.h>
@@ -370,16 +369,15 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 
 	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);
-	if (unlikely(!file)) {
+	if (unlikely(IS_ERR(file))) {
 		/* drop dentry, keep inode */
 		ihold(path.dentry->d_inode);
 		path_put(&path);
-		return ERR_PTR(-ENFILE);
+		return file;
 	}
 
 	sock->file = file;
 	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
-	file->f_pos = 0;
 	file->private_data = sock;
 	return file;
 }
@@ -602,7 +600,7 @@ void sock_release(struct socket *sock)
 }
 EXPORT_SYMBOL(sock_release);
 
-int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
+void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 {
 	*tx_flags = 0;
 	if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
@@ -611,7 +609,6 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 		*tx_flags |= SKBTX_SW_TSTAMP;
 	if (sock_flag(sk, SOCK_WIFI_STATUS))
 		*tx_flags |= SKBTX_WIFI_STATUS;
-	return 0;
 }
 EXPORT_SYMBOL(sock_tx_timestamp);
 
@@ -684,16 +681,6 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(kernel_sendmsg);
 
-static int ktime2ts(ktime_t kt, struct timespec *ts)
-{
-	if (kt.tv64) {
-		*ts = ktime_to_timespec(kt);
-		return 1;
-	} else {
-		return 0;
-	}
-}
-
 /*
  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
  */
@@ -726,17 +713,15 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 
 
 	memset(ts, 0, sizeof(ts));
-	if (skb->tstamp.tv64 &&
-	    sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
-		skb_get_timestampns(skb, ts + 0);
+	if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
+	    ktime_to_timespec_cond(skb->tstamp, ts + 0))
 		empty = 0;
-	}
 	if (shhwtstamps) {
 		if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
-		    ktime2ts(shhwtstamps->syststamp, ts + 1))
+		    ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
 			empty = 0;
 		if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
-		    ktime2ts(shhwtstamps->hwtstamp, ts + 2))
+		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
 			empty = 0;
 	}
 	if (!empty)
@@ -1175,15 +1160,6 @@ static int sock_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int sock_close(struct inode *inode, struct file *filp)
 {
-	/*
-	 *      It was possible the inode is NULL we were
-	 *      closing an unfinished socket.
-	 */
-
-	if (!inode) {
-		printk(KERN_DEBUG "sock_close: NULL inode\n");
-		return 0;
-	}
 	sock_release(SOCKET_I(inode));
 	return 0;
 }
@@ -2840,7 +2816,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 	}
 
 	ifr = compat_alloc_user_space(buf_size);
-	rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
+	rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
 
 	if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
 		return -EFAULT;
@@ -2864,12 +2840,12 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 			offsetof(struct ethtool_rxnfc, fs.ring_cookie));
 
 		if (copy_in_user(rxnfc, compat_rxnfc,
-				 (void *)(&rxnfc->fs.m_ext + 1) -
-				 (void *)rxnfc) ||
+				 (void __user *)(&rxnfc->fs.m_ext + 1) -
+				 (void __user *)rxnfc) ||
 		    copy_in_user(&rxnfc->fs.ring_cookie,
 				 &compat_rxnfc->fs.ring_cookie,
-				 (void *)(&rxnfc->fs.location + 1) -
-				 (void *)&rxnfc->fs.ring_cookie) ||
+				 (void __user *)(&rxnfc->fs.location + 1) -
+				 (void __user *)&rxnfc->fs.ring_cookie) ||
 		    copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
 				 sizeof(rxnfc->rule_cnt)))
 			return -EFAULT;
@@ -2881,12 +2857,12 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 
 	if (convert_out) {
 		if (copy_in_user(compat_rxnfc, rxnfc,
-				 (const void *)(&rxnfc->fs.m_ext + 1) -
-				 (const void *)rxnfc) ||
+				 (const void __user *)(&rxnfc->fs.m_ext + 1) -
+				 (const void __user *)rxnfc) ||
 		    copy_in_user(&compat_rxnfc->fs.ring_cookie,
 				 &rxnfc->fs.ring_cookie,
-				 (const void *)(&rxnfc->fs.location + 1) -
-				 (const void *)&rxnfc->fs.ring_cookie) ||
+				 (const void __user *)(&rxnfc->fs.location + 1) -
+				 (const void __user *)&rxnfc->fs.ring_cookie) ||
 		    copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
 				 sizeof(rxnfc->rule_cnt)))
 			return -EFAULT;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 03d03e37a7d5..241b54f30204 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -3,6 +3,7 @@ config SUNRPC
 
 config SUNRPC_GSS
 	tristate
+	select OID_REGISTRY
 
 config SUNRPC_BACKCHANNEL
 	bool
@@ -10,7 +11,7 @@ config SUNRPC_BACKCHANNEL
 
 config SUNRPC_XPRT_RDMA
 	tristate
-	depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL
+	depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
 	default SUNRPC && INFINIBAND
 	help
 	  This option allows the NFS client and server to support
@@ -24,7 +25,6 @@ config SUNRPC_XPRT_RDMA
 config SUNRPC_SWAP
 	bool
 	depends on SUNRPC
-	select NETVM
 
 config RPCSEC_GSS_KRB5
 	tristate "Secure RPC: Kerberos V mechanism"
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index d11418f97f1f..a622ad64acd8 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -17,7 +17,8 @@
  */
 
 #include <net/ipv6.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/sunrpc/msg_prot.h>
 #include <linux/slab.h>
 #include <linux/export.h>
 
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index b5c067bccc45..ed2fdd210c0b 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -82,7 +82,7 @@ MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
 
 static u32
 pseudoflavor_to_flavor(u32 flavor) {
-	if (flavor >= RPC_AUTH_MAXFLAVOR)
+	if (flavor > RPC_AUTH_MAXFLAVOR)
 		return RPC_AUTH_GSS;
 	return flavor;
 }
@@ -124,6 +124,79 @@ rpcauth_unregister(const struct rpc_authops *ops)
 EXPORT_SYMBOL_GPL(rpcauth_unregister);
 
 /**
+ * rpcauth_get_pseudoflavor - check if security flavor is supported
+ * @flavor: a security flavor
+ * @info: a GSS mech OID, quality of protection, and service value
+ *
+ * Verifies that an appropriate kernel module is available or already loaded.
+ * Returns an equivalent pseudoflavor, or RPC_AUTH_MAXFLAVOR if "flavor" is
+ * not supported locally.
+ */
+rpc_authflavor_t
+rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info)
+{
+	const struct rpc_authops *ops;
+	rpc_authflavor_t pseudoflavor;
+
+	ops = auth_flavors[flavor];
+	if (ops == NULL)
+		request_module("rpc-auth-%u", flavor);
+	spin_lock(&rpc_authflavor_lock);
+	ops = auth_flavors[flavor];
+	if (ops == NULL || !try_module_get(ops->owner)) {
+		spin_unlock(&rpc_authflavor_lock);
+		return RPC_AUTH_MAXFLAVOR;
+	}
+	spin_unlock(&rpc_authflavor_lock);
+
+	pseudoflavor = flavor;
+	if (ops->info2flavor != NULL)
+		pseudoflavor = ops->info2flavor(info);
+
+	module_put(ops->owner);
+	return pseudoflavor;
+}
+EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor);
+
+/**
+ * rpcauth_get_gssinfo - find GSS tuple matching a GSS pseudoflavor
+ * @pseudoflavor: GSS pseudoflavor to match
+ * @info: rpcsec_gss_info structure to fill in
+ *
+ * Returns zero and fills in "info" if pseudoflavor matches a
+ * supported mechanism.
+ */
+int
+rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info)
+{
+	rpc_authflavor_t flavor = pseudoflavor_to_flavor(pseudoflavor);
+	const struct rpc_authops *ops;
+	int result;
+
+	if (flavor >= RPC_AUTH_MAXFLAVOR)
+		return -EINVAL;
+
+	ops = auth_flavors[flavor];
+	if (ops == NULL)
+		request_module("rpc-auth-%u", flavor);
+	spin_lock(&rpc_authflavor_lock);
+	ops = auth_flavors[flavor];
+	if (ops == NULL || !try_module_get(ops->owner)) {
+		spin_unlock(&rpc_authflavor_lock);
+		return -ENOENT;
+	}
+	spin_unlock(&rpc_authflavor_lock);
+
+	result = -ENOENT;
+	if (ops->flavor2info != NULL)
+		result = ops->flavor2info(pseudoflavor, info);
+
+	module_put(ops->owner);
+	return result;
+}
+EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo);
+
+/**
  * rpcauth_list_flavors - discover registered flavors and pseudoflavors
  * @array: array to fill in
  * @size: size of "array"
@@ -407,15 +480,14 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 {
 	LIST_HEAD(free);
 	struct rpc_cred_cache *cache = auth->au_credcache;
-	struct hlist_node *pos;
 	struct rpc_cred	*cred = NULL,
 			*entry, *new;
 	unsigned int nr;
 
-	nr = hash_long(acred->uid, cache->hashbits);
+	nr = hash_long(from_kuid(&init_user_ns, acred->uid), cache->hashbits);
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) {
+	hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
 		if (!entry->cr_ops->crmatch(acred, entry, flags))
 			continue;
 		spin_lock(&cache->lock);
@@ -439,7 +511,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 	}
 
 	spin_lock(&cache->lock);
-	hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) {
+	hlist_for_each_entry(entry, &cache->hashtable[nr], cr_hash) {
 		if (!entry->cr_ops->crmatch(acred, entry, flags))
 			continue;
 		cred = get_rpccred(entry);
@@ -519,8 +591,8 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct auth_cred acred = {
-		.uid = 0,
-		.gid = 0,
+		.uid = GLOBAL_ROOT_UID,
+		.gid = GLOBAL_ROOT_GID,
 	};
 
 	dprintk("RPC: %5u looking up %s cred\n",
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 6ed6f201b022..b6badafc6494 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -18,8 +18,8 @@
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
-#define RPC_MACHINE_CRED_USERID		((uid_t)0)
-#define RPC_MACHINE_CRED_GROUPID	((gid_t)0)
+#define RPC_MACHINE_CRED_USERID		GLOBAL_ROOT_UID
+#define RPC_MACHINE_CRED_GROUPID	GLOBAL_ROOT_GID
 
 struct generic_cred {
 	struct rpc_cred gc_base;
@@ -96,7 +96,9 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 
 	dprintk("RPC:       allocated %s cred %p for uid %d gid %d\n",
 			gcred->acred.machine_cred ? "machine" : "generic",
-			gcred, acred->uid, acred->gid);
+			gcred,
+			from_kuid(&init_user_ns, acred->uid),
+			from_kgid(&init_user_ns, acred->gid));
 	return &gcred->gc_base;
 }
 
@@ -129,8 +131,8 @@ machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flag
 {
 	if (!gcred->acred.machine_cred ||
 	    gcred->acred.principal != acred->principal ||
-	    gcred->acred.uid != acred->uid ||
-	    gcred->acred.gid != acred->gid)
+	    !uid_eq(gcred->acred.uid, acred->uid) ||
+	    !gid_eq(gcred->acred.gid, acred->gid))
 		return 0;
 	return 1;
 }
@@ -147,8 +149,8 @@ generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
 	if (acred->machine_cred)
 		return machine_cred_match(acred, gcred, flags);
 
-	if (gcred->acred.uid != acred->uid ||
-	    gcred->acred.gid != acred->gid ||
+	if (!uid_eq(gcred->acred.uid, acred->uid) ||
+	    !gid_eq(gcred->acred.gid, acred->gid) ||
 	    gcred->acred.machine_cred != 0)
 		goto out_nomatch;
 
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 9e4cb59ef9f0..14e9e53e63d5 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -5,7 +5,8 @@
 obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
 
 auth_rpcgss-y := auth_gss.o gss_generic_token.o \
-	gss_mech_switch.o svcauth_gss.o
+	gss_mech_switch.o svcauth_gss.o \
+	gss_rpc_upcall.o gss_rpc_xdr.o
 
 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
 
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 6e5c824b040b..7da6b457f66a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -238,7 +238,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
 		p = ERR_PTR(-EFAULT);
 		goto err;
 	}
-	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS);
+	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
 	if (ret < 0) {
 		p = ERR_PTR(ret);
 		goto err;
@@ -247,8 +247,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
 		__func__, ctx->gc_expiry, now, timeout);
 	return q;
 err:
-	dprintk("RPC:       %s returns %ld gc_expiry %lu now %lu timeout %u\n",
-		__func__, -PTR_ERR(p), ctx->gc_expiry, now, timeout);
+	dprintk("RPC:       %s returns error %ld\n", __func__, -PTR_ERR(p));
 	return p;
 }
 
@@ -256,7 +255,7 @@ err:
 
 struct gss_upcall_msg {
 	atomic_t count;
-	uid_t	uid;
+	kuid_t	uid;
 	struct rpc_pipe_msg msg;
 	struct list_head list;
 	struct gss_auth *auth;
@@ -303,11 +302,11 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
 }
 
 static struct gss_upcall_msg *
-__gss_find_upcall(struct rpc_pipe *pipe, uid_t uid)
+__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid)
 {
 	struct gss_upcall_msg *pos;
 	list_for_each_entry(pos, &pipe->in_downcall, list) {
-		if (pos->uid != uid)
+		if (!uid_eq(pos->uid, uid))
 			continue;
 		atomic_inc(&pos->count);
 		dprintk("RPC:       %s found msg %p\n", __func__, pos);
@@ -395,8 +394,11 @@ gss_upcall_callback(struct rpc_task *task)
 
 static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
 {
-	gss_msg->msg.data = &gss_msg->uid;
-	gss_msg->msg.len = sizeof(gss_msg->uid);
+	uid_t uid = from_kuid(&init_user_ns, gss_msg->uid);
+	memcpy(gss_msg->databuf, &uid, sizeof(uid));
+	gss_msg->msg.data = gss_msg->databuf;
+	gss_msg->msg.len = sizeof(uid);
+	BUG_ON(sizeof(uid) > UPCALL_BUF_LEN);
 }
 
 static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
@@ -409,7 +411,7 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 
 	gss_msg->msg.len = sprintf(gss_msg->databuf, "mech=%s uid=%d ",
 				   mech->gm_name,
-				   gss_msg->uid);
+				   from_kuid(&init_user_ns, gss_msg->uid));
 	p += gss_msg->msg.len;
 	if (clnt->cl_principal) {
 		len = sprintf(p, "target=%s ", clnt->cl_principal);
@@ -445,7 +447,7 @@ static void gss_encode_msg(struct gss_upcall_msg *gss_msg,
 
 static struct gss_upcall_msg *
 gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
-		uid_t uid, const char *service_name)
+		kuid_t uid, const char *service_name)
 {
 	struct gss_upcall_msg *gss_msg;
 	int vers;
@@ -475,7 +477,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr
 	struct gss_cred *gss_cred = container_of(cred,
 			struct gss_cred, gc_base);
 	struct gss_upcall_msg *gss_new, *gss_msg;
-	uid_t uid = cred->cr_uid;
+	kuid_t uid = cred->cr_uid;
 
 	gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal);
 	if (IS_ERR(gss_new))
@@ -517,7 +519,7 @@ gss_refresh_upcall(struct rpc_task *task)
 	int err = 0;
 
 	dprintk("RPC: %5u %s for uid %u\n",
-		task->tk_pid, __func__, cred->cr_uid);
+		task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid));
 	gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred);
 	if (PTR_ERR(gss_msg) == -EAGAIN) {
 		/* XXX: warning on the first, under the assumption we
@@ -549,7 +551,8 @@ gss_refresh_upcall(struct rpc_task *task)
 	gss_release_msg(gss_msg);
 out:
 	dprintk("RPC: %5u %s for uid %u result %d\n",
-		task->tk_pid, __func__, cred->cr_uid, err);
+		task->tk_pid, __func__,
+		from_kuid(&init_user_ns, cred->cr_uid),	err);
 	return err;
 }
 
@@ -562,7 +565,8 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
 	DEFINE_WAIT(wait);
 	int err = 0;
 
-	dprintk("RPC:       %s for uid %u\n", __func__, cred->cr_uid);
+	dprintk("RPC:       %s for uid %u\n",
+		__func__, from_kuid(&init_user_ns, cred->cr_uid));
 retry:
 	gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred);
 	if (PTR_ERR(gss_msg) == -EAGAIN) {
@@ -604,7 +608,7 @@ out_intr:
 	gss_release_msg(gss_msg);
 out:
 	dprintk("RPC:       %s for uid %u result %d\n",
-		__func__, cred->cr_uid, err);
+		__func__, from_kuid(&init_user_ns, cred->cr_uid), err);
 	return err;
 }
 
@@ -616,9 +620,10 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	const void *p, *end;
 	void *buf;
 	struct gss_upcall_msg *gss_msg;
-	struct rpc_pipe *pipe = RPC_I(filp->f_dentry->d_inode)->pipe;
+	struct rpc_pipe *pipe = RPC_I(file_inode(filp))->pipe;
 	struct gss_cl_ctx *ctx;
-	uid_t uid;
+	uid_t id;
+	kuid_t uid;
 	ssize_t err = -EFBIG;
 
 	if (mlen > MSG_BUF_MAXSIZE)
@@ -633,12 +638,18 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 		goto err;
 
 	end = (const void *)((char *)buf + mlen);
-	p = simple_get_bytes(buf, end, &uid, sizeof(uid));
+	p = simple_get_bytes(buf, end, &id, sizeof(id));
 	if (IS_ERR(p)) {
 		err = PTR_ERR(p);
 		goto err;
 	}
 
+	uid = make_kuid(&init_user_ns, id);
+	if (!uid_valid(uid)) {
+		err = -EINVAL;
+		goto err;
+	}
+
 	err = -ENOMEM;
 	ctx = gss_alloc_context();
 	if (ctx == NULL)
@@ -856,8 +867,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 	err = -EINVAL;
 	gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
 	if (!gss_auth->mech) {
-		printk(KERN_WARNING "%s: Pseudoflavor %d not found!\n",
-				__func__, flavor);
+		dprintk("RPC:       Pseudoflavor %d not found!\n", flavor);
 		goto err_free;
 	}
 	gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
@@ -1059,7 +1069,8 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 	int err = -ENOMEM;
 
 	dprintk("RPC:       %s for uid %d, flavor %d\n",
-		__func__, acred->uid, auth->au_flavor);
+		__func__, from_kuid(&init_user_ns, acred->uid),
+		auth->au_flavor);
 
 	if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS)))
 		goto out_err;
@@ -1115,7 +1126,7 @@ out:
 	}
 	if (gss_cred->gc_principal != NULL)
 		return 0;
-	return rc->cr_uid == acred->uid;
+	return uid_eq(rc->cr_uid, acred->uid);
 }
 
 /*
@@ -1154,7 +1165,7 @@ gss_marshal(struct rpc_task *task, __be32 *p)
 
 	/* We compute the checksum for the verifier over the xdr-encoded bytes
 	 * starting with the xid and ending at the end of the credential: */
-	iov.iov_base = xprt_skip_transport_header(task->tk_xprt,
+	iov.iov_base = xprt_skip_transport_header(req->rq_xprt,
 					req->rq_snd_buf.head[0].iov_base);
 	iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
 	xdr_buf_from_iov(&iov, &verf_buf);
@@ -1629,6 +1640,8 @@ static const struct rpc_authops authgss_ops = {
 	.pipes_create	= gss_pipes_dentries_create,
 	.pipes_destroy	= gss_pipes_dentries_destroy,
 	.list_pseudoflavors = gss_mech_list_pseudoflavors,
+	.info2flavor	= gss_mech_info2flavor,
+	.flavor2info	= gss_mech_flavor2info,
 };
 
 static const struct rpc_credops gss_credops = {
@@ -1721,6 +1734,7 @@ static void __exit exit_rpcsec_gss(void)
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 }
 
+MODULE_ALIAS("rpc-auth-6");
 MODULE_LICENSE("GPL");
 module_param_named(expired_cred_retry_delay,
 		   gss_expired_cred_retry_delay,
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index d3611f11a8df..0d3c158ef8fa 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -679,6 +679,7 @@ out_err:
 static int
 gss_import_sec_context_kerberos(const void *p, size_t len,
 				struct gss_ctx *ctx_id,
+				time_t *endtime,
 				gfp_t gfp_mask)
 {
 	const void *end = (const void *)((const char *)p + len);
@@ -694,9 +695,11 @@ gss_import_sec_context_kerberos(const void *p, size_t len,
 	else
 		ret = gss_import_v2_context(p, end, ctx, gfp_mask);
 
-	if (ret == 0)
+	if (ret == 0) {
 		ctx_id->internal_ctx_id = ctx;
-	else
+		if (endtime)
+			*endtime = ctx->endtime;
+	} else
 		kfree(ctx);
 
 	dprintk("RPC:       %s: returning %d\n", __func__, ret);
@@ -729,16 +732,19 @@ static const struct gss_api_ops gss_kerberos_ops = {
 static struct pf_desc gss_kerberos_pfs[] = {
 	[0] = {
 		.pseudoflavor = RPC_AUTH_GSS_KRB5,
+		.qop = GSS_C_QOP_DEFAULT,
 		.service = RPC_GSS_SVC_NONE,
 		.name = "krb5",
 	},
 	[1] = {
 		.pseudoflavor = RPC_AUTH_GSS_KRB5I,
+		.qop = GSS_C_QOP_DEFAULT,
 		.service = RPC_GSS_SVC_INTEGRITY,
 		.name = "krb5i",
 	},
 	[2] = {
 		.pseudoflavor = RPC_AUTH_GSS_KRB5P,
+		.qop = GSS_C_QOP_DEFAULT,
 		.service = RPC_GSS_SVC_PRIVACY,
 		.name = "krb5p",
 	},
@@ -750,11 +756,12 @@ MODULE_ALIAS("rpc-auth-gss-krb5p");
 MODULE_ALIAS("rpc-auth-gss-390003");
 MODULE_ALIAS("rpc-auth-gss-390004");
 MODULE_ALIAS("rpc-auth-gss-390005");
+MODULE_ALIAS("rpc-auth-gss-1.2.840.113554.1.2.2");
 
 static struct gss_api_mech gss_kerberos_mech = {
 	.gm_name	= "krb5",
 	.gm_owner	= THIS_MODULE,
-	.gm_oid		= {9, (void *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02"},
+	.gm_oid		= { 9, "\x2a\x86\x48\x86\xf7\x12\x01\x02\x02" },
 	.gm_ops		= &gss_kerberos_ops,
 	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
 	.gm_pfs		= gss_kerberos_pfs,
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 107c4528654f..1da52d1406fc 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -130,8 +130,8 @@ gss_krb5_make_confounder(char *p, u32 conflen)
 
 	/* initialize to random value */
 	if (i == 0) {
-		i = random32();
-		i = (i << 32) | random32();
+		i = prandom_u32();
+		i = (i << 32) | prandom_u32();
 	}
 
 	switch (conflen) {
@@ -574,6 +574,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
 	buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
 
+	/* Trim off the checksum blob */
+	xdr_buf_trim(buf, GSS_KRB5_TOK_HDR_LEN + tailskip);
 	return GSS_S_COMPLETE;
 }
 
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index b174fcd9ff4c..defa9d33925c 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -36,6 +36,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/oid_registry.h>
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/gss_asn1.h>
 #include <linux/sunrpc/auth_gss.h>
@@ -102,8 +103,13 @@ out:
 	return status;
 }
 
-int
-gss_mech_register(struct gss_api_mech *gm)
+/**
+ * gss_mech_register - register a GSS mechanism
+ * @gm: GSS mechanism handle
+ *
+ * Returns zero if successful, or a negative errno.
+ */
+int gss_mech_register(struct gss_api_mech *gm)
 {
 	int status;
 
@@ -116,11 +122,14 @@ gss_mech_register(struct gss_api_mech *gm)
 	dprintk("RPC:       registered gss mechanism %s\n", gm->gm_name);
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(gss_mech_register);
 
-void
-gss_mech_unregister(struct gss_api_mech *gm)
+/**
+ * gss_mech_unregister - release a GSS mechanism
+ * @gm: GSS mechanism handle
+ *
+ */
+void gss_mech_unregister(struct gss_api_mech *gm)
 {
 	spin_lock(&registered_mechs_lock);
 	list_del(&gm->gm_list);
@@ -128,19 +137,15 @@ gss_mech_unregister(struct gss_api_mech *gm)
 	dprintk("RPC:       unregistered gss mechanism %s\n", gm->gm_name);
 	gss_mech_free(gm);
 }
-
 EXPORT_SYMBOL_GPL(gss_mech_unregister);
 
-struct gss_api_mech *
-gss_mech_get(struct gss_api_mech *gm)
+static struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm)
 {
 	__module_get(gm->gm_owner);
 	return gm;
 }
 
-EXPORT_SYMBOL_GPL(gss_mech_get);
-
-struct gss_api_mech *
+static struct gss_api_mech *
 _gss_mech_get_by_name(const char *name)
 {
 	struct gss_api_mech	*pos, *gm = NULL;
@@ -169,12 +174,16 @@ struct gss_api_mech * gss_mech_get_by_name(const char *name)
 	}
 	return gm;
 }
-EXPORT_SYMBOL_GPL(gss_mech_get_by_name);
 
-struct gss_api_mech *
-gss_mech_get_by_OID(struct xdr_netobj *obj)
+struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
 {
 	struct gss_api_mech	*pos, *gm = NULL;
+	char buf[32];
+
+	if (sprint_oid(obj->data, obj->len, buf, sizeof(buf)) < 0)
+		return NULL;
+	dprintk("RPC:       %s(%s)\n", __func__, buf);
+	request_module("rpc-auth-gss-%s", buf);
 
 	spin_lock(&registered_mechs_lock);
 	list_for_each_entry(pos, &registered_mechs, gm_list) {
@@ -188,11 +197,8 @@ gss_mech_get_by_OID(struct xdr_netobj *obj)
 	}
 	spin_unlock(&registered_mechs_lock);
 	return gm;
-
 }
 
-EXPORT_SYMBOL_GPL(gss_mech_get_by_OID);
-
 static inline int
 mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
 {
@@ -205,7 +211,7 @@ mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
 	return 0;
 }
 
-struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
+static struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
 {
 	struct gss_api_mech *gm = NULL, *pos;
 
@@ -237,8 +243,6 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
 	return gm;
 }
 
-EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
-
 /**
  * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors
  * @array: array to fill in
@@ -268,19 +272,82 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size)
 	return i;
 }
 
-u32
-gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
+/**
+ * gss_svc_to_pseudoflavor - map a GSS service number to a pseudoflavor
+ * @gm: GSS mechanism handle
+ * @qop: GSS quality-of-protection value
+ * @service: GSS service value
+ *
+ * Returns a matching security flavor, or RPC_AUTH_MAXFLAVOR if none is found.
+ */
+rpc_authflavor_t gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 qop,
+					 u32 service)
 {
 	int i;
 
 	for (i = 0; i < gm->gm_pf_num; i++) {
-		if (gm->gm_pfs[i].service == service) {
+		if (gm->gm_pfs[i].qop == qop &&
+		    gm->gm_pfs[i].service == service) {
 			return gm->gm_pfs[i].pseudoflavor;
 		}
 	}
-	return RPC_AUTH_MAXFLAVOR; /* illegal value */
+	return RPC_AUTH_MAXFLAVOR;
+}
+
+/**
+ * gss_mech_info2flavor - look up a pseudoflavor given a GSS tuple
+ * @info: a GSS mech OID, quality of protection, and service value
+ *
+ * Returns a matching pseudoflavor, or RPC_AUTH_MAXFLAVOR if the tuple is
+ * not supported.
+ */
+rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *info)
+{
+	rpc_authflavor_t pseudoflavor;
+	struct gss_api_mech *gm;
+
+	gm = gss_mech_get_by_OID(&info->oid);
+	if (gm == NULL)
+		return RPC_AUTH_MAXFLAVOR;
+
+	pseudoflavor = gss_svc_to_pseudoflavor(gm, info->qop, info->service);
+
+	gss_mech_put(gm);
+	return pseudoflavor;
+}
+
+/**
+ * gss_mech_flavor2info - look up a GSS tuple for a given pseudoflavor
+ * @pseudoflavor: GSS pseudoflavor to match
+ * @info: rpcsec_gss_info structure to fill in
+ *
+ * Returns zero and fills in "info" if pseudoflavor matches a
+ * supported mechanism.  Otherwise a negative errno is returned.
+ */
+int gss_mech_flavor2info(rpc_authflavor_t pseudoflavor,
+			 struct rpcsec_gss_info *info)
+{
+	struct gss_api_mech *gm;
+	int i;
+
+	gm = gss_mech_get_by_pseudoflavor(pseudoflavor);
+	if (gm == NULL)
+		return -ENOENT;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		if (gm->gm_pfs[i].pseudoflavor == pseudoflavor) {
+			memcpy(info->oid.data, gm->gm_oid.data, gm->gm_oid.len);
+			info->oid.len = gm->gm_oid.len;
+			info->qop = gm->gm_pfs[i].qop;
+			info->service = gm->gm_pfs[i].service;
+			gss_mech_put(gm);
+			return 0;
+		}
+	}
+
+	gss_mech_put(gm);
+	return -ENOENT;
 }
-EXPORT_SYMBOL_GPL(gss_svc_to_pseudoflavor);
 
 u32
 gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
@@ -294,8 +361,6 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(gss_pseudoflavor_to_service);
-
 char *
 gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
 {
@@ -308,8 +373,6 @@ gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
 	return NULL;
 }
 
-EXPORT_SYMBOL_GPL(gss_service_to_auth_domain_name);
-
 void
 gss_mech_put(struct gss_api_mech * gm)
 {
@@ -317,22 +380,21 @@ gss_mech_put(struct gss_api_mech * gm)
 		module_put(gm->gm_owner);
 }
 
-EXPORT_SYMBOL_GPL(gss_mech_put);
-
 /* The mech could probably be determined from the token instead, but it's just
  * as easy for now to pass it in. */
 int
 gss_import_sec_context(const void *input_token, size_t bufsize,
 		       struct gss_api_mech	*mech,
 		       struct gss_ctx		**ctx_id,
+		       time_t			*endtime,
 		       gfp_t gfp_mask)
 {
 	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
 		return -ENOMEM;
 	(*ctx_id)->mech_type = gss_mech_get(mech);
 
-	return mech->gm_ops
-		->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask);
+	return mech->gm_ops->gss_import_sec_context(input_token, bufsize,
+						*ctx_id, endtime, gfp_mask);
 }
 
 /* gss_get_mic: compute a mic over message and return mic_token. */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
new file mode 100644
index 000000000000..d304f41260f2
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -0,0 +1,358 @@
+/*
+ *  linux/net/sunrpc/gss_rpc_upcall.c
+ *
+ *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/un.h>
+
+#include <linux/sunrpc/svcauth.h>
+#include "gss_rpc_upcall.h"
+
+#define GSSPROXY_SOCK_PATHNAME	"/var/run/gssproxy.sock"
+
+#define GSSPROXY_PROGRAM	(400112u)
+#define GSSPROXY_VERS_1		(1u)
+
+/*
+ * Encoding/Decoding functions
+ */
+
+enum {
+	GSSX_NULL = 0,	/* Unused */
+        GSSX_INDICATE_MECHS = 1,
+        GSSX_GET_CALL_CONTEXT = 2,
+        GSSX_IMPORT_AND_CANON_NAME = 3,
+        GSSX_EXPORT_CRED = 4,
+        GSSX_IMPORT_CRED = 5,
+        GSSX_ACQUIRE_CRED = 6,
+        GSSX_STORE_CRED = 7,
+        GSSX_INIT_SEC_CONTEXT = 8,
+        GSSX_ACCEPT_SEC_CONTEXT = 9,
+        GSSX_RELEASE_HANDLE = 10,
+        GSSX_GET_MIC = 11,
+        GSSX_VERIFY = 12,
+        GSSX_WRAP = 13,
+        GSSX_UNWRAP = 14,
+        GSSX_WRAP_SIZE_LIMIT = 15,
+};
+
+#define PROC(proc, name)				\
+[GSSX_##proc] = {					\
+	.p_proc   = GSSX_##proc,			\
+	.p_encode = (kxdreproc_t)gssx_enc_##name,	\
+	.p_decode = (kxdrdproc_t)gssx_dec_##name,	\
+	.p_arglen = GSSX_ARG_##name##_sz,		\
+	.p_replen = GSSX_RES_##name##_sz, 		\
+	.p_statidx = GSSX_##proc,			\
+	.p_name   = #proc,				\
+}
+
+static struct rpc_procinfo gssp_procedures[] = {
+	PROC(INDICATE_MECHS, indicate_mechs),
+        PROC(GET_CALL_CONTEXT, get_call_context),
+        PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
+        PROC(EXPORT_CRED, export_cred),
+        PROC(IMPORT_CRED, import_cred),
+        PROC(ACQUIRE_CRED, acquire_cred),
+        PROC(STORE_CRED, store_cred),
+        PROC(INIT_SEC_CONTEXT, init_sec_context),
+        PROC(ACCEPT_SEC_CONTEXT, accept_sec_context),
+        PROC(RELEASE_HANDLE, release_handle),
+        PROC(GET_MIC, get_mic),
+        PROC(VERIFY, verify),
+        PROC(WRAP, wrap),
+        PROC(UNWRAP, unwrap),
+        PROC(WRAP_SIZE_LIMIT, wrap_size_limit),
+};
+
+
+
+/*
+ * Common transport functions
+ */
+
+static const struct rpc_program gssp_program;
+
+static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
+{
+	static const struct sockaddr_un gssp_localaddr = {
+		.sun_family		= AF_LOCAL,
+		.sun_path		= GSSPROXY_SOCK_PATHNAME,
+	};
+	struct rpc_create_args args = {
+		.net		= net,
+		.protocol	= XPRT_TRANSPORT_LOCAL,
+		.address	= (struct sockaddr *)&gssp_localaddr,
+		.addrsize	= sizeof(gssp_localaddr),
+		.servername	= "localhost",
+		.program	= &gssp_program,
+		.version	= GSSPROXY_VERS_1,
+		.authflavor	= RPC_AUTH_NULL,
+		/*
+		 * Note we want connection to be done in the caller's
+		 * filesystem namespace.  We therefore turn off the idle
+		 * timeout, which would result in reconnections being
+		 * done without the correct namespace:
+		 */
+		.flags		= RPC_CLNT_CREATE_NOPING |
+				  RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
+	};
+	struct rpc_clnt *clnt;
+	int result = 0;
+
+	clnt = rpc_create(&args);
+	if (IS_ERR(clnt)) {
+		dprintk("RPC:       failed to create AF_LOCAL gssproxy "
+				"client (errno %ld).\n", PTR_ERR(clnt));
+		result = -PTR_ERR(clnt);
+		*_clnt = NULL;
+		goto out;
+	}
+
+	dprintk("RPC:       created new gssp local client (gssp_local_clnt: "
+			"%p)\n", clnt);
+	*_clnt = clnt;
+
+out:
+	return result;
+}
+
+void init_gssp_clnt(struct sunrpc_net *sn)
+{
+	mutex_init(&sn->gssp_lock);
+	sn->gssp_clnt = NULL;
+	init_waitqueue_head(&sn->gssp_wq);
+}
+
+int set_gssp_clnt(struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	struct rpc_clnt *clnt;
+	int ret;
+
+	mutex_lock(&sn->gssp_lock);
+	ret = gssp_rpc_create(net, &clnt);
+	if (!ret) {
+		if (sn->gssp_clnt)
+			rpc_shutdown_client(sn->gssp_clnt);
+		sn->gssp_clnt = clnt;
+	}
+	mutex_unlock(&sn->gssp_lock);
+	wake_up(&sn->gssp_wq);
+	return ret;
+}
+
+void clear_gssp_clnt(struct sunrpc_net *sn)
+{
+	mutex_lock(&sn->gssp_lock);
+	if (sn->gssp_clnt) {
+		rpc_shutdown_client(sn->gssp_clnt);
+		sn->gssp_clnt = NULL;
+	}
+	mutex_unlock(&sn->gssp_lock);
+}
+
+static struct rpc_clnt *get_gssp_clnt(struct sunrpc_net *sn)
+{
+	struct rpc_clnt *clnt;
+
+	mutex_lock(&sn->gssp_lock);
+	clnt = sn->gssp_clnt;
+	if (clnt)
+		atomic_inc(&clnt->cl_count);
+	mutex_unlock(&sn->gssp_lock);
+	return clnt;
+}
+
+static int gssp_call(struct net *net, struct rpc_message *msg)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	struct rpc_clnt *clnt;
+	int status;
+
+	clnt = get_gssp_clnt(sn);
+	if (!clnt)
+		return -EIO;
+	status = rpc_call_sync(clnt, msg, 0);
+	if (status < 0) {
+		dprintk("gssp: rpc_call returned error %d\n", -status);
+		switch (status) {
+		case -EPROTONOSUPPORT:
+			status = -EINVAL;
+			break;
+		case -ECONNREFUSED:
+		case -ETIMEDOUT:
+		case -ENOTCONN:
+			status = -EAGAIN;
+			break;
+		case -ERESTARTSYS:
+			if (signalled ())
+				status = -EINTR;
+			break;
+		default:
+			break;
+		}
+	}
+	rpc_release_client(clnt);
+	return status;
+}
+
+
+/*
+ * Public functions
+ */
+
+/* numbers somewhat arbitrary but large enough for current needs */
+#define GSSX_MAX_OUT_HANDLE	128
+#define GSSX_MAX_SRC_PRINC	256
+#define GSSX_KMEMBUF (GSSX_max_output_handle_sz + \
+			GSSX_max_oid_sz + \
+			GSSX_max_princ_sz + \
+			sizeof(struct svc_cred))
+
+int gssp_accept_sec_context_upcall(struct net *net,
+				struct gssp_upcall_data *data)
+{
+	struct gssx_ctx ctxh = {
+		.state = data->in_handle
+	};
+	struct gssx_arg_accept_sec_context arg = {
+		.input_token = data->in_token,
+	};
+	struct gssx_ctx rctxh = {
+		/*
+		 * pass in the max length we expect for each of these
+		 * buffers but let the xdr code kmalloc them:
+		 */
+		.exported_context_token.len = GSSX_max_output_handle_sz,
+		.mech.len = GSS_OID_MAX_LEN,
+		.src_name.display_name.len = GSSX_max_princ_sz
+	};
+	struct gssx_res_accept_sec_context res = {
+		.context_handle = &rctxh,
+		.output_token = &data->out_token
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &gssp_procedures[GSSX_ACCEPT_SEC_CONTEXT],
+		.rpc_argp = &arg,
+		.rpc_resp = &res,
+		.rpc_cred = NULL, /* FIXME ? */
+	};
+	struct xdr_netobj client_name = { 0 , NULL };
+	int ret;
+
+	if (data->in_handle.len != 0)
+		arg.context_handle = &ctxh;
+	res.output_token->len = GSSX_max_output_token_sz;
+
+	/* use nfs/ for targ_name ? */
+
+	ret = gssp_call(net, &msg);
+
+	/* we need to fetch all data even in case of error so
+	 * that we can free special strctures is they have been allocated */
+	data->major_status = res.status.major_status;
+	data->minor_status = res.status.minor_status;
+	if (res.context_handle) {
+		data->out_handle = rctxh.exported_context_token;
+		data->mech_oid.len = rctxh.mech.len;
+		memcpy(data->mech_oid.data, rctxh.mech.data,
+						data->mech_oid.len);
+		client_name = rctxh.src_name.display_name;
+	}
+
+	if (res.options.count == 1) {
+		gssx_buffer *value = &res.options.data[0].value;
+		/* Currently we only decode CREDS_VALUE, if we add
+		 * anything else we'll have to loop and match on the
+		 * option name */
+		if (value->len == 1) {
+			/* steal group info from struct svc_cred */
+			data->creds = *(struct svc_cred *)value->data;
+			data->found_creds = 1;
+		}
+		/* whether we use it or not, free data */
+		kfree(value->data);
+	}
+
+	if (res.options.count != 0) {
+		kfree(res.options.data);
+	}
+
+	/* convert to GSS_NT_HOSTBASED_SERVICE form and set into creds */
+	if (data->found_creds && client_name.data != NULL) {
+		char *c;
+
+		data->creds.cr_principal = kstrndup(client_name.data,
+						client_name.len, GFP_KERNEL);
+		if (data->creds.cr_principal) {
+			/* terminate and remove realm part */
+			c = strchr(data->creds.cr_principal, '@');
+			if (c) {
+				*c = '\0';
+
+				/* change service-hostname delimiter */
+				c = strchr(data->creds.cr_principal, '/');
+				if (c) *c = '@';
+			}
+			if (!c) {
+				/* not a service principal */
+				kfree(data->creds.cr_principal);
+				data->creds.cr_principal = NULL;
+			}
+		}
+	}
+	kfree(client_name.data);
+
+	return ret;
+}
+
+void gssp_free_upcall_data(struct gssp_upcall_data *data)
+{
+	kfree(data->in_handle.data);
+	kfree(data->out_handle.data);
+	kfree(data->out_token.data);
+	kfree(data->mech_oid.data);
+	free_svc_cred(&data->creds);
+}
+
+/*
+ * Initialization stuff
+ */
+
+static const struct rpc_version gssp_version1 = {
+	.number		= GSSPROXY_VERS_1,
+	.nrprocs	= ARRAY_SIZE(gssp_procedures),
+	.procs		= gssp_procedures,
+};
+
+static const struct rpc_version *gssp_version[] = {
+	NULL,
+	&gssp_version1,
+};
+
+static struct rpc_stat gssp_stats;
+
+static const struct rpc_program gssp_program = {
+	.name		= "gssproxy",
+	.number		= GSSPROXY_PROGRAM,
+	.nrvers		= ARRAY_SIZE(gssp_version),
+	.version	= gssp_version,
+	.stats		= &gssp_stats,
+};
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.h b/net/sunrpc/auth_gss/gss_rpc_upcall.h
new file mode 100644
index 000000000000..1e542aded90a
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.h
@@ -0,0 +1,48 @@
+/*
+ *  linux/net/sunrpc/gss_rpc_upcall.h
+ *
+ *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _GSS_RPC_UPCALL_H
+#define _GSS_RPC_UPCALL_H
+
+#include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/auth_gss.h>
+#include "gss_rpc_xdr.h"
+#include "../netns.h"
+
+struct gssp_upcall_data {
+	struct xdr_netobj in_handle;
+	struct gssp_in_token in_token;
+	struct xdr_netobj out_handle;
+	struct xdr_netobj out_token;
+	struct rpcsec_gss_oid mech_oid;
+	struct svc_cred creds;
+	int found_creds;
+	int major_status;
+	int minor_status;
+};
+
+int gssp_accept_sec_context_upcall(struct net *net,
+				struct gssp_upcall_data *data);
+void gssp_free_upcall_data(struct gssp_upcall_data *data);
+
+void init_gssp_clnt(struct sunrpc_net *);
+int set_gssp_clnt(struct net *);
+void clear_gssp_clnt(struct sunrpc_net *);
+#endif /* _GSS_RPC_UPCALL_H */
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
new file mode 100644
index 000000000000..357f613df7ff
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -0,0 +1,840 @@
+/*
+ * GSS Proxy upcall module
+ *
+ *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/sunrpc/svcauth.h>
+#include "gss_rpc_xdr.h"
+
+static int gssx_enc_bool(struct xdr_stream *xdr, int v)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	*p = v ? xdr_one : xdr_zero;
+	return 0;
+}
+
+static int gssx_dec_bool(struct xdr_stream *xdr, u32 *v)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	*v = be32_to_cpu(*p);
+	return 0;
+}
+
+static int gssx_enc_buffer(struct xdr_stream *xdr,
+			   gssx_buffer *buf)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, sizeof(u32) + buf->len);
+	if (!p)
+		return -ENOSPC;
+	xdr_encode_opaque(p, buf->data, buf->len);
+	return 0;
+}
+
+static int gssx_enc_in_token(struct xdr_stream *xdr,
+			     struct gssp_in_token *in)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return -ENOSPC;
+	*p = cpu_to_be32(in->page_len);
+
+	/* all we need to do is to write pages */
+	xdr_write_pages(xdr, in->pages, in->page_base, in->page_len);
+
+	return 0;
+}
+
+
+static int gssx_dec_buffer(struct xdr_stream *xdr,
+			   gssx_buffer *buf)
+{
+	u32 length;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+
+	length = be32_to_cpup(p);
+	p = xdr_inline_decode(xdr, length);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+
+	if (buf->len == 0) {
+		/* we intentionally are not interested in this buffer */
+		return 0;
+	}
+	if (length > buf->len)
+		return -ENOSPC;
+
+	if (!buf->data) {
+		buf->data = kmemdup(p, length, GFP_KERNEL);
+		if (!buf->data)
+			return -ENOMEM;
+	} else {
+		memcpy(buf->data, p, length);
+	}
+	buf->len = length;
+	return 0;
+}
+
+static int gssx_enc_option(struct xdr_stream *xdr,
+			   struct gssx_option *opt)
+{
+	int err;
+
+	err = gssx_enc_buffer(xdr, &opt->option);
+	if (err)
+		return err;
+	err = gssx_enc_buffer(xdr, &opt->value);
+	return err;
+}
+
+static int gssx_dec_option(struct xdr_stream *xdr,
+			   struct gssx_option *opt)
+{
+	int err;
+
+	err = gssx_dec_buffer(xdr, &opt->option);
+	if (err)
+		return err;
+	err = gssx_dec_buffer(xdr, &opt->value);
+	return err;
+}
+
+static int dummy_enc_opt_array(struct xdr_stream *xdr,
+				struct gssx_option_array *oa)
+{
+	__be32 *p;
+
+	if (oa->count != 0)
+		return -EINVAL;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return -ENOSPC;
+	*p = 0;
+
+	return 0;
+}
+
+static int dummy_dec_opt_array(struct xdr_stream *xdr,
+				struct gssx_option_array *oa)
+{
+	struct gssx_option dummy;
+	u32 count, i;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	count = be32_to_cpup(p++);
+	memset(&dummy, 0, sizeof(dummy));
+	for (i = 0; i < count; i++) {
+		gssx_dec_option(xdr, &dummy);
+	}
+
+	oa->count = 0;
+	oa->data = NULL;
+	return 0;
+}
+
+static int get_s32(void **p, void *max, s32 *res)
+{
+	void *base = *p;
+	void *next = (void *)((char *)base + sizeof(s32));
+	if (unlikely(next > max || next < base))
+		return -EINVAL;
+	memcpy(res, base, sizeof(s32));
+	*p = next;
+	return 0;
+}
+
+static int gssx_dec_linux_creds(struct xdr_stream *xdr,
+				struct svc_cred *creds)
+{
+	u32 length;
+	__be32 *p;
+	void *q, *end;
+	s32 tmp;
+	int N, i, err;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+
+	length = be32_to_cpup(p);
+
+	/* FIXME: we do not want to use the scratch buffer for this one
+	 * may need to use functions that allows us to access an io vector
+	 * directly */
+	p = xdr_inline_decode(xdr, length);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+
+	q = p;
+	end = q + length;
+
+	/* uid */
+	err = get_s32(&q, end, &tmp);
+	if (err)
+		return err;
+	creds->cr_uid = make_kuid(&init_user_ns, tmp);
+
+	/* gid */
+	err = get_s32(&q, end, &tmp);
+	if (err)
+		return err;
+	creds->cr_gid = make_kgid(&init_user_ns, tmp);
+
+	/* number of additional gid's */
+	err = get_s32(&q, end, &tmp);
+	if (err)
+		return err;
+	N = tmp;
+	creds->cr_group_info = groups_alloc(N);
+	if (creds->cr_group_info == NULL)
+		return -ENOMEM;
+
+	/* gid's */
+	for (i = 0; i < N; i++) {
+		kgid_t kgid;
+		err = get_s32(&q, end, &tmp);
+		if (err)
+			goto out_free_groups;
+		err = -EINVAL;
+		kgid = make_kgid(&init_user_ns, tmp);
+		if (!gid_valid(kgid))
+			goto out_free_groups;
+		GROUP_AT(creds->cr_group_info, i) = kgid;
+	}
+
+	return 0;
+out_free_groups:
+	groups_free(creds->cr_group_info);
+	return err;
+}
+
+static int gssx_dec_option_array(struct xdr_stream *xdr,
+				 struct gssx_option_array *oa)
+{
+	struct svc_cred *creds;
+	u32 count, i;
+	__be32 *p;
+	int err;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	count = be32_to_cpup(p++);
+	if (!count)
+		return 0;
+
+	/* we recognize only 1 currently: CREDS_VALUE */
+	oa->count = 1;
+
+	oa->data = kmalloc(sizeof(struct gssx_option), GFP_KERNEL);
+	if (!oa->data)
+		return -ENOMEM;
+
+	creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL);
+	if (!creds) {
+		kfree(oa->data);
+		return -ENOMEM;
+	}
+
+	oa->data[0].option.data = CREDS_VALUE;
+	oa->data[0].option.len = sizeof(CREDS_VALUE);
+	oa->data[0].value.data = (void *)creds;
+	oa->data[0].value.len = 0;
+
+	for (i = 0; i < count; i++) {
+		gssx_buffer dummy = { 0, NULL };
+		u32 length;
+
+		/* option buffer */
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(p == NULL))
+			return -ENOSPC;
+
+		length = be32_to_cpup(p);
+		p = xdr_inline_decode(xdr, length);
+		if (unlikely(p == NULL))
+			return -ENOSPC;
+
+		if (length == sizeof(CREDS_VALUE) &&
+		    memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
+			/* We have creds here. parse them */
+			err = gssx_dec_linux_creds(xdr, creds);
+			if (err)
+				return err;
+			oa->data[0].value.len = 1; /* presence */
+		} else {
+			/* consume uninteresting buffer */
+			err = gssx_dec_buffer(xdr, &dummy);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
+static int gssx_dec_status(struct xdr_stream *xdr,
+			   struct gssx_status *status)
+{
+	__be32 *p;
+	int err;
+
+	/* status->major_status */
+	p = xdr_inline_decode(xdr, 8);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	p = xdr_decode_hyper(p, &status->major_status);
+
+	/* status->mech */
+	err = gssx_dec_buffer(xdr, &status->mech);
+	if (err)
+		return err;
+
+	/* status->minor_status */
+	p = xdr_inline_decode(xdr, 8);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	p = xdr_decode_hyper(p, &status->minor_status);
+
+	/* status->major_status_string */
+	err = gssx_dec_buffer(xdr, &status->major_status_string);
+	if (err)
+		return err;
+
+	/* status->minor_status_string */
+	err = gssx_dec_buffer(xdr, &status->minor_status_string);
+	if (err)
+		return err;
+
+	/* status->server_ctx */
+	err = gssx_dec_buffer(xdr, &status->server_ctx);
+	if (err)
+		return err;
+
+	/* we assume we have no options for now, so simply consume them */
+	/* status->options */
+	err = dummy_dec_opt_array(xdr, &status->options);
+
+	return err;
+}
+
+static int gssx_enc_call_ctx(struct xdr_stream *xdr,
+			     struct gssx_call_ctx *ctx)
+{
+	struct gssx_option opt;
+	__be32 *p;
+	int err;
+
+	/* ctx->locale */
+	err = gssx_enc_buffer(xdr, &ctx->locale);
+	if (err)
+		return err;
+
+	/* ctx->server_ctx */
+	err = gssx_enc_buffer(xdr, &ctx->server_ctx);
+	if (err)
+		return err;
+
+	/* we always want to ask for lucid contexts */
+	/* ctx->options */
+	p = xdr_reserve_space(xdr, 4);
+	*p = cpu_to_be32(2);
+
+	/* we want a lucid_v1 context */
+	opt.option.data = LUCID_OPTION;
+	opt.option.len = sizeof(LUCID_OPTION);
+	opt.value.data = LUCID_VALUE;
+	opt.value.len = sizeof(LUCID_VALUE);
+	err = gssx_enc_option(xdr, &opt);
+
+	/* ..and user creds */
+	opt.option.data = CREDS_OPTION;
+	opt.option.len = sizeof(CREDS_OPTION);
+	opt.value.data = CREDS_VALUE;
+	opt.value.len = sizeof(CREDS_VALUE);
+	err = gssx_enc_option(xdr, &opt);
+
+	return err;
+}
+
+static int gssx_dec_name_attr(struct xdr_stream *xdr,
+			     struct gssx_name_attr *attr)
+{
+	int err;
+
+	/* attr->attr */
+	err = gssx_dec_buffer(xdr, &attr->attr);
+	if (err)
+		return err;
+
+	/* attr->value */
+	err = gssx_dec_buffer(xdr, &attr->value);
+	if (err)
+		return err;
+
+	/* attr->extensions */
+	err = dummy_dec_opt_array(xdr, &attr->extensions);
+
+	return err;
+}
+
+static int dummy_enc_nameattr_array(struct xdr_stream *xdr,
+				    struct gssx_name_attr_array *naa)
+{
+	__be32 *p;
+
+	if (naa->count != 0)
+		return -EINVAL;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return -ENOSPC;
+	*p = 0;
+
+	return 0;
+}
+
+static int dummy_dec_nameattr_array(struct xdr_stream *xdr,
+				    struct gssx_name_attr_array *naa)
+{
+	struct gssx_name_attr dummy;
+	u32 count, i;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	count = be32_to_cpup(p++);
+	for (i = 0; i < count; i++) {
+		gssx_dec_name_attr(xdr, &dummy);
+	}
+
+	naa->count = 0;
+	naa->data = NULL;
+	return 0;
+}
+
+static struct xdr_netobj zero_netobj = {};
+
+static struct gssx_name_attr_array zero_name_attr_array = {};
+
+static struct gssx_option_array zero_option_array = {};
+
+static int gssx_enc_name(struct xdr_stream *xdr,
+			 struct gssx_name *name)
+{
+	int err;
+
+	/* name->display_name */
+	err = gssx_enc_buffer(xdr, &name->display_name);
+	if (err)
+		return err;
+
+	/* name->name_type */
+	err = gssx_enc_buffer(xdr, &zero_netobj);
+	if (err)
+		return err;
+
+	/* name->exported_name */
+	err = gssx_enc_buffer(xdr, &zero_netobj);
+	if (err)
+		return err;
+
+	/* name->exported_composite_name */
+	err = gssx_enc_buffer(xdr, &zero_netobj);
+	if (err)
+		return err;
+
+	/* leave name_attributes empty for now, will add once we have any
+	 * to pass up at all */
+	/* name->name_attributes */
+	err = dummy_enc_nameattr_array(xdr, &zero_name_attr_array);
+	if (err)
+		return err;
+
+	/* leave options empty for now, will add once we have any options
+	 * to pass up at all */
+	/* name->extensions */
+	err = dummy_enc_opt_array(xdr, &zero_option_array);
+
+	return err;
+}
+
+static int gssx_dec_name(struct xdr_stream *xdr,
+			 struct gssx_name *name)
+{
+	struct xdr_netobj dummy_netobj;
+	struct gssx_name_attr_array dummy_name_attr_array;
+	struct gssx_option_array dummy_option_array;
+	int err;
+
+	/* name->display_name */
+	err = gssx_dec_buffer(xdr, &name->display_name);
+	if (err)
+		return err;
+
+	/* name->name_type */
+	err = gssx_dec_buffer(xdr, &dummy_netobj);
+	if (err)
+		return err;
+
+	/* name->exported_name */
+	err = gssx_dec_buffer(xdr, &dummy_netobj);
+	if (err)
+		return err;
+
+	/* name->exported_composite_name */
+	err = gssx_dec_buffer(xdr, &dummy_netobj);
+	if (err)
+		return err;
+
+	/* we assume we have no attributes for now, so simply consume them */
+	/* name->name_attributes */
+	err = dummy_dec_nameattr_array(xdr, &dummy_name_attr_array);
+	if (err)
+		return err;
+
+	/* we assume we have no options for now, so simply consume them */
+	/* name->extensions */
+	err = dummy_dec_opt_array(xdr, &dummy_option_array);
+
+	return err;
+}
+
+static int dummy_enc_credel_array(struct xdr_stream *xdr,
+				  struct gssx_cred_element_array *cea)
+{
+	__be32 *p;
+
+	if (cea->count != 0)
+		return -EINVAL;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return -ENOSPC;
+	*p = 0;
+
+	return 0;
+}
+
+static int gssx_enc_cred(struct xdr_stream *xdr,
+			 struct gssx_cred *cred)
+{
+	int err;
+
+	/* cred->desired_name */
+	err = gssx_enc_name(xdr, &cred->desired_name);
+	if (err)
+		return err;
+
+	/* cred->elements */
+	err = dummy_enc_credel_array(xdr, &cred->elements);
+
+	/* cred->cred_handle_reference */
+	err = gssx_enc_buffer(xdr, &cred->cred_handle_reference);
+	if (err)
+		return err;
+
+	/* cred->needs_release */
+	err = gssx_enc_bool(xdr, cred->needs_release);
+
+	return err;
+}
+
+static int gssx_enc_ctx(struct xdr_stream *xdr,
+			struct gssx_ctx *ctx)
+{
+	__be32 *p;
+	int err;
+
+	/* ctx->exported_context_token */
+	err = gssx_enc_buffer(xdr, &ctx->exported_context_token);
+	if (err)
+		return err;
+
+	/* ctx->state */
+	err = gssx_enc_buffer(xdr, &ctx->state);
+	if (err)
+		return err;
+
+	/* ctx->need_release */
+	err = gssx_enc_bool(xdr, ctx->need_release);
+	if (err)
+		return err;
+
+	/* ctx->mech */
+	err = gssx_enc_buffer(xdr, &ctx->mech);
+	if (err)
+		return err;
+
+	/* ctx->src_name */
+	err = gssx_enc_name(xdr, &ctx->src_name);
+	if (err)
+		return err;
+
+	/* ctx->targ_name */
+	err = gssx_enc_name(xdr, &ctx->targ_name);
+	if (err)
+		return err;
+
+	/* ctx->lifetime */
+	p = xdr_reserve_space(xdr, 8+8);
+	if (!p)
+		return -ENOSPC;
+	p = xdr_encode_hyper(p, ctx->lifetime);
+
+	/* ctx->ctx_flags */
+	p = xdr_encode_hyper(p, ctx->ctx_flags);
+
+	/* ctx->locally_initiated */
+	err = gssx_enc_bool(xdr, ctx->locally_initiated);
+	if (err)
+		return err;
+
+	/* ctx->open */
+	err = gssx_enc_bool(xdr, ctx->open);
+	if (err)
+		return err;
+
+	/* leave options empty for now, will add once we have any options
+	 * to pass up at all */
+	/* ctx->options */
+	err = dummy_enc_opt_array(xdr, &ctx->options);
+
+	return err;
+}
+
+static int gssx_dec_ctx(struct xdr_stream *xdr,
+			struct gssx_ctx *ctx)
+{
+	__be32 *p;
+	int err;
+
+	/* ctx->exported_context_token */
+	err = gssx_dec_buffer(xdr, &ctx->exported_context_token);
+	if (err)
+		return err;
+
+	/* ctx->state */
+	err = gssx_dec_buffer(xdr, &ctx->state);
+	if (err)
+		return err;
+
+	/* ctx->need_release */
+	err = gssx_dec_bool(xdr, &ctx->need_release);
+	if (err)
+		return err;
+
+	/* ctx->mech */
+	err = gssx_dec_buffer(xdr, &ctx->mech);
+	if (err)
+		return err;
+
+	/* ctx->src_name */
+	err = gssx_dec_name(xdr, &ctx->src_name);
+	if (err)
+		return err;
+
+	/* ctx->targ_name */
+	err = gssx_dec_name(xdr, &ctx->targ_name);
+	if (err)
+		return err;
+
+	/* ctx->lifetime */
+	p = xdr_inline_decode(xdr, 8+8);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	p = xdr_decode_hyper(p, &ctx->lifetime);
+
+	/* ctx->ctx_flags */
+	p = xdr_decode_hyper(p, &ctx->ctx_flags);
+
+	/* ctx->locally_initiated */
+	err = gssx_dec_bool(xdr, &ctx->locally_initiated);
+	if (err)
+		return err;
+
+	/* ctx->open */
+	err = gssx_dec_bool(xdr, &ctx->open);
+	if (err)
+		return err;
+
+	/* we assume we have no options for now, so simply consume them */
+	/* ctx->options */
+	err = dummy_dec_opt_array(xdr, &ctx->options);
+
+	return err;
+}
+
+static int gssx_enc_cb(struct xdr_stream *xdr, struct gssx_cb *cb)
+{
+	__be32 *p;
+	int err;
+
+	/* cb->initiator_addrtype */
+	p = xdr_reserve_space(xdr, 8);
+	if (!p)
+		return -ENOSPC;
+	p = xdr_encode_hyper(p, cb->initiator_addrtype);
+
+	/* cb->initiator_address */
+	err = gssx_enc_buffer(xdr, &cb->initiator_address);
+	if (err)
+		return err;
+
+	/* cb->acceptor_addrtype */
+	p = xdr_reserve_space(xdr, 8);
+	if (!p)
+		return -ENOSPC;
+	p = xdr_encode_hyper(p, cb->acceptor_addrtype);
+
+	/* cb->acceptor_address */
+	err = gssx_enc_buffer(xdr, &cb->acceptor_address);
+	if (err)
+		return err;
+
+	/* cb->application_data */
+	err = gssx_enc_buffer(xdr, &cb->application_data);
+
+	return err;
+}
+
+void gssx_enc_accept_sec_context(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 struct gssx_arg_accept_sec_context *arg)
+{
+	int err;
+
+	err = gssx_enc_call_ctx(xdr, &arg->call_ctx);
+	if (err)
+		goto done;
+
+	/* arg->context_handle */
+	if (arg->context_handle) {
+		err = gssx_enc_ctx(xdr, arg->context_handle);
+		if (err)
+			goto done;
+	} else {
+		err = gssx_enc_bool(xdr, 0);
+	}
+
+	/* arg->cred_handle */
+	if (arg->cred_handle) {
+		err = gssx_enc_cred(xdr, arg->cred_handle);
+		if (err)
+			goto done;
+	} else {
+		err = gssx_enc_bool(xdr, 0);
+	}
+
+	/* arg->input_token */
+	err = gssx_enc_in_token(xdr, &arg->input_token);
+	if (err)
+		goto done;
+
+	/* arg->input_cb */
+	if (arg->input_cb) {
+		err = gssx_enc_cb(xdr, arg->input_cb);
+		if (err)
+			goto done;
+	} else {
+		err = gssx_enc_bool(xdr, 0);
+	}
+
+	err = gssx_enc_bool(xdr, arg->ret_deleg_cred);
+	if (err)
+		goto done;
+
+	/* leave options empty for now, will add once we have any options
+	 * to pass up at all */
+	/* arg->options */
+	err = dummy_enc_opt_array(xdr, &arg->options);
+
+done:
+	if (err)
+		dprintk("RPC:       gssx_enc_accept_sec_context: %d\n", err);
+}
+
+int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
+				struct xdr_stream *xdr,
+				struct gssx_res_accept_sec_context *res)
+{
+	u32 value_follows;
+	int err;
+
+	/* res->status */
+	err = gssx_dec_status(xdr, &res->status);
+	if (err)
+		return err;
+
+	/* res->context_handle */
+	err = gssx_dec_bool(xdr, &value_follows);
+	if (err)
+		return err;
+	if (value_follows) {
+		err = gssx_dec_ctx(xdr, res->context_handle);
+		if (err)
+			return err;
+	} else {
+		res->context_handle = NULL;
+	}
+
+	/* res->output_token */
+	err = gssx_dec_bool(xdr, &value_follows);
+	if (err)
+		return err;
+	if (value_follows) {
+		err = gssx_dec_buffer(xdr, res->output_token);
+		if (err)
+			return err;
+	} else {
+		res->output_token = NULL;
+	}
+
+	/* res->delegated_cred_handle */
+	err = gssx_dec_bool(xdr, &value_follows);
+	if (err)
+		return err;
+	if (value_follows) {
+		/* we do not support upcall servers sending this data. */
+		return -EINVAL;
+	}
+
+	/* res->options */
+	err = gssx_dec_option_array(xdr, &res->options);
+
+	return err;
+}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
new file mode 100644
index 000000000000..1c98b27d870c
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -0,0 +1,264 @@
+/*
+ * GSS Proxy upcall module
+ *
+ *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_GSS_RPC_XDR_H
+#define _LINUX_GSS_RPC_XDR_H
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprtsock.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+#define LUCID_OPTION "exported_context_type"
+#define LUCID_VALUE  "linux_lucid_v1"
+#define CREDS_OPTION "exported_creds_type"
+#define CREDS_VALUE  "linux_creds_v1"
+
+typedef struct xdr_netobj gssx_buffer;
+typedef struct xdr_netobj utf8string;
+typedef struct xdr_netobj gssx_OID;
+
+enum gssx_cred_usage {
+	GSSX_C_INITIATE = 1,
+	GSSX_C_ACCEPT = 2,
+	GSSX_C_BOTH = 3,
+};
+
+struct gssx_option {
+	gssx_buffer option;
+	gssx_buffer value;
+};
+
+struct gssx_option_array {
+	u32 count;
+	struct gssx_option *data;
+};
+
+struct gssx_status {
+	u64 major_status;
+	gssx_OID mech;
+	u64 minor_status;
+	utf8string major_status_string;
+	utf8string minor_status_string;
+	gssx_buffer server_ctx;
+	struct gssx_option_array options;
+};
+
+struct gssx_call_ctx {
+	utf8string locale;
+	gssx_buffer server_ctx;
+	struct gssx_option_array options;
+};
+
+struct gssx_name_attr {
+	gssx_buffer attr;
+	gssx_buffer value;
+	struct gssx_option_array extensions;
+};
+
+struct gssx_name_attr_array {
+	u32 count;
+	struct gssx_name_attr *data;
+};
+
+struct gssx_name {
+	gssx_buffer display_name;
+};
+typedef struct gssx_name gssx_name;
+
+struct gssx_cred_element {
+	gssx_name MN;
+	gssx_OID mech;
+	u32 cred_usage;
+	u64 initiator_time_rec;
+	u64 acceptor_time_rec;
+	struct gssx_option_array options;
+};
+
+struct gssx_cred_element_array {
+	u32 count;
+	struct gssx_cred_element *data;
+};
+
+struct gssx_cred {
+	gssx_name desired_name;
+	struct gssx_cred_element_array elements;
+	gssx_buffer cred_handle_reference;
+	u32 needs_release;
+};
+
+struct gssx_ctx {
+	gssx_buffer exported_context_token;
+	gssx_buffer state;
+	u32 need_release;
+	gssx_OID mech;
+	gssx_name src_name;
+	gssx_name targ_name;
+	u64 lifetime;
+	u64 ctx_flags;
+	u32 locally_initiated;
+	u32 open;
+	struct gssx_option_array options;
+};
+
+struct gssx_cb {
+	u64 initiator_addrtype;
+	gssx_buffer initiator_address;
+	u64 acceptor_addrtype;
+	gssx_buffer acceptor_address;
+	gssx_buffer application_data;
+};
+
+
+/* This structure is not defined in the protocol.
+ * It is used in the kernel to carry around a big buffer
+ * as a set of pages */
+struct gssp_in_token {
+	struct page **pages;	/* Array of contiguous pages */
+	unsigned int page_base;	/* Start of page data */
+	unsigned int page_len;	/* Length of page data */
+};
+
+struct gssx_arg_accept_sec_context {
+	struct gssx_call_ctx call_ctx;
+	struct gssx_ctx *context_handle;
+	struct gssx_cred *cred_handle;
+	struct gssp_in_token input_token;
+	struct gssx_cb *input_cb;
+	u32 ret_deleg_cred;
+	struct gssx_option_array options;
+};
+
+struct gssx_res_accept_sec_context {
+	struct gssx_status status;
+	struct gssx_ctx *context_handle;
+	gssx_buffer *output_token;
+	/* struct gssx_cred *delegated_cred_handle; not used in kernel */
+	struct gssx_option_array options;
+};
+
+
+
+#define gssx_enc_indicate_mechs NULL
+#define gssx_dec_indicate_mechs NULL
+#define gssx_enc_get_call_context NULL
+#define gssx_dec_get_call_context NULL
+#define gssx_enc_import_and_canon_name NULL
+#define gssx_dec_import_and_canon_name NULL
+#define gssx_enc_export_cred NULL
+#define gssx_dec_export_cred NULL
+#define gssx_enc_import_cred NULL
+#define gssx_dec_import_cred NULL
+#define gssx_enc_acquire_cred NULL
+#define gssx_dec_acquire_cred NULL
+#define gssx_enc_store_cred NULL
+#define gssx_dec_store_cred NULL
+#define gssx_enc_init_sec_context NULL
+#define gssx_dec_init_sec_context NULL
+void gssx_enc_accept_sec_context(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 struct gssx_arg_accept_sec_context *args);
+int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
+				struct xdr_stream *xdr,
+				struct gssx_res_accept_sec_context *res);
+#define gssx_enc_release_handle NULL
+#define gssx_dec_release_handle NULL
+#define gssx_enc_get_mic NULL
+#define gssx_dec_get_mic NULL
+#define gssx_enc_verify NULL
+#define gssx_dec_verify NULL
+#define gssx_enc_wrap NULL
+#define gssx_dec_wrap NULL
+#define gssx_enc_unwrap NULL
+#define gssx_dec_unwrap NULL
+#define gssx_enc_wrap_size_limit NULL
+#define gssx_dec_wrap_size_limit NULL
+
+/* non implemented calls are set to 0 size */
+#define GSSX_ARG_indicate_mechs_sz 0
+#define GSSX_RES_indicate_mechs_sz 0
+#define GSSX_ARG_get_call_context_sz 0
+#define GSSX_RES_get_call_context_sz 0
+#define GSSX_ARG_import_and_canon_name_sz 0
+#define GSSX_RES_import_and_canon_name_sz 0
+#define GSSX_ARG_export_cred_sz 0
+#define GSSX_RES_export_cred_sz 0
+#define GSSX_ARG_import_cred_sz 0
+#define GSSX_RES_import_cred_sz 0
+#define GSSX_ARG_acquire_cred_sz 0
+#define GSSX_RES_acquire_cred_sz 0
+#define GSSX_ARG_store_cred_sz 0
+#define GSSX_RES_store_cred_sz 0
+#define GSSX_ARG_init_sec_context_sz 0
+#define GSSX_RES_init_sec_context_sz 0
+
+#define GSSX_default_in_call_ctx_sz (4 + 4 + 4 + \
+			8 + sizeof(LUCID_OPTION) + sizeof(LUCID_VALUE) + \
+			8 + sizeof(CREDS_OPTION) + sizeof(CREDS_VALUE))
+#define GSSX_default_in_ctx_hndl_sz (4 + 4+8 + 4 + 4 + 6*4 + 6*4 + 8 + 8 + \
+					4 + 4 + 4)
+#define GSSX_default_in_cred_sz 4 /* we send in no cred_handle */
+#define GSSX_default_in_token_sz 4 /* does *not* include token data */
+#define GSSX_default_in_cb_sz 4 /* we do not use channel bindings */
+#define GSSX_ARG_accept_sec_context_sz (GSSX_default_in_call_ctx_sz + \
+					GSSX_default_in_ctx_hndl_sz + \
+					GSSX_default_in_cred_sz + \
+					GSSX_default_in_token_sz + \
+					GSSX_default_in_cb_sz + \
+					4 /* no deleg creds boolean */ + \
+					4) /* empty options */
+
+/* somewhat arbitrary numbers but large enough (we ignore some of the data
+ * sent down, but it is part of the protocol so we need enough space to take
+ * it in) */
+#define GSSX_default_status_sz 8 + 24 + 8 + 256 + 256 + 16 + 4
+#define GSSX_max_output_handle_sz 128
+#define GSSX_max_oid_sz 16
+#define GSSX_max_princ_sz 256
+#define GSSX_default_ctx_sz (GSSX_max_output_handle_sz + \
+			     16 + 4 + GSSX_max_oid_sz + \
+			     2 * GSSX_max_princ_sz + \
+			     8 + 8 + 4 + 4 + 4)
+#define GSSX_max_output_token_sz 1024
+#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4)
+#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
+					GSSX_default_ctx_sz + \
+					GSSX_max_output_token_sz + \
+					4 + GSSX_max_creds_sz)
+
+#define GSSX_ARG_release_handle_sz 0
+#define GSSX_RES_release_handle_sz 0
+#define GSSX_ARG_get_mic_sz 0
+#define GSSX_RES_get_mic_sz 0
+#define GSSX_ARG_verify_sz 0
+#define GSSX_RES_verify_sz 0
+#define GSSX_ARG_wrap_sz 0
+#define GSSX_RES_wrap_sz 0
+#define GSSX_ARG_unwrap_sz 0
+#define GSSX_RES_unwrap_sz 0
+#define GSSX_ARG_wrap_size_limit_sz 0
+#define GSSX_RES_wrap_size_limit_sz 0
+
+
+
+#endif /* _LINUX_GSS_RPC_XDR_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 73e957386600..871c73c92165 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -48,8 +48,8 @@
 #include <linux/sunrpc/svcauth.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/cache.h>
+#include "gss_rpc_upcall.h"
 
-#include "../netns.h"
 
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY	RPCDBG_AUTH
@@ -182,12 +182,6 @@ static void rsi_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
-static int rsi_upcall(struct cache_detail *cd, struct cache_head *h)
-{
-	return sunrpc_cache_pipe_upcall(cd, h, rsi_request);
-}
-
-
 static int rsi_parse(struct cache_detail *cd,
 		    char *mesg, int mlen)
 {
@@ -275,7 +269,7 @@ static struct cache_detail rsi_cache_template = {
 	.hash_size	= RSI_HASHMAX,
 	.name           = "auth.rpcsec.init",
 	.cache_put      = rsi_put,
-	.cache_upcall   = rsi_upcall,
+	.cache_request  = rsi_request,
 	.cache_parse    = rsi_parse,
 	.match		= rsi_match,
 	.init		= rsi_init,
@@ -418,6 +412,7 @@ static int rsc_parse(struct cache_detail *cd,
 {
 	/* contexthandle expiry [ uid gid N <n gids> mechname ...mechdata... ] */
 	char *buf = mesg;
+	int id;
 	int len, rv;
 	struct rsc rsci, *rscp = NULL;
 	time_t expiry;
@@ -444,7 +439,7 @@ static int rsc_parse(struct cache_detail *cd,
 		goto out;
 
 	/* uid, or NEGATIVE */
-	rv = get_int(&mesg, &rsci.cred.cr_uid);
+	rv = get_int(&mesg, &id);
 	if (rv == -EINVAL)
 		goto out;
 	if (rv == -ENOENT)
@@ -452,9 +447,21 @@ static int rsc_parse(struct cache_detail *cd,
 	else {
 		int N, i;
 
+		/*
+		 * NOTE: we skip uid_valid()/gid_valid() checks here:
+		 * instead, * -1 id's are later mapped to the
+		 * (export-specific) anonymous id by nfsd_setuser.
+		 *
+		 * (But supplementary gid's get no such special
+		 * treatment so are checked for validity here.)
+		 */
+		/* uid */
+		rsci.cred.cr_uid = make_kuid(&init_user_ns, id);
+
 		/* gid */
-		if (get_int(&mesg, &rsci.cred.cr_gid))
+		if (get_int(&mesg, &id))
 			goto out;
+		rsci.cred.cr_gid = make_kgid(&init_user_ns, id);
 
 		/* number of additional gid's */
 		if (get_int(&mesg, &N))
@@ -467,11 +474,10 @@ static int rsc_parse(struct cache_detail *cd,
 		/* gid's */
 		status = -EINVAL;
 		for (i=0; i<N; i++) {
-			gid_t gid;
 			kgid_t kgid;
-			if (get_int(&mesg, &gid))
+			if (get_int(&mesg, &id))
 				goto out;
-			kgid = make_kgid(&init_user_ns, gid);
+			kgid = make_kgid(&init_user_ns, id);
 			if (!gid_valid(kgid))
 				goto out;
 			GROUP_AT(rsci.cred.cr_group_info, i) = kgid;
@@ -491,7 +497,8 @@ static int rsc_parse(struct cache_detail *cd,
 		len = qword_get(&mesg, buf, mlen);
 		if (len < 0)
 			goto out;
-		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL);
+		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx,
+						NULL, GFP_KERNEL);
 		if (status)
 			goto out;
 
@@ -499,8 +506,10 @@ static int rsc_parse(struct cache_detail *cd,
 		len = qword_get(&mesg, buf, mlen);
 		if (len > 0) {
 			rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL);
-			if (!rsci.cred.cr_principal)
+			if (!rsci.cred.cr_principal) {
+				status = -ENOMEM;
 				goto out;
+			}
 		}
 
 	}
@@ -817,13 +826,17 @@ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
  *	The server uses base of head iovec as read pointer, while the
  *	client uses separate pointer. */
 static int
-unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
+unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
 {
 	int stat = -EINVAL;
 	u32 integ_len, maj_stat;
 	struct xdr_netobj mic;
 	struct xdr_buf integ_buf;
 
+	/* Did we already verify the signature on the original pass through? */
+	if (rqstp->rq_deferred)
+		return 0;
+
 	integ_len = svc_getnl(&buf->head[0]);
 	if (integ_len & 3)
 		return stat;
@@ -846,6 +859,8 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
 		goto out;
 	if (svc_getnl(&buf->head[0]) != seq)
 		goto out;
+	/* trim off the mic at the end before returning */
+	xdr_buf_trim(buf, mic.len + 4);
 	stat = 0;
 out:
 	kfree(mic.data);
@@ -975,13 +990,10 @@ gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp,
 }
 
 static inline int
-gss_read_verf(struct rpc_gss_wire_cred *gc,
-	      struct kvec *argv, __be32 *authp,
-	      struct xdr_netobj *in_handle,
-	      struct xdr_netobj *in_token)
+gss_read_common_verf(struct rpc_gss_wire_cred *gc,
+		     struct kvec *argv, __be32 *authp,
+		     struct xdr_netobj *in_handle)
 {
-	struct xdr_netobj tmpobj;
-
 	/* Read the verifier; should be NULL: */
 	*authp = rpc_autherr_badverf;
 	if (argv->iov_len < 2 * 4)
@@ -997,6 +1009,23 @@ gss_read_verf(struct rpc_gss_wire_cred *gc,
 	if (dup_netobj(in_handle, &gc->gc_ctx))
 		return SVC_CLOSE;
 	*authp = rpc_autherr_badverf;
+
+	return 0;
+}
+
+static inline int
+gss_read_verf(struct rpc_gss_wire_cred *gc,
+	      struct kvec *argv, __be32 *authp,
+	      struct xdr_netobj *in_handle,
+	      struct xdr_netobj *in_token)
+{
+	struct xdr_netobj tmpobj;
+	int res;
+
+	res = gss_read_common_verf(gc, argv, authp, in_handle);
+	if (res)
+		return res;
+
 	if (svc_safe_getnetobj(argv, &tmpobj)) {
 		kfree(in_handle->data);
 		return SVC_DENIED;
@@ -1009,6 +1038,40 @@ gss_read_verf(struct rpc_gss_wire_cred *gc,
 	return 0;
 }
 
+/* Ok this is really heavily depending on a set of semantics in
+ * how rqstp is set up by svc_recv and pages laid down by the
+ * server when reading a request. We are basically guaranteed that
+ * the token lays all down linearly across a set of pages, starting
+ * at iov_base in rq_arg.head[0] which happens to be the first of a
+ * set of pages stored in rq_pages[].
+ * rq_arg.head[0].iov_base will provide us the page_base to pass
+ * to the upcall.
+ */
+static inline int
+gss_read_proxy_verf(struct svc_rqst *rqstp,
+		    struct rpc_gss_wire_cred *gc, __be32 *authp,
+		    struct xdr_netobj *in_handle,
+		    struct gssp_in_token *in_token)
+{
+	struct kvec *argv = &rqstp->rq_arg.head[0];
+	u32 inlen;
+	int res;
+
+	res = gss_read_common_verf(gc, argv, authp, in_handle);
+	if (res)
+		return res;
+
+	inlen = svc_getnl(argv);
+	if (inlen > (argv->iov_len + rqstp->rq_arg.page_len))
+		return SVC_DENIED;
+
+	in_token->pages = rqstp->rq_pages;
+	in_token->page_base = (ulong)argv->iov_base & ~PAGE_MASK;
+	in_token->page_len = inlen;
+
+	return 0;
+}
+
 static inline int
 gss_write_resv(struct kvec *resv, size_t size_limit,
 	       struct xdr_netobj *out_handle, struct xdr_netobj *out_token,
@@ -1036,7 +1099,7 @@ gss_write_resv(struct kvec *resv, size_t size_limit,
  * the upcall results are available, write the verifier and result.
  * Otherwise, drop the request pending an answer to the upcall.
  */
-static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
+static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
 			struct rpc_gss_wire_cred *gc, __be32 *authp)
 {
 	struct kvec *argv = &rqstp->rq_arg.head[0];
@@ -1076,6 +1139,287 @@ out:
 	return ret;
 }
 
+static int gss_proxy_save_rsc(struct cache_detail *cd,
+				struct gssp_upcall_data *ud,
+				uint64_t *handle)
+{
+	struct rsc rsci, *rscp = NULL;
+	static atomic64_t ctxhctr;
+	long long ctxh;
+	struct gss_api_mech *gm = NULL;
+	time_t expiry;
+	int status = -EINVAL;
+
+	memset(&rsci, 0, sizeof(rsci));
+	/* context handle */
+	status = -ENOMEM;
+	/* the handle needs to be just a unique id,
+	 * use a static counter */
+	ctxh = atomic64_inc_return(&ctxhctr);
+
+	/* make a copy for the caller */
+	*handle = ctxh;
+
+	/* make a copy for the rsc cache */
+	if (dup_to_netobj(&rsci.handle, (char *)handle, sizeof(uint64_t)))
+		goto out;
+	rscp = rsc_lookup(cd, &rsci);
+	if (!rscp)
+		goto out;
+
+	/* creds */
+	if (!ud->found_creds) {
+		/* userspace seem buggy, we should always get at least a
+		 * mapping to nobody */
+		dprintk("RPC:       No creds found, marking Negative!\n");
+		set_bit(CACHE_NEGATIVE, &rsci.h.flags);
+	} else {
+
+		/* steal creds */
+		rsci.cred = ud->creds;
+		memset(&ud->creds, 0, sizeof(struct svc_cred));
+
+		status = -EOPNOTSUPP;
+		/* get mech handle from OID */
+		gm = gss_mech_get_by_OID(&ud->mech_oid);
+		if (!gm)
+			goto out;
+
+		status = -EINVAL;
+		/* mech-specific data: */
+		status = gss_import_sec_context(ud->out_handle.data,
+						ud->out_handle.len,
+						gm, &rsci.mechctx,
+						&expiry, GFP_KERNEL);
+		if (status)
+			goto out;
+	}
+
+	rsci.h.expiry_time = expiry;
+	rscp = rsc_update(cd, &rsci, rscp);
+	status = 0;
+out:
+	gss_mech_put(gm);
+	rsc_free(&rsci);
+	if (rscp)
+		cache_put(&rscp->h, cd);
+	else
+		status = -ENOMEM;
+	return status;
+}
+
+static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
+			struct rpc_gss_wire_cred *gc, __be32 *authp)
+{
+	struct kvec *resv = &rqstp->rq_res.head[0];
+	struct xdr_netobj cli_handle;
+	struct gssp_upcall_data ud;
+	uint64_t handle;
+	int status;
+	int ret;
+	struct net *net = rqstp->rq_xprt->xpt_net;
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+	memset(&ud, 0, sizeof(ud));
+	ret = gss_read_proxy_verf(rqstp, gc, authp,
+				  &ud.in_handle, &ud.in_token);
+	if (ret)
+		return ret;
+
+	ret = SVC_CLOSE;
+
+	/* Perform synchronous upcall to gss-proxy */
+	status = gssp_accept_sec_context_upcall(net, &ud);
+	if (status)
+		goto out;
+
+	dprintk("RPC:       svcauth_gss: gss major status = %d\n",
+			ud.major_status);
+
+	switch (ud.major_status) {
+	case GSS_S_CONTINUE_NEEDED:
+		cli_handle = ud.out_handle;
+		break;
+	case GSS_S_COMPLETE:
+		status = gss_proxy_save_rsc(sn->rsc_cache, &ud, &handle);
+		if (status)
+			goto out;
+		cli_handle.data = (u8 *)&handle;
+		cli_handle.len = sizeof(handle);
+		break;
+	default:
+		ret = SVC_CLOSE;
+		goto out;
+	}
+
+	/* Got an answer to the upcall; use it: */
+	if (gss_write_init_verf(sn->rsc_cache, rqstp,
+				&cli_handle, &ud.major_status))
+		goto out;
+	if (gss_write_resv(resv, PAGE_SIZE,
+			   &cli_handle, &ud.out_token,
+			   ud.major_status, ud.minor_status))
+		goto out;
+
+	ret = SVC_COMPLETE;
+out:
+	gssp_free_upcall_data(&ud);
+	return ret;
+}
+
+DEFINE_SPINLOCK(use_gssp_lock);
+
+static bool use_gss_proxy(struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+	if (sn->use_gss_proxy != -1)
+		return sn->use_gss_proxy;
+	spin_lock(&use_gssp_lock);
+	/*
+	 * If you wanted gss-proxy, you should have said so before
+	 * starting to accept requests:
+	 */
+	sn->use_gss_proxy = 0;
+	spin_unlock(&use_gssp_lock);
+	return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static bool set_gss_proxy(struct net *net, int type)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	int ret = 0;
+
+	WARN_ON_ONCE(type != 0 && type != 1);
+	spin_lock(&use_gssp_lock);
+	if (sn->use_gss_proxy == -1 || sn->use_gss_proxy == type)
+		sn->use_gss_proxy = type;
+	else
+		ret = -EBUSY;
+	spin_unlock(&use_gssp_lock);
+	wake_up(&sn->gssp_wq);
+	return ret;
+}
+
+static inline bool gssp_ready(struct sunrpc_net *sn)
+{
+	switch (sn->use_gss_proxy) {
+		case -1:
+			return false;
+		case 0:
+			return true;
+		case 1:
+			return sn->gssp_clnt;
+	}
+	WARN_ON_ONCE(1);
+	return false;
+}
+
+static int wait_for_gss_proxy(struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+	return wait_event_interruptible(sn->gssp_wq, gssp_ready(sn));
+}
+
+
+static ssize_t write_gssp(struct file *file, const char __user *buf,
+			 size_t count, loff_t *ppos)
+{
+	struct net *net = PDE_DATA(file->f_path.dentry->d_inode);
+	char tbuf[20];
+	unsigned long i;
+	int res;
+
+	if (*ppos || count > sizeof(tbuf)-1)
+		return -EINVAL;
+	if (copy_from_user(tbuf, buf, count))
+		return -EFAULT;
+
+	tbuf[count] = 0;
+	res = kstrtoul(tbuf, 0, &i);
+	if (res)
+		return res;
+	if (i != 1)
+		return -EINVAL;
+	res = set_gss_proxy(net, 1);
+	if (res)
+		return res;
+	res = set_gssp_clnt(net);
+	if (res)
+		return res;
+	return count;
+}
+
+static ssize_t read_gssp(struct file *file, char __user *buf,
+			 size_t count, loff_t *ppos)
+{
+	struct net *net = PDE_DATA(file->f_path.dentry->d_inode);
+	unsigned long p = *ppos;
+	char tbuf[10];
+	size_t len;
+	int ret;
+
+	ret = wait_for_gss_proxy(net);
+	if (ret)
+		return ret;
+
+	snprintf(tbuf, sizeof(tbuf), "%d\n", use_gss_proxy(net));
+	len = strlen(tbuf);
+	if (p >= len)
+		return 0;
+	len -= p;
+	if (len > count)
+		len = count;
+	if (copy_to_user(buf, (void *)(tbuf+p), len))
+		return -EFAULT;
+	*ppos += len;
+	return len;
+}
+
+static const struct file_operations use_gss_proxy_ops = {
+	.open = nonseekable_open,
+	.write = write_gssp,
+	.read = read_gssp,
+};
+
+static int create_use_gss_proxy_proc_entry(struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	struct proc_dir_entry **p = &sn->use_gssp_proc;
+
+	sn->use_gss_proxy = -1;
+	*p = proc_create_data("use-gss-proxy", S_IFREG|S_IRUSR|S_IWUSR,
+			      sn->proc_net_rpc,
+			      &use_gss_proxy_ops, net);
+	if (!*p)
+		return -ENOMEM;
+	init_gssp_clnt(sn);
+	return 0;
+}
+
+static void destroy_use_gss_proxy_proc_entry(struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+	if (sn->use_gssp_proc) {
+		remove_proc_entry("use-gss-proxy", sn->proc_net_rpc); 
+		clear_gssp_clnt(sn);
+	}
+}
+#else /* CONFIG_PROC_FS */
+
+static int create_use_gss_proxy_proc_entry(struct net *net)
+{
+	return 0;
+}
+
+static void destroy_use_gss_proxy_proc_entry(struct net *net) {}
+
+#endif /* CONFIG_PROC_FS */
+
 /*
  * Accept an rpcsec packet.
  * If context establishment, punt to user space
@@ -1142,7 +1486,10 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 	switch (gc->gc_proc) {
 	case RPC_GSS_PROC_INIT:
 	case RPC_GSS_PROC_CONTINUE_INIT:
-		return svcauth_gss_handle_init(rqstp, gc, authp);
+		if (use_gss_proxy(SVC_NET(rqstp)))
+			return svcauth_gss_proxy_init(rqstp, gc, authp);
+		else
+			return svcauth_gss_legacy_init(rqstp, gc, authp);
 	case RPC_GSS_PROC_DATA:
 	case RPC_GSS_PROC_DESTROY:
 		/* Look up the context, and check the verifier: */
@@ -1190,7 +1537,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 			/* placeholders for length and seq. number: */
 			svc_putnl(resv, 0);
 			svc_putnl(resv, 0);
-			if (unwrap_integ_data(&rqstp->rq_arg,
+			if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
 					gc->gc_seq, rsci->mechctx))
 				goto garbage_args;
 			break;
@@ -1208,7 +1555,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 		svcdata->rsci = rsci;
 		cache_get(&rsci->h);
 		rqstp->rq_cred.cr_flavor = gss_svc_to_pseudoflavor(
-					rsci->mechctx->mech_type, gc->gc_svc);
+					rsci->mechctx->mech_type,
+					GSS_C_QOP_DEFAULT,
+					gc->gc_svc);
 		ret = SVC_OK;
 		goto out;
 	}
@@ -1517,7 +1866,12 @@ gss_svc_init_net(struct net *net)
 	rv = rsi_cache_create_net(net);
 	if (rv)
 		goto out1;
+	rv = create_use_gss_proxy_proc_entry(net);
+	if (rv)
+		goto out2;
 	return 0;
+out2:
+	destroy_use_gss_proxy_proc_entry(net);
 out1:
 	rsc_cache_destroy_net(net);
 	return rv;
@@ -1526,6 +1880,7 @@ out1:
 void
 gss_svc_shutdown_net(struct net *net)
 {
+	destroy_use_gss_proxy_proc_entry(net);
 	rsi_cache_destroy_net(net);
 	rsc_cache_destroy_net(net);
 }
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 52c5abdee211..dc37021fc3e5 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -18,8 +18,8 @@
 
 struct unx_cred {
 	struct rpc_cred		uc_base;
-	gid_t			uc_gid;
-	gid_t			uc_gids[NFS_NGROUPS];
+	kgid_t			uc_gid;
+	kgid_t			uc_gids[NFS_NGROUPS];
 };
 #define uc_uid			uc_base.cr_uid
 
@@ -65,7 +65,8 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 	unsigned int i;
 
 	dprintk("RPC:       allocating UNIX cred for uid %d gid %d\n",
-			acred->uid, acred->gid);
+			from_kuid(&init_user_ns, acred->uid),
+			from_kgid(&init_user_ns, acred->gid));
 
 	if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS)))
 		return ERR_PTR(-ENOMEM);
@@ -79,13 +80,10 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 		groups = NFS_NGROUPS;
 
 	cred->uc_gid = acred->gid;
-	for (i = 0; i < groups; i++) {
-		gid_t gid;
-		gid = from_kgid(&init_user_ns, GROUP_AT(acred->group_info, i));
-		cred->uc_gids[i] = gid;
-	}
+	for (i = 0; i < groups; i++)
+		cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
 	if (i < NFS_NGROUPS)
-		cred->uc_gids[i] = NOGROUP;
+		cred->uc_gids[i] = INVALID_GID;
 
 	return &cred->uc_base;
 }
@@ -123,21 +121,17 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
 	unsigned int i;
 
 
-	if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid)
+	if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid))
 		return 0;
 
 	if (acred->group_info != NULL)
 		groups = acred->group_info->ngroups;
 	if (groups > NFS_NGROUPS)
 		groups = NFS_NGROUPS;
-	for (i = 0; i < groups ; i++) {
-		gid_t gid;
-		gid = from_kgid(&init_user_ns, GROUP_AT(acred->group_info, i));
-		if (cred->uc_gids[i] != gid)
+	for (i = 0; i < groups ; i++)
+		if (!gid_eq(cred->uc_gids[i], GROUP_AT(acred->group_info, i)))
 			return 0;
-	}
-	if (groups < NFS_NGROUPS &&
-	    cred->uc_gids[groups] != NOGROUP)
+	if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups]))
 		return 0;
 	return 1;
 }
@@ -163,11 +157,11 @@ unx_marshal(struct rpc_task *task, __be32 *p)
 	 */
 	p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
 
-	*p++ = htonl((u32) cred->uc_uid);
-	*p++ = htonl((u32) cred->uc_gid);
+	*p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
+	*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
 	hold = p++;
-	for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
-		*p++ = htonl((u32) cred->uc_gids[i]);
+	for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++)
+		*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
 	*hold = htonl(p - hold - 1);		/* gid array length */
 	*base = htonl((p - base - 1) << 2);	/* cred length */
 
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 9afa4393c217..80fe5c86efd1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -196,9 +196,9 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_update);
 
 static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
 {
-	if (!cd->cache_upcall)
-		return -EINVAL;
-	return cd->cache_upcall(cd, h);
+	if (cd->cache_upcall)
+		return cd->cache_upcall(cd, h);
+	return sunrpc_cache_pipe_upcall(cd, h);
 }
 
 static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
@@ -670,13 +670,13 @@ static void cache_revisit_request(struct cache_head *item)
 {
 	struct cache_deferred_req *dreq;
 	struct list_head pending;
-	struct hlist_node *lp, *tmp;
+	struct hlist_node *tmp;
 	int hash = DFR_HASH(item);
 
 	INIT_LIST_HEAD(&pending);
 	spin_lock(&cache_defer_lock);
 
-	hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash)
+	hlist_for_each_entry_safe(dreq, tmp, &cache_defer_hash[hash], hash)
 		if (dreq->item == item) {
 			__unhash_deferred_req(dreq);
 			list_add(&dreq->recent, &pending);
@@ -750,12 +750,24 @@ struct cache_reader {
 	int			offset;	/* if non-0, we have a refcnt on next request */
 };
 
+static int cache_request(struct cache_detail *detail,
+			       struct cache_request *crq)
+{
+	char *bp = crq->buf;
+	int len = PAGE_SIZE;
+
+	detail->cache_request(detail, crq->item, &bp, &len);
+	if (len < 0)
+		return -EAGAIN;
+	return PAGE_SIZE - len;
+}
+
 static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 			  loff_t *ppos, struct cache_detail *cd)
 {
 	struct cache_reader *rp = filp->private_data;
 	struct cache_request *rq;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int err;
 
 	if (count == 0)
@@ -784,6 +796,13 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 		rq->readers++;
 	spin_unlock(&queue_lock);
 
+	if (rq->len == 0) {
+		err = cache_request(cd, rq);
+		if (err < 0)
+			goto out;
+		rq->len = err;
+	}
+
 	if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
 		err = -EAGAIN;
 		spin_lock(&queue_lock);
@@ -886,7 +905,7 @@ static ssize_t cache_write(struct file *filp, const char __user *buf,
 			   struct cache_detail *cd)
 {
 	struct address_space *mapping = filp->f_mapping;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	ssize_t ret = -EINVAL;
 
 	if (!cd->cache_parse)
@@ -967,8 +986,10 @@ static int cache_open(struct inode *inode, struct file *filp,
 	nonseekable_open(inode, filp);
 	if (filp->f_mode & FMODE_READ) {
 		rp = kmalloc(sizeof(*rp), GFP_KERNEL);
-		if (!rp)
+		if (!rp) {
+			module_put(cd->owner);
 			return -ENOMEM;
+		}
 		rp->offset = 0;
 		rp->q.reader = 1;
 		atomic_inc(&cd->readers);
@@ -1140,17 +1161,14 @@ static bool cache_listeners_exist(struct cache_detail *detail)
  *
  * Each request is at most one page long.
  */
-int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
-		void (*cache_request)(struct cache_detail *,
-				      struct cache_head *,
-				      char **,
-				      int *))
+int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 {
 
 	char *buf;
 	struct cache_request *crq;
-	char *bp;
-	int len;
+
+	if (!detail->cache_request)
+		return -EINVAL;
 
 	if (!cache_listeners_exist(detail)) {
 		warn_no_listener(detail);
@@ -1167,19 +1185,10 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
 		return -EAGAIN;
 	}
 
-	bp = buf; len = PAGE_SIZE;
-
-	cache_request(detail, h, &bp, &len);
-
-	if (len < 0) {
-		kfree(buf);
-		kfree(crq);
-		return -EAGAIN;
-	}
 	crq->q.reader = 0;
 	crq->item = cache_get(h);
 	crq->buf = buf;
-	crq->len = PAGE_SIZE - len;
+	crq->len = 0;
 	crq->readers = 0;
 	spin_lock(&queue_lock);
 	list_add_tail(&crq->q.list, &detail->queue);
@@ -1201,7 +1210,6 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall);
  * key and content are both parsed by cache
  */
 
-#define isodigit(c) (isdigit(c) && c <= '7')
 int qword_get(char **bpp, char *dest, int bufsize)
 {
 	/* return bytes copied, or -1 on error */
@@ -1454,7 +1462,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
 static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
 				 size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	struct cache_detail *cd = PDE_DATA(file_inode(filp));
 
 	return cache_read(filp, buf, count, ppos, cd);
 }
@@ -1462,14 +1470,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
 static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
 				  size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	struct cache_detail *cd = PDE_DATA(file_inode(filp));
 
 	return cache_write(filp, buf, count, ppos, cd);
 }
 
 static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
 {
-	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	struct cache_detail *cd = PDE_DATA(file_inode(filp));
 
 	return cache_poll(filp, wait, cd);
 }
@@ -1477,22 +1485,22 @@ static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
 static long cache_ioctl_procfs(struct file *filp,
 			       unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct cache_detail *cd = PDE(inode)->data;
+	struct inode *inode = file_inode(filp);
+	struct cache_detail *cd = PDE_DATA(inode);
 
 	return cache_ioctl(inode, filp, cmd, arg, cd);
 }
 
 static int cache_open_procfs(struct inode *inode, struct file *filp)
 {
-	struct cache_detail *cd = PDE(inode)->data;
+	struct cache_detail *cd = PDE_DATA(inode);
 
 	return cache_open(inode, filp, cd);
 }
 
 static int cache_release_procfs(struct inode *inode, struct file *filp)
 {
-	struct cache_detail *cd = PDE(inode)->data;
+	struct cache_detail *cd = PDE_DATA(inode);
 
 	return cache_release(inode, filp, cd);
 }
@@ -1510,14 +1518,14 @@ static const struct file_operations cache_file_operations_procfs = {
 
 static int content_open_procfs(struct inode *inode, struct file *filp)
 {
-	struct cache_detail *cd = PDE(inode)->data;
+	struct cache_detail *cd = PDE_DATA(inode);
 
 	return content_open(inode, filp, cd);
 }
 
 static int content_release_procfs(struct inode *inode, struct file *filp)
 {
-	struct cache_detail *cd = PDE(inode)->data;
+	struct cache_detail *cd = PDE_DATA(inode);
 
 	return content_release(inode, filp, cd);
 }
@@ -1531,14 +1539,14 @@ static const struct file_operations content_file_operations_procfs = {
 
 static int open_flush_procfs(struct inode *inode, struct file *filp)
 {
-	struct cache_detail *cd = PDE(inode)->data;
+	struct cache_detail *cd = PDE_DATA(inode);
 
 	return open_flush(inode, filp, cd);
 }
 
 static int release_flush_procfs(struct inode *inode, struct file *filp)
 {
-	struct cache_detail *cd = PDE(inode)->data;
+	struct cache_detail *cd = PDE_DATA(inode);
 
 	return release_flush(inode, filp, cd);
 }
@@ -1546,7 +1554,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp)
 static ssize_t read_flush_procfs(struct file *filp, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	struct cache_detail *cd = PDE_DATA(file_inode(filp));
 
 	return read_flush(filp, buf, count, ppos, cd);
 }
@@ -1555,7 +1563,7 @@ static ssize_t write_flush_procfs(struct file *filp,
 				  const char __user *buf,
 				  size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	struct cache_detail *cd = PDE_DATA(file_inode(filp));
 
 	return write_flush(filp, buf, count, ppos, cd);
 }
@@ -1605,7 +1613,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 	if (p == NULL)
 		goto out_nomem;
 
-	if (cd->cache_upcall || cd->cache_parse) {
+	if (cd->cache_request || cd->cache_parse) {
 		p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
 				     cd->u.procfs.proc_ent,
 				     &cache_file_operations_procfs, cd);
@@ -1614,7 +1622,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 			goto out_nomem;
 	}
 	if (cd->cache_show) {
-		p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR,
+		p = proc_create_data("content", S_IFREG|S_IRUSR,
 				cd->u.procfs.proc_ent,
 				&content_file_operations_procfs, cd);
 		cd->u.procfs.content_ent = p;
@@ -1686,7 +1694,7 @@ EXPORT_SYMBOL_GPL(cache_destroy_net);
 static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
 				 size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
 
 	return cache_read(filp, buf, count, ppos, cd);
 }
@@ -1694,14 +1702,14 @@ static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
 static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf,
 				  size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
 
 	return cache_write(filp, buf, count, ppos, cd);
 }
 
 static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait)
 {
-	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
 
 	return cache_poll(filp, wait, cd);
 }
@@ -1709,7 +1717,7 @@ static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait)
 static long cache_ioctl_pipefs(struct file *filp,
 			      unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct cache_detail *cd = RPC_I(inode)->private;
 
 	return cache_ioctl(inode, filp, cmd, arg, cd);
@@ -1778,7 +1786,7 @@ static int release_flush_pipefs(struct inode *inode, struct file *filp)
 static ssize_t read_flush_pipefs(struct file *filp, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
 
 	return read_flush(filp, buf, count, ppos, cd);
 }
@@ -1787,7 +1795,7 @@ static ssize_t write_flush_pipefs(struct file *filp,
 				  const char __user *buf,
 				  size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
 
 	return write_flush(filp, buf, count, ppos, cd);
 }
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 507b5e84fbdb..5a750b9c3640 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -33,6 +33,7 @@
 #include <linux/rcupdate.h>
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/sunrpc/metrics.h>
 #include <linux/sunrpc/bc_xprt.h>
@@ -303,10 +304,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 	err = rpciod_up();
 	if (err)
 		goto out_no_rpciod;
-	err = -EINVAL;
-	if (!xprt)
-		goto out_no_xprt;
 
+	err = -EINVAL;
 	if (args->version >= program->nrvers)
 		goto out_err;
 	version = program->version[args->version];
@@ -361,7 +360,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 
 	auth = rpcauth_create(args->authflavor, clnt);
 	if (IS_ERR(auth)) {
-		printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
+		dprintk("RPC:       Couldn't create auth handle (flavor %u)\n",
 				args->authflavor);
 		err = PTR_ERR(auth);
 		goto out_no_auth;
@@ -381,10 +380,9 @@ out_no_principal:
 out_no_stats:
 	kfree(clnt);
 out_err:
-	xprt_put(xprt);
-out_no_xprt:
 	rpciod_down();
 out_no_rpciod:
+	xprt_put(xprt);
 	return ERR_PTR(err);
 }
 
@@ -413,6 +411,10 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 	};
 	char servername[48];
 
+	if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
+		xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
+	if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
+		xprtargs.flags |= XPRT_CREATE_NO_IDLE_TIMEOUT;
 	/*
 	 * If the caller chooses not to specify a hostname, whip
 	 * up a string representation of the passed-in address.
@@ -511,7 +513,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
 	new = rpc_new_client(args, xprt);
 	if (IS_ERR(new)) {
 		err = PTR_ERR(new);
-		goto out_put;
+		goto out_err;
 	}
 
 	atomic_inc(&clnt->cl_count);
@@ -524,8 +526,6 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
 	new->cl_chatty = clnt->cl_chatty;
 	return new;
 
-out_put:
-	xprt_put(xprt);
 out_err:
 	dprintk("RPC:       %s: returned error %d\n", __func__, err);
 	return ERR_PTR(err);
@@ -683,6 +683,7 @@ rpc_release_client(struct rpc_clnt *clnt)
 	if (atomic_dec_and_test(&clnt->cl_count))
 		rpc_free_auth(clnt);
 }
+EXPORT_SYMBOL_GPL(rpc_release_client);
 
 /**
  * rpc_bind_new_program - bind a new RPC program to an existing client
@@ -1196,6 +1197,21 @@ size_t rpc_max_payload(struct rpc_clnt *clnt)
 EXPORT_SYMBOL_GPL(rpc_max_payload);
 
 /**
+ * rpc_get_timeout - Get timeout for transport in units of HZ
+ * @clnt: RPC client to query
+ */
+unsigned long rpc_get_timeout(struct rpc_clnt *clnt)
+{
+	unsigned long ret;
+
+	rcu_read_lock();
+	ret = rcu_dereference(clnt->cl_xprt)->timeout->to_initval;
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_get_timeout);
+
+/**
  * rpc_force_rebind - force transport to check that remote port is unchanged
  * @clnt: client to rebind
  *
@@ -1290,6 +1306,8 @@ call_reserve(struct rpc_task *task)
 	xprt_reserve(task);
 }
 
+static void call_retry_reserve(struct rpc_task *task);
+
 /*
  * 1b.	Grok the result of xprt_reserve()
  */
@@ -1331,7 +1349,7 @@ call_reserveresult(struct rpc_task *task)
 	case -ENOMEM:
 		rpc_delay(task, HZ >> 2);
 	case -EAGAIN:	/* woken up; retry */
-		task->tk_action = call_reserve;
+		task->tk_action = call_retry_reserve;
 		return;
 	case -EIO:	/* probably a shutdown */
 		break;
@@ -1344,6 +1362,19 @@ call_reserveresult(struct rpc_task *task)
 }
 
 /*
+ * 1c.	Retry reserving an RPC call slot
+ */
+static void
+call_retry_reserve(struct rpc_task *task)
+{
+	dprint_status(task);
+
+	task->tk_status  = 0;
+	task->tk_action  = call_reserveresult;
+	xprt_retry_reserve(task);
+}
+
+/*
  * 2.	Bind and/or refresh the credentials
  */
 static void
@@ -1400,7 +1431,7 @@ call_allocate(struct rpc_task *task)
 {
 	unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
 	struct rpc_rqst *req = task->tk_rqstp;
-	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpc_xprt *xprt = req->rq_xprt;
 	struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
 
 	dprint_status(task);
@@ -1508,7 +1539,7 @@ rpc_xdr_encode(struct rpc_task *task)
 static void
 call_bind(struct rpc_task *task)
 {
-	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
 
 	dprint_status(task);
 
@@ -1602,7 +1633,7 @@ retry_timeout:
 static void
 call_connect(struct rpc_task *task)
 {
-	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
 
 	dprintk("RPC: %5u call_connect xprt %p %s connected\n",
 			task->tk_pid, xprt,
@@ -1628,22 +1659,26 @@ call_connect_status(struct rpc_task *task)
 
 	dprint_status(task);
 
-	task->tk_status = 0;
-	if (status >= 0 || status == -EAGAIN) {
-		clnt->cl_stats->netreconn++;
-		task->tk_action = call_transmit;
-		return;
-	}
-
 	trace_rpc_connect_status(task, status);
 	switch (status) {
 		/* if soft mounted, test if we've timed out */
 	case -ETIMEDOUT:
 		task->tk_action = call_timeout;
-		break;
-	default:
-		rpc_exit(task, -EIO);
+		return;
+	case -ECONNREFUSED:
+	case -ECONNRESET:
+	case -ENETUNREACH:
+		if (RPC_IS_SOFTCONN(task))
+			break;
+		/* retry with existing socket, after a delay */
+	case 0:
+	case -EAGAIN:
+		task->tk_status = 0;
+		clnt->cl_stats->netreconn++;
+		task->tk_action = call_transmit;
+		return;
 	}
+	rpc_exit(task, status);
 }
 
 /*
@@ -1685,7 +1720,7 @@ call_transmit(struct rpc_task *task)
 	if (rpc_reply_expected(task))
 		return;
 	task->tk_action = rpc_exit_task;
-	rpc_wake_up_queued_task(&task->tk_xprt->pending, task);
+	rpc_wake_up_queued_task(&task->tk_rqstp->rq_xprt->pending, task);
 }
 
 /*
@@ -1784,7 +1819,7 @@ call_bc_transmit(struct rpc_task *task)
 		 */
 		printk(KERN_NOTICE "RPC: Could not send backchannel reply "
 			"error: %d\n", task->tk_status);
-		xprt_conditional_disconnect(task->tk_xprt,
+		xprt_conditional_disconnect(req->rq_xprt,
 			req->rq_connect_cookie);
 		break;
 	default:
@@ -1836,7 +1871,7 @@ call_status(struct rpc_task *task)
 	case -ETIMEDOUT:
 		task->tk_action = call_timeout;
 		if (task->tk_client->cl_discrtry)
-			xprt_conditional_disconnect(task->tk_xprt,
+			xprt_conditional_disconnect(req->rq_xprt,
 					req->rq_connect_cookie);
 		break;
 	case -ECONNRESET:
@@ -1991,7 +2026,7 @@ out_retry:
 	if (task->tk_rqstp == req) {
 		req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
 		if (task->tk_client->cl_discrtry)
-			xprt_conditional_disconnect(task->tk_xprt,
+			xprt_conditional_disconnect(req->rq_xprt,
 					req->rq_connect_cookie);
 	}
 }
@@ -2005,7 +2040,7 @@ rpc_encode_header(struct rpc_task *task)
 
 	/* FIXME: check buffer size? */
 
-	p = xprt_skip_transport_header(task->tk_xprt, p);
+	p = xprt_skip_transport_header(req->rq_xprt, p);
 	*p++ = req->rq_xid;		/* XID */
 	*p++ = htonl(RPC_CALL);		/* CALL */
 	*p++ = htonl(RPC_VERSION);	/* RPC version */
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index ce7bd449173d..7111a4c9113b 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -23,6 +23,12 @@ struct sunrpc_net {
 	struct rpc_clnt *rpcb_local_clnt4;
 	spinlock_t rpcb_clnt_lock;
 	unsigned int rpcb_users;
+
+	struct mutex gssp_lock;
+	wait_queue_head_t gssp_wq;
+	struct rpc_clnt *gssp_clnt;
+	int use_gss_proxy;
+	struct proc_dir_entry *use_gssp_proc;
 };
 
 extern int sunrpc_net_id;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index fd10981ea792..a9129f8d7070 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -284,7 +284,7 @@ out:
 static ssize_t
 rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct rpc_pipe *pipe;
 	struct rpc_pipe_msg *msg;
 	int res = 0;
@@ -328,7 +328,7 @@ out_unlock:
 static ssize_t
 rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int res;
 
 	mutex_lock(&inode->i_mutex);
@@ -342,7 +342,7 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
 static unsigned int
 rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct rpc_inode *rpci = RPC_I(inode);
 	unsigned int mask = POLLOUT | POLLWRNORM;
 
@@ -360,7 +360,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
 static long
 rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct rpc_pipe *pipe;
 	int len;
 
@@ -830,7 +830,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
  * responses to upcalls.  They will result in calls to @msg->downcall.
  *
  * The @private argument passed here will be available to all these methods
- * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private.
+ * from the file pointer, via RPC_I(file_inode(file))->private.
  */
 struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
 				 void *private, struct rpc_pipe *pipe)
@@ -1174,6 +1174,8 @@ static struct file_system_type rpc_pipe_fs_type = {
 	.mount		= rpc_mount,
 	.kill_sb	= rpc_kill_sb,
 };
+MODULE_ALIAS_FS("rpc_pipefs");
+MODULE_ALIAS("rpc_pipefs");
 
 static void
 init_once(void *foo)
@@ -1218,6 +1220,3 @@ void unregister_rpc_pipefs(void)
 	kmem_cache_destroy(rpc_inode_cachep);
 	unregister_filesystem(&rpc_pipe_fs_type);
 }
-
-/* Make 'mount -t rpc_pipefs ...' autoload this module. */
-MODULE_ALIAS("rpc_pipefs");
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 795a0f4e920b..3df764dc330c 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -26,6 +26,7 @@
 #include <net/ipv6.h>
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xprtsock.h>
 
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index fb20f25ddec9..f8529fc8e542 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -180,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
 		list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
 	task->tk_waitqueue = queue;
 	queue->qlen++;
+	/* barrier matches the read in rpc_wake_up_task_queue_locked() */
+	smp_wmb();
 	rpc_set_queued(task);
 
 	dprintk("RPC: %5u added to queue %p \"%s\"\n",
@@ -430,8 +432,11 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task
  */
 static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task)
 {
-	if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue)
-		__rpc_do_wake_up_task(queue, task);
+	if (RPC_IS_QUEUED(task)) {
+		smp_rmb();
+		if (task->tk_waitqueue == queue)
+			__rpc_do_wake_up_task(queue, task);
+	}
 }
 
 /*
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index bc2068ee795b..21b75cb08c03 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -64,7 +64,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
 
 static int rpc_proc_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, rpc_proc_show, PDE(inode)->data);
+	return single_open(file, rpc_proc_show, PDE_DATA(inode));
 }
 
 static const struct file_operations rpc_proc_fops = {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index dbf12ac5ecb7..89a588b4478b 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -515,15 +515,6 @@ EXPORT_SYMBOL_GPL(svc_create_pooled);
 
 void svc_shutdown_net(struct svc_serv *serv, struct net *net)
 {
-	/*
-	 * The set of xprts (contained in the sv_tempsocks and
-	 * sv_permsocks lists) is now constant, since it is modified
-	 * only by accepting new sockets (done by service threads in
-	 * svc_recv) or aging old ones (done by sv_temptimer), or
-	 * configuration changes (excluded by whatever locking the
-	 * caller is using--nfsd_mutex in the case of nfsd).  So it's
-	 * safe to traverse those lists and shut everything down:
-	 */
 	svc_close_net(serv, net);
 
 	if (serv->sv_shutdown)
@@ -1042,6 +1033,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
 /*
  * dprintk the given error with the address of the client that caused it.
  */
+#ifdef RPC_DEBUG
 static __printf(2, 3)
 void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 {
@@ -1058,6 +1050,9 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 
 	va_end(args);
 }
+#else
+static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
+#endif
 
 /*
  * Common routine for processing the RPC request.
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b8e47fac7315..80a6640f329b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -499,7 +499,8 @@ void svc_wake_up(struct svc_serv *serv)
 			rqstp->rq_xprt = NULL;
 			 */
 			wake_up(&rqstp->rq_wait);
-		}
+		} else
+			pool->sp_task_pending = 1;
 		spin_unlock_bh(&pool->sp_lock);
 	}
 }
@@ -634,7 +635,13 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 		 * long for cache updates.
 		 */
 		rqstp->rq_chandle.thread_wait = 1*HZ;
+		pool->sp_task_pending = 0;
 	} else {
+		if (pool->sp_task_pending) {
+			pool->sp_task_pending = 0;
+			spin_unlock_bh(&pool->sp_lock);
+			return ERR_PTR(-EAGAIN);
+		}
 		/* No data pending. Go to sleep */
 		svc_thread_enqueue(pool, rqstp);
 
@@ -856,7 +863,6 @@ static void svc_age_temp_xprts(unsigned long closure)
 	struct svc_serv *serv = (struct svc_serv *)closure;
 	struct svc_xprt *xprt;
 	struct list_head *le, *next;
-	LIST_HEAD(to_be_aged);
 
 	dprintk("svc_age_temp_xprts\n");
 
@@ -877,25 +883,15 @@ static void svc_age_temp_xprts(unsigned long closure)
 		if (atomic_read(&xprt->xpt_ref.refcount) > 1 ||
 		    test_bit(XPT_BUSY, &xprt->xpt_flags))
 			continue;
-		svc_xprt_get(xprt);
-		list_move(le, &to_be_aged);
+		list_del_init(le);
 		set_bit(XPT_CLOSE, &xprt->xpt_flags);
 		set_bit(XPT_DETACHED, &xprt->xpt_flags);
-	}
-	spin_unlock_bh(&serv->sv_lock);
-
-	while (!list_empty(&to_be_aged)) {
-		le = to_be_aged.next;
-		/* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */
-		list_del_init(le);
-		xprt = list_entry(le, struct svc_xprt, xpt_list);
-
 		dprintk("queuing xprt %p for closing\n", xprt);
 
 		/* a thread will dequeue and close it soon */
 		svc_xprt_enqueue(xprt);
-		svc_xprt_put(xprt);
 	}
+	spin_unlock_bh(&serv->sv_lock);
 
 	mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
 }
@@ -959,21 +955,24 @@ void svc_close_xprt(struct svc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(svc_close_xprt);
 
-static void svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
+static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
 {
 	struct svc_xprt *xprt;
+	int ret = 0;
 
 	spin_lock(&serv->sv_lock);
 	list_for_each_entry(xprt, xprt_list, xpt_list) {
 		if (xprt->xpt_net != net)
 			continue;
+		ret++;
 		set_bit(XPT_CLOSE, &xprt->xpt_flags);
-		set_bit(XPT_BUSY, &xprt->xpt_flags);
+		svc_xprt_enqueue(xprt);
 	}
 	spin_unlock(&serv->sv_lock);
+	return ret;
 }
 
-static void svc_clear_pools(struct svc_serv *serv, struct net *net)
+static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net)
 {
 	struct svc_pool *pool;
 	struct svc_xprt *xprt;
@@ -988,42 +987,46 @@ static void svc_clear_pools(struct svc_serv *serv, struct net *net)
 			if (xprt->xpt_net != net)
 				continue;
 			list_del_init(&xprt->xpt_ready);
+			spin_unlock_bh(&pool->sp_lock);
+			return xprt;
 		}
 		spin_unlock_bh(&pool->sp_lock);
 	}
+	return NULL;
 }
 
-static void svc_clear_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
+static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
 {
 	struct svc_xprt *xprt;
-	struct svc_xprt *tmp;
-	LIST_HEAD(victims);
 
-	spin_lock(&serv->sv_lock);
-	list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
-		if (xprt->xpt_net != net)
-			continue;
-		list_move(&xprt->xpt_list, &victims);
-	}
-	spin_unlock(&serv->sv_lock);
-
-	list_for_each_entry_safe(xprt, tmp, &victims, xpt_list)
+	while ((xprt = svc_dequeue_net(serv, net))) {
+		set_bit(XPT_CLOSE, &xprt->xpt_flags);
 		svc_delete_xprt(xprt);
+	}
 }
 
+/*
+ * Server threads may still be running (especially in the case where the
+ * service is still running in other network namespaces).
+ *
+ * So we shut down sockets the same way we would on a running server, by
+ * setting XPT_CLOSE, enqueuing, and letting a thread pick it up to do
+ * the close.  In the case there are no such other threads,
+ * threads running, svc_clean_up_xprts() does a simple version of a
+ * server's main event loop, and in the case where there are other
+ * threads, we may need to wait a little while and then check again to
+ * see if they're done.
+ */
 void svc_close_net(struct svc_serv *serv, struct net *net)
 {
-	svc_close_list(serv, &serv->sv_tempsocks, net);
-	svc_close_list(serv, &serv->sv_permsocks, net);
+	int delay = 0;
 
-	svc_clear_pools(serv, net);
-	/*
-	 * At this point the sp_sockets lists will stay empty, since
-	 * svc_xprt_enqueue will not add new entries without taking the
-	 * sp_lock and checking XPT_BUSY.
-	 */
-	svc_clear_list(serv, &serv->sv_tempsocks, net);
-	svc_clear_list(serv, &serv->sv_permsocks, net);
+	while (svc_close_list(serv, &serv->sv_permsocks, net) +
+	       svc_close_list(serv, &serv->sv_tempsocks, net)) {
+
+		svc_clean_up_xprts(serv, net);
+		msleep(delay++);
+	}
 }
 
 /*
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 7963569fc04f..2af7b0cba43a 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -138,13 +138,12 @@ auth_domain_lookup(char *name, struct auth_domain *new)
 {
 	struct auth_domain *hp;
 	struct hlist_head *head;
-	struct hlist_node *np;
 
 	head = &auth_domain_table[hash_str(name, DN_HASHBITS)];
 
 	spin_lock(&auth_domain_lock);
 
-	hlist_for_each_entry(hp, np, head, hash) {
+	hlist_for_each_entry(hp, head, hash) {
 		if (strcmp(hp->name, name)==0) {
 			kref_get(&hp->ref);
 			spin_unlock(&auth_domain_lock);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 4d0129203733..c3f9e1ef7f53 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -6,6 +6,7 @@
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/svcauth.h>
 #include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/err.h>
 #include <linux/seq_file.h>
 #include <linux/hash.h>
@@ -17,7 +18,6 @@
 #include <linux/user_namespace.h>
 #define RPCDBG_FACILITY	RPCDBG_AUTH
 
-#include <linux/sunrpc/clnt.h>
 
 #include "netns.h"
 
@@ -157,11 +157,6 @@ static void ip_map_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
-static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h)
-{
-	return sunrpc_cache_pipe_upcall(cd, h, ip_map_request);
-}
-
 static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr);
 static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
 
@@ -415,10 +410,15 @@ svcauth_unix_info_release(struct svc_xprt *xpt)
 
 struct unix_gid {
 	struct cache_head	h;
-	uid_t			uid;
+	kuid_t			uid;
 	struct group_info	*gi;
 };
 
+static int unix_gid_hash(kuid_t uid)
+{
+	return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS);
+}
+
 static void unix_gid_put(struct kref *kref)
 {
 	struct cache_head *item = container_of(kref, struct cache_head, ref);
@@ -433,7 +433,7 @@ static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew)
 {
 	struct unix_gid *orig = container_of(corig, struct unix_gid, h);
 	struct unix_gid *new = container_of(cnew, struct unix_gid, h);
-	return orig->uid == new->uid;
+	return uid_eq(orig->uid, new->uid);
 }
 static void unix_gid_init(struct cache_head *cnew, struct cache_head *citem)
 {
@@ -465,23 +465,19 @@ static void unix_gid_request(struct cache_detail *cd,
 	char tuid[20];
 	struct unix_gid *ug = container_of(h, struct unix_gid, h);
 
-	snprintf(tuid, 20, "%u", ug->uid);
+	snprintf(tuid, 20, "%u", from_kuid(&init_user_ns, ug->uid));
 	qword_add(bpp, blen, tuid);
 	(*bpp)[-1] = '\n';
 }
 
-static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h)
-{
-	return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request);
-}
-
-static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid);
+static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid);
 
 static int unix_gid_parse(struct cache_detail *cd,
 			char *mesg, int mlen)
 {
 	/* uid expiry Ngid gid0 gid1 ... gidN-1 */
-	int uid;
+	int id;
+	kuid_t uid;
 	int gids;
 	int rv;
 	int i;
@@ -493,9 +489,12 @@ static int unix_gid_parse(struct cache_detail *cd,
 		return -EINVAL;
 	mesg[mlen-1] = 0;
 
-	rv = get_int(&mesg, &uid);
+	rv = get_int(&mesg, &id);
 	if (rv)
 		return -EINVAL;
+	uid = make_kuid(&init_user_ns, id);
+	if (!uid_valid(uid))
+		return -EINVAL;
 	ug.uid = uid;
 
 	expiry = get_expiry(&mesg);
@@ -530,7 +529,7 @@ static int unix_gid_parse(struct cache_detail *cd,
 		ug.h.expiry_time = expiry;
 		ch = sunrpc_cache_update(cd,
 					 &ug.h, &ugp->h,
-					 hash_long(uid, GID_HASHBITS));
+					 unix_gid_hash(uid));
 		if (!ch)
 			err = -ENOMEM;
 		else {
@@ -549,7 +548,7 @@ static int unix_gid_show(struct seq_file *m,
 			 struct cache_detail *cd,
 			 struct cache_head *h)
 {
-	struct user_namespace *user_ns = current_user_ns();
+	struct user_namespace *user_ns = &init_user_ns;
 	struct unix_gid *ug;
 	int i;
 	int glen;
@@ -565,7 +564,7 @@ static int unix_gid_show(struct seq_file *m,
 	else
 		glen = 0;
 
-	seq_printf(m, "%u %d:", ug->uid, glen);
+	seq_printf(m, "%u %d:", from_kuid_munged(user_ns, ug->uid), glen);
 	for (i = 0; i < glen; i++)
 		seq_printf(m, " %d", from_kgid_munged(user_ns, GROUP_AT(ug->gi, i)));
 	seq_printf(m, "\n");
@@ -577,7 +576,7 @@ static struct cache_detail unix_gid_cache_template = {
 	.hash_size	= GID_HASHMAX,
 	.name		= "auth.unix.gid",
 	.cache_put	= unix_gid_put,
-	.cache_upcall	= unix_gid_upcall,
+	.cache_request	= unix_gid_request,
 	.cache_parse	= unix_gid_parse,
 	.cache_show	= unix_gid_show,
 	.match		= unix_gid_match,
@@ -615,20 +614,20 @@ void unix_gid_cache_destroy(struct net *net)
 	cache_destroy_net(cd, net);
 }
 
-static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid)
+static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid)
 {
 	struct unix_gid ug;
 	struct cache_head *ch;
 
 	ug.uid = uid;
-	ch = sunrpc_cache_lookup(cd, &ug.h, hash_long(uid, GID_HASHBITS));
+	ch = sunrpc_cache_lookup(cd, &ug.h, unix_gid_hash(uid));
 	if (ch)
 		return container_of(ch, struct unix_gid, h);
 	else
 		return NULL;
 }
 
-static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
+static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp)
 {
 	struct unix_gid *ug;
 	struct group_info *gi;
@@ -750,8 +749,8 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
 	}
 
 	/* Signal that mapping to nobody uid/gid is required */
-	cred->cr_uid = (uid_t) -1;
-	cred->cr_gid = (gid_t) -1;
+	cred->cr_uid = INVALID_UID;
+	cred->cr_gid = INVALID_GID;
 	cred->cr_group_info = groups_alloc(0);
 	if (cred->cr_group_info == NULL)
 		return SVC_CLOSE; /* kmalloc failure - client must retry */
@@ -812,8 +811,10 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 	argv->iov_base = (void*)((__be32*)argv->iov_base + slen);	/* skip machname */
 	argv->iov_len -= slen*4;
 
-	cred->cr_uid = svc_getnl(argv);		/* uid */
-	cred->cr_gid = svc_getnl(argv);		/* gid */
+	cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */
+	cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */
+	if (!uid_valid(cred->cr_uid) || !gid_valid(cred->cr_gid))
+		goto badcred;
 	slen = svc_getnl(argv);			/* gids length */
 	if (slen > 16 || (len -= (slen + 2)*4) < 0)
 		goto badcred;
@@ -874,7 +875,7 @@ static struct cache_detail ip_map_cache_template = {
 	.hash_size	= IP_HASHMAX,
 	.name		= "auth.unix.ip",
 	.cache_put	= ip_map_put,
-	.cache_upcall	= ip_map_upcall,
+	.cache_request	= ip_map_request,
 	.cache_parse	= ip_map_parse,
 	.cache_show	= ip_map_show,
 	.match		= ip_map_match,
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 56055632f151..75edcfad6e26 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -879,6 +879,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
 }
 EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
 
+/**
+ * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
+ * @buf: buf to be trimmed
+ * @len: number of bytes to reduce "buf" by
+ *
+ * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
+ * that it's possible that we'll trim less than that amount if the xdr_buf is
+ * too small, or if (for instance) it's all in the head and the parser has
+ * already read too far into it.
+ */
+void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
+{
+	size_t cur;
+	unsigned int trim = len;
+
+	if (buf->tail[0].iov_len) {
+		cur = min_t(size_t, buf->tail[0].iov_len, trim);
+		buf->tail[0].iov_len -= cur;
+		trim -= cur;
+		if (!trim)
+			goto fix_len;
+	}
+
+	if (buf->page_len) {
+		cur = min_t(unsigned int, buf->page_len, trim);
+		buf->page_len -= cur;
+		trim -= cur;
+		if (!trim)
+			goto fix_len;
+	}
+
+	if (buf->head[0].iov_len) {
+		cur = min_t(size_t, buf->head[0].iov_len, trim);
+		buf->head[0].iov_len -= cur;
+		trim -= cur;
+	}
+fix_len:
+	buf->len -= (len - trim);
+}
+EXPORT_SYMBOL_GPL(xdr_buf_trim);
+
 static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
 {
 	unsigned int this_len;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 33811db8788a..095363eee764 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -430,21 +430,23 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
  */
 void xprt_release_rqst_cong(struct rpc_task *task)
 {
-	__xprt_put_cong(task->tk_xprt, task->tk_rqstp);
+	struct rpc_rqst *req = task->tk_rqstp;
+
+	__xprt_put_cong(req->rq_xprt, req);
 }
 EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
 
 /**
  * xprt_adjust_cwnd - adjust transport congestion window
+ * @xprt: pointer to xprt
  * @task: recently completed RPC request used to adjust window
  * @result: result code of completed RPC request
  *
  * We use a time-smoothed congestion estimator to avoid heavy oscillation.
  */
-void xprt_adjust_cwnd(struct rpc_task *task, int result)
+void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
-	struct rpc_xprt *xprt = task->tk_xprt;
 	unsigned long cwnd = xprt->cwnd;
 
 	if (result >= 0 && cwnd <= xprt->cong) {
@@ -485,13 +487,17 @@ EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks);
  * xprt_wait_for_buffer_space - wait for transport output buffer to clear
  * @task: task to be put to sleep
  * @action: function pointer to be executed after wait
+ *
+ * Note that we only set the timer for the case of RPC_IS_SOFT(), since
+ * we don't in general want to force a socket disconnection due to
+ * an incomplete RPC call transmission.
  */
 void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
 
-	task->tk_timeout = req->rq_timeout;
+	task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
 	rpc_sleep_on(&xprt->pending, task, action);
 }
 EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
@@ -695,7 +701,7 @@ out_abort:
  */
 void xprt_connect(struct rpc_task *task)
 {
-	struct rpc_xprt	*xprt = task->tk_xprt;
+	struct rpc_xprt	*xprt = task->tk_rqstp->rq_xprt;
 
 	dprintk("RPC: %5u xprt_connect xprt %p %s connected\n", task->tk_pid,
 			xprt, (xprt_connected(xprt) ? "is" : "is not"));
@@ -722,13 +728,13 @@ void xprt_connect(struct rpc_task *task)
 		if (xprt_test_and_set_connecting(xprt))
 			return;
 		xprt->stat.connect_start = jiffies;
-		xprt->ops->connect(task);
+		xprt->ops->connect(xprt, task);
 	}
 }
 
 static void xprt_connect_status(struct rpc_task *task)
 {
-	struct rpc_xprt	*xprt = task->tk_xprt;
+	struct rpc_xprt	*xprt = task->tk_rqstp->rq_xprt;
 
 	if (task->tk_status == 0) {
 		xprt->stat.connect_count++;
@@ -832,7 +838,7 @@ static void xprt_timer(struct rpc_task *task)
 	spin_lock_bh(&xprt->transport_lock);
 	if (!req->rq_reply_bytes_recvd) {
 		if (xprt->ops->timer)
-			xprt->ops->timer(task);
+			xprt->ops->timer(xprt, task);
 	} else
 		task->tk_status = 0;
 	spin_unlock_bh(&xprt->transport_lock);
@@ -942,6 +948,34 @@ void xprt_transmit(struct rpc_task *task)
 	spin_unlock_bh(&xprt->transport_lock);
 }
 
+static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	set_bit(XPRT_CONGESTED, &xprt->state);
+	rpc_sleep_on(&xprt->backlog, task, NULL);
+}
+
+static void xprt_wake_up_backlog(struct rpc_xprt *xprt)
+{
+	if (rpc_wake_up_next(&xprt->backlog) == NULL)
+		clear_bit(XPRT_CONGESTED, &xprt->state);
+}
+
+static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	bool ret = false;
+
+	if (!test_bit(XPRT_CONGESTED, &xprt->state))
+		goto out;
+	spin_lock(&xprt->reserve_lock);
+	if (test_bit(XPRT_CONGESTED, &xprt->state)) {
+		rpc_sleep_on(&xprt->backlog, task, NULL);
+		ret = true;
+	}
+	spin_unlock(&xprt->reserve_lock);
+out:
+	return ret;
+}
+
 static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
 {
 	struct rpc_rqst *req = ERR_PTR(-EAGAIN);
@@ -986,7 +1020,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
 		task->tk_status = -ENOMEM;
 		break;
 	case -EAGAIN:
-		rpc_sleep_on(&xprt->backlog, task, NULL);
+		xprt_add_backlog(xprt, task);
 		dprintk("RPC:       waiting for request slot\n");
 	default:
 		task->tk_status = -EAGAIN;
@@ -1022,7 +1056,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
 		memset(req, 0, sizeof(*req));	/* mark unused */
 		list_add(&req->rq_list, &xprt->free);
 	}
-	rpc_wake_up_next(&xprt->backlog);
+	xprt_wake_up_backlog(xprt);
 	spin_unlock(&xprt->reserve_lock);
 }
 
@@ -1086,12 +1120,39 @@ EXPORT_SYMBOL_GPL(xprt_free);
  * xprt_reserve - allocate an RPC request slot
  * @task: RPC task requesting a slot allocation
  *
- * If no more slots are available, place the task on the transport's
+ * If the transport is marked as being congested, or if no more
+ * slots are available, place the task on the transport's
  * backlog queue.
  */
 void xprt_reserve(struct rpc_task *task)
 {
-	struct rpc_xprt	*xprt = task->tk_xprt;
+	struct rpc_xprt	*xprt;
+
+	task->tk_status = 0;
+	if (task->tk_rqstp != NULL)
+		return;
+
+	task->tk_timeout = 0;
+	task->tk_status = -EAGAIN;
+	rcu_read_lock();
+	xprt = rcu_dereference(task->tk_client->cl_xprt);
+	if (!xprt_throttle_congested(xprt, task))
+		xprt->ops->alloc_slot(xprt, task);
+	rcu_read_unlock();
+}
+
+/**
+ * xprt_retry_reserve - allocate an RPC request slot
+ * @task: RPC task requesting a slot allocation
+ *
+ * If no more slots are available, place the task on the transport's
+ * backlog queue.
+ * Note that the only difference with xprt_reserve is that we now
+ * ignore the value of the XPRT_CONGESTED flag.
+ */
+void xprt_retry_reserve(struct rpc_task *task)
+{
+	struct rpc_xprt	*xprt;
 
 	task->tk_status = 0;
 	if (task->tk_rqstp != NULL)
@@ -1099,7 +1160,10 @@ void xprt_reserve(struct rpc_task *task)
 
 	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
+	rcu_read_lock();
+	xprt = rcu_dereference(task->tk_client->cl_xprt);
 	xprt->ops->alloc_slot(xprt, task);
+	rcu_read_unlock();
 }
 
 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -1236,6 +1300,8 @@ found:
 				-PTR_ERR(xprt));
 		goto out;
 	}
+	if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT)
+		xprt->idle_timeout = 0;
 	INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
 	if (xprt_has_timer(xprt))
 		setup_timer(&xprt->timer, xprt_init_autodisconnect,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 558fbab574f0..e03725bfe2b8 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -171,7 +171,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 		struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
 {
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
-	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt);
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
 	int nsegs, nchunks = 0;
 	unsigned int pos;
 	struct rpcrdma_mr_seg *seg = req->rl_segments;
@@ -366,7 +366,7 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
 int
 rpcrdma_marshal_req(struct rpc_rqst *rqst)
 {
-	struct rpc_xprt *xprt = rqst->rq_task->tk_xprt;
+	struct rpc_xprt *xprt = rqst->rq_xprt;
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
 	char *base;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index c9aa7a35f3bf..794312f22b9b 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -51,6 +51,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
+#include <linux/sunrpc/addr.h>
 
 #include "xprt_rdma.h"
 
@@ -426,9 +427,8 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
 }
 
 static void
-xprt_rdma_connect(struct rpc_task *task)
+xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-	struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 
 	if (r_xprt->rx_ep.rep_connected != 0) {
@@ -475,7 +475,7 @@ xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 static void *
 xprt_rdma_allocate(struct rpc_task *task, size_t size)
 {
-	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
 	struct rpcrdma_req *req, *nreq;
 
 	req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
@@ -627,7 +627,7 @@ static int
 xprt_rdma_send_request(struct rpc_task *task)
 {
 	struct rpc_rqst *rqst = task->tk_rqstp;
-	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpc_xprt *xprt = rqst->rq_xprt;
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 745973b729af..93726560eaa8 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1086,7 +1086,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
 	case RPCRDMA_MEMWINDOWS:
 		/* Allocate one extra request's worth, for full cycling */
 		for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
-			r->r.mw = ib_alloc_mw(ia->ri_pd);
+			r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1);
 			if (IS_ERR(r->r.mw)) {
 				rc = PTR_ERR(r->r.mw);
 				dprintk("RPC:       %s: ib_alloc_mw"
@@ -1673,12 +1673,12 @@ rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
 
 	*nsegs = 1;
 	rpcrdma_map_one(ia, seg, writing);
-	param.mr = ia->ri_bind_mem;
+	param.bind_info.mr = ia->ri_bind_mem;
 	param.wr_id = 0ULL;	/* no send cookie */
-	param.addr = seg->mr_dma;
-	param.length = seg->mr_len;
+	param.bind_info.addr = seg->mr_dma;
+	param.bind_info.length = seg->mr_len;
 	param.send_flags = 0;
-	param.mw_access_flags = mem_priv;
+	param.bind_info.mw_access_flags = mem_priv;
 
 	DECR_CQCOUNT(&r_xprt->rx_ep);
 	rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
@@ -1690,7 +1690,7 @@ rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
 		rpcrdma_unmap_one(ia, seg);
 	} else {
 		seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
-		seg->mr_base = param.addr;
+		seg->mr_base = param.bind_info.addr;
 		seg->mr_nsegs = 1;
 	}
 	return rc;
@@ -1706,10 +1706,10 @@ rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
 	int rc;
 
 	BUG_ON(seg->mr_nsegs != 1);
-	param.mr = ia->ri_bind_mem;
-	param.addr = 0ULL;	/* unbind */
-	param.length = 0;
-	param.mw_access_flags = 0;
+	param.bind_info.mr = ia->ri_bind_mem;
+	param.bind_info.addr = 0ULL;	/* unbind */
+	param.bind_info.length = 0;
+	param.bind_info.mw_access_flags = 0;
 	if (*r) {
 		param.wr_id = (u64) (unsigned long) *r;
 		param.send_flags = IB_SEND_SIGNALED;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 9a66c95b5837..cc1445dc1d1a 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -235,13 +235,13 @@ struct rpcrdma_create_data_internal {
 };
 
 #define RPCRDMA_INLINE_READ_THRESHOLD(rq) \
-	(rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize)
+	(rpcx_to_rdmad(rq->rq_xprt).inline_rsize)
 
 #define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\
-	(rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize)
+	(rpcx_to_rdmad(rq->rq_xprt).inline_wsize)
 
 #define RPCRDMA_INLINE_PAD_VALUE(rq)\
-	rpcx_to_rdmad(rq->rq_task->tk_xprt).padding
+	rpcx_to_rdmad(rq->rq_xprt).padding
 
 /*
  * Statistics for RPCRDMA
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 68b0a81c31d5..ffd50348a509 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -33,6 +33,7 @@
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/xprtsock.h>
@@ -770,7 +771,7 @@ static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 		goto out_release;
 	if (req->rq_bytes_sent == req->rq_snd_buf.len)
 		goto out_release;
-	set_bit(XPRT_CLOSE_WAIT, &task->tk_xprt->state);
+	set_bit(XPRT_CLOSE_WAIT, &xprt->state);
 out_release:
 	xprt_release_xprt(xprt, task);
 }
@@ -848,6 +849,14 @@ static void xs_tcp_close(struct rpc_xprt *xprt)
 		xs_tcp_shutdown(xprt);
 }
 
+static void xs_local_destroy(struct rpc_xprt *xprt)
+{
+	xs_close(xprt);
+	xs_free_peer_addresses(xprt);
+	xprt_free(xprt);
+	module_put(THIS_MODULE);
+}
+
 /**
  * xs_destroy - prepare to shutdown a transport
  * @xprt: doomed transport
@@ -861,10 +870,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
 
 	cancel_delayed_work_sync(&transport->connect_worker);
 
-	xs_close(xprt);
-	xs_free_peer_addresses(xprt);
-	xprt_free(xprt);
-	module_put(THIS_MODULE);
+	xs_local_destroy(xprt);
 }
 
 static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
@@ -1005,7 +1011,7 @@ static void xs_udp_data_ready(struct sock *sk, int len)
 
 	UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS);
 
-	xprt_adjust_cwnd(task, copied);
+	xprt_adjust_cwnd(xprt, task, copied);
 	xprt_complete_rqst(task, copied);
 
  out_unlock:
@@ -1646,9 +1652,9 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t
  *
  * Adjust the congestion window after a retransmit timeout has occurred.
  */
-static void xs_udp_timer(struct rpc_task *task)
+static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-	xprt_adjust_cwnd(task, -ETIMEDOUT);
+	xprt_adjust_cwnd(xprt, task, -ETIMEDOUT);
 }
 
 static unsigned short xs_get_random_port(void)
@@ -1731,7 +1737,9 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
  */
 static void xs_local_rpcbind(struct rpc_task *task)
 {
-	xprt_set_bound(task->tk_xprt);
+	rcu_read_lock();
+	xprt_set_bound(rcu_dereference(task->tk_client->cl_xprt));
+	rcu_read_unlock();
 }
 
 static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port)
@@ -1865,13 +1873,9 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
  * @xprt: RPC transport to connect
  * @transport: socket transport to connect
  * @create_sock: function to create a socket of the correct type
- *
- * Invoked by a work queue tasklet.
  */
-static void xs_local_setup_socket(struct work_struct *work)
+static int xs_local_setup_socket(struct sock_xprt *transport)
 {
-	struct sock_xprt *transport =
-		container_of(work, struct sock_xprt, connect_worker.work);
 	struct rpc_xprt *xprt = &transport->xprt;
 	struct socket *sock;
 	int status = -EIO;
@@ -1916,6 +1920,30 @@ out:
 	xprt_clear_connecting(xprt);
 	xprt_wake_pending_tasks(xprt, status);
 	current->flags &= ~PF_FSTRANS;
+	return status;
+}
+
+static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+	int ret;
+
+	 if (RPC_IS_ASYNC(task)) {
+		/*
+		 * We want the AF_LOCAL connect to be resolved in the
+		 * filesystem namespace of the process making the rpc
+		 * call.  Thus we connect synchronously.
+		 *
+		 * If we want to support asynchronous AF_LOCAL calls,
+		 * we'll need to figure out how to pass a namespace to
+		 * connect.
+		 */
+		rpc_exit(task, -ENOTCONN);
+		return;
+	}
+	ret = xs_local_setup_socket(transport);
+	if (ret && !RPC_IS_SOFTCONN(task))
+		msleep_interruptible(15000);
 }
 
 #ifdef CONFIG_SUNRPC_SWAP
@@ -2179,10 +2207,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
 		 */
 		xs_tcp_force_close(xprt);
 		break;
-	case -ECONNREFUSED:
-	case -ECONNRESET:
-	case -ENETUNREACH:
-		/* retry with existing socket, after a delay */
 	case 0:
 	case -EINPROGRESS:
 	case -EALREADY:
@@ -2193,6 +2217,10 @@ static void xs_tcp_setup_socket(struct work_struct *work)
 		/* Happens, for instance, if the user specified a link
 		 * local IPv6 address without a scope-id.
 		 */
+	case -ECONNREFUSED:
+	case -ECONNRESET:
+	case -ENETUNREACH:
+		/* retry with existing socket, after a delay */
 		goto out;
 	}
 out_eagain:
@@ -2205,6 +2233,7 @@ out:
 
 /**
  * xs_connect - connect a socket to a remote endpoint
+ * @xprt: pointer to transport structure
  * @task: address of RPC task that manages state of connect request
  *
  * TCP: If the remote end dropped the connection, delay reconnecting.
@@ -2216,9 +2245,8 @@ out:
  * If a UDP socket connect fails, the delay behavior here prevents
  * retry floods (hard mounts).
  */
-static void xs_connect(struct rpc_task *task)
+static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-	struct rpc_xprt *xprt = task->tk_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 
 	if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
@@ -2453,13 +2481,13 @@ static struct rpc_xprt_ops xs_local_ops = {
 	.alloc_slot		= xprt_alloc_slot,
 	.rpcbind		= xs_local_rpcbind,
 	.set_port		= xs_local_set_port,
-	.connect		= xs_connect,
+	.connect		= xs_local_connect,
 	.buf_alloc		= rpc_malloc,
 	.buf_free		= rpc_free,
 	.send_request		= xs_local_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
 	.close			= xs_close,
-	.destroy		= xs_destroy,
+	.destroy		= xs_local_destroy,
 	.print_stats		= xs_local_print_stats,
 };
 
@@ -2626,9 +2654,10 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
 			goto out_err;
 		}
 		xprt_set_bound(xprt);
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_local_setup_socket);
 		xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
+		ret = ERR_PTR(xs_local_setup_socket(transport));
+		if (ret)
+			goto out_err;
 		break;
 	default:
 		ret = ERR_PTR(-EAFNOSUPPORT);
@@ -2741,9 +2770,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
 	struct rpc_xprt *ret;
+	unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries;
+
+	if (args->flags & XPRT_CREATE_INFINITE_SLOTS)
+		max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT;
 
 	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
-			xprt_max_tcp_slot_table_entries);
+			max_slot_table_size);
 	if (IS_ERR(xprt))
 		return xprt;
 	transport = container_of(xprt, struct sock_xprt, xprt);
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index bc41bd31eadc..c890848f9d56 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -3,8 +3,8 @@
 #
 
 menuconfig TIPC
-	tristate "The TIPC Protocol (EXPERIMENTAL)"
-	depends on INET && EXPERIMENTAL
+	tristate "The TIPC Protocol"
+	depends on INET
 	---help---
 	  The Transparent Inter Process Communication (TIPC) protocol is
 	  specially designed for intra cluster communication. This protocol
@@ -31,3 +31,10 @@ config TIPC_PORTS
 
 	  Setting this to a smaller value saves some memory,
 	  setting it to higher allows for more ports.
+
+config TIPC_MEDIA_IB
+	bool "InfiniBand media type support"
+	depends on TIPC && INFINIBAND_IPOIB
+	help
+	  Saying Y here will enable support for running TIPC on
+	  IP-over-InfiniBand devices.
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 6cd55d671d3a..4df8e02d9008 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,3 +9,5 @@ tipc-y	+= addr.o bcast.o bearer.o config.o \
 	   name_distr.o  subscr.o name_table.o net.o  \
 	   netlink.o node.o node_subscr.o port.o ref.o  \
 	   socket.o log.o eth_media.o
+
+tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 54f89f90ac33..e5f3da507823 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -584,8 +584,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 {
 	int bp_index;
 
-	/*
-	 * Prepare broadcast link message for reliable transmission,
+	/* Prepare broadcast link message for reliable transmission,
 	 * if first time trying to send it;
 	 * preparation is skipped for broadcast link protocol messages
 	 * since they are sent in an unreliable manner and don't need it
@@ -611,30 +610,43 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 	for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
 		struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
 		struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
+		struct tipc_bearer *b = p;
+		struct sk_buff *tbuf;
 
 		if (!p)
-			break;	/* no more bearers to try */
+			break; /* No more bearers to try */
+
+		if (tipc_bearer_blocked(p)) {
+			if (!s || tipc_bearer_blocked(s))
+				continue; /* Can't use either bearer */
+			b = s;
+		}
 
-		tipc_nmap_diff(&bcbearer->remains, &p->nodes, &bcbearer->remains_new);
+		tipc_nmap_diff(&bcbearer->remains, &b->nodes,
+			       &bcbearer->remains_new);
 		if (bcbearer->remains_new.count == bcbearer->remains.count)
-			continue;	/* bearer pair doesn't add anything */
+			continue; /* Nothing added by bearer pair */
 
-		if (!tipc_bearer_blocked(p))
-			tipc_bearer_send(p, buf, &p->media->bcast_addr);
-		else if (s && !tipc_bearer_blocked(s))
-			/* unable to send on primary bearer */
-			tipc_bearer_send(s, buf, &s->media->bcast_addr);
-		else
-			/* unable to send on either bearer */
-			continue;
+		if (bp_index == 0) {
+			/* Use original buffer for first bearer */
+			tipc_bearer_send(b, buf, &b->bcast_addr);
+		} else {
+			/* Avoid concurrent buffer access */
+			tbuf = pskb_copy(buf, GFP_ATOMIC);
+			if (!tbuf)
+				break;
+			tipc_bearer_send(b, tbuf, &b->bcast_addr);
+			kfree_skb(tbuf); /* Bearer keeps a clone */
+		}
 
+		/* Swap bearers for next packet */
 		if (s) {
 			bcbearer->bpairs[bp_index].primary = s;
 			bcbearer->bpairs[bp_index].secondary = p;
 		}
 
 		if (bcbearer->remains_new.count == 0)
-			break;	/* all targets reached */
+			break; /* All targets reached */
 
 		bcbearer->remains = bcbearer->remains_new;
 	}
@@ -774,6 +786,7 @@ void tipc_bclink_init(void)
 	bcl->owner = &bclink->node;
 	bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
 	tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
+	spin_lock_init(&bcbearer->bearer.lock);
 	bcl->b_ptr = &bcbearer->bearer;
 	bcl->state = WORKING_WORKING;
 	strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index aa62f93a9127..cb29ef7ba2f0 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -39,7 +39,7 @@
 #include "bearer.h"
 #include "discover.h"
 
-#define MAX_ADDR_STR 32
+#define MAX_ADDR_STR 60
 
 static struct tipc_media *media_list[MAX_MEDIA];
 static u32 media_count;
@@ -89,9 +89,6 @@ int tipc_register_media(struct tipc_media *m_ptr)
 
 	if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME)
 		goto exit;
-	if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) ||
-	    !m_ptr->bcast_addr.broadcast)
-		goto exit;
 	if (m_ptr->priority > TIPC_MAX_LINK_PRI)
 		goto exit;
 	if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) ||
@@ -407,7 +404,7 @@ restart:
 	INIT_LIST_HEAD(&b_ptr->links);
 	spin_lock_init(&b_ptr->lock);
 
-	res = tipc_disc_create(b_ptr, &m_ptr->bcast_addr, disc_domain);
+	res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain);
 	if (res) {
 		bearer_disable(b_ptr);
 		pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 39f1192d04bf..09c869adcfcf 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -56,6 +56,7 @@
  * Identifiers of supported TIPC media types
  */
 #define TIPC_MEDIA_TYPE_ETH	1
+#define TIPC_MEDIA_TYPE_IB	2
 
 /**
  * struct tipc_media_addr - destination address used by TIPC bearers
@@ -77,7 +78,6 @@ struct tipc_bearer;
  * @enable_bearer: routine which enables a bearer
  * @disable_bearer: routine which disables a bearer
  * @addr2str: routine which converts media address to string
- * @str2addr: routine which converts media address from string
  * @addr2msg: routine which converts media address to protocol message area
  * @msg2addr: routine which converts media address from protocol message area
  * @bcast_addr: media address used in broadcasting
@@ -94,10 +94,9 @@ struct tipc_media {
 	int (*enable_bearer)(struct tipc_bearer *b_ptr);
 	void (*disable_bearer)(struct tipc_bearer *b_ptr);
 	int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size);
-	int (*str2addr)(struct tipc_media_addr *a, char *str_buf);
 	int (*addr2msg)(struct tipc_media_addr *a, char *msg_area);
-	int (*msg2addr)(struct tipc_media_addr *a, char *msg_area);
-	struct tipc_media_addr bcast_addr;
+	int (*msg2addr)(const struct tipc_bearer *b_ptr,
+			struct tipc_media_addr *a, char *msg_area);
 	u32 priority;
 	u32 tolerance;
 	u32 window;
@@ -136,6 +135,7 @@ struct tipc_bearer {
 	char name[TIPC_MAX_BEARER_NAME];
 	spinlock_t lock;
 	struct tipc_media *media;
+	struct tipc_media_addr bcast_addr;
 	u32 priority;
 	u32 window;
 	u32 tolerance;
@@ -175,6 +175,14 @@ int tipc_disable_bearer(const char *name);
 int  tipc_eth_media_start(void);
 void tipc_eth_media_stop(void);
 
+#ifdef CONFIG_TIPC_MEDIA_IB
+int  tipc_ib_media_start(void);
+void tipc_ib_media_stop(void);
+#else
+static inline int tipc_ib_media_start(void) { return 0; }
+static inline void tipc_ib_media_stop(void) { return; }
+#endif
+
 int tipc_media_set_priority(const char *name, u32 new_value);
 int tipc_media_set_window(const char *name, u32 new_value);
 void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index fc05cecd7481..7ec2c1eb94f1 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -82,6 +82,7 @@ static void tipc_core_stop_net(void)
 {
 	tipc_net_stop();
 	tipc_eth_media_stop();
+	tipc_ib_media_stop();
 }
 
 /**
@@ -93,8 +94,15 @@ int tipc_core_start_net(unsigned long addr)
 
 	tipc_net_start(addr);
 	res = tipc_eth_media_start();
-	if (res)
-		tipc_core_stop_net();
+	if (res < 0)
+		goto err;
+	res = tipc_ib_media_start();
+	if (res < 0)
+		goto err;
+	return res;
+
+err:
+	tipc_core_stop_net();
 	return res;
 }
 
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 1074b9587e81..eedff58d0387 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -129,7 +129,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
 	int link_fully_up;
 
 	media_addr.broadcast = 1;
-	b_ptr->media->msg2addr(&media_addr, msg_media_addr(msg));
+	b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg));
 	kfree_skb(buf);
 
 	/* Ensure message from node is valid and communication is permitted */
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 2132c1ef2951..120a676a3360 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -77,12 +77,13 @@ static struct notifier_block notifier = {
  * Media-dependent "value" field stores MAC address in first 6 bytes
  * and zeroes out the remaining bytes.
  */
-static void eth_media_addr_set(struct tipc_media_addr *a, char *mac)
+static void eth_media_addr_set(const struct tipc_bearer *tb_ptr,
+			       struct tipc_media_addr *a, char *mac)
 {
 	memcpy(a->value, mac, ETH_ALEN);
 	memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN);
 	a->media_id = TIPC_MEDIA_TYPE_ETH;
-	a->broadcast = !memcmp(mac, eth_media_info.bcast_addr.value, ETH_ALEN);
+	a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, ETH_ALEN);
 }
 
 /**
@@ -110,6 +111,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
 
 	skb_reset_network_header(clone);
 	clone->dev = dev;
+	clone->protocol = htons(ETH_P_TIPC);
 	dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
 			dev->dev_addr, clone->len);
 	dev_queue_xmit(clone);
@@ -201,9 +203,13 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
 	/* Associate TIPC bearer with Ethernet bearer */
 	eb_ptr->bearer = tb_ptr;
 	tb_ptr->usr_handle = (void *)eb_ptr;
+	memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value));
+	memcpy(tb_ptr->bcast_addr.value, dev->broadcast, ETH_ALEN);
+	tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_ETH;
+	tb_ptr->bcast_addr.broadcast = 1;
 	tb_ptr->mtu = dev->mtu;
 	tb_ptr->blocked = 0;
-	eth_media_addr_set(&tb_ptr->addr, (char *)dev->dev_addr);
+	eth_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr);
 	return 0;
 }
 
@@ -302,25 +308,6 @@ static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
 }
 
 /**
- * eth_str2addr - convert string to Ethernet address
- */
-static int eth_str2addr(struct tipc_media_addr *a, char *str_buf)
-{
-	char mac[ETH_ALEN];
-	int r;
-
-	r = sscanf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x",
-		       (u32 *)&mac[0], (u32 *)&mac[1], (u32 *)&mac[2],
-		       (u32 *)&mac[3], (u32 *)&mac[4], (u32 *)&mac[5]);
-
-	if (r != ETH_ALEN)
-		return 1;
-
-	eth_media_addr_set(a, mac);
-	return 0;
-}
-
-/**
  * eth_str2addr - convert Ethernet address format to message header format
  */
 static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area)
@@ -334,12 +321,13 @@ static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area)
 /**
  * eth_str2addr - convert message header address format to Ethernet format
  */
-static int eth_msg2addr(struct tipc_media_addr *a, char *msg_area)
+static int eth_msg2addr(const struct tipc_bearer *tb_ptr,
+			struct tipc_media_addr *a, char *msg_area)
 {
 	if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH)
 		return 1;
 
-	eth_media_addr_set(a, msg_area + ETH_ADDR_OFFSET);
+	eth_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET);
 	return 0;
 }
 
@@ -351,11 +339,8 @@ static struct tipc_media eth_media_info = {
 	.enable_bearer	= enable_bearer,
 	.disable_bearer	= disable_bearer,
 	.addr2str	= eth_addr2str,
-	.str2addr	= eth_str2addr,
 	.addr2msg	= eth_addr2msg,
 	.msg2addr	= eth_msg2addr,
-	.bcast_addr	= { { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
-			    TIPC_MEDIA_TYPE_ETH, 1 },
 	.priority	= TIPC_DEF_LINK_PRI,
 	.tolerance	= TIPC_DEF_LINK_TOL,
 	.window		= TIPC_DEF_LINK_WIN,
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
new file mode 100644
index 000000000000..2a2864c25e15
--- /dev/null
+++ b/net/tipc/ib_media.c
@@ -0,0 +1,387 @@
+/*
+ * net/tipc/ib_media.c: Infiniband bearer support for TIPC
+ *
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * Based on eth_media.c, which carries the following copyright notice:
+ *
+ * Copyright (c) 2001-2007, Ericsson AB
+ * Copyright (c) 2005-2008, 2011, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/if_infiniband.h>
+#include "core.h"
+#include "bearer.h"
+
+#define MAX_IB_BEARERS		MAX_BEARERS
+
+/**
+ * struct ib_bearer - Infiniband bearer data structure
+ * @bearer: ptr to associated "generic" bearer structure
+ * @dev: ptr to associated Infiniband network device
+ * @tipc_packet_type: used in binding TIPC to Infiniband driver
+ * @cleanup: work item used when disabling bearer
+ */
+
+struct ib_bearer {
+	struct tipc_bearer *bearer;
+	struct net_device *dev;
+	struct packet_type tipc_packet_type;
+	struct work_struct setup;
+	struct work_struct cleanup;
+};
+
+static struct tipc_media ib_media_info;
+static struct ib_bearer ib_bearers[MAX_IB_BEARERS];
+static int ib_started;
+
+/**
+ * ib_media_addr_set - initialize Infiniband media address structure
+ *
+ * Media-dependent "value" field stores MAC address in first 6 bytes
+ * and zeroes out the remaining bytes.
+ */
+static void ib_media_addr_set(const struct tipc_bearer *tb_ptr,
+			      struct tipc_media_addr *a, char *mac)
+{
+	BUILD_BUG_ON(sizeof(a->value) < INFINIBAND_ALEN);
+	memcpy(a->value, mac, INFINIBAND_ALEN);
+	a->media_id = TIPC_MEDIA_TYPE_IB;
+	a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, INFINIBAND_ALEN);
+}
+
+/**
+ * send_msg - send a TIPC message out over an InfiniBand interface
+ */
+static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
+		    struct tipc_media_addr *dest)
+{
+	struct sk_buff *clone;
+	struct net_device *dev;
+	int delta;
+
+	clone = skb_clone(buf, GFP_ATOMIC);
+	if (!clone)
+		return 0;
+
+	dev = ((struct ib_bearer *)(tb_ptr->usr_handle))->dev;
+	delta = dev->hard_header_len - skb_headroom(buf);
+
+	if ((delta > 0) &&
+	    pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+		kfree_skb(clone);
+		return 0;
+	}
+
+	skb_reset_network_header(clone);
+	clone->dev = dev;
+	clone->protocol = htons(ETH_P_TIPC);
+	dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
+			dev->dev_addr, clone->len);
+	dev_queue_xmit(clone);
+	return 0;
+}
+
+/**
+ * recv_msg - handle incoming TIPC message from an InfiniBand interface
+ *
+ * Accept only packets explicitly sent to this node, or broadcast packets;
+ * ignores packets sent using InfiniBand multicast, and traffic sent to other
+ * nodes (which can happen if interface is running in promiscuous mode).
+ */
+static int recv_msg(struct sk_buff *buf, struct net_device *dev,
+		    struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct ib_bearer *ib_ptr = (struct ib_bearer *)pt->af_packet_priv;
+
+	if (!net_eq(dev_net(dev), &init_net)) {
+		kfree_skb(buf);
+		return 0;
+	}
+
+	if (likely(ib_ptr->bearer)) {
+		if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
+			buf->next = NULL;
+			tipc_recv_msg(buf, ib_ptr->bearer);
+			return 0;
+		}
+	}
+	kfree_skb(buf);
+	return 0;
+}
+
+/**
+ * setup_bearer - setup association between InfiniBand bearer and interface
+ */
+static void setup_bearer(struct work_struct *work)
+{
+	struct ib_bearer *ib_ptr =
+		container_of(work, struct ib_bearer, setup);
+
+	dev_add_pack(&ib_ptr->tipc_packet_type);
+}
+
+/**
+ * enable_bearer - attach TIPC bearer to an InfiniBand interface
+ */
+static int enable_bearer(struct tipc_bearer *tb_ptr)
+{
+	struct net_device *dev = NULL;
+	struct net_device *pdev = NULL;
+	struct ib_bearer *ib_ptr = &ib_bearers[0];
+	struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
+	char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
+	int pending_dev = 0;
+
+	/* Find unused InfiniBand bearer structure */
+	while (ib_ptr->dev) {
+		if (!ib_ptr->bearer)
+			pending_dev++;
+		if (++ib_ptr == stop)
+			return pending_dev ? -EAGAIN : -EDQUOT;
+	}
+
+	/* Find device with specified name */
+	read_lock(&dev_base_lock);
+	for_each_netdev(&init_net, pdev) {
+		if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) {
+			dev = pdev;
+			dev_hold(dev);
+			break;
+		}
+	}
+	read_unlock(&dev_base_lock);
+	if (!dev)
+		return -ENODEV;
+
+	/* Create InfiniBand bearer for device */
+	ib_ptr->dev = dev;
+	ib_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
+	ib_ptr->tipc_packet_type.dev = dev;
+	ib_ptr->tipc_packet_type.func = recv_msg;
+	ib_ptr->tipc_packet_type.af_packet_priv = ib_ptr;
+	INIT_LIST_HEAD(&(ib_ptr->tipc_packet_type.list));
+	INIT_WORK(&ib_ptr->setup, setup_bearer);
+	schedule_work(&ib_ptr->setup);
+
+	/* Associate TIPC bearer with InfiniBand bearer */
+	ib_ptr->bearer = tb_ptr;
+	tb_ptr->usr_handle = (void *)ib_ptr;
+	memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value));
+	memcpy(tb_ptr->bcast_addr.value, dev->broadcast, INFINIBAND_ALEN);
+	tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_IB;
+	tb_ptr->bcast_addr.broadcast = 1;
+	tb_ptr->mtu = dev->mtu;
+	tb_ptr->blocked = 0;
+	ib_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr);
+	return 0;
+}
+
+/**
+ * cleanup_bearer - break association between InfiniBand bearer and interface
+ *
+ * This routine must be invoked from a work queue because it can sleep.
+ */
+static void cleanup_bearer(struct work_struct *work)
+{
+	struct ib_bearer *ib_ptr =
+		container_of(work, struct ib_bearer, cleanup);
+
+	dev_remove_pack(&ib_ptr->tipc_packet_type);
+	dev_put(ib_ptr->dev);
+	ib_ptr->dev = NULL;
+}
+
+/**
+ * disable_bearer - detach TIPC bearer from an InfiniBand interface
+ *
+ * Mark InfiniBand bearer as inactive so that incoming buffers are thrown away,
+ * then get worker thread to complete bearer cleanup.  (Can't do cleanup
+ * here because cleanup code needs to sleep and caller holds spinlocks.)
+ */
+static void disable_bearer(struct tipc_bearer *tb_ptr)
+{
+	struct ib_bearer *ib_ptr = (struct ib_bearer *)tb_ptr->usr_handle;
+
+	ib_ptr->bearer = NULL;
+	INIT_WORK(&ib_ptr->cleanup, cleanup_bearer);
+	schedule_work(&ib_ptr->cleanup);
+}
+
+/**
+ * recv_notification - handle device updates from OS
+ *
+ * Change the state of the InfiniBand bearer (if any) associated with the
+ * specified device.
+ */
+static int recv_notification(struct notifier_block *nb, unsigned long evt,
+			     void *dv)
+{
+	struct net_device *dev = (struct net_device *)dv;
+	struct ib_bearer *ib_ptr = &ib_bearers[0];
+	struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
+
+	if (!net_eq(dev_net(dev), &init_net))
+		return NOTIFY_DONE;
+
+	while ((ib_ptr->dev != dev)) {
+		if (++ib_ptr == stop)
+			return NOTIFY_DONE;	/* couldn't find device */
+	}
+	if (!ib_ptr->bearer)
+		return NOTIFY_DONE;		/* bearer had been disabled */
+
+	ib_ptr->bearer->mtu = dev->mtu;
+
+	switch (evt) {
+	case NETDEV_CHANGE:
+		if (netif_carrier_ok(dev))
+			tipc_continue(ib_ptr->bearer);
+		else
+			tipc_block_bearer(ib_ptr->bearer->name);
+		break;
+	case NETDEV_UP:
+		tipc_continue(ib_ptr->bearer);
+		break;
+	case NETDEV_DOWN:
+		tipc_block_bearer(ib_ptr->bearer->name);
+		break;
+	case NETDEV_CHANGEMTU:
+	case NETDEV_CHANGEADDR:
+		tipc_block_bearer(ib_ptr->bearer->name);
+		tipc_continue(ib_ptr->bearer);
+		break;
+	case NETDEV_UNREGISTER:
+	case NETDEV_CHANGENAME:
+		tipc_disable_bearer(ib_ptr->bearer->name);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block notifier = {
+	.notifier_call	= recv_notification,
+	.priority	= 0,
+};
+
+/**
+ * ib_addr2str - convert InfiniBand address to string
+ */
+static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
+{
+	if (str_size < 60)	/* 60 = 19 * strlen("xx:") + strlen("xx\0") */
+		return 1;
+
+	sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:"
+			 "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x",
+		a->value[0], a->value[1], a->value[2], a->value[3],
+		a->value[4], a->value[5], a->value[6], a->value[7],
+		a->value[8], a->value[9], a->value[10], a->value[11],
+		a->value[12], a->value[13], a->value[14], a->value[15],
+		a->value[16], a->value[17], a->value[18], a->value[19]);
+
+	return 0;
+}
+
+/**
+ * ib_addr2msg - convert InfiniBand address format to message header format
+ */
+static int ib_addr2msg(struct tipc_media_addr *a, char *msg_area)
+{
+	memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE);
+	msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB;
+	memcpy(msg_area, a->value, INFINIBAND_ALEN);
+	return 0;
+}
+
+/**
+ * ib_msg2addr - convert message header address format to InfiniBand format
+ */
+static int ib_msg2addr(const struct tipc_bearer *tb_ptr,
+		       struct tipc_media_addr *a, char *msg_area)
+{
+	ib_media_addr_set(tb_ptr, a, msg_area);
+	return 0;
+}
+
+/*
+ * InfiniBand media registration info
+ */
+static struct tipc_media ib_media_info = {
+	.send_msg	= send_msg,
+	.enable_bearer	= enable_bearer,
+	.disable_bearer	= disable_bearer,
+	.addr2str	= ib_addr2str,
+	.addr2msg	= ib_addr2msg,
+	.msg2addr	= ib_msg2addr,
+	.priority	= TIPC_DEF_LINK_PRI,
+	.tolerance	= TIPC_DEF_LINK_TOL,
+	.window		= TIPC_DEF_LINK_WIN,
+	.type_id	= TIPC_MEDIA_TYPE_IB,
+	.name		= "ib"
+};
+
+/**
+ * tipc_ib_media_start - activate InfiniBand bearer support
+ *
+ * Register InfiniBand media type with TIPC bearer code.  Also register
+ * with OS for notifications about device state changes.
+ */
+int tipc_ib_media_start(void)
+{
+	int res;
+
+	if (ib_started)
+		return -EINVAL;
+
+	res = tipc_register_media(&ib_media_info);
+	if (res)
+		return res;
+
+	res = register_netdevice_notifier(&notifier);
+	if (!res)
+		ib_started = 1;
+	return res;
+}
+
+/**
+ * tipc_ib_media_stop - deactivate InfiniBand bearer support
+ */
+void tipc_ib_media_stop(void)
+{
+	if (!ib_started)
+		return;
+
+	flush_scheduled_work();
+	unregister_netdevice_notifier(&notifier);
+	ib_started = 0;
+}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index daa6080a2a0c..a80feee5197a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2306,8 +2306,11 @@ static int link_recv_changeover_msg(struct tipc_link **l_ptr,
 	struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf);
 	u32 msg_typ = msg_type(tunnel_msg);
 	u32 msg_count = msg_msgcnt(tunnel_msg);
+	u32 bearer_id = msg_bearer_id(tunnel_msg);
 
-	dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)];
+	if (bearer_id >= MAX_BEARERS)
+		goto exit;
+	dest_link = (*l_ptr)->owner->links[bearer_id];
 	if (!dest_link)
 		goto exit;
 	if (dest_link == *l_ptr) {
@@ -2521,14 +2524,16 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 		struct tipc_msg *imsg = (struct tipc_msg *)msg_data(fragm);
 		u32 msg_sz = msg_size(imsg);
 		u32 fragm_sz = msg_data_sz(fragm);
-		u32 exp_fragm_cnt = msg_sz/fragm_sz + !!(msg_sz % fragm_sz);
+		u32 exp_fragm_cnt;
 		u32 max =  TIPC_MAX_USER_MSG_SIZE + NAMED_H_SIZE;
+
 		if (msg_type(imsg) == TIPC_MCAST_MSG)
 			max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE;
-		if (msg_size(imsg) > max) {
+		if (fragm_sz == 0 || msg_size(imsg) > max) {
 			kfree_skb(fbuf);
 			return 0;
 		}
+		exp_fragm_cnt = msg_sz / fragm_sz + !!(msg_sz % fragm_sz);
 		pbuf = tipc_buf_acquire(msg_size(imsg));
 		if (pbuf != NULL) {
 			pbuf->next = *pending;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 46754779fd3d..24b167914311 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -473,11 +473,10 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
 static struct name_seq *nametbl_find_seq(u32 type)
 {
 	struct hlist_head *seq_head;
-	struct hlist_node *seq_node;
 	struct name_seq *ns;
 
 	seq_head = &table.types[hash(type)];
-	hlist_for_each_entry(ns, seq_node, seq_head, ns_list) {
+	hlist_for_each_entry(ns, seq_head, ns_list) {
 		if (ns->type == type)
 			return ns;
 	}
@@ -853,7 +852,6 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 			 u32 type, u32 lowbound, u32 upbound)
 {
 	struct hlist_head *seq_head;
-	struct hlist_node *seq_node;
 	struct name_seq *seq;
 	int all_types;
 	int ret = 0;
@@ -873,7 +871,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 		upbound = ~0;
 		for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
 			seq_head = &table.types[i];
-			hlist_for_each_entry(seq, seq_node, seq_head, ns_list) {
+			hlist_for_each_entry(seq, seq_head, ns_list) {
 				ret += nameseq_list(seq, buf + ret, len - ret,
 						   depth, seq->type,
 						   lowbound, upbound, i);
@@ -889,7 +887,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 		ret += nametbl_header(buf + ret, len - ret, depth);
 		i = hash(type);
 		seq_head = &table.types[i];
-		hlist_for_each_entry(seq, seq_node, seq_head, ns_list) {
+		hlist_for_each_entry(seq, seq_head, ns_list) {
 			if (seq->type == type) {
 				ret += nameseq_list(seq, buf + ret, len - ret,
 						   depth, type,
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 6675914dc592..8bcd4985d0fb 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -44,7 +44,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 	struct nlmsghdr *rep_nlh;
 	struct nlmsghdr *req_nlh = info->nlhdr;
 	struct tipc_genlmsghdr *req_userhdr = info->userhdr;
-	int hdr_space = NLMSG_SPACE(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+	int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
 	u16 cmd;
 
 	if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN)))
@@ -53,8 +53,8 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 		cmd = req_userhdr->cmd;
 
 	rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd,
-			NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN,
-			NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN),
+			nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN,
+			nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN),
 			hdr_space);
 
 	if (rep_buf) {
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 48f39dd3eae8..6e6c434872e8 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -69,12 +69,11 @@ static unsigned int tipc_hashfn(u32 addr)
 struct tipc_node *tipc_node_find(u32 addr)
 {
 	struct tipc_node *node;
-	struct hlist_node *pos;
 
 	if (unlikely(!in_own_cluster_exact(addr)))
 		return NULL;
 
-	hlist_for_each_entry(node, pos, &node_htable[tipc_hashfn(addr)], hash) {
+	hlist_for_each_entry(node, &node_htable[tipc_hashfn(addr)], hash) {
 		if (node->addr == addr)
 			return node;
 	}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 9b4e4833a484..515ce38e4f4c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -43,7 +43,8 @@
 #define SS_LISTENING	-1	/* socket is listening */
 #define SS_READY	-2	/* socket is connectionless */
 
-#define OVERLOAD_LIMIT_BASE	10000
+#define CONN_OVERLOAD_LIMIT	((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
+				SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
 #define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
 
 struct tipc_sock {
@@ -129,19 +130,6 @@ static void advance_rx_queue(struct sock *sk)
 }
 
 /**
- * discard_rx_queue - discard all buffers in socket receive queue
- *
- * Caller must hold socket lock
- */
-static void discard_rx_queue(struct sock *sk)
-{
-	struct sk_buff *buf;
-
-	while ((buf = __skb_dequeue(&sk->sk_receive_queue)))
-		kfree_skb(buf);
-}
-
-/**
  * reject_rx_queue - reject all buffers in socket receive queue
  *
  * Caller must hold socket lock
@@ -215,7 +203,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
 
 	sock_init_data(sock, sk);
 	sk->sk_backlog_rcv = backlog_rcv;
-	sk->sk_rcvbuf = TIPC_FLOW_CONTROL_WIN * 2 * TIPC_MAX_USER_MSG_SIZE * 2;
 	sk->sk_data_ready = tipc_data_ready;
 	sk->sk_write_space = tipc_write_space;
 	tipc_sk(sk)->p = tp_ptr;
@@ -292,7 +279,7 @@ static int release(struct socket *sock)
 	res = tipc_deleteport(tport->ref);
 
 	/* Discard any remaining (connection-based) messages in receive queue */
-	discard_rx_queue(sk);
+	__skb_queue_purge(&sk->sk_receive_queue);
 
 	/* Reject any messages that accumulated in backlog queue */
 	sock->state = SS_DISCONNECTING;
@@ -516,8 +503,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 	if (unlikely((m->msg_namelen < sizeof(*dest)) ||
 		     (dest->family != AF_TIPC)))
 		return -EINVAL;
-	if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
-	    (m->msg_iovlen > (unsigned int)INT_MAX))
+	if (total_len > TIPC_MAX_USER_MSG_SIZE)
 		return -EMSGSIZE;
 
 	if (iocb)
@@ -625,8 +611,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
 	if (unlikely(dest))
 		return send_msg(iocb, sock, m, total_len);
 
-	if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
-	    (m->msg_iovlen > (unsigned int)INT_MAX))
+	if (total_len > TIPC_MAX_USER_MSG_SIZE)
 		return -EMSGSIZE;
 
 	if (iocb)
@@ -711,8 +696,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
 		goto exit;
 	}
 
-	if ((total_len > (unsigned int)INT_MAX) ||
-	    (m->msg_iovlen > (unsigned int)INT_MAX)) {
+	if (total_len > (unsigned int)INT_MAX) {
 		res = -EMSGSIZE;
 		goto exit;
 	}
@@ -806,6 +790,7 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
 	if (addr) {
 		addr->family = AF_TIPC;
 		addr->addrtype = TIPC_ADDR_ID;
+		memset(&addr->addr, 0, sizeof(addr->addr));
 		addr->addr.id.ref = msg_origport(msg);
 		addr->addr.id.node = msg_orignode(msg);
 		addr->addr.name.domain = 0;	/* could leave uninitialized */
@@ -920,6 +905,9 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock,
 		goto exit;
 	}
 
+	/* will be updated in set_orig_addr() if needed */
+	m->msg_namelen = 0;
+
 	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 restart:
 
@@ -1029,6 +1017,9 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
 		goto exit;
 	}
 
+	/* will be updated in set_orig_addr() if needed */
+	m->msg_namelen = 0;
+
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
 	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
@@ -1155,34 +1146,6 @@ static void tipc_data_ready(struct sock *sk, int len)
 }
 
 /**
- * rx_queue_full - determine if receive queue can accept another message
- * @msg: message to be added to queue
- * @queue_size: current size of queue
- * @base: nominal maximum size of queue
- *
- * Returns 1 if queue is unable to accept message, 0 otherwise
- */
-static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
-{
-	u32 threshold;
-	u32 imp = msg_importance(msg);
-
-	if (imp == TIPC_LOW_IMPORTANCE)
-		threshold = base;
-	else if (imp == TIPC_MEDIUM_IMPORTANCE)
-		threshold = base * 2;
-	else if (imp == TIPC_HIGH_IMPORTANCE)
-		threshold = base * 100;
-	else
-		return 0;
-
-	if (msg_connected(msg))
-		threshold *= 4;
-
-	return queue_size >= threshold;
-}
-
-/**
  * filter_connect - Handle all incoming messages for a connection-based socket
  * @tsock: TIPC socket
  * @msg: message
@@ -1260,6 +1223,36 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
 }
 
 /**
+ * rcvbuf_limit - get proper overload limit of socket receive queue
+ * @sk: socket
+ * @buf: message
+ *
+ * For all connection oriented messages, irrespective of importance,
+ * the default overload value (i.e. 67MB) is set as limit.
+ *
+ * For all connectionless messages, by default new queue limits are
+ * as belows:
+ *
+ * TIPC_LOW_IMPORTANCE       (5MB)
+ * TIPC_MEDIUM_IMPORTANCE    (10MB)
+ * TIPC_HIGH_IMPORTANCE      (20MB)
+ * TIPC_CRITICAL_IMPORTANCE  (40MB)
+ *
+ * Returns overload limit according to corresponding message importance
+ */
+static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
+{
+	struct tipc_msg *msg = buf_msg(buf);
+	unsigned int limit;
+
+	if (msg_connected(msg))
+		limit = CONN_OVERLOAD_LIMIT;
+	else
+		limit = sk->sk_rcvbuf << (msg_importance(msg) + 5);
+	return limit;
+}
+
+/**
  * filter_rcv - validate incoming message
  * @sk: socket
  * @buf: message
@@ -1275,7 +1268,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
 {
 	struct socket *sock = sk->sk_socket;
 	struct tipc_msg *msg = buf_msg(buf);
-	u32 recv_q_len;
+	unsigned int limit = rcvbuf_limit(sk, buf);
 	u32 res = TIPC_OK;
 
 	/* Reject message if it is wrong sort of message for socket */
@@ -1292,15 +1285,13 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
 	}
 
 	/* Reject message if there isn't room to queue it */
-	recv_q_len = skb_queue_len(&sk->sk_receive_queue);
-	if (unlikely(recv_q_len >= (OVERLOAD_LIMIT_BASE / 2))) {
-		if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE / 2))
-			return TIPC_ERR_OVERLOAD;
-	}
+	if (sk_rmem_alloc_get(sk) + buf->truesize >= limit)
+		return TIPC_ERR_OVERLOAD;
 
-	/* Enqueue message (finally!) */
+	/* Enqueue message */
 	TIPC_SKB_CB(buf)->handle = 0;
 	__skb_queue_tail(&sk->sk_receive_queue, buf);
+	skb_set_owner_r(buf, sk);
 
 	sk->sk_data_ready(sk, 0);
 	return TIPC_OK;
@@ -1349,7 +1340,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
 	if (!sock_owned_by_user(sk)) {
 		res = filter_rcv(sk, buf);
 	} else {
-		if (sk_add_backlog(sk, buf, sk->sk_rcvbuf))
+		if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf)))
 			res = TIPC_ERR_OVERLOAD;
 		else
 			res = TIPC_OK;
@@ -1583,6 +1574,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
 	} else {
 		__skb_dequeue(&sk->sk_receive_queue);
 		__skb_queue_head(&new_sk->sk_receive_queue, buf);
+		skb_set_owner_r(buf, new_sk);
 	}
 	release_sock(new_sk);
 
@@ -1637,7 +1629,7 @@ restart:
 	case SS_DISCONNECTING:
 
 		/* Discard any unreceived messages */
-		discard_rx_queue(sk);
+		__skb_queue_purge(&sk->sk_receive_queue);
 
 		/* Wake up anyone sleeping in poll */
 		sk->sk_state_change(sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5b5c876c80e9..826e09938bff 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -263,9 +263,8 @@ static struct sock *__unix_find_socket_byname(struct net *net,
 					      int len, int type, unsigned int hash)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
-	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
+	sk_for_each(s, &unix_socket_table[hash ^ type]) {
 		struct unix_sock *u = unix_sk(s);
 
 		if (!net_eq(sock_net(s), net))
@@ -298,10 +297,9 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
 static struct sock *unix_find_socket_byinode(struct inode *i)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock(&unix_table_lock);
-	sk_for_each(s, node,
+	sk_for_each(s,
 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 		struct dentry *dentry = unix_sk(s)->path.dentry;
 
@@ -384,7 +382,7 @@ static void unix_sock_destructor(struct sock *sk)
 #endif
 }
 
-static int unix_release_sock(struct sock *sk, int embrion)
+static void unix_release_sock(struct sock *sk, int embrion)
 {
 	struct unix_sock *u = unix_sk(sk);
 	struct path path;
@@ -453,8 +451,6 @@ static int unix_release_sock(struct sock *sk, int embrion)
 
 	if (unix_tot_inflight)
 		unix_gc();		/* Garbage collect fds */
-
-	return 0;
 }
 
 static void init_peercred(struct sock *sk)
@@ -701,9 +697,10 @@ static int unix_release(struct socket *sock)
 	if (!sk)
 		return 0;
 
+	unix_release_sock(sk, 0);
 	sock->sk = NULL;
 
-	return unix_release_sock(sk, 0);
+	return 0;
 }
 
 static int unix_autobind(struct socket *sock)
@@ -1343,7 +1340,6 @@ static void unix_destruct_scm(struct sk_buff *skb)
 	struct scm_cookie scm;
 	memset(&scm, 0, sizeof(scm));
 	scm.pid  = UNIXCB(skb).pid;
-	scm.cred = UNIXCB(skb).cred;
 	if (UNIXCB(skb).fp)
 		unix_detach_fds(&scm, skb);
 
@@ -1394,8 +1390,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
 	int err = 0;
 
 	UNIXCB(skb).pid  = get_pid(scm->pid);
-	if (scm->cred)
-		UNIXCB(skb).cred = get_cred(scm->cred);
+	UNIXCB(skb).uid = scm->creds.uid;
+	UNIXCB(skb).gid = scm->creds.gid;
 	UNIXCB(skb).fp = NULL;
 	if (scm->fp && send_fds)
 		err = unix_attach_fds(scm, skb);
@@ -1412,13 +1408,13 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
 			    const struct sock *other)
 {
-	if (UNIXCB(skb).cred)
+	if (UNIXCB(skb).pid)
 		return;
 	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
 	    !other->sk_socket ||
 	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
-		UNIXCB(skb).cred = get_current_cred();
+		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
 	}
 }
 
@@ -1822,7 +1818,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 		siocb->scm = &tmp_scm;
 		memset(&tmp_scm, 0, sizeof(tmp_scm));
 	}
-	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
+	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
 	unix_set_secdata(siocb->scm, skb);
 
 	if (!(flags & MSG_PEEK)) {
@@ -1862,10 +1858,10 @@ out:
 }
 
 /*
- *	Sleep until data has arrive. But check for races..
+ *	Sleep until more data has arrived. But check for races..
  */
-
-static long unix_stream_data_wait(struct sock *sk, long timeo)
+static long unix_stream_data_wait(struct sock *sk, long timeo,
+				  struct sk_buff *last)
 {
 	DEFINE_WAIT(wait);
 
@@ -1874,7 +1870,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
 	for (;;) {
 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
-		if (!skb_queue_empty(&sk->sk_receive_queue) ||
+		if (skb_peek_tail(&sk->sk_receive_queue) != last ||
 		    sk->sk_err ||
 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
 		    signal_pending(current) ||
@@ -1893,8 +1889,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
 	return timeo;
 }
 
-
-
 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 			       struct msghdr *msg, size_t size,
 			       int flags)
@@ -1939,14 +1933,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 	}
 
-	skip = sk_peek_offset(sk, flags);
-
 	do {
 		int chunk;
-		struct sk_buff *skb;
+		struct sk_buff *skb, *last;
 
 		unix_state_lock(sk);
-		skb = skb_peek(&sk->sk_receive_queue);
+		last = skb = skb_peek(&sk->sk_receive_queue);
 again:
 		if (skb == NULL) {
 			unix_sk(sk)->recursion_level = 0;
@@ -1969,7 +1961,7 @@ again:
 				break;
 			mutex_unlock(&u->readlock);
 
-			timeo = unix_stream_data_wait(sk, timeo);
+			timeo = unix_stream_data_wait(sk, timeo, last);
 
 			if (signal_pending(current)
 			    ||  mutex_lock_interruptible(&u->readlock)) {
@@ -1983,10 +1975,13 @@ again:
 			break;
 		}
 
-		if (skip >= skb->len) {
+		skip = sk_peek_offset(sk, flags);
+		while (skip >= skb->len) {
 			skip -= skb->len;
+			last = skb;
 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
-			goto again;
+			if (!skb)
+				goto again;
 		}
 
 		unix_state_unlock(sk);
@@ -1994,11 +1989,12 @@ again:
 		if (check_creds) {
 			/* Never glue messages from different writers */
 			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
-			    (UNIXCB(skb).cred != siocb->scm->cred))
+			    !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
+			    !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
 				break;
-		} else {
+		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
 			/* Copy credentials */
-			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
+			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
 			check_creds = 1;
 		}
 
@@ -2199,7 +2195,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
-		mask |= POLLERR;
+		mask |= POLLERR |
+			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
@@ -2402,7 +2400,7 @@ static int __net_init unix_net_init(struct net *net)
 		goto out;
 
 #ifdef CONFIG_PROC_FS
-	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
+	if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
 		unix_sysctl_unregister(net);
 		goto out;
 	}
@@ -2415,7 +2413,7 @@ out:
 static void __net_exit unix_net_exit(struct net *net)
 {
 	unix_sysctl_unregister(net);
-	proc_net_remove(net, "unix");
+	remove_proc_entry("unix", net->proc_net);
 }
 
 static struct pernet_operations unix_net_ops = {
@@ -2426,9 +2424,8 @@ static struct pernet_operations unix_net_ops = {
 static int __init af_unix_init(void)
 {
 	int rc = -1;
-	struct sk_buff *dummy_skb;
 
-	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
+	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	rc = proto_register(&unix_proto, 1);
 	if (rc != 0) {
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 5ac19dc1d5e4..d591091603bf 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -192,10 +192,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	     slot < ARRAY_SIZE(unix_socket_table);
 	     s_num = 0, slot++) {
 		struct sock *sk;
-		struct hlist_node *node;
 
 		num = 0;
-		sk_for_each(sk, node, &unix_socket_table[slot]) {
+		sk_for_each(sk, &unix_socket_table[slot]) {
 			if (!net_eq(sock_net(sk), net))
 				continue;
 			if (num < s_num)
@@ -226,9 +225,7 @@ static struct sock *unix_lookup_by_ino(int ino)
 
 	spin_lock(&unix_table_lock);
 	for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) {
-		struct hlist_node *node;
-
-		sk_for_each(sk, node, &unix_socket_table[i])
+		sk_for_each(sk, &unix_socket_table[i])
 			if (ino == sock_i_ino(sk)) {
 				sock_hold(sk);
 				spin_unlock(&unix_table_lock);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index b6f4b994eb35..9bc73f87f64a 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -99,7 +99,7 @@ unsigned int unix_tot_inflight;
 struct sock *unix_get_socket(struct file *filp)
 {
 	struct sock *u_sock = NULL;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 	/*
 	 *	Socket ?
@@ -185,7 +185,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
 					 * have been added to the queues after
 					 * starting the garbage collection
 					 */
-					if (u->gc_candidate) {
+					if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
 						hit = true;
 						func(u);
 					}
@@ -254,7 +254,7 @@ static void inc_inflight_move_tail(struct unix_sock *u)
 	 * of the list, so that it's checked even if it was already
 	 * passed over
 	 */
-	if (u->gc_maybe_cycle)
+	if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
 		list_move_tail(&u->link, &gc_candidates);
 }
 
@@ -315,8 +315,8 @@ void unix_gc(void)
 		BUG_ON(total_refs < inflight_refs);
 		if (total_refs == inflight_refs) {
 			list_move_tail(&u->link, &gc_candidates);
-			u->gc_candidate = 1;
-			u->gc_maybe_cycle = 1;
+			__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
+			__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
 		}
 	}
 
@@ -344,7 +344,7 @@ void unix_gc(void)
 
 		if (atomic_long_read(&u->inflight) > 0) {
 			list_move_tail(&u->link, &not_cycle_list);
-			u->gc_maybe_cycle = 0;
+			__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
 			scan_children(&u->sk, inc_inflight_move_tail, NULL);
 		}
 	}
@@ -356,7 +356,7 @@ void unix_gc(void)
 	 */
 	while (!list_empty(&not_cycle_list)) {
 		u = list_entry(not_cycle_list.next, struct unix_sock, link);
-		u->gc_candidate = 0;
+		__clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
 		list_move_tail(&u->link, &gc_inflight_list);
 	}
 
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
new file mode 100644
index 000000000000..b5fa7e40cdcb
--- /dev/null
+++ b/net/vmw_vsock/Kconfig
@@ -0,0 +1,28 @@
+#
+# Vsock protocol
+#
+
+config VSOCKETS
+	tristate "Virtual Socket protocol"
+	help
+	  Virtual Socket Protocol is a socket protocol similar to TCP/IP
+	  allowing comunication between Virtual Machines and hypervisor
+	  or host.
+
+	  You should also select one or more hypervisor-specific transports
+	  below.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called vsock. If unsure, say N.
+
+config VMWARE_VMCI_VSOCKETS
+	tristate "VMware VMCI transport for Virtual Sockets"
+	depends on VSOCKETS && VMWARE_VMCI
+	help
+	  This module implements a VMCI transport for Virtual Sockets.
+
+	  Enable this transport if your Virtual Machine runs on a VMware
+	  hypervisor.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called vmw_vsock_vmci_transport. If unsure, say N.
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
new file mode 100644
index 000000000000..2ce52d70f224
--- /dev/null
+++ b/net/vmw_vsock/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_VSOCKETS) += vsock.o
+obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
+
+vsock-y += af_vsock.o vsock_addr.o
+
+vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
+	vmci_transport_notify_qstate.o
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
new file mode 100644
index 000000000000..3f77f42a3b58
--- /dev/null
+++ b/net/vmw_vsock/af_vsock.c
@@ -0,0 +1,2014 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+/* Implementation notes:
+ *
+ * - There are two kinds of sockets: those created by user action (such as
+ * calling socket(2)) and those created by incoming connection request packets.
+ *
+ * - There are two "global" tables, one for bound sockets (sockets that have
+ * specified an address that they are responsible for) and one for connected
+ * sockets (sockets that have established a connection with another socket).
+ * These tables are "global" in that all sockets on the system are placed
+ * within them. - Note, though, that the bound table contains an extra entry
+ * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in
+ * that list. The bound table is used solely for lookup of sockets when packets
+ * are received and that's not necessary for SOCK_DGRAM sockets since we create
+ * a datagram handle for each and need not perform a lookup.  Keeping SOCK_DGRAM
+ * sockets out of the bound hash buckets will reduce the chance of collisions
+ * when looking for SOCK_STREAM sockets and prevents us from having to check the
+ * socket type in the hash table lookups.
+ *
+ * - Sockets created by user action will either be "client" sockets that
+ * initiate a connection or "server" sockets that listen for connections; we do
+ * not support simultaneous connects (two "client" sockets connecting).
+ *
+ * - "Server" sockets are referred to as listener sockets throughout this
+ * implementation because they are in the SS_LISTEN state.  When a connection
+ * request is received (the second kind of socket mentioned above), we create a
+ * new socket and refer to it as a pending socket.  These pending sockets are
+ * placed on the pending connection list of the listener socket.  When future
+ * packets are received for the address the listener socket is bound to, we
+ * check if the source of the packet is from one that has an existing pending
+ * connection.  If it does, we process the packet for the pending socket.  When
+ * that socket reaches the connected state, it is removed from the listener
+ * socket's pending list and enqueued in the listener socket's accept queue.
+ * Callers of accept(2) will accept connected sockets from the listener socket's
+ * accept queue.  If the socket cannot be accepted for some reason then it is
+ * marked rejected.  Once the connection is accepted, it is owned by the user
+ * process and the responsibility for cleanup falls with that user process.
+ *
+ * - It is possible that these pending sockets will never reach the connected
+ * state; in fact, we may never receive another packet after the connection
+ * request.  Because of this, we must schedule a cleanup function to run in the
+ * future, after some amount of time passes where a connection should have been
+ * established.  This function ensures that the socket is off all lists so it
+ * cannot be retrieved, then drops all references to the socket so it is cleaned
+ * up (sock_put() -> sk_free() -> our sk_destruct implementation).  Note this
+ * function will also cleanup rejected sockets, those that reach the connected
+ * state but leave it before they have been accepted.
+ *
+ * - Sockets created by user action will be cleaned up when the user process
+ * calls close(2), causing our release implementation to be called. Our release
+ * implementation will perform some cleanup then drop the last reference so our
+ * sk_destruct implementation is invoked.  Our sk_destruct implementation will
+ * perform additional cleanup that's common for both types of sockets.
+ *
+ * - A socket's reference count is what ensures that the structure won't be
+ * freed.  Each entry in a list (such as the "global" bound and connected tables
+ * and the listener socket's pending list and connected queue) ensures a
+ * reference.  When we defer work until process context and pass a socket as our
+ * argument, we must ensure the reference count is increased to ensure the
+ * socket isn't freed before the function is run; the deferred function will
+ * then drop the reference.
+ */
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/cred.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/skbuff.h>
+#include <linux/smp.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <net/sock.h>
+
+#include "af_vsock.h"
+
+static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
+static void vsock_sk_destruct(struct sock *sk);
+static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+
+/* Protocol family. */
+static struct proto vsock_proto = {
+	.name = "AF_VSOCK",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof(struct vsock_sock),
+};
+
+/* The default peer timeout indicates how long we will wait for a peer response
+ * to a control message.
+ */
+#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
+
+#define SS_LISTEN 255
+
+static const struct vsock_transport *transport;
+static DEFINE_MUTEX(vsock_register_mutex);
+
+/**** EXPORTS ****/
+
+/* Get the ID of the local context.  This is transport dependent. */
+
+int vm_sockets_get_local_cid(void)
+{
+	return transport->get_local_cid();
+}
+EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
+
+/**** UTILS ****/
+
+/* Each bound VSocket is stored in the bind hash table and each connected
+ * VSocket is stored in the connected hash table.
+ *
+ * Unbound sockets are all put on the same list attached to the end of the hash
+ * table (vsock_unbound_sockets).  Bound sockets are added to the hash table in
+ * the bucket that their local address hashes to (vsock_bound_sockets(addr)
+ * represents the list that addr hashes to).
+ *
+ * Specifically, we initialize the vsock_bind_table array to a size of
+ * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through
+ * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and
+ * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.  The hash function
+ * mods with VSOCK_HASH_SIZE - 1 to ensure this.
+ */
+#define VSOCK_HASH_SIZE         251
+#define MAX_PORT_RETRIES        24
+
+#define VSOCK_HASH(addr)        ((addr)->svm_port % (VSOCK_HASH_SIZE - 1))
+#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
+#define vsock_unbound_sockets     (&vsock_bind_table[VSOCK_HASH_SIZE])
+
+/* XXX This can probably be implemented in a better way. */
+#define VSOCK_CONN_HASH(src, dst)				\
+	(((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1))
+#define vsock_connected_sockets(src, dst)		\
+	(&vsock_connected_table[VSOCK_CONN_HASH(src, dst)])
+#define vsock_connected_sockets_vsk(vsk)				\
+	vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
+
+static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
+static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
+static DEFINE_SPINLOCK(vsock_table_lock);
+
+static void vsock_init_tables(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++)
+		INIT_LIST_HEAD(&vsock_bind_table[i]);
+
+	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
+		INIT_LIST_HEAD(&vsock_connected_table[i]);
+}
+
+static void __vsock_insert_bound(struct list_head *list,
+				 struct vsock_sock *vsk)
+{
+	sock_hold(&vsk->sk);
+	list_add(&vsk->bound_table, list);
+}
+
+static void __vsock_insert_connected(struct list_head *list,
+				     struct vsock_sock *vsk)
+{
+	sock_hold(&vsk->sk);
+	list_add(&vsk->connected_table, list);
+}
+
+static void __vsock_remove_bound(struct vsock_sock *vsk)
+{
+	list_del_init(&vsk->bound_table);
+	sock_put(&vsk->sk);
+}
+
+static void __vsock_remove_connected(struct vsock_sock *vsk)
+{
+	list_del_init(&vsk->connected_table);
+	sock_put(&vsk->sk);
+}
+
+static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
+{
+	struct vsock_sock *vsk;
+
+	list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table)
+		if (addr->svm_port == vsk->local_addr.svm_port)
+			return sk_vsock(vsk);
+
+	return NULL;
+}
+
+static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
+						  struct sockaddr_vm *dst)
+{
+	struct vsock_sock *vsk;
+
+	list_for_each_entry(vsk, vsock_connected_sockets(src, dst),
+			    connected_table) {
+		if (vsock_addr_equals_addr(src, &vsk->remote_addr) &&
+		    dst->svm_port == vsk->local_addr.svm_port) {
+			return sk_vsock(vsk);
+		}
+	}
+
+	return NULL;
+}
+
+static bool __vsock_in_bound_table(struct vsock_sock *vsk)
+{
+	return !list_empty(&vsk->bound_table);
+}
+
+static bool __vsock_in_connected_table(struct vsock_sock *vsk)
+{
+	return !list_empty(&vsk->connected_table);
+}
+
+static void vsock_insert_unbound(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_insert_bound(vsock_unbound_sockets, vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+
+void vsock_insert_connected(struct vsock_sock *vsk)
+{
+	struct list_head *list = vsock_connected_sockets(
+		&vsk->remote_addr, &vsk->local_addr);
+
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_insert_connected(list, vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_insert_connected);
+
+void vsock_remove_bound(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_remove_bound(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_bound);
+
+void vsock_remove_connected(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_remove_connected(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_connected);
+
+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
+{
+	struct sock *sk;
+
+	spin_lock_bh(&vsock_table_lock);
+	sk = __vsock_find_bound_socket(addr);
+	if (sk)
+		sock_hold(sk);
+
+	spin_unlock_bh(&vsock_table_lock);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(vsock_find_bound_socket);
+
+struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
+					 struct sockaddr_vm *dst)
+{
+	struct sock *sk;
+
+	spin_lock_bh(&vsock_table_lock);
+	sk = __vsock_find_connected_socket(src, dst);
+	if (sk)
+		sock_hold(sk);
+
+	spin_unlock_bh(&vsock_table_lock);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
+
+static bool vsock_in_bound_table(struct vsock_sock *vsk)
+{
+	bool ret;
+
+	spin_lock_bh(&vsock_table_lock);
+	ret = __vsock_in_bound_table(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+
+	return ret;
+}
+
+static bool vsock_in_connected_table(struct vsock_sock *vsk)
+{
+	bool ret;
+
+	spin_lock_bh(&vsock_table_lock);
+	ret = __vsock_in_connected_table(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+
+	return ret;
+}
+
+void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+{
+	int i;
+
+	spin_lock_bh(&vsock_table_lock);
+
+	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
+		struct vsock_sock *vsk;
+		list_for_each_entry(vsk, &vsock_connected_table[i],
+				    connected_table);
+			fn(sk_vsock(vsk));
+	}
+
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket);
+
+void vsock_add_pending(struct sock *listener, struct sock *pending)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vpending;
+
+	vlistener = vsock_sk(listener);
+	vpending = vsock_sk(pending);
+
+	sock_hold(pending);
+	sock_hold(listener);
+	list_add_tail(&vpending->pending_links, &vlistener->pending_links);
+}
+EXPORT_SYMBOL_GPL(vsock_add_pending);
+
+void vsock_remove_pending(struct sock *listener, struct sock *pending)
+{
+	struct vsock_sock *vpending = vsock_sk(pending);
+
+	list_del_init(&vpending->pending_links);
+	sock_put(listener);
+	sock_put(pending);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_pending);
+
+void vsock_enqueue_accept(struct sock *listener, struct sock *connected)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vconnected;
+
+	vlistener = vsock_sk(listener);
+	vconnected = vsock_sk(connected);
+
+	sock_hold(connected);
+	sock_hold(listener);
+	list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue);
+}
+EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
+
+static struct sock *vsock_dequeue_accept(struct sock *listener)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vconnected;
+
+	vlistener = vsock_sk(listener);
+
+	if (list_empty(&vlistener->accept_queue))
+		return NULL;
+
+	vconnected = list_entry(vlistener->accept_queue.next,
+				struct vsock_sock, accept_queue);
+
+	list_del_init(&vconnected->accept_queue);
+	sock_put(listener);
+	/* The caller will need a reference on the connected socket so we let
+	 * it call sock_put().
+	 */
+
+	return sk_vsock(vconnected);
+}
+
+static bool vsock_is_accept_queue_empty(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	return list_empty(&vsk->accept_queue);
+}
+
+static bool vsock_is_pending(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	return !list_empty(&vsk->pending_links);
+}
+
+static int vsock_send_shutdown(struct sock *sk, int mode)
+{
+	return transport->shutdown(vsock_sk(sk), mode);
+}
+
+void vsock_pending_work(struct work_struct *work)
+{
+	struct sock *sk;
+	struct sock *listener;
+	struct vsock_sock *vsk;
+	bool cleanup;
+
+	vsk = container_of(work, struct vsock_sock, dwork.work);
+	sk = sk_vsock(vsk);
+	listener = vsk->listener;
+	cleanup = true;
+
+	lock_sock(listener);
+	lock_sock(sk);
+
+	if (vsock_is_pending(sk)) {
+		vsock_remove_pending(listener, sk);
+	} else if (!vsk->rejected) {
+		/* We are not on the pending list and accept() did not reject
+		 * us, so we must have been accepted by our user process.  We
+		 * just need to drop our references to the sockets and be on
+		 * our way.
+		 */
+		cleanup = false;
+		goto out;
+	}
+
+	listener->sk_ack_backlog--;
+
+	/* We need to remove ourself from the global connected sockets list so
+	 * incoming packets can't find this socket, and to reduce the reference
+	 * count.
+	 */
+	if (vsock_in_connected_table(vsk))
+		vsock_remove_connected(vsk);
+
+	sk->sk_state = SS_FREE;
+
+out:
+	release_sock(sk);
+	release_sock(listener);
+	if (cleanup)
+		sock_put(sk);
+
+	sock_put(sk);
+	sock_put(listener);
+}
+EXPORT_SYMBOL_GPL(vsock_pending_work);
+
+/**** SOCKET OPERATIONS ****/
+
+static int __vsock_bind_stream(struct vsock_sock *vsk,
+			       struct sockaddr_vm *addr)
+{
+	static u32 port = LAST_RESERVED_PORT + 1;
+	struct sockaddr_vm new_addr;
+
+	vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port);
+
+	if (addr->svm_port == VMADDR_PORT_ANY) {
+		bool found = false;
+		unsigned int i;
+
+		for (i = 0; i < MAX_PORT_RETRIES; i++) {
+			if (port <= LAST_RESERVED_PORT)
+				port = LAST_RESERVED_PORT + 1;
+
+			new_addr.svm_port = port++;
+
+			if (!__vsock_find_bound_socket(&new_addr)) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			return -EADDRNOTAVAIL;
+	} else {
+		/* If port is in reserved range, ensure caller
+		 * has necessary privileges.
+		 */
+		if (addr->svm_port <= LAST_RESERVED_PORT &&
+		    !capable(CAP_NET_BIND_SERVICE)) {
+			return -EACCES;
+		}
+
+		if (__vsock_find_bound_socket(&new_addr))
+			return -EADDRINUSE;
+	}
+
+	vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port);
+
+	/* Remove stream sockets from the unbound list and add them to the hash
+	 * table for easy lookup by its address.  The unbound list is simply an
+	 * extra entry at the end of the hash table, a trick used by AF_UNIX.
+	 */
+	__vsock_remove_bound(vsk);
+	__vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk);
+
+	return 0;
+}
+
+static int __vsock_bind_dgram(struct vsock_sock *vsk,
+			      struct sockaddr_vm *addr)
+{
+	return transport->dgram_bind(vsk, addr);
+}
+
+static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	u32 cid;
+	int retval;
+
+	/* First ensure this socket isn't already bound. */
+	if (vsock_addr_bound(&vsk->local_addr))
+		return -EINVAL;
+
+	/* Now bind to the provided address or select appropriate values if
+	 * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY).  Note that
+	 * like AF_INET prevents binding to a non-local IP address (in most
+	 * cases), we only allow binding to the local CID.
+	 */
+	cid = transport->get_local_cid();
+	if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY)
+		return -EADDRNOTAVAIL;
+
+	switch (sk->sk_socket->type) {
+	case SOCK_STREAM:
+		spin_lock_bh(&vsock_table_lock);
+		retval = __vsock_bind_stream(vsk, addr);
+		spin_unlock_bh(&vsock_table_lock);
+		break;
+
+	case SOCK_DGRAM:
+		retval = __vsock_bind_dgram(vsk, addr);
+		break;
+
+	default:
+		retval = -EINVAL;
+		break;
+	}
+
+	return retval;
+}
+
+struct sock *__vsock_create(struct net *net,
+			    struct socket *sock,
+			    struct sock *parent,
+			    gfp_t priority,
+			    unsigned short type)
+{
+	struct sock *sk;
+	struct vsock_sock *psk;
+	struct vsock_sock *vsk;
+
+	sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto);
+	if (!sk)
+		return NULL;
+
+	sock_init_data(sock, sk);
+
+	/* sk->sk_type is normally set in sock_init_data, but only if sock is
+	 * non-NULL. We make sure that our sockets always have a type by
+	 * setting it here if needed.
+	 */
+	if (!sock)
+		sk->sk_type = type;
+
+	vsk = vsock_sk(sk);
+	vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+	vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+
+	sk->sk_destruct = vsock_sk_destruct;
+	sk->sk_backlog_rcv = vsock_queue_rcv_skb;
+	sk->sk_state = 0;
+	sock_reset_flag(sk, SOCK_DONE);
+
+	INIT_LIST_HEAD(&vsk->bound_table);
+	INIT_LIST_HEAD(&vsk->connected_table);
+	vsk->listener = NULL;
+	INIT_LIST_HEAD(&vsk->pending_links);
+	INIT_LIST_HEAD(&vsk->accept_queue);
+	vsk->rejected = false;
+	vsk->sent_request = false;
+	vsk->ignore_connecting_rst = false;
+	vsk->peer_shutdown = 0;
+
+	psk = parent ? vsock_sk(parent) : NULL;
+	if (parent) {
+		vsk->trusted = psk->trusted;
+		vsk->owner = get_cred(psk->owner);
+		vsk->connect_timeout = psk->connect_timeout;
+	} else {
+		vsk->trusted = capable(CAP_NET_ADMIN);
+		vsk->owner = get_current_cred();
+		vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
+	}
+
+	if (transport->init(vsk, psk) < 0) {
+		sk_free(sk);
+		return NULL;
+	}
+
+	if (sock)
+		vsock_insert_unbound(vsk);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(__vsock_create);
+
+static void __vsock_release(struct sock *sk)
+{
+	if (sk) {
+		struct sk_buff *skb;
+		struct sock *pending;
+		struct vsock_sock *vsk;
+
+		vsk = vsock_sk(sk);
+		pending = NULL;	/* Compiler warning. */
+
+		if (vsock_in_bound_table(vsk))
+			vsock_remove_bound(vsk);
+
+		if (vsock_in_connected_table(vsk))
+			vsock_remove_connected(vsk);
+
+		transport->release(vsk);
+
+		lock_sock(sk);
+		sock_orphan(sk);
+		sk->sk_shutdown = SHUTDOWN_MASK;
+
+		while ((skb = skb_dequeue(&sk->sk_receive_queue)))
+			kfree_skb(skb);
+
+		/* Clean up any sockets that never were accepted. */
+		while ((pending = vsock_dequeue_accept(sk)) != NULL) {
+			__vsock_release(pending);
+			sock_put(pending);
+		}
+
+		release_sock(sk);
+		sock_put(sk);
+	}
+}
+
+static void vsock_sk_destruct(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	transport->destruct(vsk);
+
+	/* When clearing these addresses, there's no need to set the family and
+	 * possibly register the address family with the kernel.
+	 */
+	vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+	vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+
+	put_cred(vsk->owner);
+}
+
+static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	int err;
+
+	err = sock_queue_rcv_skb(sk, skb);
+	if (err)
+		kfree_skb(skb);
+
+	return err;
+}
+
+s64 vsock_stream_has_data(struct vsock_sock *vsk)
+{
+	return transport->stream_has_data(vsk);
+}
+EXPORT_SYMBOL_GPL(vsock_stream_has_data);
+
+s64 vsock_stream_has_space(struct vsock_sock *vsk)
+{
+	return transport->stream_has_space(vsk);
+}
+EXPORT_SYMBOL_GPL(vsock_stream_has_space);
+
+static int vsock_release(struct socket *sock)
+{
+	__vsock_release(sock->sk);
+	sock->sk = NULL;
+	sock->state = SS_FREE;
+
+	return 0;
+}
+
+static int
+vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	int err;
+	struct sock *sk;
+	struct sockaddr_vm *vm_addr;
+
+	sk = sock->sk;
+
+	if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0)
+		return -EINVAL;
+
+	lock_sock(sk);
+	err = __vsock_bind(sk, vm_addr);
+	release_sock(sk);
+
+	return err;
+}
+
+static int vsock_getname(struct socket *sock,
+			 struct sockaddr *addr, int *addr_len, int peer)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *vm_addr;
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	lock_sock(sk);
+
+	if (peer) {
+		if (sock->state != SS_CONNECTED) {
+			err = -ENOTCONN;
+			goto out;
+		}
+		vm_addr = &vsk->remote_addr;
+	} else {
+		vm_addr = &vsk->local_addr;
+	}
+
+	if (!vm_addr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* sys_getsockname() and sys_getpeername() pass us a
+	 * MAX_SOCK_ADDR-sized buffer and don't set addr_len.  Unfortunately
+	 * that macro is defined in socket.c instead of .h, so we hardcode its
+	 * value here.
+	 */
+	BUILD_BUG_ON(sizeof(*vm_addr) > 128);
+	memcpy(addr, vm_addr, sizeof(*vm_addr));
+	*addr_len = sizeof(*vm_addr);
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_shutdown(struct socket *sock, int mode)
+{
+	int err;
+	struct sock *sk;
+
+	/* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses
+	 * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode
+	 * here like the other address families do.  Note also that the
+	 * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3),
+	 * which is what we want.
+	 */
+	mode++;
+
+	if ((mode & ~SHUTDOWN_MASK) || !mode)
+		return -EINVAL;
+
+	/* If this is a STREAM socket and it is not connected then bail out
+	 * immediately.  If it is a DGRAM socket then we must first kick the
+	 * socket so that it wakes up from any sleeping calls, for example
+	 * recv(), and then afterwards return the error.
+	 */
+
+	sk = sock->sk;
+	if (sock->state == SS_UNCONNECTED) {
+		err = -ENOTCONN;
+		if (sk->sk_type == SOCK_STREAM)
+			return err;
+	} else {
+		sock->state = SS_DISCONNECTING;
+		err = 0;
+	}
+
+	/* Receive and send shutdowns are treated alike. */
+	mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
+	if (mode) {
+		lock_sock(sk);
+		sk->sk_shutdown |= mode;
+		sk->sk_state_change(sk);
+		release_sock(sk);
+
+		if (sk->sk_type == SOCK_STREAM) {
+			sock_reset_flag(sk, SOCK_DONE);
+			vsock_send_shutdown(sk, mode);
+		}
+	}
+
+	return err;
+}
+
+static unsigned int vsock_poll(struct file *file, struct socket *sock,
+			       poll_table *wait)
+{
+	struct sock *sk;
+	unsigned int mask;
+	struct vsock_sock *vsk;
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
+
+	if (sk->sk_err)
+		/* Signify that there has been an error on this socket. */
+		mask |= POLLERR;
+
+	/* INET sockets treat local write shutdown and peer write shutdown as a
+	 * case of POLLHUP set.
+	 */
+	if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
+	    ((sk->sk_shutdown & SEND_SHUTDOWN) &&
+	     (vsk->peer_shutdown & SEND_SHUTDOWN))) {
+		mask |= POLLHUP;
+	}
+
+	if (sk->sk_shutdown & RCV_SHUTDOWN ||
+	    vsk->peer_shutdown & SEND_SHUTDOWN) {
+		mask |= POLLRDHUP;
+	}
+
+	if (sock->type == SOCK_DGRAM) {
+		/* For datagram sockets we can read if there is something in
+		 * the queue and write as long as the socket isn't shutdown for
+		 * sending.
+		 */
+		if (!skb_queue_empty(&sk->sk_receive_queue) ||
+		    (sk->sk_shutdown & RCV_SHUTDOWN)) {
+			mask |= POLLIN | POLLRDNORM;
+		}
+
+		if (!(sk->sk_shutdown & SEND_SHUTDOWN))
+			mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	} else if (sock->type == SOCK_STREAM) {
+		lock_sock(sk);
+
+		/* Listening sockets that have connections in their accept
+		 * queue can be read.
+		 */
+		if (sk->sk_state == SS_LISTEN
+		    && !vsock_is_accept_queue_empty(sk))
+			mask |= POLLIN | POLLRDNORM;
+
+		/* If there is something in the queue then we can read. */
+		if (transport->stream_is_active(vsk) &&
+		    !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+			bool data_ready_now = false;
+			int ret = transport->notify_poll_in(
+					vsk, 1, &data_ready_now);
+			if (ret < 0) {
+				mask |= POLLERR;
+			} else {
+				if (data_ready_now)
+					mask |= POLLIN | POLLRDNORM;
+
+			}
+		}
+
+		/* Sockets whose connections have been closed, reset, or
+		 * terminated should also be considered read, and we check the
+		 * shutdown flag for that.
+		 */
+		if (sk->sk_shutdown & RCV_SHUTDOWN ||
+		    vsk->peer_shutdown & SEND_SHUTDOWN) {
+			mask |= POLLIN | POLLRDNORM;
+		}
+
+		/* Connected sockets that can produce data can be written. */
+		if (sk->sk_state == SS_CONNECTED) {
+			if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+				bool space_avail_now = false;
+				int ret = transport->notify_poll_out(
+						vsk, 1, &space_avail_now);
+				if (ret < 0) {
+					mask |= POLLERR;
+				} else {
+					if (space_avail_now)
+						/* Remove POLLWRBAND since INET
+						 * sockets are not setting it.
+						 */
+						mask |= POLLOUT | POLLWRNORM;
+
+				}
+			}
+		}
+
+		/* Simulate INET socket poll behaviors, which sets
+		 * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
+		 * but local send is not shutdown.
+		 */
+		if (sk->sk_state == SS_UNCONNECTED) {
+			if (!(sk->sk_shutdown & SEND_SHUTDOWN))
+				mask |= POLLOUT | POLLWRNORM;
+
+		}
+
+		release_sock(sk);
+	}
+
+	return mask;
+}
+
+static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
+			       struct msghdr *msg, size_t len)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *remote_addr;
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	/* For now, MSG_DONTWAIT is always assumed... */
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr)) {
+		struct sockaddr_vm local_addr;
+
+		vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+		err = __vsock_bind(sk, &local_addr);
+		if (err != 0)
+			goto out;
+
+	}
+
+	/* If the provided message contains an address, use that.  Otherwise
+	 * fall back on the socket's remote handle (if it has been connected).
+	 */
+	if (msg->msg_name &&
+	    vsock_addr_cast(msg->msg_name, msg->msg_namelen,
+			    &remote_addr) == 0) {
+		/* Ensure this address is of the right type and is a valid
+		 * destination.
+		 */
+
+		if (remote_addr->svm_cid == VMADDR_CID_ANY)
+			remote_addr->svm_cid = transport->get_local_cid();
+
+		if (!vsock_addr_bound(remote_addr)) {
+			err = -EINVAL;
+			goto out;
+		}
+	} else if (sock->state == SS_CONNECTED) {
+		remote_addr = &vsk->remote_addr;
+
+		if (remote_addr->svm_cid == VMADDR_CID_ANY)
+			remote_addr->svm_cid = transport->get_local_cid();
+
+		/* XXX Should connect() or this function ensure remote_addr is
+		 * bound?
+		 */
+		if (!vsock_addr_bound(&vsk->remote_addr)) {
+			err = -EINVAL;
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!transport->dgram_allow(remote_addr->svm_cid,
+				    remote_addr->svm_port)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = transport->dgram_enqueue(vsk, remote_addr, msg->msg_iov, len);
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_dgram_connect(struct socket *sock,
+			       struct sockaddr *addr, int addr_len, int flags)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *remote_addr;
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	err = vsock_addr_cast(addr, addr_len, &remote_addr);
+	if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) {
+		lock_sock(sk);
+		vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY,
+				VMADDR_PORT_ANY);
+		sock->state = SS_UNCONNECTED;
+		release_sock(sk);
+		return 0;
+	} else if (err != 0)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr)) {
+		struct sockaddr_vm local_addr;
+
+		vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+		err = __vsock_bind(sk, &local_addr);
+		if (err != 0)
+			goto out;
+
+	}
+
+	if (!transport->dgram_allow(remote_addr->svm_cid,
+				    remote_addr->svm_port)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr));
+	sock->state = SS_CONNECTED;
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock,
+			       struct msghdr *msg, size_t len, int flags)
+{
+	return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len,
+					flags);
+}
+
+static const struct proto_ops vsock_dgram_ops = {
+	.family = PF_VSOCK,
+	.owner = THIS_MODULE,
+	.release = vsock_release,
+	.bind = vsock_bind,
+	.connect = vsock_dgram_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = vsock_getname,
+	.poll = vsock_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = vsock_shutdown,
+	.setsockopt = sock_no_setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = vsock_dgram_sendmsg,
+	.recvmsg = vsock_dgram_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static void vsock_connect_timeout(struct work_struct *work)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+
+	vsk = container_of(work, struct vsock_sock, dwork.work);
+	sk = sk_vsock(vsk);
+
+	lock_sock(sk);
+	if (sk->sk_state == SS_CONNECTING &&
+	    (sk->sk_shutdown != SHUTDOWN_MASK)) {
+		sk->sk_state = SS_UNCONNECTED;
+		sk->sk_err = ETIMEDOUT;
+		sk->sk_error_report(sk);
+	}
+	release_sock(sk);
+
+	sock_put(sk);
+}
+
+static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
+				int addr_len, int flags)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *remote_addr;
+	long timeout;
+	DEFINE_WAIT(wait);
+
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	/* XXX AF_UNSPEC should make us disconnect like AF_INET. */
+	switch (sock->state) {
+	case SS_CONNECTED:
+		err = -EISCONN;
+		goto out;
+	case SS_DISCONNECTING:
+		err = -EINVAL;
+		goto out;
+	case SS_CONNECTING:
+		/* This continues on so we can move sock into the SS_CONNECTED
+		 * state once the connection has completed (at which point err
+		 * will be set to zero also).  Otherwise, we will either wait
+		 * for the connection or return -EALREADY should this be a
+		 * non-blocking call.
+		 */
+		err = -EALREADY;
+		break;
+	default:
+		if ((sk->sk_state == SS_LISTEN) ||
+		    vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* The hypervisor and well-known contexts do not have socket
+		 * endpoints.
+		 */
+		if (!transport->stream_allow(remote_addr->svm_cid,
+					     remote_addr->svm_port)) {
+			err = -ENETUNREACH;
+			goto out;
+		}
+
+		/* Set the remote address that we are connecting to. */
+		memcpy(&vsk->remote_addr, remote_addr,
+		       sizeof(vsk->remote_addr));
+
+		/* Autobind this socket to the local address if necessary. */
+		if (!vsock_addr_bound(&vsk->local_addr)) {
+			struct sockaddr_vm local_addr;
+
+			vsock_addr_init(&local_addr, VMADDR_CID_ANY,
+					VMADDR_PORT_ANY);
+			err = __vsock_bind(sk, &local_addr);
+			if (err != 0)
+				goto out;
+
+		}
+
+		sk->sk_state = SS_CONNECTING;
+
+		err = transport->connect(vsk);
+		if (err < 0)
+			goto out;
+
+		/* Mark sock as connecting and set the error code to in
+		 * progress in case this is a non-blocking connect.
+		 */
+		sock->state = SS_CONNECTING;
+		err = -EINPROGRESS;
+	}
+
+	/* The receive path will handle all communication until we are able to
+	 * enter the connected state.  Here we wait for the connection to be
+	 * completed or a notification of an error.
+	 */
+	timeout = vsk->connect_timeout;
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) {
+		if (flags & O_NONBLOCK) {
+			/* If we're not going to block, we schedule a timeout
+			 * function to generate a timeout on the connection
+			 * attempt, in case the peer doesn't respond in a
+			 * timely manner. We hold on to the socket until the
+			 * timeout fires.
+			 */
+			sock_hold(sk);
+			INIT_DELAYED_WORK(&vsk->dwork,
+					  vsock_connect_timeout);
+			schedule_delayed_work(&vsk->dwork, timeout);
+
+			/* Skip ahead to preserve error code set above. */
+			goto out_wait;
+		}
+
+		release_sock(sk);
+		timeout = schedule_timeout(timeout);
+		lock_sock(sk);
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeout);
+			goto out_wait_error;
+		} else if (timeout == 0) {
+			err = -ETIMEDOUT;
+			goto out_wait_error;
+		}
+
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+	}
+
+	if (sk->sk_err) {
+		err = -sk->sk_err;
+		goto out_wait_error;
+	} else
+		err = 0;
+
+out_wait:
+	finish_wait(sk_sleep(sk), &wait);
+out:
+	release_sock(sk);
+	return err;
+
+out_wait_error:
+	sk->sk_state = SS_UNCONNECTED;
+	sock->state = SS_UNCONNECTED;
+	goto out_wait;
+}
+
+static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *listener;
+	int err;
+	struct sock *connected;
+	struct vsock_sock *vconnected;
+	long timeout;
+	DEFINE_WAIT(wait);
+
+	err = 0;
+	listener = sock->sk;
+
+	lock_sock(listener);
+
+	if (sock->type != SOCK_STREAM) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (listener->sk_state != SS_LISTEN) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Wait for children sockets to appear; these are the new sockets
+	 * created upon connection establishment.
+	 */
+	timeout = sock_sndtimeo(listener, flags & O_NONBLOCK);
+	prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
+
+	while ((connected = vsock_dequeue_accept(listener)) == NULL &&
+	       listener->sk_err == 0) {
+		release_sock(listener);
+		timeout = schedule_timeout(timeout);
+		lock_sock(listener);
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeout);
+			goto out_wait;
+		} else if (timeout == 0) {
+			err = -EAGAIN;
+			goto out_wait;
+		}
+
+		prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
+	}
+
+	if (listener->sk_err)
+		err = -listener->sk_err;
+
+	if (connected) {
+		listener->sk_ack_backlog--;
+
+		lock_sock(connected);
+		vconnected = vsock_sk(connected);
+
+		/* If the listener socket has received an error, then we should
+		 * reject this socket and return.  Note that we simply mark the
+		 * socket rejected, drop our reference, and let the cleanup
+		 * function handle the cleanup; the fact that we found it in
+		 * the listener's accept queue guarantees that the cleanup
+		 * function hasn't run yet.
+		 */
+		if (err) {
+			vconnected->rejected = true;
+			release_sock(connected);
+			sock_put(connected);
+			goto out_wait;
+		}
+
+		newsock->state = SS_CONNECTED;
+		sock_graft(connected, newsock);
+		release_sock(connected);
+		sock_put(connected);
+	}
+
+out_wait:
+	finish_wait(sk_sleep(listener), &wait);
+out:
+	release_sock(listener);
+	return err;
+}
+
+static int vsock_listen(struct socket *sock, int backlog)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+
+	sk = sock->sk;
+
+	lock_sock(sk);
+
+	if (sock->type != SOCK_STREAM) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (sock->state != SS_UNCONNECTED) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	vsk = vsock_sk(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	sk->sk_max_ack_backlog = backlog;
+	sk->sk_state = SS_LISTEN;
+
+	err = 0;
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_stream_setsockopt(struct socket *sock,
+				   int level,
+				   int optname,
+				   char __user *optval,
+				   unsigned int optlen)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	u64 val;
+
+	if (level != AF_VSOCK)
+		return -ENOPROTOOPT;
+
+#define COPY_IN(_v)                                       \
+	do {						  \
+		if (optlen < sizeof(_v)) {		  \
+			err = -EINVAL;			  \
+			goto exit;			  \
+		}					  \
+		if (copy_from_user(&_v, optval, sizeof(_v)) != 0) {	\
+			err = -EFAULT;					\
+			goto exit;					\
+		}							\
+	} while (0)
+
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case SO_VM_SOCKETS_BUFFER_SIZE:
+		COPY_IN(val);
+		transport->set_buffer_size(vsk, val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
+		COPY_IN(val);
+		transport->set_max_buffer_size(vsk, val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
+		COPY_IN(val);
+		transport->set_min_buffer_size(vsk, val);
+		break;
+
+	case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
+		struct timeval tv;
+		COPY_IN(tv);
+		if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC &&
+		    tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) {
+			vsk->connect_timeout = tv.tv_sec * HZ +
+			    DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ));
+			if (vsk->connect_timeout == 0)
+				vsk->connect_timeout =
+				    VSOCK_DEFAULT_CONNECT_TIMEOUT;
+
+		} else {
+			err = -ERANGE;
+		}
+		break;
+	}
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+#undef COPY_IN
+
+exit:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_stream_getsockopt(struct socket *sock,
+				   int level, int optname,
+				   char __user *optval,
+				   int __user *optlen)
+{
+	int err;
+	int len;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	u64 val;
+
+	if (level != AF_VSOCK)
+		return -ENOPROTOOPT;
+
+	err = get_user(len, optlen);
+	if (err != 0)
+		return err;
+
+#define COPY_OUT(_v)                            \
+	do {					\
+		if (len < sizeof(_v))		\
+			return -EINVAL;		\
+						\
+		len = sizeof(_v);		\
+		if (copy_to_user(optval, &_v, len) != 0)	\
+			return -EFAULT;				\
+								\
+	} while (0)
+
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	switch (optname) {
+	case SO_VM_SOCKETS_BUFFER_SIZE:
+		val = transport->get_buffer_size(vsk);
+		COPY_OUT(val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
+		val = transport->get_max_buffer_size(vsk);
+		COPY_OUT(val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
+		val = transport->get_min_buffer_size(vsk);
+		COPY_OUT(val);
+		break;
+
+	case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
+		struct timeval tv;
+		tv.tv_sec = vsk->connect_timeout / HZ;
+		tv.tv_usec =
+		    (vsk->connect_timeout -
+		     tv.tv_sec * HZ) * (1000000 / HZ);
+		COPY_OUT(tv);
+		break;
+	}
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	err = put_user(len, optlen);
+	if (err != 0)
+		return -EFAULT;
+
+#undef COPY_OUT
+
+	return 0;
+}
+
+static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
+				struct msghdr *msg, size_t len)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	ssize_t total_written;
+	long timeout;
+	int err;
+	struct vsock_transport_send_notify_data send_data;
+
+	DEFINE_WAIT(wait);
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	total_written = 0;
+	err = 0;
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	lock_sock(sk);
+
+	/* Callers should not provide a destination with stream sockets. */
+	if (msg->msg_namelen) {
+		err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* Send data only if both sides are not shutdown in the direction. */
+	if (sk->sk_shutdown & SEND_SHUTDOWN ||
+	    vsk->peer_shutdown & RCV_SHUTDOWN) {
+		err = -EPIPE;
+		goto out;
+	}
+
+	if (sk->sk_state != SS_CONNECTED ||
+	    !vsock_addr_bound(&vsk->local_addr)) {
+		err = -ENOTCONN;
+		goto out;
+	}
+
+	if (!vsock_addr_bound(&vsk->remote_addr)) {
+		err = -EDESTADDRREQ;
+		goto out;
+	}
+
+	/* Wait for room in the produce queue to enqueue our user's data. */
+	timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+	err = transport->notify_send_init(vsk, &send_data);
+	if (err < 0)
+		goto out;
+
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	while (total_written < len) {
+		ssize_t written;
+
+		while (vsock_stream_has_space(vsk) == 0 &&
+		       sk->sk_err == 0 &&
+		       !(sk->sk_shutdown & SEND_SHUTDOWN) &&
+		       !(vsk->peer_shutdown & RCV_SHUTDOWN)) {
+
+			/* Don't wait for non-blocking sockets. */
+			if (timeout == 0) {
+				err = -EAGAIN;
+				goto out_wait;
+			}
+
+			err = transport->notify_send_pre_block(vsk, &send_data);
+			if (err < 0)
+				goto out_wait;
+
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeout);
+				goto out_wait;
+			} else if (timeout == 0) {
+				err = -EAGAIN;
+				goto out_wait;
+			}
+
+			prepare_to_wait(sk_sleep(sk), &wait,
+					TASK_INTERRUPTIBLE);
+		}
+
+		/* These checks occur both as part of and after the loop
+		 * conditional since we need to check before and after
+		 * sleeping.
+		 */
+		if (sk->sk_err) {
+			err = -sk->sk_err;
+			goto out_wait;
+		} else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
+			   (vsk->peer_shutdown & RCV_SHUTDOWN)) {
+			err = -EPIPE;
+			goto out_wait;
+		}
+
+		err = transport->notify_send_pre_enqueue(vsk, &send_data);
+		if (err < 0)
+			goto out_wait;
+
+		/* Note that enqueue will only write as many bytes as are free
+		 * in the produce queue, so we don't need to ensure len is
+		 * smaller than the queue size.  It is the caller's
+		 * responsibility to check how many bytes we were able to send.
+		 */
+
+		written = transport->stream_enqueue(
+				vsk, msg->msg_iov,
+				len - total_written);
+		if (written < 0) {
+			err = -ENOMEM;
+			goto out_wait;
+		}
+
+		total_written += written;
+
+		err = transport->notify_send_post_enqueue(
+				vsk, written, &send_data);
+		if (err < 0)
+			goto out_wait;
+
+	}
+
+out_wait:
+	if (total_written > 0)
+		err = total_written;
+	finish_wait(sk_sleep(sk), &wait);
+out:
+	release_sock(sk);
+	return err;
+}
+
+
+static int
+vsock_stream_recvmsg(struct kiocb *kiocb,
+		     struct socket *sock,
+		     struct msghdr *msg, size_t len, int flags)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	int err;
+	size_t target;
+	ssize_t copied;
+	long timeout;
+	struct vsock_transport_recv_notify_data recv_data;
+
+	DEFINE_WAIT(wait);
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	msg->msg_namelen = 0;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != SS_CONNECTED) {
+		/* Recvmsg is supposed to return 0 if a peer performs an
+		 * orderly shutdown. Differentiate between that case and when a
+		 * peer has not connected or a local shutdown occured with the
+		 * SOCK_DONE flag.
+		 */
+		if (sock_flag(sk, SOCK_DONE))
+			err = 0;
+		else
+			err = -ENOTCONN;
+
+		goto out;
+	}
+
+	if (flags & MSG_OOB) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* We don't check peer_shutdown flag here since peer may actually shut
+	 * down, but there can be data in the queue that a local socket can
+	 * receive.
+	 */
+	if (sk->sk_shutdown & RCV_SHUTDOWN) {
+		err = 0;
+		goto out;
+	}
+
+	/* It is valid on Linux to pass in a zero-length receive buffer.  This
+	 * is not an error.  We may as well bail out now.
+	 */
+	if (!len) {
+		err = 0;
+		goto out;
+	}
+
+	/* We must not copy less than target bytes into the user's buffer
+	 * before returning successfully, so we wait for the consume queue to
+	 * have that much data to consume before dequeueing.  Note that this
+	 * makes it impossible to handle cases where target is greater than the
+	 * queue size.
+	 */
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+	if (target >= transport->stream_rcvhiwat(vsk)) {
+		err = -ENOMEM;
+		goto out;
+	}
+	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	copied = 0;
+
+	err = transport->notify_recv_init(vsk, target, &recv_data);
+	if (err < 0)
+		goto out;
+
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	while (1) {
+		s64 ready = vsock_stream_has_data(vsk);
+
+		if (ready < 0) {
+			/* Invalid queue pair content. XXX This should be
+			 * changed to a connection reset in a later change.
+			 */
+
+			err = -ENOMEM;
+			goto out_wait;
+		} else if (ready > 0) {
+			ssize_t read;
+
+			err = transport->notify_recv_pre_dequeue(
+					vsk, target, &recv_data);
+			if (err < 0)
+				break;
+
+			read = transport->stream_dequeue(
+					vsk, msg->msg_iov,
+					len - copied, flags);
+			if (read < 0) {
+				err = -ENOMEM;
+				break;
+			}
+
+			copied += read;
+
+			err = transport->notify_recv_post_dequeue(
+					vsk, target, read,
+					!(flags & MSG_PEEK), &recv_data);
+			if (err < 0)
+				goto out_wait;
+
+			if (read >= target || flags & MSG_PEEK)
+				break;
+
+			target -= read;
+		} else {
+			if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN)
+			    || (vsk->peer_shutdown & SEND_SHUTDOWN)) {
+				break;
+			}
+			/* Don't wait for non-blocking sockets. */
+			if (timeout == 0) {
+				err = -EAGAIN;
+				break;
+			}
+
+			err = transport->notify_recv_pre_block(
+					vsk, target, &recv_data);
+			if (err < 0)
+				break;
+
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeout);
+				break;
+			} else if (timeout == 0) {
+				err = -EAGAIN;
+				break;
+			}
+
+			prepare_to_wait(sk_sleep(sk), &wait,
+					TASK_INTERRUPTIBLE);
+		}
+	}
+
+	if (sk->sk_err)
+		err = -sk->sk_err;
+	else if (sk->sk_shutdown & RCV_SHUTDOWN)
+		err = 0;
+
+	if (copied > 0) {
+		/* We only do these additional bookkeeping/notification steps
+		 * if we actually copied something out of the queue pair
+		 * instead of just peeking ahead.
+		 */
+
+		if (!(flags & MSG_PEEK)) {
+			/* If the other side has shutdown for sending and there
+			 * is nothing more to read, then modify the socket
+			 * state.
+			 */
+			if (vsk->peer_shutdown & SEND_SHUTDOWN) {
+				if (vsock_stream_has_data(vsk) <= 0) {
+					sk->sk_state = SS_UNCONNECTED;
+					sock_set_flag(sk, SOCK_DONE);
+					sk->sk_state_change(sk);
+				}
+			}
+		}
+		err = copied;
+	}
+
+out_wait:
+	finish_wait(sk_sleep(sk), &wait);
+out:
+	release_sock(sk);
+	return err;
+}
+
+static const struct proto_ops vsock_stream_ops = {
+	.family = PF_VSOCK,
+	.owner = THIS_MODULE,
+	.release = vsock_release,
+	.bind = vsock_bind,
+	.connect = vsock_stream_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = vsock_accept,
+	.getname = vsock_getname,
+	.poll = vsock_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = vsock_listen,
+	.shutdown = vsock_shutdown,
+	.setsockopt = vsock_stream_setsockopt,
+	.getsockopt = vsock_stream_getsockopt,
+	.sendmsg = vsock_stream_sendmsg,
+	.recvmsg = vsock_stream_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static int vsock_create(struct net *net, struct socket *sock,
+			int protocol, int kern)
+{
+	if (!sock)
+		return -EINVAL;
+
+	if (protocol && protocol != PF_VSOCK)
+		return -EPROTONOSUPPORT;
+
+	switch (sock->type) {
+	case SOCK_DGRAM:
+		sock->ops = &vsock_dgram_ops;
+		break;
+	case SOCK_STREAM:
+		sock->ops = &vsock_stream_ops;
+		break;
+	default:
+		return -ESOCKTNOSUPPORT;
+	}
+
+	sock->state = SS_UNCONNECTED;
+
+	return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM;
+}
+
+static const struct net_proto_family vsock_family_ops = {
+	.family = AF_VSOCK,
+	.create = vsock_create,
+	.owner = THIS_MODULE,
+};
+
+static long vsock_dev_do_ioctl(struct file *filp,
+			       unsigned int cmd, void __user *ptr)
+{
+	u32 __user *p = ptr;
+	int retval = 0;
+
+	switch (cmd) {
+	case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
+		if (put_user(transport->get_local_cid(), p) != 0)
+			retval = -EFAULT;
+		break;
+
+	default:
+		pr_err("Unknown ioctl %d\n", cmd);
+		retval = -EINVAL;
+	}
+
+	return retval;
+}
+
+static long vsock_dev_ioctl(struct file *filp,
+			    unsigned int cmd, unsigned long arg)
+{
+	return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg);
+}
+
+#ifdef CONFIG_COMPAT
+static long vsock_dev_compat_ioctl(struct file *filp,
+				   unsigned int cmd, unsigned long arg)
+{
+	return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg));
+}
+#endif
+
+static const struct file_operations vsock_device_ops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= vsock_dev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= vsock_dev_compat_ioctl,
+#endif
+	.open		= nonseekable_open,
+};
+
+static struct miscdevice vsock_device = {
+	.name		= "vsock",
+	.fops		= &vsock_device_ops,
+};
+
+static int __vsock_core_init(void)
+{
+	int err;
+
+	vsock_init_tables();
+
+	vsock_device.minor = MISC_DYNAMIC_MINOR;
+	err = misc_register(&vsock_device);
+	if (err) {
+		pr_err("Failed to register misc device\n");
+		return -ENOENT;
+	}
+
+	err = proto_register(&vsock_proto, 1);	/* we want our slab */
+	if (err) {
+		pr_err("Cannot register vsock protocol\n");
+		goto err_misc_deregister;
+	}
+
+	err = sock_register(&vsock_family_ops);
+	if (err) {
+		pr_err("could not register af_vsock (%d) address family: %d\n",
+		       AF_VSOCK, err);
+		goto err_unregister_proto;
+	}
+
+	return 0;
+
+err_unregister_proto:
+	proto_unregister(&vsock_proto);
+err_misc_deregister:
+	misc_deregister(&vsock_device);
+	return err;
+}
+
+int vsock_core_init(const struct vsock_transport *t)
+{
+	int retval = mutex_lock_interruptible(&vsock_register_mutex);
+	if (retval)
+		return retval;
+
+	if (transport) {
+		retval = -EBUSY;
+		goto out;
+	}
+
+	transport = t;
+	retval = __vsock_core_init();
+	if (retval)
+		transport = NULL;
+
+out:
+	mutex_unlock(&vsock_register_mutex);
+	return retval;
+}
+EXPORT_SYMBOL_GPL(vsock_core_init);
+
+void vsock_core_exit(void)
+{
+	mutex_lock(&vsock_register_mutex);
+
+	misc_deregister(&vsock_device);
+	sock_unregister(AF_VSOCK);
+	proto_unregister(&vsock_proto);
+
+	/* We do not want the assignment below re-ordered. */
+	mb();
+	transport = NULL;
+
+	mutex_unlock(&vsock_register_mutex);
+}
+EXPORT_SYMBOL_GPL(vsock_core_exit);
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMware Virtual Socket Family");
+MODULE_VERSION("1.0.0.0-k");
+MODULE_LICENSE("GPL v2");
diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h
new file mode 100644
index 000000000000..7d64d3609ec9
--- /dev/null
+++ b/net/vmw_vsock/af_vsock.h
@@ -0,0 +1,175 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __AF_VSOCK_H__
+#define __AF_VSOCK_H__
+
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/vm_sockets.h>
+
+#include "vsock_addr.h"
+
+#define LAST_RESERVED_PORT 1023
+
+#define vsock_sk(__sk)    ((struct vsock_sock *)__sk)
+#define sk_vsock(__vsk)   (&(__vsk)->sk)
+
+struct vsock_sock {
+	/* sk must be the first member. */
+	struct sock sk;
+	struct sockaddr_vm local_addr;
+	struct sockaddr_vm remote_addr;
+	/* Links for the global tables of bound and connected sockets. */
+	struct list_head bound_table;
+	struct list_head connected_table;
+	/* Accessed without the socket lock held. This means it can never be
+	 * modified outsided of socket create or destruct.
+	 */
+	bool trusted;
+	bool cached_peer_allow_dgram;	/* Dgram communication allowed to
+					 * cached peer?
+					 */
+	u32 cached_peer;  /* Context ID of last dgram destination check. */
+	const struct cred *owner;
+	/* Rest are SOCK_STREAM only. */
+	long connect_timeout;
+	/* Listening socket that this came from. */
+	struct sock *listener;
+	/* Used for pending list and accept queue during connection handshake.
+	 * The listening socket is the head for both lists.  Sockets created
+	 * for connection requests are placed in the pending list until they
+	 * are connected, at which point they are put in the accept queue list
+	 * so they can be accepted in accept().  If accept() cannot accept the
+	 * connection, it is marked as rejected so the cleanup function knows
+	 * to clean up the socket.
+	 */
+	struct list_head pending_links;
+	struct list_head accept_queue;
+	bool rejected;
+	struct delayed_work dwork;
+	u32 peer_shutdown;
+	bool sent_request;
+	bool ignore_connecting_rst;
+
+	/* Private to transport. */
+	void *trans;
+};
+
+s64 vsock_stream_has_data(struct vsock_sock *vsk);
+s64 vsock_stream_has_space(struct vsock_sock *vsk);
+void vsock_pending_work(struct work_struct *work);
+struct sock *__vsock_create(struct net *net,
+			    struct socket *sock,
+			    struct sock *parent,
+			    gfp_t priority, unsigned short type);
+
+/**** TRANSPORT ****/
+
+struct vsock_transport_recv_notify_data {
+	u64 data1; /* Transport-defined. */
+	u64 data2; /* Transport-defined. */
+	bool notify_on_block;
+};
+
+struct vsock_transport_send_notify_data {
+	u64 data1; /* Transport-defined. */
+	u64 data2; /* Transport-defined. */
+};
+
+struct vsock_transport {
+	/* Initialize/tear-down socket. */
+	int (*init)(struct vsock_sock *, struct vsock_sock *);
+	void (*destruct)(struct vsock_sock *);
+	void (*release)(struct vsock_sock *);
+
+	/* Connections. */
+	int (*connect)(struct vsock_sock *);
+
+	/* DGRAM. */
+	int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *);
+	int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk,
+			     struct msghdr *msg, size_t len, int flags);
+	int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *,
+			     struct iovec *, size_t len);
+	bool (*dgram_allow)(u32 cid, u32 port);
+
+	/* STREAM. */
+	/* TODO: stream_bind() */
+	ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *,
+				  size_t len, int flags);
+	ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *,
+				  size_t len);
+	s64 (*stream_has_data)(struct vsock_sock *);
+	s64 (*stream_has_space)(struct vsock_sock *);
+	u64 (*stream_rcvhiwat)(struct vsock_sock *);
+	bool (*stream_is_active)(struct vsock_sock *);
+	bool (*stream_allow)(u32 cid, u32 port);
+
+	/* Notification. */
+	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
+	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
+	int (*notify_recv_init)(struct vsock_sock *, size_t,
+		struct vsock_transport_recv_notify_data *);
+	int (*notify_recv_pre_block)(struct vsock_sock *, size_t,
+		struct vsock_transport_recv_notify_data *);
+	int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t,
+		struct vsock_transport_recv_notify_data *);
+	int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t,
+		ssize_t, bool, struct vsock_transport_recv_notify_data *);
+	int (*notify_send_init)(struct vsock_sock *,
+		struct vsock_transport_send_notify_data *);
+	int (*notify_send_pre_block)(struct vsock_sock *,
+		struct vsock_transport_send_notify_data *);
+	int (*notify_send_pre_enqueue)(struct vsock_sock *,
+		struct vsock_transport_send_notify_data *);
+	int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t,
+		struct vsock_transport_send_notify_data *);
+
+	/* Shutdown. */
+	int (*shutdown)(struct vsock_sock *, int);
+
+	/* Buffer sizes. */
+	void (*set_buffer_size)(struct vsock_sock *, u64);
+	void (*set_min_buffer_size)(struct vsock_sock *, u64);
+	void (*set_max_buffer_size)(struct vsock_sock *, u64);
+	u64 (*get_buffer_size)(struct vsock_sock *);
+	u64 (*get_min_buffer_size)(struct vsock_sock *);
+	u64 (*get_max_buffer_size)(struct vsock_sock *);
+
+	/* Addressing. */
+	u32 (*get_local_cid)(void);
+};
+
+/**** CORE ****/
+
+int vsock_core_init(const struct vsock_transport *t);
+void vsock_core_exit(void);
+
+/**** UTILS ****/
+
+void vsock_release_pending(struct sock *pending);
+void vsock_add_pending(struct sock *listener, struct sock *pending);
+void vsock_remove_pending(struct sock *listener, struct sock *pending);
+void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
+void vsock_insert_connected(struct vsock_sock *vsk);
+void vsock_remove_bound(struct vsock_sock *vsk);
+void vsock_remove_connected(struct vsock_sock *vsk);
+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
+struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
+					 struct sockaddr_vm *dst);
+void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
+
+#endif /* __AF_VSOCK_H__ */
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
new file mode 100644
index 000000000000..daff75200e25
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport.c
@@ -0,0 +1,2175 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/cred.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/skbuff.h>
+#include <linux/smp.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <net/sock.h>
+
+#include "af_vsock.h"
+#include "vmci_transport_notify.h"
+
+static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
+static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
+static void vmci_transport_peer_attach_cb(u32 sub_id,
+					  const struct vmci_event_data *ed,
+					  void *client_data);
+static void vmci_transport_peer_detach_cb(u32 sub_id,
+					  const struct vmci_event_data *ed,
+					  void *client_data);
+static void vmci_transport_recv_pkt_work(struct work_struct *work);
+static int vmci_transport_recv_listen(struct sock *sk,
+				      struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_server(
+					struct sock *sk,
+					struct sock *pending,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_client(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_client_negotiate(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_client_invalid(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connected(struct sock *sk,
+					 struct vmci_transport_packet *pkt);
+static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
+static u16 vmci_transport_new_proto_supported_versions(void);
+static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
+						  bool old_pkt_proto);
+
+struct vmci_transport_recv_pkt_info {
+	struct work_struct work;
+	struct sock *sk;
+	struct vmci_transport_packet pkt;
+};
+
+static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
+							   VMCI_INVALID_ID };
+static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+
+static int PROTOCOL_OVERRIDE = -1;
+
+#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN   128
+#define VMCI_TRANSPORT_DEFAULT_QP_SIZE       262144
+#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX   262144
+
+/* The default peer timeout indicates how long we will wait for a peer response
+ * to a control message.
+ */
+#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
+
+#define SS_LISTEN 255
+
+/* Helper function to convert from a VMCI error code to a VSock error code. */
+
+static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
+{
+	int err;
+
+	switch (vmci_error) {
+	case VMCI_ERROR_NO_MEM:
+		err = ENOMEM;
+		break;
+	case VMCI_ERROR_DUPLICATE_ENTRY:
+	case VMCI_ERROR_ALREADY_EXISTS:
+		err = EADDRINUSE;
+		break;
+	case VMCI_ERROR_NO_ACCESS:
+		err = EPERM;
+		break;
+	case VMCI_ERROR_NO_RESOURCES:
+		err = ENOBUFS;
+		break;
+	case VMCI_ERROR_INVALID_RESOURCE:
+		err = EHOSTUNREACH;
+		break;
+	case VMCI_ERROR_INVALID_ARGS:
+	default:
+		err = EINVAL;
+	}
+
+	return err > 0 ? -err : err;
+}
+
+static u32 vmci_transport_peer_rid(u32 peer_cid)
+{
+	if (VMADDR_CID_HYPERVISOR == peer_cid)
+		return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID;
+
+	return VMCI_TRANSPORT_PACKET_RID;
+}
+
+static inline void
+vmci_transport_packet_init(struct vmci_transport_packet *pkt,
+			   struct sockaddr_vm *src,
+			   struct sockaddr_vm *dst,
+			   u8 type,
+			   u64 size,
+			   u64 mode,
+			   struct vmci_transport_waiting_info *wait,
+			   u16 proto,
+			   struct vmci_handle handle)
+{
+	/* We register the stream control handler as an any cid handle so we
+	 * must always send from a source address of VMADDR_CID_ANY
+	 */
+	pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
+				       VMCI_TRANSPORT_PACKET_RID);
+	pkt->dg.dst = vmci_make_handle(dst->svm_cid,
+				       vmci_transport_peer_rid(dst->svm_cid));
+	pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
+	pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
+	pkt->type = type;
+	pkt->src_port = src->svm_port;
+	pkt->dst_port = dst->svm_port;
+	memset(&pkt->proto, 0, sizeof(pkt->proto));
+	memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
+		pkt->u.size = 0;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_REQUEST:
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
+		pkt->u.size = size;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
+	case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
+		pkt->u.handle = handle;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
+	case VMCI_TRANSPORT_PACKET_TYPE_READ:
+	case VMCI_TRANSPORT_PACKET_TYPE_RST:
+		pkt->u.size = 0;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
+		pkt->u.mode = mode;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
+		memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait));
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2:
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
+		pkt->u.size = size;
+		pkt->proto = proto;
+		break;
+	}
+}
+
+static inline void
+vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt,
+				    struct sockaddr_vm *local,
+				    struct sockaddr_vm *remote)
+{
+	vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port);
+	vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port);
+}
+
+static int
+__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt,
+				  struct sockaddr_vm *src,
+				  struct sockaddr_vm *dst,
+				  enum vmci_transport_packet_type type,
+				  u64 size,
+				  u64 mode,
+				  struct vmci_transport_waiting_info *wait,
+				  u16 proto,
+				  struct vmci_handle handle,
+				  bool convert_error)
+{
+	int err;
+
+	vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait,
+				   proto, handle);
+	err = vmci_datagram_send(&pkt->dg);
+	if (convert_error && (err < 0))
+		return vmci_transport_error_to_vsock_error(err);
+
+	return err;
+}
+
+static int
+vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt,
+				      enum vmci_transport_packet_type type,
+				      u64 size,
+				      u64 mode,
+				      struct vmci_transport_waiting_info *wait,
+				      struct vmci_handle handle)
+{
+	struct vmci_transport_packet reply;
+	struct sockaddr_vm src, dst;
+
+	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) {
+		return 0;
+	} else {
+		vmci_transport_packet_get_addresses(pkt, &src, &dst);
+		return __vmci_transport_send_control_pkt(&reply, &src, &dst,
+							 type,
+							 size, mode, wait,
+							 VSOCK_PROTO_INVALID,
+							 handle, true);
+	}
+}
+
+static int
+vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
+				   struct sockaddr_vm *dst,
+				   enum vmci_transport_packet_type type,
+				   u64 size,
+				   u64 mode,
+				   struct vmci_transport_waiting_info *wait,
+				   struct vmci_handle handle)
+{
+	/* Note that it is safe to use a single packet across all CPUs since
+	 * two tasklets of the same type are guaranteed to not ever run
+	 * simultaneously. If that ever changes, or VMCI stops using tasklets,
+	 * we can use per-cpu packets.
+	 */
+	static struct vmci_transport_packet pkt;
+
+	return __vmci_transport_send_control_pkt(&pkt, src, dst, type,
+						 size, mode, wait,
+						 VSOCK_PROTO_INVALID, handle,
+						 false);
+}
+
+static int
+vmci_transport_send_control_pkt(struct sock *sk,
+				enum vmci_transport_packet_type type,
+				u64 size,
+				u64 mode,
+				struct vmci_transport_waiting_info *wait,
+				u16 proto,
+				struct vmci_handle handle)
+{
+	struct vmci_transport_packet *pkt;
+	struct vsock_sock *vsk;
+	int err;
+
+	vsk = vsock_sk(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr))
+		return -EINVAL;
+
+	if (!vsock_addr_bound(&vsk->remote_addr))
+		return -EINVAL;
+
+	pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+
+	err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr,
+						&vsk->remote_addr, type, size,
+						mode, wait, proto, handle,
+						true);
+	kfree(pkt);
+
+	return err;
+}
+
+static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
+					struct sockaddr_vm *src,
+					struct vmci_transport_packet *pkt)
+{
+	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
+		return 0;
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_RST, 0,
+					0, NULL, VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_reset(struct sock *sk,
+				     struct vmci_transport_packet *pkt)
+{
+	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
+		return 0;
+	return vmci_transport_send_control_pkt(sk,
+					VMCI_TRANSPORT_PACKET_TYPE_RST,
+					0, 0, NULL, VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
+{
+	return vmci_transport_send_control_pkt(
+					sk,
+					VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
+					size, 0, NULL,
+					VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_negotiate2(struct sock *sk, size_t size,
+					  u16 version)
+{
+	return vmci_transport_send_control_pkt(
+					sk,
+					VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
+					size, 0, NULL, version,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_qp_offer(struct sock *sk,
+					struct vmci_handle handle)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0,
+					0, NULL,
+					VSOCK_PROTO_INVALID, handle);
+}
+
+static int vmci_transport_send_attach(struct sock *sk,
+				      struct vmci_handle handle)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
+					0, 0, NULL, VSOCK_PROTO_INVALID,
+					handle);
+}
+
+static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt)
+{
+	return vmci_transport_reply_control_pkt_fast(
+						pkt,
+						VMCI_TRANSPORT_PACKET_TYPE_RST,
+						0, 0, NULL,
+						VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst,
+					  struct sockaddr_vm *src)
+{
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_INVALID,
+					0, 0, NULL, VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
+				 struct sockaddr_vm *src)
+{
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
+					0, NULL, VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
+				struct sockaddr_vm *src)
+{
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
+					0, NULL, VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_wrote(struct sock *sk)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
+					0, NULL, VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_read(struct sock *sk)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
+					0, NULL, VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_waiting_write(struct sock *sk,
+				      struct vmci_transport_waiting_info *wait)
+{
+	return vmci_transport_send_control_pkt(
+				sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
+				0, 0, wait, VSOCK_PROTO_INVALID,
+				VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_waiting_read(struct sock *sk,
+				     struct vmci_transport_waiting_info *wait)
+{
+	return vmci_transport_send_control_pkt(
+				sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
+				0, 0, wait, VSOCK_PROTO_INVALID,
+				VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode)
+{
+	return vmci_transport_send_control_pkt(
+					&vsk->sk,
+					VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
+					0, mode, NULL,
+					VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_conn_request(struct sock *sk, size_t size)
+{
+	return vmci_transport_send_control_pkt(sk,
+					VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
+					size, 0, NULL,
+					VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_conn_request2(struct sock *sk, size_t size,
+					     u16 version)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
+					size, 0, NULL, version,
+					VMCI_INVALID_HANDLE);
+}
+
+static struct sock *vmci_transport_get_pending(
+					struct sock *listener,
+					struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vpending;
+	struct sock *pending;
+	struct sockaddr_vm src;
+
+	vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
+
+	vlistener = vsock_sk(listener);
+
+	list_for_each_entry(vpending, &vlistener->pending_links,
+			    pending_links) {
+		if (vsock_addr_equals_addr(&src, &vpending->remote_addr) &&
+		    pkt->dst_port == vpending->local_addr.svm_port) {
+			pending = sk_vsock(vpending);
+			sock_hold(pending);
+			goto found;
+		}
+	}
+
+	pending = NULL;
+found:
+	return pending;
+
+}
+
+static void vmci_transport_release_pending(struct sock *pending)
+{
+	sock_put(pending);
+}
+
+/* We allow two kinds of sockets to communicate with a restricted VM: 1)
+ * trusted sockets 2) sockets from applications running as the same user as the
+ * VM (this is only true for the host side and only when using hosted products)
+ */
+
+static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
+{
+	return vsock->trusted ||
+	       vmci_is_context_owner(peer_cid, vsock->owner->uid);
+}
+
+/* We allow sending datagrams to and receiving datagrams from a restricted VM
+ * only if it is trusted as described in vmci_transport_is_trusted.
+ */
+
+static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
+{
+	if (VMADDR_CID_HYPERVISOR == peer_cid)
+		return true;
+
+	if (vsock->cached_peer != peer_cid) {
+		vsock->cached_peer = peer_cid;
+		if (!vmci_transport_is_trusted(vsock, peer_cid) &&
+		    (vmci_context_get_priv_flags(peer_cid) &
+		     VMCI_PRIVILEGE_FLAG_RESTRICTED)) {
+			vsock->cached_peer_allow_dgram = false;
+		} else {
+			vsock->cached_peer_allow_dgram = true;
+		}
+	}
+
+	return vsock->cached_peer_allow_dgram;
+}
+
+static int
+vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
+				struct vmci_handle *handle,
+				u64 produce_size,
+				u64 consume_size,
+				u32 peer, u32 flags, bool trusted)
+{
+	int err = 0;
+
+	if (trusted) {
+		/* Try to allocate our queue pair as trusted. This will only
+		 * work if vsock is running in the host.
+		 */
+
+		err = vmci_qpair_alloc(qpair, handle, produce_size,
+				       consume_size,
+				       peer, flags,
+				       VMCI_PRIVILEGE_FLAG_TRUSTED);
+		if (err != VMCI_ERROR_NO_ACCESS)
+			goto out;
+
+	}
+
+	err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size,
+			       peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
+out:
+	if (err < 0) {
+		pr_err("Could not attach to queue pair with %d\n",
+		       err);
+		err = vmci_transport_error_to_vsock_error(err);
+	}
+
+	return err;
+}
+
+static int
+vmci_transport_datagram_create_hnd(u32 resource_id,
+				   u32 flags,
+				   vmci_datagram_recv_cb recv_cb,
+				   void *client_data,
+				   struct vmci_handle *out_handle)
+{
+	int err = 0;
+
+	/* Try to allocate our datagram handler as trusted. This will only work
+	 * if vsock is running in the host.
+	 */
+
+	err = vmci_datagram_create_handle_priv(resource_id, flags,
+					       VMCI_PRIVILEGE_FLAG_TRUSTED,
+					       recv_cb,
+					       client_data, out_handle);
+
+	if (err == VMCI_ERROR_NO_ACCESS)
+		err = vmci_datagram_create_handle(resource_id, flags,
+						  recv_cb, client_data,
+						  out_handle);
+
+	return err;
+}
+
+/* This is invoked as part of a tasklet that's scheduled when the VMCI
+ * interrupt fires.  This is run in bottom-half context and if it ever needs to
+ * sleep it should defer that work to a work queue.
+ */
+
+static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
+{
+	struct sock *sk;
+	size_t size;
+	struct sk_buff *skb;
+	struct vsock_sock *vsk;
+
+	sk = (struct sock *)data;
+
+	/* This handler is privileged when this module is running on the host.
+	 * We will get datagrams from all endpoints (even VMs that are in a
+	 * restricted context). If we get one from a restricted context then
+	 * the destination socket must be trusted.
+	 *
+	 * NOTE: We access the socket struct without holding the lock here.
+	 * This is ok because the field we are interested is never modified
+	 * outside of the create and destruct socket functions.
+	 */
+	vsk = vsock_sk(sk);
+	if (!vmci_transport_allow_dgram(vsk, dg->src.context))
+		return VMCI_ERROR_NO_ACCESS;
+
+	size = VMCI_DG_SIZE(dg);
+
+	/* Attach the packet to the socket's receive queue as an sk_buff. */
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (skb) {
+		/* sk_receive_skb() will do a sock_put(), so hold here. */
+		sock_hold(sk);
+		skb_put(skb, size);
+		memcpy(skb->data, dg, size);
+		sk_receive_skb(sk, skb, 0);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+static bool vmci_transport_stream_allow(u32 cid, u32 port)
+{
+	static const u32 non_socket_contexts[] = {
+		VMADDR_CID_RESERVED,
+	};
+	int i;
+
+	BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts));
+
+	for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) {
+		if (cid == non_socket_contexts[i])
+			return false;
+	}
+
+	return true;
+}
+
+/* This is invoked as part of a tasklet that's scheduled when the VMCI
+ * interrupt fires.  This is run in bottom-half context but it defers most of
+ * its work to the packet handling work queue.
+ */
+
+static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
+{
+	struct sock *sk;
+	struct sockaddr_vm dst;
+	struct sockaddr_vm src;
+	struct vmci_transport_packet *pkt;
+	struct vsock_sock *vsk;
+	bool bh_process_pkt;
+	int err;
+
+	sk = NULL;
+	err = VMCI_SUCCESS;
+	bh_process_pkt = false;
+
+	/* Ignore incoming packets from contexts without sockets, or resources
+	 * that aren't vsock implementations.
+	 */
+
+	if (!vmci_transport_stream_allow(dg->src.context, -1)
+	    || vmci_transport_peer_rid(dg->src.context) != dg->src.resource)
+		return VMCI_ERROR_NO_ACCESS;
+
+	if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
+		/* Drop datagrams that do not contain full VSock packets. */
+		return VMCI_ERROR_INVALID_ARGS;
+
+	pkt = (struct vmci_transport_packet *)dg;
+
+	/* Find the socket that should handle this packet.  First we look for a
+	 * connected socket and if there is none we look for a socket bound to
+	 * the destintation address.
+	 */
+	vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
+	vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
+
+	sk = vsock_find_connected_socket(&src, &dst);
+	if (!sk) {
+		sk = vsock_find_bound_socket(&dst);
+		if (!sk) {
+			/* We could not find a socket for this specified
+			 * address.  If this packet is a RST, we just drop it.
+			 * If it is another packet, we send a RST.  Note that
+			 * we do not send a RST reply to RSTs so that we do not
+			 * continually send RSTs between two endpoints.
+			 *
+			 * Note that since this is a reply, dst is src and src
+			 * is dst.
+			 */
+			if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
+				pr_err("unable to send reset\n");
+
+			err = VMCI_ERROR_NOT_FOUND;
+			goto out;
+		}
+	}
+
+	/* If the received packet type is beyond all types known to this
+	 * implementation, reply with an invalid message.  Hopefully this will
+	 * help when implementing backwards compatibility in the future.
+	 */
+	if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) {
+		vmci_transport_send_invalid_bh(&dst, &src);
+		err = VMCI_ERROR_INVALID_ARGS;
+		goto out;
+	}
+
+	/* This handler is privileged when this module is running on the host.
+	 * We will get datagram connect requests from all endpoints (even VMs
+	 * that are in a restricted context). If we get one from a restricted
+	 * context then the destination socket must be trusted.
+	 *
+	 * NOTE: We access the socket struct without holding the lock here.
+	 * This is ok because the field we are interested is never modified
+	 * outside of the create and destruct socket functions.
+	 */
+	vsk = vsock_sk(sk);
+	if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) {
+		err = VMCI_ERROR_NO_ACCESS;
+		goto out;
+	}
+
+	/* We do most everything in a work queue, but let's fast path the
+	 * notification of reads and writes to help data transfer performance.
+	 * We can only do this if there is no process context code executing
+	 * for this socket since that may change the state.
+	 */
+	bh_lock_sock(sk);
+
+	if (!sock_owned_by_user(sk)) {
+		/* The local context ID may be out of date, update it. */
+		vsk->local_addr.svm_cid = dst.svm_cid;
+
+		if (sk->sk_state == SS_CONNECTED)
+			vmci_trans(vsk)->notify_ops->handle_notify_pkt(
+					sk, pkt, true, &dst, &src,
+					&bh_process_pkt);
+	}
+
+	bh_unlock_sock(sk);
+
+	if (!bh_process_pkt) {
+		struct vmci_transport_recv_pkt_info *recv_pkt_info;
+
+		recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC);
+		if (!recv_pkt_info) {
+			if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
+				pr_err("unable to send reset\n");
+
+			err = VMCI_ERROR_NO_MEM;
+			goto out;
+		}
+
+		recv_pkt_info->sk = sk;
+		memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt));
+		INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work);
+
+		schedule_work(&recv_pkt_info->work);
+		/* Clear sk so that the reference count incremented by one of
+		 * the Find functions above is not decremented below.  We need
+		 * that reference count for the packet handler we've scheduled
+		 * to run.
+		 */
+		sk = NULL;
+	}
+
+out:
+	if (sk)
+		sock_put(sk);
+
+	return err;
+}
+
+static void vmci_transport_peer_attach_cb(u32 sub_id,
+					  const struct vmci_event_data *e_data,
+					  void *client_data)
+{
+	struct sock *sk = client_data;
+	const struct vmci_event_payload_qp *e_payload;
+	struct vsock_sock *vsk;
+
+	e_payload = vmci_event_data_const_payload(e_data);
+
+	vsk = vsock_sk(sk);
+
+	/* We don't ask for delayed CBs when we subscribe to this event (we
+	 * pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
+	 * guarantees in that case about what context we might be running in,
+	 * so it could be BH or process, blockable or non-blockable.  So we
+	 * need to account for all possible contexts here.
+	 */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	/* XXX This is lame, we should provide a way to lookup sockets by
+	 * qp_handle.
+	 */
+	if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
+				 e_payload->handle)) {
+		/* XXX This doesn't do anything, but in the future we may want
+		 * to set a flag here to verify the attach really did occur and
+		 * we weren't just sent a datagram claiming it was.
+		 */
+		goto out;
+	}
+
+out:
+	bh_unlock_sock(sk);
+	local_bh_enable();
+}
+
+static void vmci_transport_handle_detach(struct sock *sk)
+{
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+	if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
+		sock_set_flag(sk, SOCK_DONE);
+
+		/* On a detach the peer will not be sending or receiving
+		 * anymore.
+		 */
+		vsk->peer_shutdown = SHUTDOWN_MASK;
+
+		/* We should not be sending anymore since the peer won't be
+		 * there to receive, but we can still receive if there is data
+		 * left in our consume queue.
+		 */
+		if (vsock_stream_has_data(vsk) <= 0) {
+			if (sk->sk_state == SS_CONNECTING) {
+				/* The peer may detach from a queue pair while
+				 * we are still in the connecting state, i.e.,
+				 * if the peer VM is killed after attaching to
+				 * a queue pair, but before we complete the
+				 * handshake. In that case, we treat the detach
+				 * event like a reset.
+				 */
+
+				sk->sk_state = SS_UNCONNECTED;
+				sk->sk_err = ECONNRESET;
+				sk->sk_error_report(sk);
+				return;
+			}
+			sk->sk_state = SS_UNCONNECTED;
+		}
+		sk->sk_state_change(sk);
+	}
+}
+
+static void vmci_transport_peer_detach_cb(u32 sub_id,
+					  const struct vmci_event_data *e_data,
+					  void *client_data)
+{
+	struct sock *sk = client_data;
+	const struct vmci_event_payload_qp *e_payload;
+	struct vsock_sock *vsk;
+
+	e_payload = vmci_event_data_const_payload(e_data);
+	vsk = vsock_sk(sk);
+	if (vmci_handle_is_invalid(e_payload->handle))
+		return;
+
+	/* Same rules for locking as for peer_attach_cb(). */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	/* XXX This is lame, we should provide a way to lookup sockets by
+	 * qp_handle.
+	 */
+	if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
+				 e_payload->handle))
+		vmci_transport_handle_detach(sk);
+
+	bh_unlock_sock(sk);
+	local_bh_enable();
+}
+
+static void vmci_transport_qp_resumed_cb(u32 sub_id,
+					 const struct vmci_event_data *e_data,
+					 void *client_data)
+{
+	vsock_for_each_connected_socket(vmci_transport_handle_detach);
+}
+
+static void vmci_transport_recv_pkt_work(struct work_struct *work)
+{
+	struct vmci_transport_recv_pkt_info *recv_pkt_info;
+	struct vmci_transport_packet *pkt;
+	struct sock *sk;
+
+	recv_pkt_info =
+		container_of(work, struct vmci_transport_recv_pkt_info, work);
+	sk = recv_pkt_info->sk;
+	pkt = &recv_pkt_info->pkt;
+
+	lock_sock(sk);
+
+	/* The local context ID may be out of date. */
+	vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
+
+	switch (sk->sk_state) {
+	case SS_LISTEN:
+		vmci_transport_recv_listen(sk, pkt);
+		break;
+	case SS_CONNECTING:
+		/* Processing of pending connections for servers goes through
+		 * the listening socket, so see vmci_transport_recv_listen()
+		 * for that path.
+		 */
+		vmci_transport_recv_connecting_client(sk, pkt);
+		break;
+	case SS_CONNECTED:
+		vmci_transport_recv_connected(sk, pkt);
+		break;
+	default:
+		/* Because this function does not run in the same context as
+		 * vmci_transport_recv_stream_cb it is possible that the
+		 * socket has closed. We need to let the other side know or it
+		 * could be sitting in a connect and hang forever. Send a
+		 * reset to prevent that.
+		 */
+		vmci_transport_send_reset(sk, pkt);
+		goto out;
+	}
+
+out:
+	release_sock(sk);
+	kfree(recv_pkt_info);
+	/* Release reference obtained in the stream callback when we fetched
+	 * this socket out of the bound or connected list.
+	 */
+	sock_put(sk);
+}
+
+static int vmci_transport_recv_listen(struct sock *sk,
+				      struct vmci_transport_packet *pkt)
+{
+	struct sock *pending;
+	struct vsock_sock *vpending;
+	int err;
+	u64 qp_size;
+	bool old_request = false;
+	bool old_pkt_proto = false;
+
+	err = 0;
+
+	/* Because we are in the listen state, we could be receiving a packet
+	 * for ourself or any previous connection requests that we received.
+	 * If it's the latter, we try to find a socket in our list of pending
+	 * connections and, if we do, call the appropriate handler for the
+	 * state that that socket is in.  Otherwise we try to service the
+	 * connection request.
+	 */
+	pending = vmci_transport_get_pending(sk, pkt);
+	if (pending) {
+		lock_sock(pending);
+
+		/* The local context ID may be out of date. */
+		vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
+
+		switch (pending->sk_state) {
+		case SS_CONNECTING:
+			err = vmci_transport_recv_connecting_server(sk,
+								    pending,
+								    pkt);
+			break;
+		default:
+			vmci_transport_send_reset(pending, pkt);
+			err = -EINVAL;
+		}
+
+		if (err < 0)
+			vsock_remove_pending(sk, pending);
+
+		release_sock(pending);
+		vmci_transport_release_pending(pending);
+
+		return err;
+	}
+
+	/* The listen state only accepts connection requests.  Reply with a
+	 * reset unless we received a reset.
+	 */
+
+	if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST ||
+	      pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) {
+		vmci_transport_reply_reset(pkt);
+		return -EINVAL;
+	}
+
+	if (pkt->u.size == 0) {
+		vmci_transport_reply_reset(pkt);
+		return -EINVAL;
+	}
+
+	/* If this socket can't accommodate this connection request, we send a
+	 * reset.  Otherwise we create and initialize a child socket and reply
+	 * with a connection negotiation.
+	 */
+	if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
+		vmci_transport_reply_reset(pkt);
+		return -ECONNREFUSED;
+	}
+
+	pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
+				 sk->sk_type);
+	if (!pending) {
+		vmci_transport_send_reset(sk, pkt);
+		return -ENOMEM;
+	}
+
+	vpending = vsock_sk(pending);
+
+	vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context,
+			pkt->dst_port);
+	vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context,
+			pkt->src_port);
+
+	/* If the proposed size fits within our min/max, accept it. Otherwise
+	 * propose our own size.
+	 */
+	if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size &&
+	    pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) {
+		qp_size = pkt->u.size;
+	} else {
+		qp_size = vmci_trans(vpending)->queue_pair_size;
+	}
+
+	/* Figure out if we are using old or new requests based on the
+	 * overrides pkt types sent by our peer.
+	 */
+	if (vmci_transport_old_proto_override(&old_pkt_proto)) {
+		old_request = old_pkt_proto;
+	} else {
+		if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST)
+			old_request = true;
+		else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)
+			old_request = false;
+
+	}
+
+	if (old_request) {
+		/* Handle a REQUEST (or override) */
+		u16 version = VSOCK_PROTO_INVALID;
+		if (vmci_transport_proto_to_notify_struct(
+			pending, &version, true))
+			err = vmci_transport_send_negotiate(pending, qp_size);
+		else
+			err = -EINVAL;
+
+	} else {
+		/* Handle a REQUEST2 (or override) */
+		int proto_int = pkt->proto;
+		int pos;
+		u16 active_proto_version = 0;
+
+		/* The list of possible protocols is the intersection of all
+		 * protocols the client supports ... plus all the protocols we
+		 * support.
+		 */
+		proto_int &= vmci_transport_new_proto_supported_versions();
+
+		/* We choose the highest possible protocol version and use that
+		 * one.
+		 */
+		pos = fls(proto_int);
+		if (pos) {
+			active_proto_version = (1 << (pos - 1));
+			if (vmci_transport_proto_to_notify_struct(
+				pending, &active_proto_version, false))
+				err = vmci_transport_send_negotiate2(pending,
+							qp_size,
+							active_proto_version);
+			else
+				err = -EINVAL;
+
+		} else {
+			err = -EINVAL;
+		}
+	}
+
+	if (err < 0) {
+		vmci_transport_send_reset(sk, pkt);
+		sock_put(pending);
+		err = vmci_transport_error_to_vsock_error(err);
+		goto out;
+	}
+
+	vsock_add_pending(sk, pending);
+	sk->sk_ack_backlog++;
+
+	pending->sk_state = SS_CONNECTING;
+	vmci_trans(vpending)->produce_size =
+		vmci_trans(vpending)->consume_size = qp_size;
+	vmci_trans(vpending)->queue_pair_size = qp_size;
+
+	vmci_trans(vpending)->notify_ops->process_request(pending);
+
+	/* We might never receive another message for this socket and it's not
+	 * connected to any process, so we have to ensure it gets cleaned up
+	 * ourself.  Our delayed work function will take care of that.  Note
+	 * that we do not ever cancel this function since we have few
+	 * guarantees about its state when calling cancel_delayed_work().
+	 * Instead we hold a reference on the socket for that function and make
+	 * it capable of handling cases where it needs to do nothing but
+	 * release that reference.
+	 */
+	vpending->listener = sk;
+	sock_hold(sk);
+	sock_hold(pending);
+	INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
+	schedule_delayed_work(&vpending->dwork, HZ);
+
+out:
+	return err;
+}
+
+static int
+vmci_transport_recv_connecting_server(struct sock *listener,
+				      struct sock *pending,
+				      struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vpending;
+	struct vmci_handle handle;
+	struct vmci_qp *qpair;
+	bool is_local;
+	u32 flags;
+	u32 detach_sub_id;
+	int err;
+	int skerr;
+
+	vpending = vsock_sk(pending);
+	detach_sub_id = VMCI_INVALID_ID;
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
+		if (vmci_handle_is_invalid(pkt->u.handle)) {
+			vmci_transport_send_reset(pending, pkt);
+			skerr = EPROTO;
+			err = -EINVAL;
+			goto destroy;
+		}
+		break;
+	default:
+		/* Close and cleanup the connection. */
+		vmci_transport_send_reset(pending, pkt);
+		skerr = EPROTO;
+		err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL;
+		goto destroy;
+	}
+
+	/* In order to complete the connection we need to attach to the offered
+	 * queue pair and send an attach notification.  We also subscribe to the
+	 * detach event so we know when our peer goes away, and we do that
+	 * before attaching so we don't miss an event.  If all this succeeds,
+	 * we update our state and wakeup anything waiting in accept() for a
+	 * connection.
+	 */
+
+	/* We don't care about attach since we ensure the other side has
+	 * attached by specifying the ATTACH_ONLY flag below.
+	 */
+	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
+				   vmci_transport_peer_detach_cb,
+				   pending, &detach_sub_id);
+	if (err < VMCI_SUCCESS) {
+		vmci_transport_send_reset(pending, pkt);
+		err = vmci_transport_error_to_vsock_error(err);
+		skerr = -err;
+		goto destroy;
+	}
+
+	vmci_trans(vpending)->detach_sub_id = detach_sub_id;
+
+	/* Now attach to the queue pair the client created. */
+	handle = pkt->u.handle;
+
+	/* vpending->local_addr always has a context id so we do not need to
+	 * worry about VMADDR_CID_ANY in this case.
+	 */
+	is_local =
+	    vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid;
+	flags = VMCI_QPFLAG_ATTACH_ONLY;
+	flags |= is_local ? VMCI_QPFLAG_LOCAL : 0;
+
+	err = vmci_transport_queue_pair_alloc(
+					&qpair,
+					&handle,
+					vmci_trans(vpending)->produce_size,
+					vmci_trans(vpending)->consume_size,
+					pkt->dg.src.context,
+					flags,
+					vmci_transport_is_trusted(
+						vpending,
+						vpending->remote_addr.svm_cid));
+	if (err < 0) {
+		vmci_transport_send_reset(pending, pkt);
+		skerr = -err;
+		goto destroy;
+	}
+
+	vmci_trans(vpending)->qp_handle = handle;
+	vmci_trans(vpending)->qpair = qpair;
+
+	/* When we send the attach message, we must be ready to handle incoming
+	 * control messages on the newly connected socket. So we move the
+	 * pending socket to the connected state before sending the attach
+	 * message. Otherwise, an incoming packet triggered by the attach being
+	 * received by the peer may be processed concurrently with what happens
+	 * below after sending the attach message, and that incoming packet
+	 * will find the listening socket instead of the (currently) pending
+	 * socket. Note that enqueueing the socket increments the reference
+	 * count, so even if a reset comes before the connection is accepted,
+	 * the socket will be valid until it is removed from the queue.
+	 *
+	 * If we fail sending the attach below, we remove the socket from the
+	 * connected list and move the socket to SS_UNCONNECTED before
+	 * releasing the lock, so a pending slow path processing of an incoming
+	 * packet will not see the socket in the connected state in that case.
+	 */
+	pending->sk_state = SS_CONNECTED;
+
+	vsock_insert_connected(vpending);
+
+	/* Notify our peer of our attach. */
+	err = vmci_transport_send_attach(pending, handle);
+	if (err < 0) {
+		vsock_remove_connected(vpending);
+		pr_err("Could not send attach\n");
+		vmci_transport_send_reset(pending, pkt);
+		err = vmci_transport_error_to_vsock_error(err);
+		skerr = -err;
+		goto destroy;
+	}
+
+	/* We have a connection. Move the now connected socket from the
+	 * listener's pending list to the accept queue so callers of accept()
+	 * can find it.
+	 */
+	vsock_remove_pending(listener, pending);
+	vsock_enqueue_accept(listener, pending);
+
+	/* Callers of accept() will be be waiting on the listening socket, not
+	 * the pending socket.
+	 */
+	listener->sk_state_change(listener);
+
+	return 0;
+
+destroy:
+	pending->sk_err = skerr;
+	pending->sk_state = SS_UNCONNECTED;
+	/* As long as we drop our reference, all necessary cleanup will handle
+	 * when the cleanup function drops its reference and our destruct
+	 * implementation is called.  Note that since the listen handler will
+	 * remove pending from the pending list upon our failure, the cleanup
+	 * function won't drop the additional reference, which is why we do it
+	 * here.
+	 */
+	sock_put(pending);
+
+	return err;
+}
+
+static int
+vmci_transport_recv_connecting_client(struct sock *sk,
+				      struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vsk;
+	int err;
+	int skerr;
+
+	vsk = vsock_sk(sk);
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
+		if (vmci_handle_is_invalid(pkt->u.handle) ||
+		    !vmci_handle_is_equal(pkt->u.handle,
+					  vmci_trans(vsk)->qp_handle)) {
+			skerr = EPROTO;
+			err = -EINVAL;
+			goto destroy;
+		}
+
+		/* Signify the socket is connected and wakeup the waiter in
+		 * connect(). Also place the socket in the connected table for
+		 * accounting (it can already be found since it's in the bound
+		 * table).
+		 */
+		sk->sk_state = SS_CONNECTED;
+		sk->sk_socket->state = SS_CONNECTED;
+		vsock_insert_connected(vsk);
+		sk->sk_state_change(sk);
+
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
+		if (pkt->u.size == 0
+		    || pkt->dg.src.context != vsk->remote_addr.svm_cid
+		    || pkt->src_port != vsk->remote_addr.svm_port
+		    || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)
+		    || vmci_trans(vsk)->qpair
+		    || vmci_trans(vsk)->produce_size != 0
+		    || vmci_trans(vsk)->consume_size != 0
+		    || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID
+		    || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
+			skerr = EPROTO;
+			err = -EINVAL;
+
+			goto destroy;
+		}
+
+		err = vmci_transport_recv_connecting_client_negotiate(sk, pkt);
+		if (err) {
+			skerr = -err;
+			goto destroy;
+		}
+
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
+		err = vmci_transport_recv_connecting_client_invalid(sk, pkt);
+		if (err) {
+			skerr = -err;
+			goto destroy;
+		}
+
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_RST:
+		/* Older versions of the linux code (WS 6.5 / ESX 4.0) used to
+		 * continue processing here after they sent an INVALID packet.
+		 * This meant that we got a RST after the INVALID. We ignore a
+		 * RST after an INVALID. The common code doesn't send the RST
+		 * ... so we can hang if an old version of the common code
+		 * fails between getting a REQUEST and sending an OFFER back.
+		 * Not much we can do about it... except hope that it doesn't
+		 * happen.
+		 */
+		if (vsk->ignore_connecting_rst) {
+			vsk->ignore_connecting_rst = false;
+		} else {
+			skerr = ECONNRESET;
+			err = 0;
+			goto destroy;
+		}
+
+		break;
+	default:
+		/* Close and cleanup the connection. */
+		skerr = EPROTO;
+		err = -EINVAL;
+		goto destroy;
+	}
+
+	return 0;
+
+destroy:
+	vmci_transport_send_reset(sk, pkt);
+
+	sk->sk_state = SS_UNCONNECTED;
+	sk->sk_err = skerr;
+	sk->sk_error_report(sk);
+	return err;
+}
+
+static int vmci_transport_recv_connecting_client_negotiate(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt)
+{
+	int err;
+	struct vsock_sock *vsk;
+	struct vmci_handle handle;
+	struct vmci_qp *qpair;
+	u32 attach_sub_id;
+	u32 detach_sub_id;
+	bool is_local;
+	u32 flags;
+	bool old_proto = true;
+	bool old_pkt_proto;
+	u16 version;
+
+	vsk = vsock_sk(sk);
+	handle = VMCI_INVALID_HANDLE;
+	attach_sub_id = VMCI_INVALID_ID;
+	detach_sub_id = VMCI_INVALID_ID;
+
+	/* If we have gotten here then we should be past the point where old
+	 * linux vsock could have sent the bogus rst.
+	 */
+	vsk->sent_request = false;
+	vsk->ignore_connecting_rst = false;
+
+	/* Verify that we're OK with the proposed queue pair size */
+	if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size ||
+	    pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) {
+		err = -EINVAL;
+		goto destroy;
+	}
+
+	/* At this point we know the CID the peer is using to talk to us. */
+
+	if (vsk->local_addr.svm_cid == VMADDR_CID_ANY)
+		vsk->local_addr.svm_cid = pkt->dg.dst.context;
+
+	/* Setup the notify ops to be the highest supported version that both
+	 * the server and the client support.
+	 */
+
+	if (vmci_transport_old_proto_override(&old_pkt_proto)) {
+		old_proto = old_pkt_proto;
+	} else {
+		if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE)
+			old_proto = true;
+		else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2)
+			old_proto = false;
+
+	}
+
+	if (old_proto)
+		version = VSOCK_PROTO_INVALID;
+	else
+		version = pkt->proto;
+
+	if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) {
+		err = -EINVAL;
+		goto destroy;
+	}
+
+	/* Subscribe to attach and detach events first.
+	 *
+	 * XXX We attach once for each queue pair created for now so it is easy
+	 * to find the socket (it's provided), but later we should only
+	 * subscribe once and add a way to lookup sockets by queue pair handle.
+	 */
+	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH,
+				   vmci_transport_peer_attach_cb,
+				   sk, &attach_sub_id);
+	if (err < VMCI_SUCCESS) {
+		err = vmci_transport_error_to_vsock_error(err);
+		goto destroy;
+	}
+
+	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
+				   vmci_transport_peer_detach_cb,
+				   sk, &detach_sub_id);
+	if (err < VMCI_SUCCESS) {
+		err = vmci_transport_error_to_vsock_error(err);
+		goto destroy;
+	}
+
+	/* Make VMCI select the handle for us. */
+	handle = VMCI_INVALID_HANDLE;
+	is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid;
+	flags = is_local ? VMCI_QPFLAG_LOCAL : 0;
+
+	err = vmci_transport_queue_pair_alloc(&qpair,
+					      &handle,
+					      pkt->u.size,
+					      pkt->u.size,
+					      vsk->remote_addr.svm_cid,
+					      flags,
+					      vmci_transport_is_trusted(
+						  vsk,
+						  vsk->
+						  remote_addr.svm_cid));
+	if (err < 0)
+		goto destroy;
+
+	err = vmci_transport_send_qp_offer(sk, handle);
+	if (err < 0) {
+		err = vmci_transport_error_to_vsock_error(err);
+		goto destroy;
+	}
+
+	vmci_trans(vsk)->qp_handle = handle;
+	vmci_trans(vsk)->qpair = qpair;
+
+	vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
+		pkt->u.size;
+
+	vmci_trans(vsk)->attach_sub_id = attach_sub_id;
+	vmci_trans(vsk)->detach_sub_id = detach_sub_id;
+
+	vmci_trans(vsk)->notify_ops->process_negotiate(sk);
+
+	return 0;
+
+destroy:
+	if (attach_sub_id != VMCI_INVALID_ID)
+		vmci_event_unsubscribe(attach_sub_id);
+
+	if (detach_sub_id != VMCI_INVALID_ID)
+		vmci_event_unsubscribe(detach_sub_id);
+
+	if (!vmci_handle_is_invalid(handle))
+		vmci_qpair_detach(&qpair);
+
+	return err;
+}
+
+static int
+vmci_transport_recv_connecting_client_invalid(struct sock *sk,
+					      struct vmci_transport_packet *pkt)
+{
+	int err = 0;
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsk->sent_request) {
+		vsk->sent_request = false;
+		vsk->ignore_connecting_rst = true;
+
+		err = vmci_transport_send_conn_request(
+			sk, vmci_trans(vsk)->queue_pair_size);
+		if (err < 0)
+			err = vmci_transport_error_to_vsock_error(err);
+		else
+			err = 0;
+
+	}
+
+	return err;
+}
+
+static int vmci_transport_recv_connected(struct sock *sk,
+					 struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vsk;
+	bool pkt_processed = false;
+
+	/* In cases where we are closing the connection, it's sufficient to
+	 * mark the state change (and maybe error) and wake up any waiting
+	 * threads. Since this is a connected socket, it's owned by a user
+	 * process and will be cleaned up when the failure is passed back on
+	 * the current or next system call.  Our system call implementations
+	 * must therefore check for error and state changes on entry and when
+	 * being awoken.
+	 */
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
+		if (pkt->u.mode) {
+			vsk = vsock_sk(sk);
+
+			vsk->peer_shutdown |= pkt->u.mode;
+			sk->sk_state_change(sk);
+		}
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_RST:
+		vsk = vsock_sk(sk);
+		/* It is possible that we sent our peer a message (e.g a
+		 * WAITING_READ) right before we got notified that the peer had
+		 * detached. If that happens then we can get a RST pkt back
+		 * from our peer even though there is data available for us to
+		 * read. In that case, don't shutdown the socket completely but
+		 * instead allow the local client to finish reading data off
+		 * the queuepair. Always treat a RST pkt in connected mode like
+		 * a clean shutdown.
+		 */
+		sock_set_flag(sk, SOCK_DONE);
+		vsk->peer_shutdown = SHUTDOWN_MASK;
+		if (vsock_stream_has_data(vsk) <= 0)
+			sk->sk_state = SS_DISCONNECTING;
+
+		sk->sk_state_change(sk);
+		break;
+
+	default:
+		vsk = vsock_sk(sk);
+		vmci_trans(vsk)->notify_ops->handle_notify_pkt(
+				sk, pkt, false, NULL, NULL,
+				&pkt_processed);
+		if (!pkt_processed)
+			return -EINVAL;
+
+		break;
+	}
+
+	return 0;
+}
+
+static int vmci_transport_socket_init(struct vsock_sock *vsk,
+				      struct vsock_sock *psk)
+{
+	vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL);
+	if (!vsk->trans)
+		return -ENOMEM;
+
+	vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
+	vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
+	vmci_trans(vsk)->qpair = NULL;
+	vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
+	vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id =
+		VMCI_INVALID_ID;
+	vmci_trans(vsk)->notify_ops = NULL;
+	if (psk) {
+		vmci_trans(vsk)->queue_pair_size =
+			vmci_trans(psk)->queue_pair_size;
+		vmci_trans(vsk)->queue_pair_min_size =
+			vmci_trans(psk)->queue_pair_min_size;
+		vmci_trans(vsk)->queue_pair_max_size =
+			vmci_trans(psk)->queue_pair_max_size;
+	} else {
+		vmci_trans(vsk)->queue_pair_size =
+			VMCI_TRANSPORT_DEFAULT_QP_SIZE;
+		vmci_trans(vsk)->queue_pair_min_size =
+			 VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN;
+		vmci_trans(vsk)->queue_pair_max_size =
+			VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX;
+	}
+
+	return 0;
+}
+
+static void vmci_transport_destruct(struct vsock_sock *vsk)
+{
+	if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) {
+		vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id);
+		vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID;
+	}
+
+	if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
+		vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id);
+		vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
+	}
+
+	if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
+		vmci_qpair_detach(&vmci_trans(vsk)->qpair);
+		vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
+		vmci_trans(vsk)->produce_size = 0;
+		vmci_trans(vsk)->consume_size = 0;
+	}
+
+	if (vmci_trans(vsk)->notify_ops)
+		vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
+
+	kfree(vsk->trans);
+	vsk->trans = NULL;
+}
+
+static void vmci_transport_release(struct vsock_sock *vsk)
+{
+	if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
+		vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
+		vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
+	}
+}
+
+static int vmci_transport_dgram_bind(struct vsock_sock *vsk,
+				     struct sockaddr_vm *addr)
+{
+	u32 port;
+	u32 flags;
+	int err;
+
+	/* VMCI will select a resource ID for us if we provide
+	 * VMCI_INVALID_ID.
+	 */
+	port = addr->svm_port == VMADDR_PORT_ANY ?
+			VMCI_INVALID_ID : addr->svm_port;
+
+	if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE))
+		return -EACCES;
+
+	flags = addr->svm_cid == VMADDR_CID_ANY ?
+				VMCI_FLAG_ANYCID_DG_HND : 0;
+
+	err = vmci_transport_datagram_create_hnd(port, flags,
+						 vmci_transport_recv_dgram_cb,
+						 &vsk->sk,
+						 &vmci_trans(vsk)->dg_handle);
+	if (err < VMCI_SUCCESS)
+		return vmci_transport_error_to_vsock_error(err);
+	vsock_addr_init(&vsk->local_addr, addr->svm_cid,
+			vmci_trans(vsk)->dg_handle.resource);
+
+	return 0;
+}
+
+static int vmci_transport_dgram_enqueue(
+	struct vsock_sock *vsk,
+	struct sockaddr_vm *remote_addr,
+	struct iovec *iov,
+	size_t len)
+{
+	int err;
+	struct vmci_datagram *dg;
+
+	if (len > VMCI_MAX_DG_PAYLOAD_SIZE)
+		return -EMSGSIZE;
+
+	if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid))
+		return -EPERM;
+
+	/* Allocate a buffer for the user's message and our packet header. */
+	dg = kmalloc(len + sizeof(*dg), GFP_KERNEL);
+	if (!dg)
+		return -ENOMEM;
+
+	memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len);
+
+	dg->dst = vmci_make_handle(remote_addr->svm_cid,
+				   remote_addr->svm_port);
+	dg->src = vmci_make_handle(vsk->local_addr.svm_cid,
+				   vsk->local_addr.svm_port);
+	dg->payload_size = len;
+
+	err = vmci_datagram_send(dg);
+	kfree(dg);
+	if (err < 0)
+		return vmci_transport_error_to_vsock_error(err);
+
+	return err - sizeof(*dg);
+}
+
+static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
+					struct vsock_sock *vsk,
+					struct msghdr *msg, size_t len,
+					int flags)
+{
+	int err;
+	int noblock;
+	struct vmci_datagram *dg;
+	size_t payload_len;
+	struct sk_buff *skb;
+
+	noblock = flags & MSG_DONTWAIT;
+
+	if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
+		return -EOPNOTSUPP;
+
+	msg->msg_namelen = 0;
+
+	/* Retrieve the head sk_buff from the socket's receive queue. */
+	err = 0;
+	skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
+	if (err)
+		return err;
+
+	if (!skb)
+		return -EAGAIN;
+
+	dg = (struct vmci_datagram *)skb->data;
+	if (!dg)
+		/* err is 0, meaning we read zero bytes. */
+		goto out;
+
+	payload_len = dg->payload_size;
+	/* Ensure the sk_buff matches the payload size claimed in the packet. */
+	if (payload_len != skb->len - sizeof(*dg)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (payload_len > len) {
+		payload_len = len;
+		msg->msg_flags |= MSG_TRUNC;
+	}
+
+	/* Place the datagram payload in the user's iovec. */
+	err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov,
+		payload_len);
+	if (err)
+		goto out;
+
+	if (msg->msg_name) {
+		struct sockaddr_vm *vm_addr;
+
+		/* Provide the address of the sender. */
+		vm_addr = (struct sockaddr_vm *)msg->msg_name;
+		vsock_addr_init(vm_addr, dg->src.context, dg->src.resource);
+		msg->msg_namelen = sizeof(*vm_addr);
+	}
+	err = payload_len;
+
+out:
+	skb_free_datagram(&vsk->sk, skb);
+	return err;
+}
+
+static bool vmci_transport_dgram_allow(u32 cid, u32 port)
+{
+	if (cid == VMADDR_CID_HYPERVISOR) {
+		/* Registrations of PBRPC Servers do not modify VMX/Hypervisor
+		 * state and are allowed.
+		 */
+		return port == VMCI_UNITY_PBRPC_REGISTER;
+	}
+
+	return true;
+}
+
+static int vmci_transport_connect(struct vsock_sock *vsk)
+{
+	int err;
+	bool old_pkt_proto = false;
+	struct sock *sk = &vsk->sk;
+
+	if (vmci_transport_old_proto_override(&old_pkt_proto) &&
+		old_pkt_proto) {
+		err = vmci_transport_send_conn_request(
+			sk, vmci_trans(vsk)->queue_pair_size);
+		if (err < 0) {
+			sk->sk_state = SS_UNCONNECTED;
+			return err;
+		}
+	} else {
+		int supported_proto_versions =
+			vmci_transport_new_proto_supported_versions();
+		err = vmci_transport_send_conn_request2(
+				sk, vmci_trans(vsk)->queue_pair_size,
+				supported_proto_versions);
+		if (err < 0) {
+			sk->sk_state = SS_UNCONNECTED;
+			return err;
+		}
+
+		vsk->sent_request = true;
+	}
+
+	return err;
+}
+
+static ssize_t vmci_transport_stream_dequeue(
+	struct vsock_sock *vsk,
+	struct iovec *iov,
+	size_t len,
+	int flags)
+{
+	if (flags & MSG_PEEK)
+		return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0);
+	else
+		return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0);
+}
+
+static ssize_t vmci_transport_stream_enqueue(
+	struct vsock_sock *vsk,
+	struct iovec *iov,
+	size_t len)
+{
+	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0);
+}
+
+static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
+{
+	return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair);
+}
+
+static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk)
+{
+	return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair);
+}
+
+static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->consume_size;
+}
+
+static bool vmci_transport_stream_is_active(struct vsock_sock *vsk)
+{
+	return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle);
+}
+
+static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->queue_pair_size;
+}
+
+static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->queue_pair_min_size;
+}
+
+static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->queue_pair_max_size;
+}
+
+static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+	if (val < vmci_trans(vsk)->queue_pair_min_size)
+		vmci_trans(vsk)->queue_pair_min_size = val;
+	if (val > vmci_trans(vsk)->queue_pair_max_size)
+		vmci_trans(vsk)->queue_pair_max_size = val;
+	vmci_trans(vsk)->queue_pair_size = val;
+}
+
+static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk,
+					       u64 val)
+{
+	if (val > vmci_trans(vsk)->queue_pair_size)
+		vmci_trans(vsk)->queue_pair_size = val;
+	vmci_trans(vsk)->queue_pair_min_size = val;
+}
+
+static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk,
+					       u64 val)
+{
+	if (val < vmci_trans(vsk)->queue_pair_size)
+		vmci_trans(vsk)->queue_pair_size = val;
+	vmci_trans(vsk)->queue_pair_max_size = val;
+}
+
+static int vmci_transport_notify_poll_in(
+	struct vsock_sock *vsk,
+	size_t target,
+	bool *data_ready_now)
+{
+	return vmci_trans(vsk)->notify_ops->poll_in(
+			&vsk->sk, target, data_ready_now);
+}
+
+static int vmci_transport_notify_poll_out(
+	struct vsock_sock *vsk,
+	size_t target,
+	bool *space_available_now)
+{
+	return vmci_trans(vsk)->notify_ops->poll_out(
+			&vsk->sk, target, space_available_now);
+}
+
+static int vmci_transport_notify_recv_init(
+	struct vsock_sock *vsk,
+	size_t target,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_init(
+			&vsk->sk, target,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_recv_pre_block(
+	struct vsock_sock *vsk,
+	size_t target,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_pre_block(
+			&vsk->sk, target,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_recv_pre_dequeue(
+	struct vsock_sock *vsk,
+	size_t target,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_pre_dequeue(
+			&vsk->sk, target,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_recv_post_dequeue(
+	struct vsock_sock *vsk,
+	size_t target,
+	ssize_t copied,
+	bool data_read,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_post_dequeue(
+			&vsk->sk, target, copied, data_read,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_init(
+	struct vsock_sock *vsk,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_init(
+			&vsk->sk,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_pre_block(
+	struct vsock_sock *vsk,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_pre_block(
+			&vsk->sk,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_pre_enqueue(
+	struct vsock_sock *vsk,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_pre_enqueue(
+			&vsk->sk,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_post_enqueue(
+	struct vsock_sock *vsk,
+	ssize_t written,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_post_enqueue(
+			&vsk->sk, written,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static bool vmci_transport_old_proto_override(bool *old_pkt_proto)
+{
+	if (PROTOCOL_OVERRIDE != -1) {
+		if (PROTOCOL_OVERRIDE == 0)
+			*old_pkt_proto = true;
+		else
+			*old_pkt_proto = false;
+
+		pr_info("Proto override in use\n");
+		return true;
+	}
+
+	return false;
+}
+
+static bool vmci_transport_proto_to_notify_struct(struct sock *sk,
+						  u16 *proto,
+						  bool old_pkt_proto)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (old_pkt_proto) {
+		if (*proto != VSOCK_PROTO_INVALID) {
+			pr_err("Can't set both an old and new protocol\n");
+			return false;
+		}
+		vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops;
+		goto exit;
+	}
+
+	switch (*proto) {
+	case VSOCK_PROTO_PKT_ON_NOTIFY:
+		vmci_trans(vsk)->notify_ops =
+			&vmci_transport_notify_pkt_q_state_ops;
+		break;
+	default:
+		pr_err("Unknown notify protocol version\n");
+		return false;
+	}
+
+exit:
+	vmci_trans(vsk)->notify_ops->socket_init(sk);
+	return true;
+}
+
+static u16 vmci_transport_new_proto_supported_versions(void)
+{
+	if (PROTOCOL_OVERRIDE != -1)
+		return PROTOCOL_OVERRIDE;
+
+	return VSOCK_PROTO_ALL_SUPPORTED;
+}
+
+static u32 vmci_transport_get_local_cid(void)
+{
+	return vmci_get_context_id();
+}
+
+static struct vsock_transport vmci_transport = {
+	.init = vmci_transport_socket_init,
+	.destruct = vmci_transport_destruct,
+	.release = vmci_transport_release,
+	.connect = vmci_transport_connect,
+	.dgram_bind = vmci_transport_dgram_bind,
+	.dgram_dequeue = vmci_transport_dgram_dequeue,
+	.dgram_enqueue = vmci_transport_dgram_enqueue,
+	.dgram_allow = vmci_transport_dgram_allow,
+	.stream_dequeue = vmci_transport_stream_dequeue,
+	.stream_enqueue = vmci_transport_stream_enqueue,
+	.stream_has_data = vmci_transport_stream_has_data,
+	.stream_has_space = vmci_transport_stream_has_space,
+	.stream_rcvhiwat = vmci_transport_stream_rcvhiwat,
+	.stream_is_active = vmci_transport_stream_is_active,
+	.stream_allow = vmci_transport_stream_allow,
+	.notify_poll_in = vmci_transport_notify_poll_in,
+	.notify_poll_out = vmci_transport_notify_poll_out,
+	.notify_recv_init = vmci_transport_notify_recv_init,
+	.notify_recv_pre_block = vmci_transport_notify_recv_pre_block,
+	.notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue,
+	.notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue,
+	.notify_send_init = vmci_transport_notify_send_init,
+	.notify_send_pre_block = vmci_transport_notify_send_pre_block,
+	.notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue,
+	.notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue,
+	.shutdown = vmci_transport_shutdown,
+	.set_buffer_size = vmci_transport_set_buffer_size,
+	.set_min_buffer_size = vmci_transport_set_min_buffer_size,
+	.set_max_buffer_size = vmci_transport_set_max_buffer_size,
+	.get_buffer_size = vmci_transport_get_buffer_size,
+	.get_min_buffer_size = vmci_transport_get_min_buffer_size,
+	.get_max_buffer_size = vmci_transport_get_max_buffer_size,
+	.get_local_cid = vmci_transport_get_local_cid,
+};
+
+static int __init vmci_transport_init(void)
+{
+	int err;
+
+	/* Create the datagram handle that we will use to send and receive all
+	 * VSocket control messages for this context.
+	 */
+	err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID,
+						 VMCI_FLAG_ANYCID_DG_HND,
+						 vmci_transport_recv_stream_cb,
+						 NULL,
+						 &vmci_transport_stream_handle);
+	if (err < VMCI_SUCCESS) {
+		pr_err("Unable to create datagram handle. (%d)\n", err);
+		return vmci_transport_error_to_vsock_error(err);
+	}
+
+	err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED,
+				   vmci_transport_qp_resumed_cb,
+				   NULL, &vmci_transport_qp_resumed_sub_id);
+	if (err < VMCI_SUCCESS) {
+		pr_err("Unable to subscribe to resumed event. (%d)\n", err);
+		err = vmci_transport_error_to_vsock_error(err);
+		vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+		goto err_destroy_stream_handle;
+	}
+
+	err = vsock_core_init(&vmci_transport);
+	if (err < 0)
+		goto err_unsubscribe;
+
+	return 0;
+
+err_unsubscribe:
+	vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
+err_destroy_stream_handle:
+	vmci_datagram_destroy_handle(vmci_transport_stream_handle);
+	return err;
+}
+module_init(vmci_transport_init);
+
+static void __exit vmci_transport_exit(void)
+{
+	if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
+		if (vmci_datagram_destroy_handle(
+			vmci_transport_stream_handle) != VMCI_SUCCESS)
+			pr_err("Couldn't destroy datagram handle\n");
+		vmci_transport_stream_handle = VMCI_INVALID_HANDLE;
+	}
+
+	if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) {
+		vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
+		vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+	}
+
+	vsock_core_exit();
+}
+module_exit(vmci_transport_exit);
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("vmware_vsock");
+MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
new file mode 100644
index 000000000000..fd88ea8924e4
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport.h
@@ -0,0 +1,142 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VMCI_TRANSPORT_H_
+#define _VMCI_TRANSPORT_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+
+#include "vsock_addr.h"
+#include "af_vsock.h"
+
+/* If the packet format changes in a release then this should change too. */
+#define VMCI_TRANSPORT_PACKET_VERSION 1
+
+/* The resource ID on which control packets are sent. */
+#define VMCI_TRANSPORT_PACKET_RID 1
+
+/* The resource ID on which control packets are sent to the hypervisor. */
+#define VMCI_TRANSPORT_HYPERVISOR_PACKET_RID 15
+
+#define VSOCK_PROTO_INVALID        0
+#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0)
+#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY)
+
+#define vmci_trans(_vsk) ((struct vmci_transport *)((_vsk)->trans))
+
+enum vmci_transport_packet_type {
+	VMCI_TRANSPORT_PACKET_TYPE_INVALID = 0,
+	VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
+	VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
+	VMCI_TRANSPORT_PACKET_TYPE_OFFER,
+	VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
+	VMCI_TRANSPORT_PACKET_TYPE_WROTE,
+	VMCI_TRANSPORT_PACKET_TYPE_READ,
+	VMCI_TRANSPORT_PACKET_TYPE_RST,
+	VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
+	VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
+	VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
+	VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
+	VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
+	VMCI_TRANSPORT_PACKET_TYPE_MAX
+};
+
+struct vmci_transport_waiting_info {
+	u64 generation;
+	u64 offset;
+};
+
+/* Control packet type for STREAM sockets.  DGRAMs have no control packets nor
+ * special packet header for data packets, they are just raw VMCI DGRAM
+ * messages.  For STREAMs, control packets are sent over the control channel
+ * while data is written and read directly from queue pairs with no packet
+ * format.
+ */
+struct vmci_transport_packet {
+	struct vmci_datagram dg;
+	u8 version;
+	u8 type;
+	u16 proto;
+	u32 src_port;
+	u32 dst_port;
+	u32 _reserved2;
+	union {
+		u64 size;
+		u64 mode;
+		struct vmci_handle handle;
+		struct vmci_transport_waiting_info wait;
+	} u;
+};
+
+struct vmci_transport_notify_pkt {
+	u64 write_notify_window;
+	u64 write_notify_min_window;
+	bool peer_waiting_read;
+	bool peer_waiting_write;
+	bool peer_waiting_write_detected;
+	bool sent_waiting_read;
+	bool sent_waiting_write;
+	struct vmci_transport_waiting_info peer_waiting_read_info;
+	struct vmci_transport_waiting_info peer_waiting_write_info;
+	u64 produce_q_generation;
+	u64 consume_q_generation;
+};
+
+struct vmci_transport_notify_pkt_q_state {
+	u64 write_notify_window;
+	u64 write_notify_min_window;
+	bool peer_waiting_write;
+	bool peer_waiting_write_detected;
+};
+
+union vmci_transport_notify {
+	struct vmci_transport_notify_pkt pkt;
+	struct vmci_transport_notify_pkt_q_state pkt_q_state;
+};
+
+/* Our transport-specific data. */
+struct vmci_transport {
+	/* For DGRAMs. */
+	struct vmci_handle dg_handle;
+	/* For STREAMs. */
+	struct vmci_handle qp_handle;
+	struct vmci_qp *qpair;
+	u64 produce_size;
+	u64 consume_size;
+	u64 queue_pair_size;
+	u64 queue_pair_min_size;
+	u64 queue_pair_max_size;
+	u32 attach_sub_id;
+	u32 detach_sub_id;
+	union vmci_transport_notify notify;
+	struct vmci_transport_notify_ops *notify_ops;
+};
+
+int vmci_transport_register(void);
+void vmci_transport_unregister(void);
+
+int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
+				 struct sockaddr_vm *src);
+int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
+				struct sockaddr_vm *src);
+int vmci_transport_send_wrote(struct sock *sk);
+int vmci_transport_send_read(struct sock *sk);
+int vmci_transport_send_waiting_write(struct sock *sk,
+				      struct vmci_transport_waiting_info *wait);
+int vmci_transport_send_waiting_read(struct sock *sk,
+				     struct vmci_transport_waiting_info *wait);
+
+#endif
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
new file mode 100644
index 000000000000..9a730744e7bc
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -0,0 +1,680 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+
+#include "vmci_transport_notify.h"
+
+#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name)
+
+static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	bool retval;
+	u64 notify_limit;
+
+	if (!PKT_FIELD(vsk, peer_waiting_write))
+		return false;
+
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	/* When the sender blocks, we take that as a sign that the sender is
+	 * faster than the receiver. To reduce the transmit rate of the sender,
+	 * we delay the sending of the read notification by decreasing the
+	 * write_notify_window. The notification is delayed until the number of
+	 * bytes used in the queue drops below the write_notify_window.
+	 */
+
+	if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
+		PKT_FIELD(vsk, peer_waiting_write_detected) = true;
+		if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+		} else {
+			PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
+			if (PKT_FIELD(vsk, write_notify_window) <
+			    PKT_FIELD(vsk, write_notify_min_window))
+				PKT_FIELD(vsk, write_notify_window) =
+				    PKT_FIELD(vsk, write_notify_min_window);
+
+		}
+	}
+	notify_limit = vmci_trans(vsk)->consume_size -
+		PKT_FIELD(vsk, write_notify_window);
+#else
+	notify_limit = 0;
+#endif
+
+	/* For now we ignore the wait information and just see if the free
+	 * space exceeds the notify limit.  Note that improving this function
+	 * to be more intelligent will not require a protocol change and will
+	 * retain compatibility between endpoints with mixed versions of this
+	 * function.
+	 *
+	 * The notify_limit is used to delay notifications in the case where
+	 * flow control is enabled. Below the test is expressed in terms of
+	 * free space in the queue: if free_space > ConsumeSize -
+	 * write_notify_window then notify An alternate way of expressing this
+	 * is to rewrite the expression to use the data ready in the receive
+	 * queue: if write_notify_window > bufferReady then notify as
+	 * free_space == ConsumeSize - bufferReady.
+	 */
+	retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
+		notify_limit;
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	if (retval) {
+		/*
+		 * Once we notify the peer, we reset the detected flag so the
+		 * next wait will again cause a decrease in the window size.
+		 */
+
+		PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+	}
+#endif
+	return retval;
+#else
+	return true;
+#endif
+}
+
+static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	if (!PKT_FIELD(vsk, peer_waiting_read))
+		return false;
+
+	/* For now we ignore the wait information and just see if there is any
+	 * data for our peer to read.  Note that improving this function to be
+	 * more intelligent will not require a protocol change and will retain
+	 * compatibility between endpoints with mixed versions of this
+	 * function.
+	 */
+	return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0;
+#else
+	return true;
+#endif
+}
+
+static void
+vmci_transport_handle_waiting_read(struct sock *sk,
+				   struct vmci_transport_packet *pkt,
+				   bool bottom_half,
+				   struct sockaddr_vm *dst,
+				   struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, peer_waiting_read) = true;
+	memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
+
+	if (vmci_transport_notify_waiting_read(vsk)) {
+		bool sent;
+
+		if (bottom_half)
+			sent = vmci_transport_send_wrote_bh(dst, src) > 0;
+		else
+			sent = vmci_transport_send_wrote(sk) > 0;
+
+		if (sent)
+			PKT_FIELD(vsk, peer_waiting_read) = false;
+	}
+#endif
+}
+
+static void
+vmci_transport_handle_waiting_write(struct sock *sk,
+				    struct vmci_transport_packet *pkt,
+				    bool bottom_half,
+				    struct sockaddr_vm *dst,
+				    struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, peer_waiting_write) = true;
+	memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
+
+	if (vmci_transport_notify_waiting_write(vsk)) {
+		bool sent;
+
+		if (bottom_half)
+			sent = vmci_transport_send_read_bh(dst, src) > 0;
+		else
+			sent = vmci_transport_send_read(sk) > 0;
+
+		if (sent)
+			PKT_FIELD(vsk, peer_waiting_write) = false;
+	}
+#endif
+}
+
+static void
+vmci_transport_handle_read(struct sock *sk,
+			   struct vmci_transport_packet *pkt,
+			   bool bottom_half,
+			   struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+	PKT_FIELD(vsk, sent_waiting_write) = false;
+#endif
+
+	sk->sk_write_space(sk);
+}
+
+static bool send_waiting_read(struct sock *sk, u64 room_needed)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+	struct vmci_transport_waiting_info waiting_info;
+	u64 tail;
+	u64 head;
+	u64 room_left;
+	bool ret;
+
+	vsk = vsock_sk(sk);
+
+	if (PKT_FIELD(vsk, sent_waiting_read))
+		return true;
+
+	if (PKT_FIELD(vsk, write_notify_window) <
+			vmci_trans(vsk)->consume_size)
+		PKT_FIELD(vsk, write_notify_window) =
+		    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
+			vmci_trans(vsk)->consume_size);
+
+	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head);
+	room_left = vmci_trans(vsk)->consume_size - head;
+	if (room_needed >= room_left) {
+		waiting_info.offset = room_needed - room_left;
+		waiting_info.generation =
+		    PKT_FIELD(vsk, consume_q_generation) + 1;
+	} else {
+		waiting_info.offset = head + room_needed;
+		waiting_info.generation = PKT_FIELD(vsk, consume_q_generation);
+	}
+
+	ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0;
+	if (ret)
+		PKT_FIELD(vsk, sent_waiting_read) = true;
+
+	return ret;
+#else
+	return true;
+#endif
+}
+
+static bool send_waiting_write(struct sock *sk, u64 room_needed)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+	struct vmci_transport_waiting_info waiting_info;
+	u64 tail;
+	u64 head;
+	u64 room_left;
+	bool ret;
+
+	vsk = vsock_sk(sk);
+
+	if (PKT_FIELD(vsk, sent_waiting_write))
+		return true;
+
+	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head);
+	room_left = vmci_trans(vsk)->produce_size - tail;
+	if (room_needed + 1 >= room_left) {
+		/* Wraps around to current generation. */
+		waiting_info.offset = room_needed + 1 - room_left;
+		waiting_info.generation = PKT_FIELD(vsk, produce_q_generation);
+	} else {
+		waiting_info.offset = tail + room_needed + 1;
+		waiting_info.generation =
+		    PKT_FIELD(vsk, produce_q_generation) - 1;
+	}
+
+	ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0;
+	if (ret)
+		PKT_FIELD(vsk, sent_waiting_write) = true;
+
+	return ret;
+#else
+	return true;
+#endif
+}
+
+static int vmci_transport_send_read_notification(struct sock *sk)
+{
+	struct vsock_sock *vsk;
+	bool sent_read;
+	unsigned int retries;
+	int err;
+
+	vsk = vsock_sk(sk);
+	sent_read = false;
+	retries = 0;
+	err = 0;
+
+	if (vmci_transport_notify_waiting_write(vsk)) {
+		/* Notify the peer that we have read, retrying the send on
+		 * failure up to our maximum value.  XXX For now we just log
+		 * the failure, but later we should schedule a work item to
+		 * handle the resend until it succeeds.  That would require
+		 * keeping track of work items in the vsk and cleaning them up
+		 * upon socket close.
+		 */
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_read &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_read(sk);
+			if (err >= 0)
+				sent_read = true;
+
+			retries++;
+		}
+
+		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS)
+			pr_err("%p unable to send read notify to peer\n", sk);
+		else
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+			PKT_FIELD(vsk, peer_waiting_write) = false;
+#endif
+
+	}
+	return err;
+}
+
+static void
+vmci_transport_handle_wrote(struct sock *sk,
+			    struct vmci_transport_packet *pkt,
+			    bool bottom_half,
+			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk = vsock_sk(sk);
+	PKT_FIELD(vsk, sent_waiting_read) = false;
+#endif
+	sk->sk_data_ready(sk, 0);
+}
+
+static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, peer_waiting_read) = false;
+	PKT_FIELD(vsk, peer_waiting_write) = false;
+	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+	PKT_FIELD(vsk, sent_waiting_read) = false;
+	PKT_FIELD(vsk, sent_waiting_write) = false;
+	PKT_FIELD(vsk, produce_q_generation) = 0;
+	PKT_FIELD(vsk, consume_q_generation) = 0;
+
+	memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
+	memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
+}
+
+static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
+{
+}
+
+static int
+vmci_transport_notify_pkt_poll_in(struct sock *sk,
+				  size_t target, bool *data_ready_now)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsock_stream_has_data(vsk)) {
+		*data_ready_now = true;
+	} else {
+		/* We can't read right now because there is nothing in the
+		 * queue. Ask for notifications when there is something to
+		 * read.
+		 */
+		if (sk->sk_state == SS_CONNECTED) {
+			if (!send_waiting_read(sk, 1))
+				return -1;
+
+		}
+		*data_ready_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_poll_out(struct sock *sk,
+				   size_t target, bool *space_avail_now)
+{
+	s64 produce_q_free_space;
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	produce_q_free_space = vsock_stream_has_space(vsk);
+	if (produce_q_free_space > 0) {
+		*space_avail_now = true;
+		return 0;
+	} else if (produce_q_free_space == 0) {
+		/* This is a connected socket but we can't currently send data.
+		 * Notify the peer that we are waiting if the queue is full. We
+		 * only send a waiting write if the queue is full because
+		 * otherwise we end up in an infinite WAITING_WRITE, READ,
+		 * WAITING_WRITE, READ, etc. loop. Treat failing to send the
+		 * notification as a socket error, passing that back through
+		 * the mask.
+		 */
+		if (!send_waiting_write(sk, 1))
+			return -1;
+
+		*space_avail_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_init(
+			struct sock *sk,
+			size_t target,
+			struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
+	data->consume_head = 0;
+	data->produce_tail = 0;
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	data->notify_on_block = false;
+
+	if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
+		PKT_FIELD(vsk, write_notify_min_window) = target + 1;
+		if (PKT_FIELD(vsk, write_notify_window) <
+		    PKT_FIELD(vsk, write_notify_min_window)) {
+			/* If the current window is smaller than the new
+			 * minimal window size, we need to reevaluate whether
+			 * we need to notify the sender. If the number of ready
+			 * bytes are smaller than the new window, we need to
+			 * send a notification to the sender before we block.
+			 */
+
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+			data->notify_on_block = true;
+		}
+	}
+#endif
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_block(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	int err = 0;
+
+	/* Notify our peer that we are waiting for data to read. */
+	if (!send_waiting_read(sk, target)) {
+		err = -EHOSTUNREACH;
+		return err;
+	}
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	if (data->notify_on_block) {
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+
+		data->notify_on_block = false;
+	}
+#endif
+
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_dequeue(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	/* Now consume up to len bytes from the queue.  Note that since we have
+	 * the socket locked we should copy at least ready bytes.
+	 */
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair,
+				       &data->produce_tail,
+				       &data->consume_head);
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_post_dequeue(
+				struct sock *sk,
+				size_t target,
+				ssize_t copied,
+				bool data_read,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk;
+	int err;
+
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	if (data_read) {
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+		/* Detect a wrap-around to maintain queue generation.  Note
+		 * that this is safe since we hold the socket lock across the
+		 * two queue pair operations.
+		 */
+		if (copied >=
+			vmci_trans(vsk)->consume_size - data->consume_head)
+			PKT_FIELD(vsk, consume_q_generation)++;
+#endif
+
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+
+	}
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_send_init(
+			struct sock *sk,
+			struct vmci_transport_send_notify_data *data)
+{
+#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
+	data->consume_head = 0;
+	data->produce_tail = 0;
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_block(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	/* Notify our peer that we are waiting for room to write. */
+	if (!send_waiting_write(sk, 1))
+		return -EHOSTUNREACH;
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_enqueue(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair,
+				       &data->produce_tail,
+				       &data->consume_head);
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_post_enqueue(
+				struct sock *sk,
+				ssize_t written,
+				struct vmci_transport_send_notify_data *data)
+{
+	int err = 0;
+	struct vsock_sock *vsk;
+	bool sent_wrote = false;
+	int retries = 0;
+
+	vsk = vsock_sk(sk);
+
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	/* Detect a wrap-around to maintain queue generation.  Note that this
+	 * is safe since we hold the socket lock across the two queue pair
+	 * operations.
+	 */
+	if (written >= vmci_trans(vsk)->produce_size - data->produce_tail)
+		PKT_FIELD(vsk, produce_q_generation)++;
+
+#endif
+
+	if (vmci_transport_notify_waiting_read(vsk)) {
+		/* Notify the peer that we have written, retrying the send on
+		 * failure up to our maximum value. See the XXX comment for the
+		 * corresponding piece of code in StreamRecvmsg() for potential
+		 * improvements.
+		 */
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_wrote &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_wrote(sk);
+			if (err >= 0)
+				sent_wrote = true;
+
+			retries++;
+		}
+
+		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			pr_err("%p unable to send wrote notify to peer\n", sk);
+			return err;
+		} else {
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+			PKT_FIELD(vsk, peer_waiting_read) = false;
+#endif
+		}
+	}
+	return err;
+}
+
+static void
+vmci_transport_notify_pkt_handle_pkt(
+			struct sock *sk,
+			struct vmci_transport_packet *pkt,
+			bool bottom_half,
+			struct sockaddr_vm *dst,
+			struct sockaddr_vm *src, bool *pkt_processed)
+{
+	bool processed = false;
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
+		vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_READ:
+		vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
+		vmci_transport_handle_waiting_write(sk, pkt, bottom_half,
+						    dst, src);
+		processed = true;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
+		vmci_transport_handle_waiting_read(sk, pkt, bottom_half,
+						   dst, src);
+		processed = true;
+		break;
+	}
+
+	if (pkt_processed)
+		*pkt_processed = processed;
+}
+
+static void vmci_transport_notify_pkt_process_request(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+/* Socket control packet based operations. */
+struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
+	vmci_transport_notify_pkt_socket_init,
+	vmci_transport_notify_pkt_socket_destruct,
+	vmci_transport_notify_pkt_poll_in,
+	vmci_transport_notify_pkt_poll_out,
+	vmci_transport_notify_pkt_handle_pkt,
+	vmci_transport_notify_pkt_recv_init,
+	vmci_transport_notify_pkt_recv_pre_block,
+	vmci_transport_notify_pkt_recv_pre_dequeue,
+	vmci_transport_notify_pkt_recv_post_dequeue,
+	vmci_transport_notify_pkt_send_init,
+	vmci_transport_notify_pkt_send_pre_block,
+	vmci_transport_notify_pkt_send_pre_enqueue,
+	vmci_transport_notify_pkt_send_post_enqueue,
+	vmci_transport_notify_pkt_process_request,
+	vmci_transport_notify_pkt_process_negotiate,
+};
diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h
new file mode 100644
index 000000000000..7df793249b6c
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport_notify.h
@@ -0,0 +1,83 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __VMCI_TRANSPORT_NOTIFY_H__
+#define __VMCI_TRANSPORT_NOTIFY_H__
+
+#include <linux/types.h>
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/vm_sockets.h>
+
+#include "vmci_transport.h"
+
+/* Comment this out to compare with old protocol. */
+#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+/* Comment this out to remove flow control for "new" protocol */
+#define VSOCK_OPTIMIZATION_FLOW_CONTROL 1
+#endif
+
+#define VMCI_TRANSPORT_MAX_DGRAM_RESENDS       10
+
+struct vmci_transport_recv_notify_data {
+	u64 consume_head;
+	u64 produce_tail;
+	bool notify_on_block;
+};
+
+struct vmci_transport_send_notify_data {
+	u64 consume_head;
+	u64 produce_tail;
+};
+
+/* Socket notification callbacks. */
+struct vmci_transport_notify_ops {
+	void (*socket_init) (struct sock *sk);
+	void (*socket_destruct) (struct vsock_sock *vsk);
+	int (*poll_in) (struct sock *sk, size_t target,
+			  bool *data_ready_now);
+	int (*poll_out) (struct sock *sk, size_t target,
+			   bool *space_avail_now);
+	void (*handle_notify_pkt) (struct sock *sk,
+				   struct vmci_transport_packet *pkt,
+				   bool bottom_half, struct sockaddr_vm *dst,
+				   struct sockaddr_vm *src,
+				   bool *pkt_processed);
+	int (*recv_init) (struct sock *sk, size_t target,
+			  struct vmci_transport_recv_notify_data *data);
+	int (*recv_pre_block) (struct sock *sk, size_t target,
+			       struct vmci_transport_recv_notify_data *data);
+	int (*recv_pre_dequeue) (struct sock *sk, size_t target,
+				 struct vmci_transport_recv_notify_data *data);
+	int (*recv_post_dequeue) (struct sock *sk, size_t target,
+				  ssize_t copied, bool data_read,
+				  struct vmci_transport_recv_notify_data *data);
+	int (*send_init) (struct sock *sk,
+			  struct vmci_transport_send_notify_data *data);
+	int (*send_pre_block) (struct sock *sk,
+			       struct vmci_transport_send_notify_data *data);
+	int (*send_pre_enqueue) (struct sock *sk,
+				 struct vmci_transport_send_notify_data *data);
+	int (*send_post_enqueue) (struct sock *sk, ssize_t written,
+				  struct vmci_transport_send_notify_data *data);
+	void (*process_request) (struct sock *sk);
+	void (*process_negotiate) (struct sock *sk);
+};
+
+extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops;
+extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops;
+
+#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
new file mode 100644
index 000000000000..622bd7aa1016
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -0,0 +1,438 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+
+#include "vmci_transport_notify.h"
+
+#define PKT_FIELD(vsk, field_name) \
+	(vmci_trans(vsk)->notify.pkt_q_state.field_name)
+
+static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
+{
+	bool retval;
+	u64 notify_limit;
+
+	if (!PKT_FIELD(vsk, peer_waiting_write))
+		return false;
+
+	/* When the sender blocks, we take that as a sign that the sender is
+	 * faster than the receiver. To reduce the transmit rate of the sender,
+	 * we delay the sending of the read notification by decreasing the
+	 * write_notify_window. The notification is delayed until the number of
+	 * bytes used in the queue drops below the write_notify_window.
+	 */
+
+	if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
+		PKT_FIELD(vsk, peer_waiting_write_detected) = true;
+		if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+		} else {
+			PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
+			if (PKT_FIELD(vsk, write_notify_window) <
+			    PKT_FIELD(vsk, write_notify_min_window))
+				PKT_FIELD(vsk, write_notify_window) =
+				    PKT_FIELD(vsk, write_notify_min_window);
+
+		}
+	}
+	notify_limit = vmci_trans(vsk)->consume_size -
+		PKT_FIELD(vsk, write_notify_window);
+
+	/* The notify_limit is used to delay notifications in the case where
+	 * flow control is enabled. Below the test is expressed in terms of
+	 * free space in the queue: if free_space > ConsumeSize -
+	 * write_notify_window then notify An alternate way of expressing this
+	 * is to rewrite the expression to use the data ready in the receive
+	 * queue: if write_notify_window > bufferReady then notify as
+	 * free_space == ConsumeSize - bufferReady.
+	 */
+
+	retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
+		notify_limit;
+
+	if (retval) {
+		/* Once we notify the peer, we reset the detected flag so the
+		 * next wait will again cause a decrease in the window size.
+		 */
+
+		PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+	}
+	return retval;
+}
+
+static void
+vmci_transport_handle_read(struct sock *sk,
+			   struct vmci_transport_packet *pkt,
+			   bool bottom_half,
+			   struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+	sk->sk_write_space(sk);
+}
+
+static void
+vmci_transport_handle_wrote(struct sock *sk,
+			    struct vmci_transport_packet *pkt,
+			    bool bottom_half,
+			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+	sk->sk_data_ready(sk, 0);
+}
+
+static void vsock_block_update_write_window(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size)
+		PKT_FIELD(vsk, write_notify_window) =
+		    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
+			vmci_trans(vsk)->consume_size);
+}
+
+static int vmci_transport_send_read_notification(struct sock *sk)
+{
+	struct vsock_sock *vsk;
+	bool sent_read;
+	unsigned int retries;
+	int err;
+
+	vsk = vsock_sk(sk);
+	sent_read = false;
+	retries = 0;
+	err = 0;
+
+	if (vmci_transport_notify_waiting_write(vsk)) {
+		/* Notify the peer that we have read, retrying the send on
+		 * failure up to our maximum value.  XXX For now we just log
+		 * the failure, but later we should schedule a work item to
+		 * handle the resend until it succeeds.  That would require
+		 * keeping track of work items in the vsk and cleaning them up
+		 * upon socket close.
+		 */
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_read &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_read(sk);
+			if (err >= 0)
+				sent_read = true;
+
+			retries++;
+		}
+
+		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read)
+			pr_err("%p unable to send read notification to peer\n",
+			       sk);
+		else
+			PKT_FIELD(vsk, peer_waiting_write) = false;
+
+	}
+	return err;
+}
+
+static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, peer_waiting_write) = false;
+	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+}
+
+static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
+{
+	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, peer_waiting_write) = false;
+	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+}
+
+static int
+vmci_transport_notify_pkt_poll_in(struct sock *sk,
+				  size_t target, bool *data_ready_now)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsock_stream_has_data(vsk)) {
+		*data_ready_now = true;
+	} else {
+		/* We can't read right now because there is nothing in the
+		 * queue. Ask for notifications when there is something to
+		 * read.
+		 */
+		if (sk->sk_state == SS_CONNECTED)
+			vsock_block_update_write_window(sk);
+		*data_ready_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_poll_out(struct sock *sk,
+				   size_t target, bool *space_avail_now)
+{
+	s64 produce_q_free_space;
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	produce_q_free_space = vsock_stream_has_space(vsk);
+	if (produce_q_free_space > 0) {
+		*space_avail_now = true;
+		return 0;
+	} else if (produce_q_free_space == 0) {
+		/* This is a connected socket but we can't currently send data.
+		 * Nothing else to do.
+		 */
+		*space_avail_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_init(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	data->consume_head = 0;
+	data->produce_tail = 0;
+	data->notify_on_block = false;
+
+	if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
+		PKT_FIELD(vsk, write_notify_min_window) = target + 1;
+		if (PKT_FIELD(vsk, write_notify_window) <
+		    PKT_FIELD(vsk, write_notify_min_window)) {
+			/* If the current window is smaller than the new
+			 * minimal window size, we need to reevaluate whether
+			 * we need to notify the sender. If the number of ready
+			 * bytes are smaller than the new window, we need to
+			 * send a notification to the sender before we block.
+			 */
+
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+			data->notify_on_block = true;
+		}
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_block(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	int err = 0;
+
+	vsock_block_update_write_window(sk);
+
+	if (data->notify_on_block) {
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+		data->notify_on_block = false;
+	}
+
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_recv_post_dequeue(
+				struct sock *sk,
+				size_t target,
+				ssize_t copied,
+				bool data_read,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk;
+	int err;
+	bool was_full = false;
+	u64 free_space;
+
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	if (data_read) {
+		smp_mb();
+
+		free_space =
+			vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair);
+		was_full = free_space == copied;
+
+		if (was_full)
+			PKT_FIELD(vsk, peer_waiting_write) = true;
+
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+
+		/* See the comment in
+		 * vmci_transport_notify_pkt_send_post_enqueue().
+		 */
+		sk->sk_data_ready(sk, 0);
+	}
+
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_send_init(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	data->consume_head = 0;
+	data->produce_tail = 0;
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_post_enqueue(
+				struct sock *sk,
+				ssize_t written,
+				struct vmci_transport_send_notify_data *data)
+{
+	int err = 0;
+	struct vsock_sock *vsk;
+	bool sent_wrote = false;
+	bool was_empty;
+	int retries = 0;
+
+	vsk = vsock_sk(sk);
+
+	smp_mb();
+
+	was_empty =
+		vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written;
+	if (was_empty) {
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_wrote &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_wrote(sk);
+			if (err >= 0)
+				sent_wrote = true;
+
+			retries++;
+		}
+	}
+
+	if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) {
+		pr_err("%p unable to send wrote notification to peer\n",
+		       sk);
+		return err;
+	}
+
+	return err;
+}
+
+static void
+vmci_transport_notify_pkt_handle_pkt(
+				struct sock *sk,
+				struct vmci_transport_packet *pkt,
+				bool bottom_half,
+				struct sockaddr_vm *dst,
+				struct sockaddr_vm *src, bool *pkt_processed)
+{
+	bool processed = false;
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
+		vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_READ:
+		vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	}
+
+	if (pkt_processed)
+		*pkt_processed = processed;
+}
+
+static void vmci_transport_notify_pkt_process_request(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_dequeue(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	return 0; /* NOP for QState. */
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_block(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	return 0; /* NOP for QState. */
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_enqueue(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	return 0; /* NOP for QState. */
+}
+
+/* Socket always on control packet based operations. */
+struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
+	vmci_transport_notify_pkt_socket_init,
+	vmci_transport_notify_pkt_socket_destruct,
+	vmci_transport_notify_pkt_poll_in,
+	vmci_transport_notify_pkt_poll_out,
+	vmci_transport_notify_pkt_handle_pkt,
+	vmci_transport_notify_pkt_recv_init,
+	vmci_transport_notify_pkt_recv_pre_block,
+	vmci_transport_notify_pkt_recv_pre_dequeue,
+	vmci_transport_notify_pkt_recv_post_dequeue,
+	vmci_transport_notify_pkt_send_init,
+	vmci_transport_notify_pkt_send_pre_block,
+	vmci_transport_notify_pkt_send_pre_enqueue,
+	vmci_transport_notify_pkt_send_post_enqueue,
+	vmci_transport_notify_pkt_process_request,
+	vmci_transport_notify_pkt_process_negotiate,
+};
diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c
new file mode 100644
index 000000000000..ec2611b4ea0e
--- /dev/null
+++ b/net/vmw_vsock/vsock_addr.c
@@ -0,0 +1,76 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+
+#include "vsock_addr.h"
+
+void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port)
+{
+	memset(addr, 0, sizeof(*addr));
+	addr->svm_family = AF_VSOCK;
+	addr->svm_cid = cid;
+	addr->svm_port = port;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_init);
+
+int vsock_addr_validate(const struct sockaddr_vm *addr)
+{
+	if (!addr)
+		return -EFAULT;
+
+	if (addr->svm_family != AF_VSOCK)
+		return -EAFNOSUPPORT;
+
+	if (addr->svm_zero[0] != 0)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_validate);
+
+bool vsock_addr_bound(const struct sockaddr_vm *addr)
+{
+	return addr->svm_port != VMADDR_PORT_ANY;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_bound);
+
+void vsock_addr_unbind(struct sockaddr_vm *addr)
+{
+	vsock_addr_init(addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+}
+EXPORT_SYMBOL_GPL(vsock_addr_unbind);
+
+bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
+			    const struct sockaddr_vm *other)
+{
+	return addr->svm_cid == other->svm_cid &&
+		addr->svm_port == other->svm_port;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_equals_addr);
+
+int vsock_addr_cast(const struct sockaddr *addr,
+		    size_t len, struct sockaddr_vm **out_addr)
+{
+	if (len < sizeof(**out_addr))
+		return -EFAULT;
+
+	*out_addr = (struct sockaddr_vm *)addr;
+	return vsock_addr_validate(*out_addr);
+}
+EXPORT_SYMBOL_GPL(vsock_addr_cast);
diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h
new file mode 100644
index 000000000000..9ccd5316eac0
--- /dev/null
+++ b/net/vmw_vsock/vsock_addr.h
@@ -0,0 +1,30 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VSOCK_ADDR_H_
+#define _VSOCK_ADDR_H_
+
+#include <linux/vm_sockets.h>
+
+void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port);
+int vsock_addr_validate(const struct sockaddr_vm *addr);
+bool vsock_addr_bound(const struct sockaddr_vm *addr);
+void vsock_addr_unbind(struct sockaddr_vm *addr);
+bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
+			    const struct sockaddr_vm *other);
+int vsock_addr_cast(const struct sockaddr *addr, size_t len,
+		    struct sockaddr_vm **out_addr);
+
+#endif
diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig
deleted file mode 100644
index a157a2e64e18..000000000000
--- a/net/wanrouter/Kconfig
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Configuration for WAN router
-#
-
-config WAN_ROUTER
-	tristate "WAN router (DEPRECATED)"
-	depends on EXPERIMENTAL
-	---help---
-	  Wide Area Networks (WANs), such as X.25, frame relay and leased
-	  lines, are used to interconnect Local Area Networks (LANs) over vast
-	  distances with data transfer rates significantly higher than those
-	  achievable with commonly used asynchronous modem connections.
-	  Usually, a quite expensive external device called a `WAN router' is
-	  needed to connect to a WAN.
-
-	  As an alternative, WAN routing can be built into the Linux kernel.
-	  With relatively inexpensive WAN interface cards available on the
-	  market, a perfectly usable router can be built for less than half
-	  the price of an external router.  If you have one of those cards and
-	  wish to use your Linux box as a WAN router, say Y here and also to
-	  the WAN driver for your card, below.  You will then need the
-	  wan-tools package which is available from <ftp://ftp.sangoma.com/>.
-
-	  To compile WAN routing support as a module, choose M here: the
-	  module will be called wanrouter.
-
-	  If unsure, say N.
diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile
deleted file mode 100644
index 4da14bc48078..000000000000
--- a/net/wanrouter/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the Linux WAN router layer.
-#
-
-obj-$(CONFIG_WAN_ROUTER) += wanrouter.o
-
-wanrouter-y :=  wanproc.o wanmain.o
diff --git a/net/wanrouter/patchlevel b/net/wanrouter/patchlevel
deleted file mode 100644
index c043eea7767e..000000000000
--- a/net/wanrouter/patchlevel
+++ /dev/null
@@ -1 +0,0 @@
-2.2.1
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
deleted file mode 100644
index 2ab785064b7e..000000000000
--- a/net/wanrouter/wanmain.c
+++ /dev/null
@@ -1,782 +0,0 @@
-/*****************************************************************************
-* wanmain.c	WAN Multiprotocol Router Module. Main code.
-*
-*		This module is completely hardware-independent and provides
-*		the following common services for the WAN Link Drivers:
-*		 o WAN device management (registering, unregistering)
-*		 o Network interface management
-*		 o Physical connection management (dial-up, incoming calls)
-*		 o Logical connection management (switched virtual circuits)
-*		 o Protocol encapsulation/decapsulation
-*
-* Author:	Gideon Hack
-*
-* Copyright:	(c) 1995-1999 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Nov 24, 2000  Nenad Corbic	Updated for 2.4.X kernels
-* Nov 07, 2000  Nenad Corbic	Fixed the Mulit-Port PPP for kernels 2.2.16 and
-*  				greater.
-* Aug 2,  2000  Nenad Corbic	Block the Multi-Port PPP from running on
-*  			        kernels 2.2.16 or greater.  The SyncPPP
-*  			        has changed.
-* Jul 13, 2000  Nenad Corbic	Added SyncPPP support
-* 				Added extra debugging in device_setup().
-* Oct 01, 1999  Gideon Hack     Update for s514 PCI card
-* Dec 27, 1996	Gene Kozin	Initial version (based on Sangoma's WANPIPE)
-* Jan 16, 1997	Gene Kozin	router_devlist made public
-* Jan 31, 1997  Alan Cox	Hacked it about a bit for 2.1
-* Jun 27, 1997  Alan Cox	realigned with vendor code
-* Oct 15, 1997  Farhan Thawar   changed wan_encapsulate to add a pad byte of 0
-* Apr 20, 1998	Alan Cox	Fixed 2.1 symbols
-* May 17, 1998  K. Baranowski	Fixed SNAP encapsulation in wan_encapsulate
-* Dec 15, 1998  Arnaldo Melo    support for firmwares of up to 128000 bytes
-*                               check wandev->setup return value
-* Dec 22, 1998  Arnaldo Melo    vmalloc/vfree used in device_setup to allocate
-*                               kernel memory and copy configuration data to
-*                               kernel space (for big firmwares)
-* Jun 02, 1999  Gideon Hack	Updates for Linux 2.0.X and 2.2.X kernels.
-*****************************************************************************/
-
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/capability.h>
-#include <linux/errno.h>	/* return codes */
-#include <linux/kernel.h>
-#include <linux/module.h>	/* support for loadable modules */
-#include <linux/slab.h>		/* kmalloc(), kfree() */
-#include <linux/mutex.h>
-#include <linux/mm.h>
-#include <linux/string.h>	/* inline mem*, str* functions */
-
-#include <asm/byteorder.h>	/* htons(), etc. */
-#include <linux/wanrouter.h>	/* WAN router API definitions */
-
-#include <linux/vmalloc.h>	/* vmalloc, vfree */
-#include <asm/uaccess.h>        /* copy_to/from_user */
-#include <linux/init.h>         /* __initfunc et al. */
-
-#define DEV_TO_SLAVE(dev)	(*((struct net_device **)netdev_priv(dev)))
-
-/*
- * 	Function Prototypes
- */
-
-/*
- *	WAN device IOCTL handlers
- */
-
-static DEFINE_MUTEX(wanrouter_mutex);
-static int wanrouter_device_setup(struct wan_device *wandev,
-				  wandev_conf_t __user *u_conf);
-static int wanrouter_device_stat(struct wan_device *wandev,
-				 wandev_stat_t __user *u_stat);
-static int wanrouter_device_shutdown(struct wan_device *wandev);
-static int wanrouter_device_new_if(struct wan_device *wandev,
-				   wanif_conf_t __user *u_conf);
-static int wanrouter_device_del_if(struct wan_device *wandev,
-				   char __user *u_name);
-
-/*
- *	Miscellaneous
- */
-
-static struct wan_device *wanrouter_find_device(char *name);
-static int wanrouter_delete_interface(struct wan_device *wandev, char *name);
-static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__acquires(lock);
-static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__releases(lock);
-
-
-
-/*
- *	Global Data
- */
-
-static char wanrouter_fullname[]  = "Sangoma WANPIPE Router";
-static char wanrouter_copyright[] = "(c) 1995-2000 Sangoma Technologies Inc.";
-static char wanrouter_modname[] = ROUTER_NAME; /* short module name */
-struct wan_device* wanrouter_router_devlist; /* list of registered devices */
-
-/*
- *	Organize Unique Identifiers for encapsulation/decapsulation
- */
-
-#if 0
-static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 };
-static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 };
-#endif
-
-static int __init wanrouter_init(void)
-{
-	int err;
-
-	printk(KERN_INFO "%s v%u.%u %s\n",
-	       wanrouter_fullname, ROUTER_VERSION, ROUTER_RELEASE,
-	       wanrouter_copyright);
-
-	err = wanrouter_proc_init();
-	if (err)
-		printk(KERN_INFO "%s: can't create entry in proc filesystem!\n",
-		       wanrouter_modname);
-
-	return err;
-}
-
-static void __exit wanrouter_cleanup (void)
-{
-	wanrouter_proc_cleanup();
-}
-
-/*
- * This is just plain dumb.  We should move the bugger to drivers/net/wan,
- * slap it first in directory and make it module_init().  The only reason
- * for subsys_initcall() here is that net goes after drivers (why, BTW?)
- */
-subsys_initcall(wanrouter_init);
-module_exit(wanrouter_cleanup);
-
-/*
- * 	Kernel APIs
- */
-
-/*
- * 	Register WAN device.
- * 	o verify device credentials
- * 	o create an entry for the device in the /proc/net/router directory
- * 	o initialize internally maintained fields of the wan_device structure
- * 	o link device data space to a singly-linked list
- * 	o if it's the first device, then start kernel 'thread'
- * 	o increment module use count
- *
- * 	Return:
- *	0	Ok
- *	< 0	error.
- *
- * 	Context:	process
- */
-
-
-int register_wan_device(struct wan_device *wandev)
-{
-	int err, namelen;
-
-	if ((wandev == NULL) || (wandev->magic != ROUTER_MAGIC) ||
-	    (wandev->name == NULL))
-		return -EINVAL;
-
-	namelen = strlen(wandev->name);
-	if (!namelen || (namelen > WAN_DRVNAME_SZ))
-		return -EINVAL;
-
-	if (wanrouter_find_device(wandev->name))
-		return -EEXIST;
-
-#ifdef WANDEBUG
-	printk(KERN_INFO "%s: registering WAN device %s\n",
-	       wanrouter_modname, wandev->name);
-#endif
-
-	/*
-	 *	Register /proc directory entry
-	 */
-	err = wanrouter_proc_add(wandev);
-	if (err) {
-		printk(KERN_INFO
-			"%s: can't create /proc/net/router/%s entry!\n",
-			wanrouter_modname, wandev->name);
-		return err;
-	}
-
-	/*
-	 *	Initialize fields of the wan_device structure maintained by the
-	 *	router and update local data.
-	 */
-
-	wandev->ndev = 0;
-	wandev->dev  = NULL;
-	wandev->next = wanrouter_router_devlist;
-	wanrouter_router_devlist = wandev;
-	return 0;
-}
-
-/*
- *	Unregister WAN device.
- *	o shut down device
- *	o unlink device data space from the linked list
- *	o delete device entry in the /proc/net/router directory
- *	o decrement module use count
- *
- *	Return:		0	Ok
- *			<0	error.
- *	Context:	process
- */
-
-
-int unregister_wan_device(char *name)
-{
-	struct wan_device *wandev, *prev;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	for (wandev = wanrouter_router_devlist, prev = NULL;
-		wandev && strcmp(wandev->name, name);
-		prev = wandev, wandev = wandev->next)
-		;
-	if (wandev == NULL)
-		return -ENODEV;
-
-#ifdef WANDEBUG
-	printk(KERN_INFO "%s: unregistering WAN device %s\n",
-	       wanrouter_modname, name);
-#endif
-
-	if (wandev->state != WAN_UNCONFIGURED)
-		wanrouter_device_shutdown(wandev);
-
-	if (prev)
-		prev->next = wandev->next;
-	else
-		wanrouter_router_devlist = wandev->next;
-
-	wanrouter_proc_delete(wandev);
-	return 0;
-}
-
-#if 0
-
-/*
- *	Encapsulate packet.
- *
- *	Return:	encapsulation header size
- *		< 0	- unsupported Ethertype
- *
- *	Notes:
- *	1. This function may be called on interrupt context.
- */
-
-
-int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
-			  unsigned short type)
-{
-	int hdr_len = 0;
-
-	switch (type) {
-	case ETH_P_IP:		/* IP datagram encapsulation */
-		hdr_len += 1;
-		skb_push(skb, 1);
-		skb->data[0] = NLPID_IP;
-		break;
-
-	case ETH_P_IPX:		/* SNAP encapsulation */
-	case ETH_P_ARP:
-		hdr_len += 7;
-		skb_push(skb, 7);
-		skb->data[0] = 0;
-		skb->data[1] = NLPID_SNAP;
-		skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether,
-					       sizeof(wanrouter_oui_ether));
-		*((unsigned short*)&skb->data[5]) = htons(type);
-		break;
-
-	default:		/* Unknown packet type */
-		printk(KERN_INFO
-			"%s: unsupported Ethertype 0x%04X on interface %s!\n",
-			wanrouter_modname, type, dev->name);
-		hdr_len = -EINVAL;
-	}
-	return hdr_len;
-}
-
-
-/*
- *	Decapsulate packet.
- *
- *	Return:	Ethertype (in network order)
- *			0	unknown encapsulation
- *
- *	Notes:
- *	1. This function may be called on interrupt context.
- */
-
-
-__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
-{
-	int cnt = skb->data[0] ? 0 : 1;	/* there may be a pad present */
-	__be16 ethertype;
-
-	switch (skb->data[cnt]) {
-	case NLPID_IP:		/* IP datagramm */
-		ethertype = htons(ETH_P_IP);
-		cnt += 1;
-		break;
-
-	case NLPID_SNAP:	/* SNAP encapsulation */
-		if (memcmp(&skb->data[cnt + 1], wanrouter_oui_ether,
-			   sizeof(wanrouter_oui_ether))){
-			printk(KERN_INFO
-				"%s: unsupported SNAP OUI %02X-%02X-%02X "
-				"on interface %s!\n", wanrouter_modname,
-				skb->data[cnt+1], skb->data[cnt+2],
-				skb->data[cnt+3], dev->name);
-			return 0;
-		}
-		ethertype = *((__be16*)&skb->data[cnt+4]);
-		cnt += 6;
-		break;
-
-	/* add other protocols, e.g. CLNP, ESIS, ISIS, if needed */
-
-	default:
-		printk(KERN_INFO
-			"%s: unsupported NLPID 0x%02X on interface %s!\n",
-			wanrouter_modname, skb->data[cnt], dev->name);
-		return 0;
-	}
-	skb->protocol = ethertype;
-	skb->pkt_type = PACKET_HOST;	/*	Physically point to point */
-	skb_pull(skb, cnt);
-	skb_reset_mac_header(skb);
-	return ethertype;
-}
-
-#endif  /*  0  */
-
-/*
- *	WAN device IOCTL.
- *	o find WAN device associated with this node
- *	o execute requested action or pass command to the device driver
- */
-
-long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	struct inode *inode = file->f_path.dentry->d_inode;
-	int err = 0;
-	struct proc_dir_entry *dent;
-	struct wan_device *wandev;
-	void __user *data = (void __user *)arg;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if ((cmd >> 8) != ROUTER_IOCTL)
-		return -EINVAL;
-
-	dent = PDE(inode);
-	if ((dent == NULL) || (dent->data == NULL))
-		return -EINVAL;
-
-	wandev = dent->data;
-	if (wandev->magic != ROUTER_MAGIC)
-		return -EINVAL;
-
-	mutex_lock(&wanrouter_mutex);
-	switch (cmd) {
-	case ROUTER_SETUP:
-		err = wanrouter_device_setup(wandev, data);
-		break;
-
-	case ROUTER_DOWN:
-		err = wanrouter_device_shutdown(wandev);
-		break;
-
-	case ROUTER_STAT:
-		err = wanrouter_device_stat(wandev, data);
-		break;
-
-	case ROUTER_IFNEW:
-		err = wanrouter_device_new_if(wandev, data);
-		break;
-
-	case ROUTER_IFDEL:
-		err = wanrouter_device_del_if(wandev, data);
-		break;
-
-	case ROUTER_IFSTAT:
-		break;
-
-	default:
-		if ((cmd >= ROUTER_USER) &&
-		    (cmd <= ROUTER_USER_MAX) &&
-		    wandev->ioctl)
-			err = wandev->ioctl(wandev, cmd, arg);
-		else err = -EINVAL;
-	}
-	mutex_unlock(&wanrouter_mutex);
-	return err;
-}
-
-/*
- *	WAN Driver IOCTL Handlers
- */
-
-/*
- *	Setup WAN link device.
- *	o verify user address space
- *	o allocate kernel memory and copy configuration data to kernel space
- *	o if configuration data includes extension, copy it to kernel space too
- *	o call driver's setup() entry point
- */
-
-static int wanrouter_device_setup(struct wan_device *wandev,
-				  wandev_conf_t __user *u_conf)
-{
-	void *data = NULL;
-	wandev_conf_t *conf;
-	int err = -EINVAL;
-
-	if (wandev->setup == NULL) {	/* Nothing to do ? */
-		printk(KERN_INFO "%s: ERROR, No setup script: wandev->setup()\n",
-				wandev->name);
-		return 0;
-	}
-
-	conf = kmalloc(sizeof(wandev_conf_t), GFP_KERNEL);
-	if (conf == NULL){
-		printk(KERN_INFO "%s: ERROR, Failed to allocate kernel memory !\n",
-				wandev->name);
-		return -ENOBUFS;
-	}
-
-	if (copy_from_user(conf, u_conf, sizeof(wandev_conf_t))) {
-		printk(KERN_INFO "%s: Failed to copy user config data to kernel space!\n",
-				wandev->name);
-		kfree(conf);
-		return -EFAULT;
-	}
-
-	if (conf->magic != ROUTER_MAGIC) {
-		kfree(conf);
-		printk(KERN_INFO "%s: ERROR, Invalid MAGIC Number\n",
-				wandev->name);
-		return -EINVAL;
-	}
-
-	if (conf->data_size && conf->data) {
-		if (conf->data_size > 128000) {
-			printk(KERN_INFO
-			    "%s: ERROR, Invalid firmware data size %i !\n",
-					wandev->name, conf->data_size);
-			kfree(conf);
-			return -EINVAL;
-		}
-
-		data = vmalloc(conf->data_size);
-		if (!data) {
-			printk(KERN_INFO
-				"%s: ERROR, Failed allocate kernel memory !\n",
-				wandev->name);
-			kfree(conf);
-			return -ENOBUFS;
-		}
-		if (!copy_from_user(data, conf->data, conf->data_size)) {
-			conf->data = data;
-			err = wandev->setup(wandev, conf);
-		} else {
-			printk(KERN_INFO
-			     "%s: ERROR, Failed to copy from user data !\n",
-			       wandev->name);
-			err = -EFAULT;
-		}
-		vfree(data);
-	} else {
-		printk(KERN_INFO
-		    "%s: ERROR, No firmware found ! Firmware size = %i !\n",
-				wandev->name, conf->data_size);
-	}
-
-	kfree(conf);
-	return err;
-}
-
-/*
- *	Shutdown WAN device.
- *	o delete all not opened logical channels for this device
- *	o call driver's shutdown() entry point
- */
-
-static int wanrouter_device_shutdown(struct wan_device *wandev)
-{
-	struct net_device *dev;
-	int err=0;
-
-	if (wandev->state == WAN_UNCONFIGURED)
-		return 0;
-
-	printk(KERN_INFO "\n%s: Shutting Down!\n",wandev->name);
-
-	for (dev = wandev->dev; dev;) {
-		err = wanrouter_delete_interface(wandev, dev->name);
-		if (err)
-			return err;
-		/* The above function deallocates the current dev
-		 * structure. Therefore, we cannot use netdev_priv(dev)
-		 * as the next element: wandev->dev points to the
-		 * next element */
-		dev = wandev->dev;
-	}
-
-	if (wandev->ndev)
-		return -EBUSY;	/* there are opened interfaces  */
-
-	if (wandev->shutdown)
-		err=wandev->shutdown(wandev);
-
-	return err;
-}
-
-/*
- *	Get WAN device status & statistics.
- */
-
-static int wanrouter_device_stat(struct wan_device *wandev,
-				 wandev_stat_t __user *u_stat)
-{
-	wandev_stat_t stat;
-
-	memset(&stat, 0, sizeof(stat));
-
-	/* Ask device driver to update device statistics */
-	if ((wandev->state != WAN_UNCONFIGURED) && wandev->update)
-		wandev->update(wandev);
-
-	/* Fill out structure */
-	stat.ndev  = wandev->ndev;
-	stat.state = wandev->state;
-
-	if (copy_to_user(u_stat, &stat, sizeof(stat)))
-		return -EFAULT;
-
-	return 0;
-}
-
-/*
- *	Create new WAN interface.
- *	o verify user address space
- *	o copy configuration data to kernel address space
- *	o allocate network interface data space
- *	o call driver's new_if() entry point
- *	o make sure there is no interface name conflict
- *	o register network interface
- */
-
-static int wanrouter_device_new_if(struct wan_device *wandev,
-				   wanif_conf_t __user *u_conf)
-{
-	wanif_conf_t *cnf;
-	struct net_device *dev = NULL;
-	int err;
-
-	if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL))
-		return -ENODEV;
-
-	cnf = kmalloc(sizeof(wanif_conf_t), GFP_KERNEL);
-	if (!cnf)
-		return -ENOBUFS;
-
-	err = -EFAULT;
-	if (copy_from_user(cnf, u_conf, sizeof(wanif_conf_t)))
-		goto out;
-
-	err = -EINVAL;
-	if (cnf->magic != ROUTER_MAGIC)
-		goto out;
-
-	if (cnf->config_id == WANCONFIG_MPPP) {
-		printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n",
-				wandev->name);
-		err = -EPROTONOSUPPORT;
-		goto out;
-	} else {
-		err = wandev->new_if(wandev, dev, cnf);
-	}
-
-	if (!err) {
-		/* Register network interface. This will invoke init()
-		 * function supplied by the driver.  If device registered
-		 * successfully, add it to the interface list.
-		 */
-
-#ifdef WANDEBUG
-		printk(KERN_INFO "%s: registering interface %s...\n",
-		       wanrouter_modname, dev->name);
-#endif
-
-		err = register_netdev(dev);
-		if (!err) {
-			struct net_device *slave = NULL;
-			unsigned long smp_flags=0;
-
-			lock_adapter_irq(&wandev->lock, &smp_flags);
-
-			if (wandev->dev == NULL) {
-				wandev->dev = dev;
-			} else {
-				for (slave=wandev->dev;
-				     DEV_TO_SLAVE(slave);
-				     slave = DEV_TO_SLAVE(slave))
-					DEV_TO_SLAVE(slave) = dev;
-			}
-			++wandev->ndev;
-
-			unlock_adapter_irq(&wandev->lock, &smp_flags);
-			err = 0;	/* done !!! */
-			goto out;
-		}
-		if (wandev->del_if)
-			wandev->del_if(wandev, dev);
-		free_netdev(dev);
-	}
-
-out:
-	kfree(cnf);
-	return err;
-}
-
-
-/*
- *	Delete WAN logical channel.
- *	 o verify user address space
- *	 o copy configuration data to kernel address space
- */
-
-static int wanrouter_device_del_if(struct wan_device *wandev, char __user *u_name)
-{
-	char name[WAN_IFNAME_SZ + 1];
-	int err = 0;
-
-	if (wandev->state == WAN_UNCONFIGURED)
-		return -ENODEV;
-
-	memset(name, 0, sizeof(name));
-
-	if (copy_from_user(name, u_name, WAN_IFNAME_SZ))
-		return -EFAULT;
-
-	err = wanrouter_delete_interface(wandev, name);
-	if (err)
-		return err;
-
-	/* If last interface being deleted, shutdown card
-	 * This helps with administration at leaf nodes
-	 * (You can tell if the person at the other end of the phone
-	 * has an interface configured) and avoids DoS vulnerabilities
-	 * in binary driver files - this fixes a problem with the current
-	 * Sangoma driver going into strange states when all the network
-	 * interfaces are deleted and the link irrecoverably disconnected.
-	 */
-
-	if (!wandev->ndev && wandev->shutdown)
-		err = wandev->shutdown(wandev);
-
-	return err;
-}
-
-/*
- *	Miscellaneous Functions
- */
-
-/*
- *	Find WAN device by name.
- *	Return pointer to the WAN device data space or NULL if device not found.
- */
-
-static struct wan_device *wanrouter_find_device(char *name)
-{
-	struct wan_device *wandev;
-
-	for (wandev = wanrouter_router_devlist;
-	     wandev && strcmp(wandev->name, name);
-		wandev = wandev->next);
-	return wandev;
-}
-
-/*
- *	Delete WAN logical channel identified by its name.
- *	o find logical channel by its name
- *	o call driver's del_if() entry point
- *	o unregister network interface
- *	o unlink channel data space from linked list of channels
- *	o release channel data space
- *
- *	Return:	0		success
- *		-ENODEV		channel not found.
- *		-EBUSY		interface is open
- *
- *	Note: If (force != 0), then device will be destroyed even if interface
- *	associated with it is open. It's caller's responsibility to make
- *	sure that opened interfaces are not removed!
- */
-
-static int wanrouter_delete_interface(struct wan_device *wandev, char *name)
-{
-	struct net_device *dev = NULL, *prev = NULL;
-	unsigned long smp_flags=0;
-
-	lock_adapter_irq(&wandev->lock, &smp_flags);
-	dev = wandev->dev;
-	prev = NULL;
-	while (dev && strcmp(name, dev->name)) {
-		struct net_device **slave = netdev_priv(dev);
-		prev = dev;
-		dev = *slave;
-	}
-	unlock_adapter_irq(&wandev->lock, &smp_flags);
-
-	if (dev == NULL)
-		return -ENODEV;	/* interface not found */
-
-	if (netif_running(dev))
-		return -EBUSY;	/* interface in use */
-
-	if (wandev->del_if)
-		wandev->del_if(wandev, dev);
-
-	lock_adapter_irq(&wandev->lock, &smp_flags);
-	if (prev) {
-		struct net_device **prev_slave = netdev_priv(prev);
-		struct net_device **slave = netdev_priv(dev);
-
-		*prev_slave = *slave;
-	} else {
-		struct net_device **slave = netdev_priv(dev);
-		wandev->dev = *slave;
-	}
-	--wandev->ndev;
-	unlock_adapter_irq(&wandev->lock, &smp_flags);
-
-	printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name);
-
-	unregister_netdev(dev);
-
-	free_netdev(dev);
-
-	return 0;
-}
-
-static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__acquires(lock)
-{
-	spin_lock_irqsave(lock, *smp_flags);
-}
-
-
-static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__releases(lock)
-{
-	spin_unlock_irqrestore(lock, *smp_flags);
-}
-
-EXPORT_SYMBOL(register_wan_device);
-EXPORT_SYMBOL(unregister_wan_device);
-
-MODULE_LICENSE("GPL");
-
-/*
- *	End
- */
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
deleted file mode 100644
index c43612ee96bb..000000000000
--- a/net/wanrouter/wanproc.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*****************************************************************************
-* wanproc.c	WAN Router Module. /proc filesystem interface.
-*
-*		This module is completely hardware-independent and provides
-*		access to the router using Linux /proc filesystem.
-*
-* Author: 	Gideon Hack
-*
-* Copyright:	(c) 1995-1999 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Jun 02, 1999  Gideon Hack	Updates for Linux 2.2.X kernels.
-* Jun 29, 1997	Alan Cox	Merged with 1.0.3 vendor code
-* Jan 29, 1997	Gene Kozin	v1.0.1. Implemented /proc read routines
-* Jan 30, 1997	Alan Cox	Hacked around for 2.1
-* Dec 13, 1996	Gene Kozin	Initial version (based on Sangoma's WANPIPE)
-*****************************************************************************/
-
-#include <linux/init.h>		/* __initfunc et al. */
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/errno.h>	/* return codes */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/wanrouter.h>	/* WAN router API definitions */
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-
-#include <net/net_namespace.h>
-#include <asm/io.h>
-
-#define PROC_STATS_FORMAT "%30s: %12lu\n"
-
-/****** Defines and Macros **************************************************/
-
-#define PROT_DECODE(prot) ((prot == WANCONFIG_FR) ? " FR" :\
-			      (prot == WANCONFIG_X25) ? " X25" : \
-				 (prot == WANCONFIG_PPP) ? " PPP" : \
-				    (prot == WANCONFIG_CHDLC) ? " CHDLC": \
-				       (prot == WANCONFIG_MPPP) ? " MPPP" : \
-					   " Unknown" )
-
-/****** Function Prototypes *************************************************/
-
-#ifdef CONFIG_PROC_FS
-
-/* Miscellaneous */
-
-/*
- *	Structures for interfacing with the /proc filesystem.
- *	Router creates its own directory /proc/net/router with the following
- *	entries:
- *	config		device configuration
- *	status		global device statistics
- *	<device>	entry for each WAN device
- */
-
-/*
- *	Generic /proc/net/router/<file> file and inode operations
- */
-
-/*
- *	/proc/net/router
- */
-
-static DEFINE_MUTEX(config_mutex);
-static struct proc_dir_entry *proc_router;
-
-/* Strings */
-
-/*
- *	Interface functions
- */
-
-/****** Proc filesystem entry points ****************************************/
-
-/*
- *	Iterator
- */
-static void *r_start(struct seq_file *m, loff_t *pos)
-{
-	struct wan_device *wandev;
-	loff_t l = *pos;
-
-	mutex_lock(&config_mutex);
-	if (!l--)
-		return SEQ_START_TOKEN;
-	for (wandev = wanrouter_router_devlist; l-- && wandev;
-	     wandev = wandev->next)
-		;
-	return wandev;
-}
-
-static void *r_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	struct wan_device *wandev = v;
-	(*pos)++;
-	return (v == SEQ_START_TOKEN) ? wanrouter_router_devlist : wandev->next;
-}
-
-static void r_stop(struct seq_file *m, void *v)
-{
-	mutex_unlock(&config_mutex);
-}
-
-static int config_show(struct seq_file *m, void *v)
-{
-	struct wan_device *p = v;
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "Device name    | port |IRQ|DMA|  mem.addr  |"
-			    "mem.size|option1|option2|option3|option4\n");
-		return 0;
-	}
-	if (!p->state)
-		return 0;
-	seq_printf(m, "%-15s|0x%-4X|%3u|%3u| 0x%-8lX |0x%-6X|%7u|%7u|%7u|%7u\n",
-			p->name, p->ioport, p->irq, p->dma, p->maddr, p->msize,
-			p->hw_opt[0], p->hw_opt[1], p->hw_opt[2], p->hw_opt[3]);
-	return 0;
-}
-
-static int status_show(struct seq_file *m, void *v)
-{
-	struct wan_device *p = v;
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "Device name    |protocol|station|interface|"
-			    "clocking|baud rate| MTU |ndev|link state\n");
-		return 0;
-	}
-	if (!p->state)
-		return 0;
-	seq_printf(m, "%-15s|%-8s| %-7s| %-9s|%-8s|%9u|%5u|%3u |",
-		p->name,
-		PROT_DECODE(p->config_id),
-		p->config_id == WANCONFIG_FR ?
-			(p->station ? "Node" : "CPE") :
-			(p->config_id == WANCONFIG_X25 ?
-			(p->station ? "DCE" : "DTE") :
-			("N/A")),
-		p->interface ? "V.35" : "RS-232",
-		p->clocking ? "internal" : "external",
-		p->bps,
-		p->mtu,
-		p->ndev);
-
-	switch (p->state) {
-	case WAN_UNCONFIGURED:
-		seq_printf(m, "%-12s\n", "unconfigured");
-		break;
-	case WAN_DISCONNECTED:
-		seq_printf(m, "%-12s\n", "disconnected");
-		break;
-	case WAN_CONNECTING:
-		seq_printf(m, "%-12s\n", "connecting");
-		break;
-	case WAN_CONNECTED:
-		seq_printf(m, "%-12s\n", "connected");
-		break;
-	default:
-		seq_printf(m, "%-12s\n", "invalid");
-		break;
-	}
-	return 0;
-}
-
-static const struct seq_operations config_op = {
-	.start	= r_start,
-	.next	= r_next,
-	.stop	= r_stop,
-	.show	= config_show,
-};
-
-static const struct seq_operations status_op = {
-	.start	= r_start,
-	.next	= r_next,
-	.stop	= r_stop,
-	.show	= status_show,
-};
-
-static int config_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &config_op);
-}
-
-static int status_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &status_op);
-}
-
-static const struct file_operations config_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = config_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release,
-};
-
-static const struct file_operations status_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = status_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release,
-};
-
-static int wandev_show(struct seq_file *m, void *v)
-{
-	struct wan_device *wandev = m->private;
-
-	if (wandev->magic != ROUTER_MAGIC)
-		return 0;
-
-	if (!wandev->state) {
-		seq_puts(m, "device is not configured!\n");
-		return 0;
-	}
-
-	/* Update device statistics */
-	if (wandev->update) {
-		int err = wandev->update(wandev);
-		if (err == -EAGAIN) {
-			seq_puts(m, "Device is busy!\n");
-			return 0;
-		}
-		if (err) {
-			seq_puts(m, "Device is not configured!\n");
-			return 0;
-		}
-	}
-
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total packets received", wandev->stats.rx_packets);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total packets transmitted", wandev->stats.tx_packets);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total bytes received", wandev->stats.rx_bytes);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total bytes transmitted", wandev->stats.tx_bytes);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"bad packets received", wandev->stats.rx_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"packet transmit problems", wandev->stats.tx_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"received frames dropped", wandev->stats.rx_dropped);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"transmit frames dropped", wandev->stats.tx_dropped);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"multicast packets received", wandev->stats.multicast);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"transmit collisions", wandev->stats.collisions);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receive length errors", wandev->stats.rx_length_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receiver overrun errors", wandev->stats.rx_over_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"CRC errors", wandev->stats.rx_crc_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"frame format errors (aborts)", wandev->stats.rx_frame_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receiver fifo overrun", wandev->stats.rx_fifo_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receiver missed packet", wandev->stats.rx_missed_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"aborted frames transmitted", wandev->stats.tx_aborted_errors);
-	return 0;
-}
-
-static int wandev_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, wandev_show, PDE(inode)->data);
-}
-
-static const struct file_operations wandev_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = wandev_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release,
-	.unlocked_ioctl  = wanrouter_ioctl,
-};
-
-/*
- *	Initialize router proc interface.
- */
-
-int __init wanrouter_proc_init(void)
-{
-	struct proc_dir_entry *p;
-	proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net);
-	if (!proc_router)
-		goto fail;
-
-	p = proc_create("config", S_IRUGO, proc_router, &config_fops);
-	if (!p)
-		goto fail_config;
-	p = proc_create("status", S_IRUGO, proc_router, &status_fops);
-	if (!p)
-		goto fail_stat;
-	return 0;
-fail_stat:
-	remove_proc_entry("config", proc_router);
-fail_config:
-	remove_proc_entry(ROUTER_NAME, init_net.proc_net);
-fail:
-	return -ENOMEM;
-}
-
-/*
- *	Clean up router proc interface.
- */
-
-void wanrouter_proc_cleanup(void)
-{
-	remove_proc_entry("config", proc_router);
-	remove_proc_entry("status", proc_router);
-	remove_proc_entry(ROUTER_NAME, init_net.proc_net);
-}
-
-/*
- *	Add directory entry for WAN device.
- */
-
-int wanrouter_proc_add(struct wan_device* wandev)
-{
-	if (wandev->magic != ROUTER_MAGIC)
-		return -EINVAL;
-
-	wandev->dent = proc_create(wandev->name, S_IRUGO,
-				   proc_router, &wandev_fops);
-	if (!wandev->dent)
-		return -ENOMEM;
-	wandev->dent->data	= wandev;
-	return 0;
-}
-
-/*
- *	Delete directory entry for WAN device.
- */
-int wanrouter_proc_delete(struct wan_device* wandev)
-{
-	if (wandev->magic != ROUTER_MAGIC)
-		return -EINVAL;
-	remove_proc_entry(wandev->name, proc_router);
-	return 0;
-}
-
-#else
-
-/*
- *	No /proc - output stubs
- */
-
-int __init wanrouter_proc_init(void)
-{
-	return 0;
-}
-
-void wanrouter_proc_cleanup(void)
-{
-}
-
-int wanrouter_proc_add(struct wan_device *wandev)
-{
-	return 0;
-}
-
-int wanrouter_proc_delete(struct wan_device *wandev)
-{
-	return 0;
-}
-
-#endif
-
-/*
- *	End
- */
-
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index a7990bb16529..fd556ac05fdb 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -76,6 +76,10 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
 			return false;
 		if (!chandef->center_freq2)
 			return false;
+		/* adjacent is not allowed -- that's a 160 MHz channel */
+		if (chandef->center_freq1 - chandef->center_freq2 == 80 ||
+		    chandef->center_freq2 - chandef->center_freq1 == 80)
+			return false;
 		break;
 	case NL80211_CHAN_WIDTH_80:
 		if (chandef->center_freq1 != control_freq + 30 &&
@@ -143,6 +147,32 @@ static void chandef_primary_freqs(const struct cfg80211_chan_def *c,
 	}
 }
 
+static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c)
+{
+	int width;
+
+	switch (c->width) {
+	case NL80211_CHAN_WIDTH_20:
+	case NL80211_CHAN_WIDTH_20_NOHT:
+		width = 20;
+		break;
+	case NL80211_CHAN_WIDTH_40:
+		width = 40;
+		break;
+	case NL80211_CHAN_WIDTH_80P80:
+	case NL80211_CHAN_WIDTH_80:
+		width = 80;
+		break;
+	case NL80211_CHAN_WIDTH_160:
+		width = 160;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -1;
+	}
+	return width;
+}
+
 const struct cfg80211_chan_def *
 cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
 			    const struct cfg80211_chan_def *c2)
@@ -188,6 +218,93 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
 }
 EXPORT_SYMBOL(cfg80211_chandef_compatible);
 
+static void cfg80211_set_chans_dfs_state(struct wiphy *wiphy, u32 center_freq,
+					 u32 bandwidth,
+					 enum nl80211_dfs_state dfs_state)
+{
+	struct ieee80211_channel *c;
+	u32 freq;
+
+	for (freq = center_freq - bandwidth/2 + 10;
+	     freq <= center_freq + bandwidth/2 - 10;
+	     freq += 20) {
+		c = ieee80211_get_channel(wiphy, freq);
+		if (!c || !(c->flags & IEEE80211_CHAN_RADAR))
+			continue;
+
+		c->dfs_state = dfs_state;
+		c->dfs_state_entered = jiffies;
+	}
+}
+
+void cfg80211_set_dfs_state(struct wiphy *wiphy,
+			    const struct cfg80211_chan_def *chandef,
+			    enum nl80211_dfs_state dfs_state)
+{
+	int width;
+
+	if (WARN_ON(!cfg80211_chandef_valid(chandef)))
+		return;
+
+	width = cfg80211_chandef_get_width(chandef);
+	if (width < 0)
+		return;
+
+	cfg80211_set_chans_dfs_state(wiphy, chandef->center_freq1,
+				     width, dfs_state);
+
+	if (!chandef->center_freq2)
+		return;
+	cfg80211_set_chans_dfs_state(wiphy, chandef->center_freq2,
+				     width, dfs_state);
+}
+
+static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy,
+					    u32 center_freq,
+					    u32 bandwidth)
+{
+	struct ieee80211_channel *c;
+	u32 freq;
+
+	for (freq = center_freq - bandwidth/2 + 10;
+	     freq <= center_freq + bandwidth/2 - 10;
+	     freq += 20) {
+		c = ieee80211_get_channel(wiphy, freq);
+		if (!c)
+			return -EINVAL;
+
+		if (c->flags & IEEE80211_CHAN_RADAR)
+			return 1;
+	}
+	return 0;
+}
+
+
+int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
+				  const struct cfg80211_chan_def *chandef)
+{
+	int width;
+	int r;
+
+	if (WARN_ON(!cfg80211_chandef_valid(chandef)))
+		return -EINVAL;
+
+	width = cfg80211_chandef_get_width(chandef);
+	if (width < 0)
+		return -EINVAL;
+
+	r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1,
+					    width);
+	if (r)
+		return r;
+
+	if (!chandef->center_freq2)
+		return 0;
+
+	return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2,
+					       width);
+}
+
 static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
 					u32 center_freq, u32 bandwidth,
 					u32 prohibited_flags)
@@ -199,7 +316,16 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
 	     freq <= center_freq + bandwidth/2 - 10;
 	     freq += 20) {
 		c = ieee80211_get_channel(wiphy, freq);
-		if (!c || c->flags & prohibited_flags)
+		if (!c)
+			return false;
+
+		/* check for radar flags */
+		if ((prohibited_flags & c->flags & IEEE80211_CHAN_RADAR) &&
+		    (c->dfs_state != NL80211_DFS_AVAILABLE))
+			return false;
+
+		/* check for the other flags */
+		if (c->flags & prohibited_flags & ~IEEE80211_CHAN_RADAR)
 			return false;
 	}
 
@@ -249,6 +375,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 	case NL80211_CHAN_WIDTH_80:
 		if (!vht_cap->vht_supported)
 			return false;
+		prohibited_flags |= IEEE80211_CHAN_NO_80MHZ;
 		width = 80;
 		break;
 	case NL80211_CHAN_WIDTH_160:
@@ -256,6 +383,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 			return false;
 		if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ))
 			return false;
+		prohibited_flags |= IEEE80211_CHAN_NO_160MHZ;
 		width = 160;
 		break;
 	default:
@@ -263,7 +391,16 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 		return false;
 	}
 
-	/* TODO: missing regulatory check on 80/160 bandwidth */
+	/*
+	 * TODO: What if there are only certain 80/160/80+80 MHz channels
+	 *	 allowed by the driver, or only certain combinations?
+	 *	 For 40 MHz the driver can set the NO_HT40 flags, but for
+	 *	 80/160 MHz and in particular 80+80 MHz this isn't really
+	 *	 feasible and we only have NO_80MHZ/NO_160MHZ so far but
+	 *	 no way to cover 80+80 MHz or more complex restrictions.
+	 *	 Note that such restrictions also need to be advertised to
+	 *	 userspace, for example for P2P channel selection.
+	 */
 
 	if (width > 20)
 		prohibited_flags |= IEEE80211_CHAN_NO_OFDM;
@@ -340,7 +477,10 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
 		break;
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_P2P_GO:
-		if (wdev->beacon_interval) {
+		if (wdev->cac_started) {
+			*chan = wdev->channel;
+			*chanmode = CHAN_MODE_SHARED;
+		} else if (wdev->beacon_interval) {
 			*chan = wdev->channel;
 			*chanmode = CHAN_MODE_SHARED;
 		}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b677eab55b68..84c9ad7e1dca 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -57,9 +57,6 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx)
 {
 	struct cfg80211_registered_device *result = NULL, *rdev;
 
-	if (!wiphy_idx_valid(wiphy_idx))
-		return NULL;
-
 	assert_cfg80211_lock();
 
 	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
@@ -74,10 +71,8 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx)
 
 int get_wiphy_idx(struct wiphy *wiphy)
 {
-	struct cfg80211_registered_device *rdev;
-	if (!wiphy)
-		return WIPHY_IDX_STALE;
-	rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
 	return rdev->wiphy_idx;
 }
 
@@ -86,9 +81,6 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx)
 {
 	struct cfg80211_registered_device *rdev;
 
-	if (!wiphy_idx_valid(wiphy_idx))
-		return NULL;
-
 	assert_cfg80211_lock();
 
 	rdev = cfg80211_rdev_by_wiphy_idx(wiphy_idx);
@@ -220,6 +212,39 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
 	rdev_rfkill_poll(rdev);
 }
 
+void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
+			      struct wireless_dev *wdev)
+{
+	lockdep_assert_held(&rdev->devlist_mtx);
+	lockdep_assert_held(&rdev->sched_scan_mtx);
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE))
+		return;
+
+	if (!wdev->p2p_started)
+		return;
+
+	rdev_stop_p2p_device(rdev, wdev);
+	wdev->p2p_started = false;
+
+	rdev->opencount--;
+
+	if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
+		bool busy = work_busy(&rdev->scan_done_wk);
+
+		/*
+		 * If the work isn't pending or running (in which case it would
+		 * be waiting for the lock we hold) the driver didn't properly
+		 * cancel the scan when the interface was removed. In this case
+		 * warn and leak the scan request object to not crash later.
+		 */
+		WARN_ON(!busy);
+
+		rdev->scan_req->aborted = true;
+		___cfg80211_scan_done(rdev, !busy);
+	}
+}
+
 static int cfg80211_rfkill_set_block(void *data, bool blocked)
 {
 	struct cfg80211_registered_device *rdev = data;
@@ -229,7 +254,8 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked)
 		return 0;
 
 	rtnl_lock();
-	mutex_lock(&rdev->devlist_mtx);
+
+	/* read-only iteration need not hold the devlist_mtx */
 
 	list_for_each_entry(wdev, &rdev->wdev_list, list) {
 		if (wdev->netdev) {
@@ -239,18 +265,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked)
 		/* otherwise, check iftype */
 		switch (wdev->iftype) {
 		case NL80211_IFTYPE_P2P_DEVICE:
-			if (!wdev->p2p_started)
-				break;
-			rdev_stop_p2p_device(rdev, wdev);
-			wdev->p2p_started = false;
-			rdev->opencount--;
+			/* but this requires it */
+			mutex_lock(&rdev->devlist_mtx);
+			mutex_lock(&rdev->sched_scan_mtx);
+			cfg80211_stop_p2p_device(rdev, wdev);
+			mutex_unlock(&rdev->sched_scan_mtx);
+			mutex_unlock(&rdev->devlist_mtx);
 			break;
 		default:
 			break;
 		}
 	}
 
-	mutex_unlock(&rdev->devlist_mtx);
 	rtnl_unlock();
 
 	return 0;
@@ -309,7 +335,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 
 	rdev->wiphy_idx = wiphy_counter++;
 
-	if (unlikely(!wiphy_idx_valid(rdev->wiphy_idx))) {
+	if (unlikely(rdev->wiphy_idx < 0)) {
 		wiphy_counter--;
 		mutex_unlock(&cfg80211_mutex);
 		/* ugh, wrapped! */
@@ -332,6 +358,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	INIT_LIST_HEAD(&rdev->bss_list);
 	INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done);
 	INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results);
+	INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk,
+			  cfg80211_dfs_channels_update_work);
 #ifdef CONFIG_CFG80211_WEXT
 	rdev->wiphy.wext = &cfg80211_wext_handler;
 #endif
@@ -390,8 +418,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
 
 		c = &wiphy->iface_combinations[i];
 
-		/* Combinations with just one interface aren't real */
-		if (WARN_ON(c->max_interfaces < 2))
+		/*
+		 * Combinations with just one interface aren't real,
+		 * however we make an exception for DFS.
+		 */
+		if (WARN_ON((c->max_interfaces < 2) && !c->radar_detect_widths))
 			return -EINVAL;
 
 		/* Need at least one channel */
@@ -406,6 +437,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
 				CFG80211_MAX_NUM_DIFFERENT_CHANNELS))
 			return -EINVAL;
 
+		/* DFS only works on one channel. */
+		if (WARN_ON(c->radar_detect_widths &&
+			    (c->num_different_channels > 1)))
+			return -EINVAL;
+
 		if (WARN_ON(!c->n_limits))
 			return -EINVAL;
 
@@ -478,6 +514,11 @@ int wiphy_register(struct wiphy *wiphy)
 			   ETH_ALEN)))
 		return -EINVAL;
 
+	if (WARN_ON(wiphy->max_acl_mac_addrs &&
+		    (!(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME) ||
+		     !rdev->ops->set_mac_acl)))
+		return -EINVAL;
+
 	if (wiphy->addresses)
 		memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN);
 
@@ -690,6 +731,7 @@ void wiphy_unregister(struct wiphy *wiphy)
 	flush_work(&rdev->scan_done_wk);
 	cancel_work_sync(&rdev->conn_work);
 	flush_work(&rdev->event_work);
+	cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
 
 	if (rdev->wowlan && rdev->ops->set_wakeup)
 		rdev_set_wakeup(rdev, false);
@@ -710,7 +752,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
 		kfree(reg);
 	}
 	list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list)
-		cfg80211_put_bss(&scan->pub);
+		cfg80211_put_bss(&rdev->wiphy, &scan->pub);
 	kfree(rdev);
 }
 
@@ -737,17 +779,13 @@ static void wdev_cleanup_work(struct work_struct *work)
 	wdev = container_of(work, struct wireless_dev, cleanup_work);
 	rdev = wiphy_to_dev(wdev->wiphy);
 
-	cfg80211_lock_rdev(rdev);
+	mutex_lock(&rdev->sched_scan_mtx);
 
 	if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) {
 		rdev->scan_req->aborted = true;
 		___cfg80211_scan_done(rdev, true);
 	}
 
-	cfg80211_unlock_rdev(rdev);
-
-	mutex_lock(&rdev->sched_scan_mtx);
-
 	if (WARN_ON(rdev->sched_scan_req &&
 		    rdev->sched_scan_req->dev == wdev->netdev)) {
 		__cfg80211_stop_sched_scan(rdev, false);
@@ -773,21 +811,19 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
 		return;
 
 	mutex_lock(&rdev->devlist_mtx);
+	mutex_lock(&rdev->sched_scan_mtx);
 	list_del_rcu(&wdev->list);
 	rdev->devlist_generation++;
 
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_P2P_DEVICE:
-		if (!wdev->p2p_started)
-			break;
-		rdev_stop_p2p_device(rdev, wdev);
-		wdev->p2p_started = false;
-		rdev->opencount--;
+		cfg80211_stop_p2p_device(rdev, wdev);
 		break;
 	default:
 		WARN_ON_ONCE(1);
 		break;
 	}
+	mutex_unlock(&rdev->sched_scan_mtx);
 	mutex_unlock(&rdev->devlist_mtx);
 }
 EXPORT_SYMBOL(cfg80211_unregister_wdev);
@@ -806,6 +842,46 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
 		rdev->num_running_monitor_ifaces += num;
 }
 
+void cfg80211_leave(struct cfg80211_registered_device *rdev,
+		   struct wireless_dev *wdev)
+{
+	struct net_device *dev = wdev->netdev;
+
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_ADHOC:
+		cfg80211_leave_ibss(rdev, dev, true);
+		break;
+	case NL80211_IFTYPE_P2P_CLIENT:
+	case NL80211_IFTYPE_STATION:
+		mutex_lock(&rdev->sched_scan_mtx);
+		__cfg80211_stop_sched_scan(rdev, false);
+		mutex_unlock(&rdev->sched_scan_mtx);
+
+		wdev_lock(wdev);
+#ifdef CONFIG_CFG80211_WEXT
+		kfree(wdev->wext.ie);
+		wdev->wext.ie = NULL;
+		wdev->wext.ie_len = 0;
+		wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
+#endif
+		__cfg80211_disconnect(rdev, dev,
+				      WLAN_REASON_DEAUTH_LEAVING, true);
+		cfg80211_mlme_down(rdev, dev);
+		wdev_unlock(wdev);
+		break;
+	case NL80211_IFTYPE_MESH_POINT:
+		cfg80211_leave_mesh(rdev, dev);
+		break;
+	case NL80211_IFTYPE_AP:
+		cfg80211_stop_ap(rdev, dev);
+		break;
+	default:
+		break;
+	}
+
+	wdev->beacon_interval = 0;
+}
+
 static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 					 unsigned long state,
 					 void *ndev)
@@ -874,38 +950,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 			dev->priv_flags |= IFF_DONT_BRIDGE;
 		break;
 	case NETDEV_GOING_DOWN:
-		switch (wdev->iftype) {
-		case NL80211_IFTYPE_ADHOC:
-			cfg80211_leave_ibss(rdev, dev, true);
-			break;
-		case NL80211_IFTYPE_P2P_CLIENT:
-		case NL80211_IFTYPE_STATION:
-			mutex_lock(&rdev->sched_scan_mtx);
-			__cfg80211_stop_sched_scan(rdev, false);
-			mutex_unlock(&rdev->sched_scan_mtx);
-
-			wdev_lock(wdev);
-#ifdef CONFIG_CFG80211_WEXT
-			kfree(wdev->wext.ie);
-			wdev->wext.ie = NULL;
-			wdev->wext.ie_len = 0;
-			wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
-#endif
-			__cfg80211_disconnect(rdev, dev,
-					      WLAN_REASON_DEAUTH_LEAVING, true);
-			cfg80211_mlme_down(rdev, dev);
-			wdev_unlock(wdev);
-			break;
-		case NL80211_IFTYPE_MESH_POINT:
-			cfg80211_leave_mesh(rdev, dev);
-			break;
-		case NL80211_IFTYPE_AP:
-			cfg80211_stop_ap(rdev, dev);
-			break;
-		default:
-			break;
-		}
-		wdev->beacon_interval = 0;
+		cfg80211_leave(rdev, wdev);
 		break;
 	case NETDEV_DOWN:
 		cfg80211_update_iface_num(rdev, wdev->iftype, -1);
@@ -928,6 +973,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 		cfg80211_update_iface_num(rdev, wdev->iftype, 1);
 		cfg80211_lock_rdev(rdev);
 		mutex_lock(&rdev->devlist_mtx);
+		mutex_lock(&rdev->sched_scan_mtx);
 		wdev_lock(wdev);
 		switch (wdev->iftype) {
 #ifdef CONFIG_CFG80211_WEXT
@@ -959,6 +1005,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 			break;
 		}
 		wdev_unlock(wdev);
+		mutex_unlock(&rdev->sched_scan_mtx);
 		rdev->opencount++;
 		mutex_unlock(&rdev->devlist_mtx);
 		cfg80211_unlock_rdev(rdev);
@@ -1079,8 +1126,10 @@ static int __init cfg80211_init(void)
 		goto out_fail_reg;
 
 	cfg80211_wq = create_singlethread_workqueue("cfg80211");
-	if (!cfg80211_wq)
+	if (!cfg80211_wq) {
+		err = -ENOMEM;
 		goto out_fail_wq;
+	}
 
 	return 0;
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 3563097169cb..fd35dae547c4 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -8,7 +8,6 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
-#include <linux/kref.h>
 #include <linux/rbtree.h>
 #include <linux/debugfs.h>
 #include <linux/rfkill.h>
@@ -18,6 +17,9 @@
 #include <net/cfg80211.h>
 #include "reg.h"
 
+
+#define WIPHY_IDX_INVALID	-1
+
 struct cfg80211_registered_device {
 	const struct cfg80211_ops *ops;
 	struct list_head list;
@@ -84,9 +86,14 @@ struct cfg80211_registered_device {
 
 	struct cfg80211_wowlan *wowlan;
 
+	struct delayed_work dfs_update_channels_wk;
+
+	/* netlink port which started critical protocol (0 means not started) */
+	u32 crit_proto_nlportid;
+
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
-	struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
+	struct wiphy wiphy __aligned(NETDEV_ALIGN);
 };
 
 static inline
@@ -96,13 +103,6 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
 	return container_of(wiphy, struct cfg80211_registered_device, wiphy);
 }
 
-/* Note 0 is valid, hence phy0 */
-static inline
-bool wiphy_idx_valid(int wiphy_idx)
-{
-	return wiphy_idx >= 0;
-}
-
 static inline void
 cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev)
 {
@@ -113,6 +113,9 @@ cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev)
 	for (i = 0; i < rdev->wowlan->n_patterns; i++)
 		kfree(rdev->wowlan->patterns[i].mask);
 	kfree(rdev->wowlan->patterns);
+	if (rdev->wowlan->tcp && rdev->wowlan->tcp->sock)
+		sock_release(rdev->wowlan->tcp->sock);
+	kfree(rdev->wowlan->tcp);
 	kfree(rdev->wowlan);
 }
 
@@ -126,17 +129,12 @@ static inline void assert_cfg80211_lock(void)
 	lockdep_assert_held(&cfg80211_mutex);
 }
 
-/*
- * You can use this to mark a wiphy_idx as not having an associated wiphy.
- * It guarantees cfg80211_rdev_by_wiphy_idx(wiphy_idx) will return NULL
- */
-#define WIPHY_IDX_STALE -1
-
 struct cfg80211_internal_bss {
 	struct list_head list;
+	struct list_head hidden_list;
 	struct rb_node rbn;
 	unsigned long ts;
-	struct kref ref;
+	unsigned long refcount;
 	atomic_t hold;
 
 	/* must be last because of priv member */
@@ -335,20 +333,15 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
 			  struct ieee80211_channel *chan,
-			  const u8 *bssid, const u8 *prev_bssid,
+			  const u8 *bssid,
 			  const u8 *ssid, int ssid_len,
-			  const u8 *ie, int ie_len, bool use_mfp,
-			  struct cfg80211_crypto_settings *crypt,
-			  u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
-			  struct ieee80211_ht_cap *ht_capa_mask);
+			  struct cfg80211_assoc_request *req);
 int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
-			struct net_device *dev, struct ieee80211_channel *chan,
-			const u8 *bssid, const u8 *prev_bssid,
+			struct net_device *dev,
+			struct ieee80211_channel *chan,
+			const u8 *bssid,
 			const u8 *ssid, int ssid_len,
-			const u8 *ie, int ie_len, bool use_mfp,
-			struct cfg80211_crypto_settings *crypt,
-			u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
-			struct ieee80211_ht_cap *ht_capa_mask);
+			struct cfg80211_assoc_request *req);
 int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
 			   const u8 *ie, int ie_len, u16 reason,
@@ -380,6 +373,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  bool no_cck, bool dont_wait_for_ack, u64 *cookie);
 void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
 			       const struct ieee80211_ht_cap *ht_capa_mask);
+void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
+				const struct ieee80211_vht_cap *vht_capa_mask);
 
 /* SME */
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
@@ -435,7 +430,24 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 				 struct wireless_dev *wdev,
 				 enum nl80211_iftype iftype,
 				 struct ieee80211_channel *chan,
-				 enum cfg80211_chan_mode chanmode);
+				 enum cfg80211_chan_mode chanmode,
+				 u8 radar_detect);
+
+/**
+ * cfg80211_chandef_dfs_required - checks if radar detection is required
+ * @wiphy: the wiphy to validate against
+ * @chandef: the channel definition to check
+ * Return: 1 if radar detection is required, 0 if it is not, < 0 on error
+ */
+int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
+				  const struct cfg80211_chan_def *c);
+
+void cfg80211_set_dfs_state(struct wiphy *wiphy,
+			    const struct cfg80211_chan_def *chandef,
+			    enum nl80211_dfs_state dfs_state);
+
+void cfg80211_dfs_channels_update_work(struct work_struct *work);
+
 
 static inline int
 cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
@@ -443,7 +455,7 @@ cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
 			      enum nl80211_iftype iftype)
 {
 	return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL,
-					    CHAN_MODE_UNDEFINED);
+					    CHAN_MODE_UNDEFINED, 0);
 }
 
 static inline int
@@ -460,7 +472,17 @@ cfg80211_can_use_chan(struct cfg80211_registered_device *rdev,
 		      enum cfg80211_chan_mode chanmode)
 {
 	return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
-					    chan, chanmode);
+					    chan, chanmode, 0);
+}
+
+static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
+{
+	unsigned long end = jiffies;
+
+	if (end >= start)
+		return jiffies_to_msecs(end - start);
+
+	return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1);
 }
 
 void
@@ -481,6 +503,12 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
 void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
 			       enum nl80211_iftype iftype, int num);
 
+void cfg80211_leave(struct cfg80211_registered_device *rdev,
+		    struct wireless_dev *wdev);
+
+void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
+			      struct wireless_dev *wdev);
+
 #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
 
 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index 48c48ffafa1d..e37862f1b127 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -15,10 +15,10 @@ static void cfg80211_get_drvinfo(struct net_device *dev,
 	strlcpy(info->version, init_utsname()->release, sizeof(info->version));
 
 	if (wdev->wiphy->fw_version[0])
-		strncpy(info->fw_version, wdev->wiphy->fw_version,
+		strlcpy(info->fw_version, wdev->wiphy->fw_version,
 			sizeof(info->fw_version));
 	else
-		strncpy(info->fw_version, "N/A", sizeof(info->fw_version));
+		strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
 
 	strlcpy(info->bus_info, dev_name(wiphy_dev(wdev->wiphy)),
 		sizeof(info->bus_info));
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 9b9551e4a6f9..d80e47194d49 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -37,7 +37,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid)
 
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
-		cfg80211_put_bss(&wdev->current_bss->pub);
+		cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
 	}
 
 	cfg80211_hold_bss(bss_from_pub(bss));
@@ -182,7 +182,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
 
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
-		cfg80211_put_bss(&wdev->current_bss->pub);
+		cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
 	}
 
 	wdev->current_bss = NULL;
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index 1526c211db66..dc0e59e53dbf 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -430,24 +430,23 @@ static int lib80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv)
 	return CCMP_TK_LEN;
 }
 
-static char *lib80211_ccmp_print_stats(char *p, void *priv)
+static void lib80211_ccmp_print_stats(struct seq_file *m, void *priv)
 {
 	struct lib80211_ccmp_data *ccmp = priv;
 
-	p += sprintf(p, "key[%d] alg=CCMP key_set=%d "
-		     "tx_pn=%02x%02x%02x%02x%02x%02x "
-		     "rx_pn=%02x%02x%02x%02x%02x%02x "
-		     "format_errors=%d replays=%d decrypt_errors=%d\n",
-		     ccmp->key_idx, ccmp->key_set,
-		     ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2],
-		     ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5],
-		     ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2],
-		     ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5],
-		     ccmp->dot11RSNAStatsCCMPFormatErrors,
-		     ccmp->dot11RSNAStatsCCMPReplays,
-		     ccmp->dot11RSNAStatsCCMPDecryptErrors);
-
-	return p;
+	seq_printf(m,
+		   "key[%d] alg=CCMP key_set=%d "
+		   "tx_pn=%02x%02x%02x%02x%02x%02x "
+		   "rx_pn=%02x%02x%02x%02x%02x%02x "
+		   "format_errors=%d replays=%d decrypt_errors=%d\n",
+		   ccmp->key_idx, ccmp->key_set,
+		   ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2],
+		   ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5],
+		   ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2],
+		   ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5],
+		   ccmp->dot11RSNAStatsCCMPFormatErrors,
+		   ccmp->dot11RSNAStatsCCMPReplays,
+		   ccmp->dot11RSNAStatsCCMPDecryptErrors);
 }
 
 static struct lib80211_crypto_ops lib80211_crypt_ccmp = {
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index d475cfc8568f..8c90ba79e56e 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -703,30 +703,30 @@ static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv)
 	return TKIP_KEY_LEN;
 }
 
-static char *lib80211_tkip_print_stats(char *p, void *priv)
+static void lib80211_tkip_print_stats(struct seq_file *m, void *priv)
 {
 	struct lib80211_tkip_data *tkip = priv;
-	p += sprintf(p, "key[%d] alg=TKIP key_set=%d "
-		     "tx_pn=%02x%02x%02x%02x%02x%02x "
-		     "rx_pn=%02x%02x%02x%02x%02x%02x "
-		     "replays=%d icv_errors=%d local_mic_failures=%d\n",
-		     tkip->key_idx, tkip->key_set,
-		     (tkip->tx_iv32 >> 24) & 0xff,
-		     (tkip->tx_iv32 >> 16) & 0xff,
-		     (tkip->tx_iv32 >> 8) & 0xff,
-		     tkip->tx_iv32 & 0xff,
-		     (tkip->tx_iv16 >> 8) & 0xff,
-		     tkip->tx_iv16 & 0xff,
-		     (tkip->rx_iv32 >> 24) & 0xff,
-		     (tkip->rx_iv32 >> 16) & 0xff,
-		     (tkip->rx_iv32 >> 8) & 0xff,
-		     tkip->rx_iv32 & 0xff,
-		     (tkip->rx_iv16 >> 8) & 0xff,
-		     tkip->rx_iv16 & 0xff,
-		     tkip->dot11RSNAStatsTKIPReplays,
-		     tkip->dot11RSNAStatsTKIPICVErrors,
-		     tkip->dot11RSNAStatsTKIPLocalMICFailures);
-	return p;
+	seq_printf(m,
+		   "key[%d] alg=TKIP key_set=%d "
+		   "tx_pn=%02x%02x%02x%02x%02x%02x "
+		   "rx_pn=%02x%02x%02x%02x%02x%02x "
+		   "replays=%d icv_errors=%d local_mic_failures=%d\n",
+		   tkip->key_idx, tkip->key_set,
+		   (tkip->tx_iv32 >> 24) & 0xff,
+		   (tkip->tx_iv32 >> 16) & 0xff,
+		   (tkip->tx_iv32 >> 8) & 0xff,
+		   tkip->tx_iv32 & 0xff,
+		   (tkip->tx_iv16 >> 8) & 0xff,
+		   tkip->tx_iv16 & 0xff,
+		   (tkip->rx_iv32 >> 24) & 0xff,
+		   (tkip->rx_iv32 >> 16) & 0xff,
+		   (tkip->rx_iv32 >> 8) & 0xff,
+		   tkip->rx_iv32 & 0xff,
+		   (tkip->rx_iv16 >> 8) & 0xff,
+		   tkip->rx_iv16 & 0xff,
+		   tkip->dot11RSNAStatsTKIPReplays,
+		   tkip->dot11RSNAStatsTKIPICVErrors,
+		   tkip->dot11RSNAStatsTKIPLocalMICFailures);
 }
 
 static struct lib80211_crypto_ops lib80211_crypt_tkip = {
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index c1304018fc1c..1c292e4ea7b6 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -253,11 +253,10 @@ static int lib80211_wep_get_key(void *key, int len, u8 * seq, void *priv)
 	return wep->key_len;
 }
 
-static char *lib80211_wep_print_stats(char *p, void *priv)
+static void lib80211_wep_print_stats(struct seq_file *m, void *priv)
 {
 	struct lib80211_wep_data *wep = priv;
-	p += sprintf(p, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len);
-	return p;
+	seq_printf(m, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len);
 }
 
 static struct lib80211_crypto_ops lib80211_crypt_wep = {
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index f9d6ce5cfabb..0bb93f3061a4 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -44,6 +44,10 @@
 
 #define MESH_SYNC_NEIGHBOR_OFFSET_MAX 50
 
+#define MESH_DEFAULT_BEACON_INTERVAL	1000	/* in 1024 us units (=TUs) */
+#define MESH_DEFAULT_DTIM_PERIOD	2
+#define MESH_DEFAULT_AWAKE_WINDOW	10	/* in 1024 us units (=TUs) */
+
 const struct mesh_config default_mesh_config = {
 	.dot11MeshRetryTimeout = MESH_RET_T,
 	.dot11MeshConfirmTimeout = MESH_CONF_T,
@@ -69,6 +73,8 @@ const struct mesh_config default_mesh_config = {
 	.dot11MeshHWMPactivePathToRootTimeout = MESH_PATH_TO_ROOT_TIMEOUT,
 	.dot11MeshHWMProotInterval = MESH_ROOT_INTERVAL,
 	.dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL,
+	.power_mode = NL80211_MESH_POWER_ACTIVE,
+	.dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW,
 };
 
 const struct mesh_setup default_mesh_setup = {
@@ -79,6 +85,9 @@ const struct mesh_setup default_mesh_setup = {
 	.ie = NULL,
 	.ie_len = 0,
 	.is_secure = false,
+	.user_mpm = false,
+	.beacon_interval = MESH_DEFAULT_BEACON_INTERVAL,
+	.dtim_period = MESH_DEFAULT_DTIM_PERIOD,
 };
 
 int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
@@ -225,20 +234,6 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
 	return 0;
 }
 
-void cfg80211_notify_new_peer_candidate(struct net_device *dev,
-		const u8 *macaddr, const u8* ie, u8 ie_len, gfp_t gfp)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-
-	trace_cfg80211_notify_new_peer_candidate(dev, macaddr);
-	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT))
-		return;
-
-	nl80211_send_new_peer_candidate(wiphy_to_dev(wdev->wiphy), dev,
-			macaddr, ie, ie_len, gfp);
-}
-EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
-
 static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
 				 struct net_device *dev)
 {
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 5e8123ee63fd..0c7b7dd855f6 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -58,7 +58,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss,
 	 */
 	if (status_code != WLAN_STATUS_SUCCESS && wdev->conn &&
 	    cfg80211_sme_failed_reassoc(wdev)) {
-		cfg80211_put_bss(bss);
+		cfg80211_put_bss(wiphy, bss);
 		goto out;
 	}
 
@@ -70,7 +70,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss,
 		 * do not call connect_result() now because the
 		 * sme will schedule work that does it later.
 		 */
-		cfg80211_put_bss(bss);
+		cfg80211_put_bss(wiphy, bss);
 		goto out;
 	}
 
@@ -108,7 +108,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
 	if (wdev->current_bss &&
 	    ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) {
 		cfg80211_unhold_bss(wdev->current_bss);
-		cfg80211_put_bss(&wdev->current_bss->pub);
+		cfg80211_put_bss(wiphy, &wdev->current_bss->pub);
 		wdev->current_bss = NULL;
 		was_current = true;
 	}
@@ -164,7 +164,7 @@ void __cfg80211_send_disassoc(struct net_device *dev,
 	    ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) {
 		cfg80211_sme_disassoc(dev, wdev->current_bss);
 		cfg80211_unhold_bss(wdev->current_bss);
-		cfg80211_put_bss(&wdev->current_bss->pub);
+		cfg80211_put_bss(wiphy, &wdev->current_bss->pub);
 		wdev->current_bss = NULL;
 	} else
 		WARN_ON(1);
@@ -187,30 +187,6 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
 }
 EXPORT_SYMBOL(cfg80211_send_disassoc);
 
-void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf,
-				 size_t len)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
-	trace_cfg80211_send_unprot_deauth(dev);
-	nl80211_send_unprot_deauth(rdev, dev, buf, len, GFP_ATOMIC);
-}
-EXPORT_SYMBOL(cfg80211_send_unprot_deauth);
-
-void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
-				   size_t len)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
-	trace_cfg80211_send_unprot_disassoc(dev);
-	nl80211_send_unprot_disassoc(rdev, dev, buf, len, GFP_ATOMIC);
-}
-EXPORT_SYMBOL(cfg80211_send_unprot_disassoc);
-
 void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -324,7 +300,7 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 	err = rdev_auth(rdev, dev, &req);
 
 out:
-	cfg80211_put_bss(req.bss);
+	cfg80211_put_bss(&rdev->wiphy, req.bss);
 	return err;
 }
 
@@ -367,27 +343,38 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
 		p1[i] &= p2[i];
 }
 
+/*  Do a logical ht_capa &= ht_capa_mask.  */
+void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
+				const struct ieee80211_vht_cap *vht_capa_mask)
+{
+	int i;
+	u8 *p1, *p2;
+	if (!vht_capa_mask) {
+		memset(vht_capa, 0, sizeof(*vht_capa));
+		return;
+	}
+
+	p1 = (u8*)(vht_capa);
+	p2 = (u8*)(vht_capa_mask);
+	for (i = 0; i < sizeof(*vht_capa); i++)
+		p1[i] &= p2[i];
+}
+
 int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
 			  struct ieee80211_channel *chan,
-			  const u8 *bssid, const u8 *prev_bssid,
+			  const u8 *bssid,
 			  const u8 *ssid, int ssid_len,
-			  const u8 *ie, int ie_len, bool use_mfp,
-			  struct cfg80211_crypto_settings *crypt,
-			  u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
-			  struct ieee80211_ht_cap *ht_capa_mask)
+			  struct cfg80211_assoc_request *req)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_assoc_request req;
 	int err;
 	bool was_connected = false;
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	memset(&req, 0, sizeof(req));
-
-	if (wdev->current_bss && prev_bssid &&
-	    ether_addr_equal(wdev->current_bss->pub.bssid, prev_bssid)) {
+	if (wdev->current_bss && req->prev_bssid &&
+	    ether_addr_equal(wdev->current_bss->pub.bssid, req->prev_bssid)) {
 		/*
 		 * Trying to reassociate: Allow this to proceed and let the old
 		 * association to be dropped when the new one is completed.
@@ -399,40 +386,30 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 	} else if (wdev->current_bss)
 		return -EALREADY;
 
-	req.ie = ie;
-	req.ie_len = ie_len;
-	memcpy(&req.crypto, crypt, sizeof(req.crypto));
-	req.use_mfp = use_mfp;
-	req.prev_bssid = prev_bssid;
-	req.flags = assoc_flags;
-	if (ht_capa)
-		memcpy(&req.ht_capa, ht_capa, sizeof(req.ht_capa));
-	if (ht_capa_mask)
-		memcpy(&req.ht_capa_mask, ht_capa_mask,
-		       sizeof(req.ht_capa_mask));
-	cfg80211_oper_and_ht_capa(&req.ht_capa_mask,
+	cfg80211_oper_and_ht_capa(&req->ht_capa_mask,
 				  rdev->wiphy.ht_capa_mod_mask);
+	cfg80211_oper_and_vht_capa(&req->vht_capa_mask,
+				   rdev->wiphy.vht_capa_mod_mask);
 
-	req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
-				   WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
-	if (!req.bss) {
+	req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
+				    WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
+	if (!req->bss) {
 		if (was_connected)
 			wdev->sme_state = CFG80211_SME_CONNECTED;
 		return -ENOENT;
 	}
 
-	err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel,
-				    CHAN_MODE_SHARED);
+	err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED);
 	if (err)
 		goto out;
 
-	err = rdev_assoc(rdev, dev, &req);
+	err = rdev_assoc(rdev, dev, req);
 
 out:
 	if (err) {
 		if (was_connected)
 			wdev->sme_state = CFG80211_SME_CONNECTED;
-		cfg80211_put_bss(req.bss);
+		cfg80211_put_bss(&rdev->wiphy, req->bss);
 	}
 
 	return err;
@@ -441,21 +418,17 @@ out:
 int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			struct net_device *dev,
 			struct ieee80211_channel *chan,
-			const u8 *bssid, const u8 *prev_bssid,
+			const u8 *bssid,
 			const u8 *ssid, int ssid_len,
-			const u8 *ie, int ie_len, bool use_mfp,
-			struct cfg80211_crypto_settings *crypt,
-			u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
-			struct ieee80211_ht_cap *ht_capa_mask)
+			struct cfg80211_assoc_request *req)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
 	mutex_lock(&rdev->devlist_mtx);
 	wdev_lock(wdev);
-	err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
-				    ssid, ssid_len, ie, ie_len, use_mfp, crypt,
-				    assoc_flags, ht_capa, ht_capa_mask);
+	err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid,
+				    ssid, ssid_len, req);
 	wdev_unlock(wdev);
 	mutex_unlock(&rdev->devlist_mtx);
 
@@ -514,7 +487,7 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 	if (wdev->sme_state != CFG80211_SME_CONNECTED)
 		return -ENOTCONN;
 
-	if (WARN_ON(!wdev->current_bss))
+	if (WARN(!wdev->current_bss, "sme_state=%d\n", wdev->sme_state))
 		return -ENOTCONN;
 
 	memset(&req, 0, sizeof(req));
@@ -572,67 +545,11 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
-		cfg80211_put_bss(&wdev->current_bss->pub);
+		cfg80211_put_bss(&rdev->wiphy, &wdev->current_bss->pub);
 		wdev->current_bss = NULL;
 	}
 }
 
-void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie,
-			       struct ieee80211_channel *chan,
-			       unsigned int duration, gfp_t gfp)
-{
-	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
-	trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration);
-	nl80211_send_remain_on_channel(rdev, wdev, cookie, chan, duration, gfp);
-}
-EXPORT_SYMBOL(cfg80211_ready_on_channel);
-
-void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
-					struct ieee80211_channel *chan,
-					gfp_t gfp)
-{
-	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
-	trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan);
-	nl80211_send_remain_on_channel_cancel(rdev, wdev, cookie, chan, gfp);
-}
-EXPORT_SYMBOL(cfg80211_remain_on_channel_expired);
-
-void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
-		      struct station_info *sinfo, gfp_t gfp)
-{
-	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
-	trace_cfg80211_new_sta(dev, mac_addr, sinfo);
-	nl80211_send_sta_event(rdev, dev, mac_addr, sinfo, gfp);
-}
-EXPORT_SYMBOL(cfg80211_new_sta);
-
-void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
-{
-	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
-	trace_cfg80211_del_sta(dev, mac_addr);
-	nl80211_send_sta_del_event(rdev, dev, mac_addr, gfp);
-}
-EXPORT_SYMBOL(cfg80211_del_sta);
-
-void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
-			  enum nl80211_connect_failed_reason reason,
-			  gfp_t gfp)
-{
-	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
-	nl80211_send_conn_failed_event(rdev, dev, mac_addr, reason, gfp);
-}
-EXPORT_SYMBOL(cfg80211_conn_failed);
-
 struct cfg80211_mgmt_registration {
 	struct list_head list;
 
@@ -731,6 +648,11 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
 
 	spin_unlock_bh(&wdev->mgmt_registrations_lock);
 
+	if (nlportid && rdev->crit_proto_nlportid == nlportid) {
+		rdev->crit_proto_nlportid = 0;
+		rdev_crit_proto_stop(rdev, wdev);
+	}
+
 	if (nlportid == wdev->ap_unexpected_nlportid)
 		wdev->ap_unexpected_nlportid = 0;
 }
@@ -909,143 +831,122 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
 }
 EXPORT_SYMBOL(cfg80211_rx_mgmt);
 
-void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
-			     const u8 *buf, size_t len, bool ack, gfp_t gfp)
-{
-	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
-	trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
+void cfg80211_dfs_channels_update_work(struct work_struct *work)
+{
+	struct delayed_work *delayed_work;
+	struct cfg80211_registered_device *rdev;
+	struct cfg80211_chan_def chandef;
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_channel *c;
+	struct wiphy *wiphy;
+	bool check_again = false;
+	unsigned long timeout, next_time = 0;
+	int bandid, i;
+
+	delayed_work = container_of(work, struct delayed_work, work);
+	rdev = container_of(delayed_work, struct cfg80211_registered_device,
+			    dfs_update_channels_wk);
+	wiphy = &rdev->wiphy;
+
+	mutex_lock(&cfg80211_mutex);
+	for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) {
+		sband = wiphy->bands[bandid];
+		if (!sband)
+			continue;
 
-	/* Indicate TX status of the Action frame to user space */
-	nl80211_send_mgmt_tx_status(rdev, wdev, cookie, buf, len, ack, gfp);
-}
-EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
+		for (i = 0; i < sband->n_channels; i++) {
+			c = &sband->channels[i];
 
-void cfg80211_cqm_rssi_notify(struct net_device *dev,
-			      enum nl80211_cqm_rssi_threshold_event rssi_event,
-			      gfp_t gfp)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+			if (c->dfs_state != NL80211_DFS_UNAVAILABLE)
+				continue;
 
-	trace_cfg80211_cqm_rssi_notify(dev, rssi_event);
+			timeout = c->dfs_state_entered +
+				  IEEE80211_DFS_MIN_NOP_TIME_MS;
 
-	/* Indicate roaming trigger event to user space */
-	nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp);
-}
-EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
+			if (time_after_eq(jiffies, timeout)) {
+				c->dfs_state = NL80211_DFS_USABLE;
+				cfg80211_chandef_create(&chandef, c,
+							NL80211_CHAN_NO_HT);
 
-void cfg80211_cqm_pktloss_notify(struct net_device *dev,
-				 const u8 *peer, u32 num_packets, gfp_t gfp)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+				nl80211_radar_notify(rdev, &chandef,
+						     NL80211_RADAR_NOP_FINISHED,
+						     NULL, GFP_ATOMIC);
+				continue;
+			}
 
-	trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets);
+			if (!check_again)
+				next_time = timeout - jiffies;
+			else
+				next_time = min(next_time, timeout - jiffies);
+			check_again = true;
+		}
+	}
+	mutex_unlock(&cfg80211_mutex);
 
-	/* Indicate roaming trigger event to user space */
-	nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp);
+	/* reschedule if there are other channels waiting to be cleared again */
+	if (check_again)
+		queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk,
+				   next_time);
 }
-EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify);
 
-void cfg80211_cqm_txe_notify(struct net_device *dev,
-			     const u8 *peer, u32 num_packets,
-			     u32 rate, u32 intvl, gfp_t gfp)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 
-	nl80211_send_cqm_txe_notify(rdev, dev, peer, num_packets,
-				    rate, intvl, gfp);
-}
-EXPORT_SYMBOL(cfg80211_cqm_txe_notify);
-
-void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid,
-			       const u8 *replay_ctr, gfp_t gfp)
+void cfg80211_radar_event(struct wiphy *wiphy,
+			  struct cfg80211_chan_def *chandef,
+			  gfp_t gfp)
 {
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	unsigned long timeout;
 
-	trace_cfg80211_gtk_rekey_notify(dev, bssid);
-	nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp);
-}
-EXPORT_SYMBOL(cfg80211_gtk_rekey_notify);
+	trace_cfg80211_radar_event(wiphy, chandef);
 
-void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
-				     const u8 *bssid, bool preauth, gfp_t gfp)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	/* only set the chandef supplied channel to unavailable, in
+	 * case the radar is detected on only one of multiple channels
+	 * spanned by the chandef.
+	 */
+	cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE);
 
-	trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth);
-	nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp);
+	timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_NOP_TIME_MS);
+	queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk,
+			   timeout);
+
+	nl80211_radar_notify(rdev, chandef, NL80211_RADAR_DETECTED, NULL, gfp);
 }
-EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify);
+EXPORT_SYMBOL(cfg80211_radar_event);
 
-void cfg80211_ch_switch_notify(struct net_device *dev,
-			       struct cfg80211_chan_def *chandef)
+void cfg80211_cac_event(struct net_device *netdev,
+			enum nl80211_radar_event event, gfp_t gfp)
 {
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wireless_dev *wdev = netdev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_chan_def chandef;
+	unsigned long timeout;
 
-	trace_cfg80211_ch_switch_notify(dev, chandef);
-
-	wdev_lock(wdev);
-
-	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
-		    wdev->iftype != NL80211_IFTYPE_P2P_GO))
-		goto out;
+	trace_cfg80211_cac_event(netdev, event);
 
-	wdev->channel = chandef->chan;
-	nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL);
-out:
-	wdev_unlock(wdev);
-	return;
-}
-EXPORT_SYMBOL(cfg80211_ch_switch_notify);
+	if (WARN_ON(!wdev->cac_started))
+		return;
 
-bool cfg80211_rx_spurious_frame(struct net_device *dev,
-				const u8 *addr, gfp_t gfp)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	bool ret;
+	if (WARN_ON(!wdev->channel))
+		return;
 
-	trace_cfg80211_rx_spurious_frame(dev, addr);
+	cfg80211_chandef_create(&chandef, wdev->channel, NL80211_CHAN_NO_HT);
 
-	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
-		    wdev->iftype != NL80211_IFTYPE_P2P_GO)) {
-		trace_cfg80211_return_bool(false);
-		return false;
+	switch (event) {
+	case NL80211_RADAR_CAC_FINISHED:
+		timeout = wdev->cac_start_time +
+			  msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS);
+		WARN_ON(!time_after_eq(jiffies, timeout));
+		cfg80211_set_dfs_state(wiphy, &chandef, NL80211_DFS_AVAILABLE);
+		break;
+	case NL80211_RADAR_CAC_ABORTED:
+		break;
+	default:
+		WARN_ON(1);
+		return;
 	}
-	ret = nl80211_unexpected_frame(dev, addr, gfp);
-	trace_cfg80211_return_bool(ret);
-	return ret;
-}
-EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
+	wdev->cac_started = false;
 
-bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
-					const u8 *addr, gfp_t gfp)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	bool ret;
-
-	trace_cfg80211_rx_unexpected_4addr_frame(dev, addr);
-
-	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
-		    wdev->iftype != NL80211_IFTYPE_P2P_GO &&
-		    wdev->iftype != NL80211_IFTYPE_AP_VLAN)) {
-		trace_cfg80211_return_bool(false);
-		return false;
-	}
-	ret = nl80211_unexpected_4addr_frame(dev, addr, gfp);
-	trace_cfg80211_return_bool(ret);
-	return ret;
+	nl80211_radar_notify(rdev, &chandef, event, netdev, gfp);
 }
-EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
+EXPORT_SYMBOL(cfg80211_cac_event);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f45706adaf34..afa283841e8c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -19,6 +19,7 @@
 #include <net/genetlink.h>
 #include <net/cfg80211.h>
 #include <net/sock.h>
+#include <net/inet_connection_sock.h>
 #include "core.h"
 #include "nl80211.h"
 #include "reg.h"
@@ -365,6 +366,18 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 },
 	[NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 },
 	[NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 },
+	[NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 },
+	[NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED },
+	[NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 },
+	[NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, },
+	[NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, },
+	[NL80211_ATTR_DISABLE_VHT] = { .type = NLA_FLAG },
+	[NL80211_ATTR_VHT_CAPABILITY_MASK] = {
+		.len = NL80211_VHT_CAPABILITY_LEN,
+	},
+	[NL80211_ATTR_MDID] = { .type = NLA_U16 },
+	[NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
+				  .len = IEEE80211_MAX_DATA_LEN },
 };
 
 /* policy for the key attributes */
@@ -397,6 +410,26 @@ nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = {
 	[NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST] = { .type = NLA_FLAG },
 	[NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE] = { .type = NLA_FLAG },
 	[NL80211_WOWLAN_TRIG_RFKILL_RELEASE] = { .type = NLA_FLAG },
+	[NL80211_WOWLAN_TRIG_TCP_CONNECTION] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
+	[NL80211_WOWLAN_TCP_SRC_IPV4] = { .type = NLA_U32 },
+	[NL80211_WOWLAN_TCP_DST_IPV4] = { .type = NLA_U32 },
+	[NL80211_WOWLAN_TCP_DST_MAC] = { .len = ETH_ALEN },
+	[NL80211_WOWLAN_TCP_SRC_PORT] = { .type = NLA_U16 },
+	[NL80211_WOWLAN_TCP_DST_PORT] = { .type = NLA_U16 },
+	[NL80211_WOWLAN_TCP_DATA_PAYLOAD] = { .len = 1 },
+	[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ] = {
+		.len = sizeof(struct nl80211_wowlan_tcp_data_seq)
+	},
+	[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN] = {
+		.len = sizeof(struct nl80211_wowlan_tcp_data_token)
+	},
+	[NL80211_WOWLAN_TCP_DATA_INTERVAL] = { .type = NLA_U32 },
+	[NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 },
+	[NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 },
 };
 
 /* policy for GTK rekey offload attributes */
@@ -414,62 +447,69 @@ nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = {
 	[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 },
 };
 
-/* ifidx get helper */
-static int nl80211_get_ifidx(struct netlink_callback *cb)
+static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
+				     struct netlink_callback *cb,
+				     struct cfg80211_registered_device **rdev,
+				     struct wireless_dev **wdev)
 {
-	int res;
-
-	res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
-			  nl80211_fam.attrbuf, nl80211_fam.maxattr,
-			  nl80211_policy);
-	if (res)
-		return res;
-
-	if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX])
-		return -EINVAL;
+	int err;
 
-	res = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]);
-	if (!res)
-		return -EINVAL;
-	return res;
-}
+	rtnl_lock();
+	mutex_lock(&cfg80211_mutex);
 
-static int nl80211_prepare_netdev_dump(struct sk_buff *skb,
-				       struct netlink_callback *cb,
-				       struct cfg80211_registered_device **rdev,
-				       struct net_device **dev)
-{
-	int ifidx = cb->args[0];
-	int err;
+	if (!cb->args[0]) {
+		err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
+				  nl80211_fam.attrbuf, nl80211_fam.maxattr,
+				  nl80211_policy);
+		if (err)
+			goto out_unlock;
 
-	if (!ifidx)
-		ifidx = nl80211_get_ifidx(cb);
-	if (ifidx < 0)
-		return ifidx;
+		*wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
+						   nl80211_fam.attrbuf);
+		if (IS_ERR(*wdev)) {
+			err = PTR_ERR(*wdev);
+			goto out_unlock;
+		}
+		*rdev = wiphy_to_dev((*wdev)->wiphy);
+		cb->args[0] = (*rdev)->wiphy_idx;
+		cb->args[1] = (*wdev)->identifier;
+	} else {
+		struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0]);
+		struct wireless_dev *tmp;
 
-	cb->args[0] = ifidx;
+		if (!wiphy) {
+			err = -ENODEV;
+			goto out_unlock;
+		}
+		*rdev = wiphy_to_dev(wiphy);
+		*wdev = NULL;
 
-	rtnl_lock();
+		mutex_lock(&(*rdev)->devlist_mtx);
+		list_for_each_entry(tmp, &(*rdev)->wdev_list, list) {
+			if (tmp->identifier == cb->args[1]) {
+				*wdev = tmp;
+				break;
+			}
+		}
+		mutex_unlock(&(*rdev)->devlist_mtx);
 
-	*dev = __dev_get_by_index(sock_net(skb->sk), ifidx);
-	if (!*dev) {
-		err = -ENODEV;
-		goto out_rtnl;
+		if (!*wdev) {
+			err = -ENODEV;
+			goto out_unlock;
+		}
 	}
 
-	*rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
-	if (IS_ERR(*rdev)) {
-		err = PTR_ERR(*rdev);
-		goto out_rtnl;
-	}
+	cfg80211_lock_rdev(*rdev);
 
+	mutex_unlock(&cfg80211_mutex);
 	return 0;
- out_rtnl:
+ out_unlock:
+	mutex_unlock(&cfg80211_mutex);
 	rtnl_unlock();
 	return err;
 }
 
-static void nl80211_finish_netdev_dump(struct cfg80211_registered_device *rdev)
+static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev)
 {
 	cfg80211_unlock_rdev(rdev);
 	rtnl_unlock();
@@ -514,7 +554,8 @@ static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
 }
 
 static int nl80211_msg_put_channel(struct sk_buff *msg,
-				   struct ieee80211_channel *chan)
+				   struct ieee80211_channel *chan,
+				   bool large)
 {
 	if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ,
 			chan->center_freq))
@@ -529,9 +570,37 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
 	if ((chan->flags & IEEE80211_CHAN_NO_IBSS) &&
 	    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS))
 		goto nla_put_failure;
-	if ((chan->flags & IEEE80211_CHAN_RADAR) &&
-	    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR))
-		goto nla_put_failure;
+	if (chan->flags & IEEE80211_CHAN_RADAR) {
+		if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR))
+			goto nla_put_failure;
+		if (large) {
+			u32 time;
+
+			time = elapsed_jiffies_msecs(chan->dfs_state_entered);
+
+			if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE,
+					chan->dfs_state))
+				goto nla_put_failure;
+			if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME,
+					time))
+				goto nla_put_failure;
+		}
+	}
+
+	if (large) {
+		if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS))
+			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS))
+			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ))
+			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ))
+			goto nla_put_failure;
+	}
 
 	if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
 			DBM_TO_MBM(chan->max_power)))
@@ -807,7 +876,8 @@ nla_put_failure:
 }
 
 static int nl80211_put_iface_combinations(struct wiphy *wiphy,
-					  struct sk_buff *msg)
+					  struct sk_buff *msg,
+					  bool large)
 {
 	struct nlattr *nl_combis;
 	int i, j;
@@ -856,6 +926,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy,
 		    nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM,
 				c->max_interfaces))
 			goto nla_put_failure;
+		if (large &&
+		    nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
+				c->radar_detect_widths))
+			goto nla_put_failure;
 
 		nla_nest_end(msg, nl_combi);
 	}
@@ -867,404 +941,615 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags,
-			      struct cfg80211_registered_device *dev)
+#ifdef CONFIG_PM
+static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev,
+					struct sk_buff *msg)
 {
-	void *hdr;
-	struct nlattr *nl_bands, *nl_band;
-	struct nlattr *nl_freqs, *nl_freq;
-	struct nlattr *nl_rates, *nl_rate;
-	struct nlattr *nl_cmds;
-	enum ieee80211_band band;
-	struct ieee80211_channel *chan;
-	struct ieee80211_rate *rate;
-	int i;
-	const struct ieee80211_txrx_stypes *mgmt_stypes =
-				dev->wiphy.mgmt_stypes;
+	const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp;
+	struct nlattr *nl_tcp;
 
-	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
-	if (!hdr)
-		return -1;
+	if (!tcp)
+		return 0;
 
-	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) ||
-	    nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)) ||
-	    nla_put_u32(msg, NL80211_ATTR_GENERATION,
-			cfg80211_rdev_list_generation) ||
-	    nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
-		       dev->wiphy.retry_short) ||
-	    nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
-		       dev->wiphy.retry_long) ||
-	    nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
-			dev->wiphy.frag_threshold) ||
-	    nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
-			dev->wiphy.rts_threshold) ||
-	    nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
-		       dev->wiphy.coverage_class) ||
-	    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
-		       dev->wiphy.max_scan_ssids) ||
-	    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
-		       dev->wiphy.max_sched_scan_ssids) ||
-	    nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
-			dev->wiphy.max_scan_ie_len) ||
-	    nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
-			dev->wiphy.max_sched_scan_ie_len) ||
-	    nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
-		       dev->wiphy.max_match_sets))
-		goto nla_put_failure;
+	nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION);
+	if (!nl_tcp)
+		return -ENOBUFS;
 
-	if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
-	    nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN))
-		goto nla_put_failure;
-	if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
-	    nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))
-		goto nla_put_failure;
-	if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
-	    nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD))
-		goto nla_put_failure;
-	if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
-	    nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT))
-		goto nla_put_failure;
-	if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
-	    nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT))
-		goto nla_put_failure;
-	if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
-	    nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
-		goto nla_put_failure;
+	if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD,
+			tcp->data_payload_max))
+		return -ENOBUFS;
 
-	if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES,
-		    sizeof(u32) * dev->wiphy.n_cipher_suites,
-		    dev->wiphy.cipher_suites))
-		goto nla_put_failure;
+	if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD,
+			tcp->data_payload_max))
+		return -ENOBUFS;
 
-	if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
-		       dev->wiphy.max_num_pmkids))
-		goto nla_put_failure;
+	if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ))
+		return -ENOBUFS;
 
-	if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
-	    nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE))
-		goto nla_put_failure;
+	if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN,
+				sizeof(*tcp->tok), tcp->tok))
+		return -ENOBUFS;
 
-	if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
-			dev->wiphy.available_antennas_tx) ||
-	    nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
-			dev->wiphy.available_antennas_rx))
-		goto nla_put_failure;
+	if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL,
+			tcp->data_interval_max))
+		return -ENOBUFS;
 
-	if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) &&
-	    nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD,
-			dev->wiphy.probe_resp_offload))
-		goto nla_put_failure;
+	if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD,
+			tcp->wake_payload_max))
+		return -ENOBUFS;
 
-	if ((dev->wiphy.available_antennas_tx ||
-	     dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) {
-		u32 tx_ant = 0, rx_ant = 0;
-		int res;
-		res = rdev_get_antenna(dev, &tx_ant, &rx_ant);
-		if (!res) {
-			if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX,
-					tx_ant) ||
-			    nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX,
-					rx_ant))
-				goto nla_put_failure;
-		}
+	nla_nest_end(msg, nl_tcp);
+	return 0;
+}
+
+static int nl80211_send_wowlan(struct sk_buff *msg,
+			       struct cfg80211_registered_device *dev,
+			       bool large)
+{
+	struct nlattr *nl_wowlan;
+
+	if (!dev->wiphy.wowlan.flags && !dev->wiphy.wowlan.n_patterns)
+		return 0;
+
+	nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
+	if (!nl_wowlan)
+		return -ENOBUFS;
+
+	if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
+		return -ENOBUFS;
+
+	if (dev->wiphy.wowlan.n_patterns) {
+		struct nl80211_wowlan_pattern_support pat = {
+			.max_patterns = dev->wiphy.wowlan.n_patterns,
+			.min_pattern_len = dev->wiphy.wowlan.pattern_min_len,
+			.max_pattern_len = dev->wiphy.wowlan.pattern_max_len,
+			.max_pkt_offset = dev->wiphy.wowlan.max_pkt_offset,
+		};
+
+		if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
+			    sizeof(pat), &pat))
+			return -ENOBUFS;
 	}
 
-	if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
-				dev->wiphy.interface_modes))
-		goto nla_put_failure;
+	if (large && nl80211_send_wowlan_tcp_caps(dev, msg))
+		return -ENOBUFS;
 
-	nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
-	if (!nl_bands)
-		goto nla_put_failure;
+	nla_nest_end(msg, nl_wowlan);
 
-	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
-		if (!dev->wiphy.bands[band])
-			continue;
+	return 0;
+}
+#endif
 
-		nl_band = nla_nest_start(msg, band);
-		if (!nl_band)
-			goto nla_put_failure;
+static int nl80211_send_band_rateinfo(struct sk_buff *msg,
+				      struct ieee80211_supported_band *sband)
+{
+	struct nlattr *nl_rates, *nl_rate;
+	struct ieee80211_rate *rate;
+	int i;
 
-		/* add HT info */
-		if (dev->wiphy.bands[band]->ht_cap.ht_supported &&
-		    (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET,
-			     sizeof(dev->wiphy.bands[band]->ht_cap.mcs),
-			     &dev->wiphy.bands[band]->ht_cap.mcs) ||
-		     nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA,
-				 dev->wiphy.bands[band]->ht_cap.cap) ||
-		     nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR,
-				dev->wiphy.bands[band]->ht_cap.ampdu_factor) ||
-		     nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY,
-				dev->wiphy.bands[band]->ht_cap.ampdu_density)))
-			goto nla_put_failure;
+	/* add HT info */
+	if (sband->ht_cap.ht_supported &&
+	    (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET,
+		     sizeof(sband->ht_cap.mcs),
+		     &sband->ht_cap.mcs) ||
+	     nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA,
+			 sband->ht_cap.cap) ||
+	     nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR,
+			sband->ht_cap.ampdu_factor) ||
+	     nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY,
+			sband->ht_cap.ampdu_density)))
+		return -ENOBUFS;
 
-		/* add VHT info */
-		if (dev->wiphy.bands[band]->vht_cap.vht_supported &&
-		    (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET,
-			     sizeof(dev->wiphy.bands[band]->vht_cap.vht_mcs),
-			     &dev->wiphy.bands[band]->vht_cap.vht_mcs) ||
-		     nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA,
-				 dev->wiphy.bands[band]->vht_cap.cap)))
-			goto nla_put_failure;
+	/* add VHT info */
+	if (sband->vht_cap.vht_supported &&
+	    (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET,
+		     sizeof(sband->vht_cap.vht_mcs),
+		     &sband->vht_cap.vht_mcs) ||
+	     nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA,
+			 sband->vht_cap.cap)))
+		return -ENOBUFS;
 
-		/* add frequencies */
-		nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS);
-		if (!nl_freqs)
-			goto nla_put_failure;
+	/* add bitrates */
+	nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
+	if (!nl_rates)
+		return -ENOBUFS;
 
-		for (i = 0; i < dev->wiphy.bands[band]->n_channels; i++) {
-			nl_freq = nla_nest_start(msg, i);
-			if (!nl_freq)
-				goto nla_put_failure;
+	for (i = 0; i < sband->n_bitrates; i++) {
+		nl_rate = nla_nest_start(msg, i);
+		if (!nl_rate)
+			return -ENOBUFS;
+
+		rate = &sband->bitrates[i];
+		if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE,
+				rate->bitrate))
+			return -ENOBUFS;
+		if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
+		    nla_put_flag(msg,
+				 NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE))
+			return -ENOBUFS;
 
-			chan = &dev->wiphy.bands[band]->channels[i];
+		nla_nest_end(msg, nl_rate);
+	}
 
-			if (nl80211_msg_put_channel(msg, chan))
-				goto nla_put_failure;
+	nla_nest_end(msg, nl_rates);
 
-			nla_nest_end(msg, nl_freq);
-		}
+	return 0;
+}
 
-		nla_nest_end(msg, nl_freqs);
+static int
+nl80211_send_mgmt_stypes(struct sk_buff *msg,
+			 const struct ieee80211_txrx_stypes *mgmt_stypes)
+{
+	u16 stypes;
+	struct nlattr *nl_ftypes, *nl_ifs;
+	enum nl80211_iftype ift;
+	int i;
 
-		/* add bitrates */
-		nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
-		if (!nl_rates)
-			goto nla_put_failure;
+	if (!mgmt_stypes)
+		return 0;
 
-		for (i = 0; i < dev->wiphy.bands[band]->n_bitrates; i++) {
-			nl_rate = nla_nest_start(msg, i);
-			if (!nl_rate)
-				goto nla_put_failure;
+	nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
+	if (!nl_ifs)
+		return -ENOBUFS;
 
-			rate = &dev->wiphy.bands[band]->bitrates[i];
-			if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE,
-					rate->bitrate))
-				goto nla_put_failure;
-			if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
-			    nla_put_flag(msg,
-					 NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE))
-				goto nla_put_failure;
+	for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
+		nl_ftypes = nla_nest_start(msg, ift);
+		if (!nl_ftypes)
+			return -ENOBUFS;
+		i = 0;
+		stypes = mgmt_stypes[ift].tx;
+		while (stypes) {
+			if ((stypes & 1) &&
+			    nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
+					(i << 4) | IEEE80211_FTYPE_MGMT))
+				return -ENOBUFS;
+			stypes >>= 1;
+			i++;
+		}
+		nla_nest_end(msg, nl_ftypes);
+	}
+
+	nla_nest_end(msg, nl_ifs);
+
+	nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
+	if (!nl_ifs)
+		return -ENOBUFS;
 
-			nla_nest_end(msg, nl_rate);
+	for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
+		nl_ftypes = nla_nest_start(msg, ift);
+		if (!nl_ftypes)
+			return -ENOBUFS;
+		i = 0;
+		stypes = mgmt_stypes[ift].rx;
+		while (stypes) {
+			if ((stypes & 1) &&
+			    nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
+					(i << 4) | IEEE80211_FTYPE_MGMT))
+				return -ENOBUFS;
+			stypes >>= 1;
+			i++;
 		}
+		nla_nest_end(msg, nl_ftypes);
+	}
+	nla_nest_end(msg, nl_ifs);
+
+	return 0;
+}
+
+static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
+			      struct sk_buff *msg, u32 portid, u32 seq,
+			      int flags, bool split, long *split_start,
+			      long *band_start, long *chan_start)
+{
+	void *hdr;
+	struct nlattr *nl_bands, *nl_band;
+	struct nlattr *nl_freqs, *nl_freq;
+	struct nlattr *nl_cmds;
+	enum ieee80211_band band;
+	struct ieee80211_channel *chan;
+	int i;
+	const struct ieee80211_txrx_stypes *mgmt_stypes =
+				dev->wiphy.mgmt_stypes;
+	long start = 0, start_chan = 0, start_band = 0;
+	u32 features;
 
-		nla_nest_end(msg, nl_rates);
+	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
+	if (!hdr)
+		return -ENOBUFS;
 
-		nla_nest_end(msg, nl_band);
+	/* allow always using the variables */
+	if (!split) {
+		split_start = &start;
+		band_start = &start_band;
+		chan_start = &start_chan;
 	}
-	nla_nest_end(msg, nl_bands);
 
-	nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS);
-	if (!nl_cmds)
-		goto nla_put_failure;
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) ||
+	    nla_put_string(msg, NL80211_ATTR_WIPHY_NAME,
+			   wiphy_name(&dev->wiphy)) ||
+	    nla_put_u32(msg, NL80211_ATTR_GENERATION,
+			cfg80211_rdev_list_generation))
+		goto nla_put_failure;
+
+	switch (*split_start) {
+	case 0:
+		if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
+			       dev->wiphy.retry_short) ||
+		    nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
+			       dev->wiphy.retry_long) ||
+		    nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
+				dev->wiphy.frag_threshold) ||
+		    nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
+				dev->wiphy.rts_threshold) ||
+		    nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
+			       dev->wiphy.coverage_class) ||
+		    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
+			       dev->wiphy.max_scan_ssids) ||
+		    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
+			       dev->wiphy.max_sched_scan_ssids) ||
+		    nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
+				dev->wiphy.max_scan_ie_len) ||
+		    nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
+				dev->wiphy.max_sched_scan_ie_len) ||
+		    nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
+			       dev->wiphy.max_match_sets))
+			goto nla_put_failure;
 
-	i = 0;
-#define CMD(op, n)						\
-	 do {							\
-		if (dev->ops->op) {				\
-			i++;					\
-			if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
-				goto nla_put_failure;		\
-		}						\
-	} while (0)
-
-	CMD(add_virtual_intf, NEW_INTERFACE);
-	CMD(change_virtual_intf, SET_INTERFACE);
-	CMD(add_key, NEW_KEY);
-	CMD(start_ap, START_AP);
-	CMD(add_station, NEW_STATION);
-	CMD(add_mpath, NEW_MPATH);
-	CMD(update_mesh_config, SET_MESH_CONFIG);
-	CMD(change_bss, SET_BSS);
-	CMD(auth, AUTHENTICATE);
-	CMD(assoc, ASSOCIATE);
-	CMD(deauth, DEAUTHENTICATE);
-	CMD(disassoc, DISASSOCIATE);
-	CMD(join_ibss, JOIN_IBSS);
-	CMD(join_mesh, JOIN_MESH);
-	CMD(set_pmksa, SET_PMKSA);
-	CMD(del_pmksa, DEL_PMKSA);
-	CMD(flush_pmksa, FLUSH_PMKSA);
-	if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
-		CMD(remain_on_channel, REMAIN_ON_CHANNEL);
-	CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
-	CMD(mgmt_tx, FRAME);
-	CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
-	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
-		i++;
-		if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
+		if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
+		    nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN))
 			goto nla_put_failure;
-	}
-	if (dev->ops->set_monitor_channel || dev->ops->start_ap ||
-	    dev->ops->join_mesh) {
-		i++;
-		if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
+		if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
+		    nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))
 			goto nla_put_failure;
-	}
-	CMD(set_wds_peer, SET_WDS_PEER);
-	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
-		CMD(tdls_mgmt, TDLS_MGMT);
-		CMD(tdls_oper, TDLS_OPER);
-	}
-	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
-		CMD(sched_scan_start, START_SCHED_SCAN);
-	CMD(probe_client, PROBE_CLIENT);
-	CMD(set_noack_map, SET_NOACK_MAP);
-	if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
-		i++;
-		if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
+		if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
+		    nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD))
+			goto nla_put_failure;
+		if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
+		    nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT))
+			goto nla_put_failure;
+		if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
+		    nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT))
+			goto nla_put_failure;
+		if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
+		    nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
 			goto nla_put_failure;
-	}
-	CMD(start_p2p_device, START_P2P_DEVICE);
-	CMD(set_mcast_rate, SET_MCAST_RATE);
 
-#ifdef CONFIG_NL80211_TESTMODE
-	CMD(testmode_cmd, TESTMODE);
-#endif
+		(*split_start)++;
+		if (split)
+			break;
+	case 1:
+		if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES,
+			    sizeof(u32) * dev->wiphy.n_cipher_suites,
+			    dev->wiphy.cipher_suites))
+			goto nla_put_failure;
 
-#undef CMD
+		if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
+			       dev->wiphy.max_num_pmkids))
+			goto nla_put_failure;
 
-	if (dev->ops->connect || dev->ops->auth) {
-		i++;
-		if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
+		if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
+		    nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE))
 			goto nla_put_failure;
-	}
 
-	if (dev->ops->disconnect || dev->ops->deauth) {
-		i++;
-		if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
+		if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
+				dev->wiphy.available_antennas_tx) ||
+		    nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
+				dev->wiphy.available_antennas_rx))
 			goto nla_put_failure;
-	}
 
-	nla_nest_end(msg, nl_cmds);
+		if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) &&
+		    nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD,
+				dev->wiphy.probe_resp_offload))
+			goto nla_put_failure;
 
-	if (dev->ops->remain_on_channel &&
-	    (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
-	    nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
-			dev->wiphy.max_remain_on_channel_duration))
-		goto nla_put_failure;
+		if ((dev->wiphy.available_antennas_tx ||
+		     dev->wiphy.available_antennas_rx) &&
+		    dev->ops->get_antenna) {
+			u32 tx_ant = 0, rx_ant = 0;
+			int res;
+			res = rdev_get_antenna(dev, &tx_ant, &rx_ant);
+			if (!res) {
+				if (nla_put_u32(msg,
+						NL80211_ATTR_WIPHY_ANTENNA_TX,
+						tx_ant) ||
+				    nla_put_u32(msg,
+						NL80211_ATTR_WIPHY_ANTENNA_RX,
+						rx_ant))
+					goto nla_put_failure;
+			}
+		}
 
-	if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) &&
-	    nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK))
-		goto nla_put_failure;
+		(*split_start)++;
+		if (split)
+			break;
+	case 2:
+		if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
+					dev->wiphy.interface_modes))
+				goto nla_put_failure;
+		(*split_start)++;
+		if (split)
+			break;
+	case 3:
+		nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
+		if (!nl_bands)
+			goto nla_put_failure;
 
-	if (mgmt_stypes) {
-		u16 stypes;
-		struct nlattr *nl_ftypes, *nl_ifs;
-		enum nl80211_iftype ift;
+		for (band = *band_start; band < IEEE80211_NUM_BANDS; band++) {
+			struct ieee80211_supported_band *sband;
 
-		nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
-		if (!nl_ifs)
-			goto nla_put_failure;
+			sband = dev->wiphy.bands[band];
+
+			if (!sband)
+				continue;
 
-		for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
-			nl_ftypes = nla_nest_start(msg, ift);
-			if (!nl_ftypes)
+			nl_band = nla_nest_start(msg, band);
+			if (!nl_band)
 				goto nla_put_failure;
-			i = 0;
-			stypes = mgmt_stypes[ift].tx;
-			while (stypes) {
-				if ((stypes & 1) &&
-				    nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
-						(i << 4) | IEEE80211_FTYPE_MGMT))
+
+			switch (*chan_start) {
+			case 0:
+				if (nl80211_send_band_rateinfo(msg, sband))
 					goto nla_put_failure;
-				stypes >>= 1;
-				i++;
+				(*chan_start)++;
+				if (split)
+					break;
+			default:
+				/* add frequencies */
+				nl_freqs = nla_nest_start(
+					msg, NL80211_BAND_ATTR_FREQS);
+				if (!nl_freqs)
+					goto nla_put_failure;
+
+				for (i = *chan_start - 1;
+				     i < sband->n_channels;
+				     i++) {
+					nl_freq = nla_nest_start(msg, i);
+					if (!nl_freq)
+						goto nla_put_failure;
+
+					chan = &sband->channels[i];
+
+					if (nl80211_msg_put_channel(msg, chan,
+								    split))
+						goto nla_put_failure;
+
+					nla_nest_end(msg, nl_freq);
+					if (split)
+						break;
+				}
+				if (i < sband->n_channels)
+					*chan_start = i + 2;
+				else
+					*chan_start = 0;
+				nla_nest_end(msg, nl_freqs);
+			}
+
+			nla_nest_end(msg, nl_band);
+
+			if (split) {
+				/* start again here */
+				if (*chan_start)
+					band--;
+				break;
 			}
-			nla_nest_end(msg, nl_ftypes);
 		}
+		nla_nest_end(msg, nl_bands);
 
-		nla_nest_end(msg, nl_ifs);
+		if (band < IEEE80211_NUM_BANDS)
+			*band_start = band + 1;
+		else
+			*band_start = 0;
 
-		nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
-		if (!nl_ifs)
+		/* if bands & channels are done, continue outside */
+		if (*band_start == 0 && *chan_start == 0)
+			(*split_start)++;
+		if (split)
+			break;
+	case 4:
+		nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS);
+		if (!nl_cmds)
 			goto nla_put_failure;
 
-		for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
-			nl_ftypes = nla_nest_start(msg, ift);
-			if (!nl_ftypes)
+		i = 0;
+#define CMD(op, n)							\
+		 do {							\
+			if (dev->ops->op) {				\
+				i++;					\
+				if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
+					goto nla_put_failure;		\
+			}						\
+		} while (0)
+
+		CMD(add_virtual_intf, NEW_INTERFACE);
+		CMD(change_virtual_intf, SET_INTERFACE);
+		CMD(add_key, NEW_KEY);
+		CMD(start_ap, START_AP);
+		CMD(add_station, NEW_STATION);
+		CMD(add_mpath, NEW_MPATH);
+		CMD(update_mesh_config, SET_MESH_CONFIG);
+		CMD(change_bss, SET_BSS);
+		CMD(auth, AUTHENTICATE);
+		CMD(assoc, ASSOCIATE);
+		CMD(deauth, DEAUTHENTICATE);
+		CMD(disassoc, DISASSOCIATE);
+		CMD(join_ibss, JOIN_IBSS);
+		CMD(join_mesh, JOIN_MESH);
+		CMD(set_pmksa, SET_PMKSA);
+		CMD(del_pmksa, DEL_PMKSA);
+		CMD(flush_pmksa, FLUSH_PMKSA);
+		if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
+			CMD(remain_on_channel, REMAIN_ON_CHANNEL);
+		CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
+		CMD(mgmt_tx, FRAME);
+		CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
+		if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
+			i++;
+			if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
 				goto nla_put_failure;
-			i = 0;
-			stypes = mgmt_stypes[ift].rx;
-			while (stypes) {
-				if ((stypes & 1) &&
-				    nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
-						(i << 4) | IEEE80211_FTYPE_MGMT))
-					goto nla_put_failure;
-				stypes >>= 1;
-				i++;
-			}
-			nla_nest_end(msg, nl_ftypes);
 		}
-		nla_nest_end(msg, nl_ifs);
-	}
+		if (dev->ops->set_monitor_channel || dev->ops->start_ap ||
+		    dev->ops->join_mesh) {
+			i++;
+			if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
+				goto nla_put_failure;
+		}
+		CMD(set_wds_peer, SET_WDS_PEER);
+		if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
+			CMD(tdls_mgmt, TDLS_MGMT);
+			CMD(tdls_oper, TDLS_OPER);
+		}
+		if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+			CMD(sched_scan_start, START_SCHED_SCAN);
+		CMD(probe_client, PROBE_CLIENT);
+		CMD(set_noack_map, SET_NOACK_MAP);
+		if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
+			i++;
+			if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
+				goto nla_put_failure;
+		}
+		CMD(start_p2p_device, START_P2P_DEVICE);
+		CMD(set_mcast_rate, SET_MCAST_RATE);
+		if (split) {
+			CMD(crit_proto_start, CRIT_PROTOCOL_START);
+			CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
+		}
 
-#ifdef CONFIG_PM
-	if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) {
-		struct nlattr *nl_wowlan;
+#ifdef CONFIG_NL80211_TESTMODE
+		CMD(testmode_cmd, TESTMODE);
+#endif
 
-		nl_wowlan = nla_nest_start(msg,
-				NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
-		if (!nl_wowlan)
-			goto nla_put_failure;
+#undef CMD
 
-		if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
-		    goto nla_put_failure;
-		if (dev->wiphy.wowlan.n_patterns) {
-			struct nl80211_wowlan_pattern_support pat = {
-				.max_patterns = dev->wiphy.wowlan.n_patterns,
-				.min_pattern_len =
-					dev->wiphy.wowlan.pattern_min_len,
-				.max_pattern_len =
-					dev->wiphy.wowlan.pattern_max_len,
-			};
-			if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
-				    sizeof(pat), &pat))
+		if (dev->ops->connect || dev->ops->auth) {
+			i++;
+			if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
 				goto nla_put_failure;
 		}
 
-		nla_nest_end(msg, nl_wowlan);
-	}
+		if (dev->ops->disconnect || dev->ops->deauth) {
+			i++;
+			if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
+				goto nla_put_failure;
+		}
+
+		nla_nest_end(msg, nl_cmds);
+		(*split_start)++;
+		if (split)
+			break;
+	case 5:
+		if (dev->ops->remain_on_channel &&
+		    (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
+		    nla_put_u32(msg,
+				NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
+				dev->wiphy.max_remain_on_channel_duration))
+			goto nla_put_failure;
+
+		if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) &&
+		    nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK))
+			goto nla_put_failure;
+
+		if (nl80211_send_mgmt_stypes(msg, mgmt_stypes))
+			goto nla_put_failure;
+		(*split_start)++;
+		if (split)
+			break;
+	case 6:
+#ifdef CONFIG_PM
+		if (nl80211_send_wowlan(msg, dev, split))
+			goto nla_put_failure;
+		(*split_start)++;
+		if (split)
+			break;
+#else
+		(*split_start)++;
 #endif
+	case 7:
+		if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
+					dev->wiphy.software_iftypes))
+			goto nla_put_failure;
 
-	if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
-				dev->wiphy.software_iftypes))
-		goto nla_put_failure;
+		if (nl80211_put_iface_combinations(&dev->wiphy, msg, split))
+			goto nla_put_failure;
 
-	if (nl80211_put_iface_combinations(&dev->wiphy, msg))
-		goto nla_put_failure;
+		(*split_start)++;
+		if (split)
+			break;
+	case 8:
+		if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
+		    nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME,
+				dev->wiphy.ap_sme_capa))
+			goto nla_put_failure;
 
-	if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
-	    nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME,
-			dev->wiphy.ap_sme_capa))
-		goto nla_put_failure;
+		features = dev->wiphy.features;
+		/*
+		 * We can only add the per-channel limit information if the
+		 * dump is split, otherwise it makes it too big. Therefore
+		 * only advertise it in that case.
+		 */
+		if (split)
+			features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS;
+		if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features))
+			goto nla_put_failure;
 
-	if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS,
-			dev->wiphy.features))
-		goto nla_put_failure;
+		if (dev->wiphy.ht_capa_mod_mask &&
+		    nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK,
+			    sizeof(*dev->wiphy.ht_capa_mod_mask),
+			    dev->wiphy.ht_capa_mod_mask))
+			goto nla_put_failure;
 
-	if (dev->wiphy.ht_capa_mod_mask &&
-	    nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK,
-		    sizeof(*dev->wiphy.ht_capa_mod_mask),
-		    dev->wiphy.ht_capa_mod_mask))
-		goto nla_put_failure;
+		if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME &&
+		    dev->wiphy.max_acl_mac_addrs &&
+		    nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX,
+				dev->wiphy.max_acl_mac_addrs))
+			goto nla_put_failure;
 
+		/*
+		 * Any information below this point is only available to
+		 * applications that can deal with it being split. This
+		 * helps ensure that newly added capabilities don't break
+		 * older tools by overrunning their buffers.
+		 *
+		 * We still increment split_start so that in the split
+		 * case we'll continue with more data in the next round,
+		 * but break unconditionally so unsplit data stops here.
+		 */
+		(*split_start)++;
+		break;
+	case 9:
+		if (dev->wiphy.extended_capabilities &&
+		    (nla_put(msg, NL80211_ATTR_EXT_CAPA,
+			     dev->wiphy.extended_capabilities_len,
+			     dev->wiphy.extended_capabilities) ||
+		     nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK,
+			     dev->wiphy.extended_capabilities_len,
+			     dev->wiphy.extended_capabilities_mask)))
+			goto nla_put_failure;
+
+		if (dev->wiphy.vht_capa_mod_mask &&
+		    nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK,
+			    sizeof(*dev->wiphy.vht_capa_mod_mask),
+			    dev->wiphy.vht_capa_mod_mask))
+			goto nla_put_failure;
+
+		/* done */
+		*split_start = 0;
+		break;
+	}
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
@@ -1274,22 +1559,83 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag
 
 static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int idx = 0;
+	int idx = 0, ret;
 	int start = cb->args[0];
 	struct cfg80211_registered_device *dev;
+	s64 filter_wiphy = -1;
+	bool split = false;
+	struct nlattr **tb = nl80211_fam.attrbuf;
+	int res;
 
 	mutex_lock(&cfg80211_mutex);
+	res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
+			  tb, nl80211_fam.maxattr, nl80211_policy);
+	if (res == 0) {
+		split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP];
+		if (tb[NL80211_ATTR_WIPHY])
+			filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]);
+		if (tb[NL80211_ATTR_WDEV])
+			filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32;
+		if (tb[NL80211_ATTR_IFINDEX]) {
+			struct net_device *netdev;
+			int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]);
+
+			netdev = dev_get_by_index(sock_net(skb->sk), ifidx);
+			if (!netdev) {
+				mutex_unlock(&cfg80211_mutex);
+				return -ENODEV;
+			}
+			if (netdev->ieee80211_ptr) {
+				dev = wiphy_to_dev(
+					netdev->ieee80211_ptr->wiphy);
+				filter_wiphy = dev->wiphy_idx;
+			}
+			dev_put(netdev);
+		}
+	}
+
 	list_for_each_entry(dev, &cfg80211_rdev_list, list) {
 		if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk)))
 			continue;
 		if (++idx <= start)
 			continue;
-		if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid,
-				       cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				       dev) < 0) {
-			idx--;
-			break;
-		}
+		if (filter_wiphy != -1 && dev->wiphy_idx != filter_wiphy)
+			continue;
+		/* attempt to fit multiple wiphy data chunks into the skb */
+		do {
+			ret = nl80211_send_wiphy(dev, skb,
+						 NETLINK_CB(cb->skb).portid,
+						 cb->nlh->nlmsg_seq,
+						 NLM_F_MULTI,
+						 split, &cb->args[1],
+						 &cb->args[2],
+						 &cb->args[3]);
+			if (ret < 0) {
+				/*
+				 * If sending the wiphy data didn't fit (ENOBUFS
+				 * or EMSGSIZE returned), this SKB is still
+				 * empty (so it's not too big because another
+				 * wiphy dataset is already in the skb) and
+				 * we've not tried to adjust the dump allocation
+				 * yet ... then adjust the alloc size to be
+				 * bigger, and return 1 but with the empty skb.
+				 * This results in an empty message being RX'ed
+				 * in userspace, but that is ignored.
+				 *
+				 * We can then retry with the larger buffer.
+				 */
+				if ((ret == -ENOBUFS || ret == -EMSGSIZE) &&
+				    !skb->len &&
+				    cb->min_dump_alloc < 4096) {
+					cb->min_dump_alloc = 4096;
+					mutex_unlock(&cfg80211_mutex);
+					return 1;
+				}
+				idx--;
+				break;
+			}
+		} while (cb->args[1] > 0);
+		break;
 	}
 	mutex_unlock(&cfg80211_mutex);
 
@@ -1303,11 +1649,12 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *msg;
 	struct cfg80211_registered_device *dev = info->user_ptr[0];
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	msg = nlmsg_new(4096, GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
 
-	if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) {
+	if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0,
+			       false, NULL, NULL, NULL) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
 	}
@@ -2079,6 +2426,13 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 	    !(rdev->wiphy.interface_modes & (1 << type)))
 		return -EOPNOTSUPP;
 
+	if (type == NL80211_IFTYPE_P2P_DEVICE && info->attrs[NL80211_ATTR_MAC]) {
+		nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC],
+			   ETH_ALEN);
+		if (!is_valid_ether_addr(params.macaddr))
+			return -EADDRNOTAVAIL;
+	}
+
 	if (info->attrs[NL80211_ATTR_4ADDR]) {
 		params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]);
 		err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type);
@@ -2481,6 +2835,97 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+/* This function returns an error or the number of nested attributes */
+static int validate_acl_mac_addrs(struct nlattr *nl_attr)
+{
+	struct nlattr *attr;
+	int n_entries = 0, tmp;
+
+	nla_for_each_nested(attr, nl_attr, tmp) {
+		if (nla_len(attr) != ETH_ALEN)
+			return -EINVAL;
+
+		n_entries++;
+	}
+
+	return n_entries;
+}
+
+/*
+ * This function parses ACL information and allocates memory for ACL data.
+ * On successful return, the calling function is responsible to free the
+ * ACL buffer returned by this function.
+ */
+static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy,
+						struct genl_info *info)
+{
+	enum nl80211_acl_policy acl_policy;
+	struct nlattr *attr;
+	struct cfg80211_acl_data *acl;
+	int i = 0, n_entries, tmp;
+
+	if (!wiphy->max_acl_mac_addrs)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (!info->attrs[NL80211_ATTR_ACL_POLICY])
+		return ERR_PTR(-EINVAL);
+
+	acl_policy = nla_get_u32(info->attrs[NL80211_ATTR_ACL_POLICY]);
+	if (acl_policy != NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED &&
+	    acl_policy != NL80211_ACL_POLICY_DENY_UNLESS_LISTED)
+		return ERR_PTR(-EINVAL);
+
+	if (!info->attrs[NL80211_ATTR_MAC_ADDRS])
+		return ERR_PTR(-EINVAL);
+
+	n_entries = validate_acl_mac_addrs(info->attrs[NL80211_ATTR_MAC_ADDRS]);
+	if (n_entries < 0)
+		return ERR_PTR(n_entries);
+
+	if (n_entries > wiphy->max_acl_mac_addrs)
+		return ERR_PTR(-ENOTSUPP);
+
+	acl = kzalloc(sizeof(*acl) + (sizeof(struct mac_address) * n_entries),
+		      GFP_KERNEL);
+	if (!acl)
+		return ERR_PTR(-ENOMEM);
+
+	nla_for_each_nested(attr, info->attrs[NL80211_ATTR_MAC_ADDRS], tmp) {
+		memcpy(acl->mac_addrs[i].addr, nla_data(attr), ETH_ALEN);
+		i++;
+	}
+
+	acl->n_acl_entries = n_entries;
+	acl->acl_policy = acl_policy;
+
+	return acl;
+}
+
+static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct cfg80211_acl_data *acl;
+	int err;
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
+		return -EOPNOTSUPP;
+
+	if (!dev->ieee80211_ptr->beacon_interval)
+		return -EINVAL;
+
+	acl = parse_acl_data(&rdev->wiphy, info);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+
+	err = rdev_set_mac_acl(rdev, dev, acl);
+
+	kfree(acl);
+
+	return err;
+}
+
 static int nl80211_parse_beacon(struct genl_info *info,
 				struct cfg80211_beacon_data *bcn)
 {
@@ -2598,6 +3043,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_ap_settings params;
 	int err;
+	u8 radar_detect_width = 0;
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
@@ -2716,14 +3162,30 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef))
 		return -EINVAL;
 
+	err = cfg80211_chandef_dfs_required(wdev->wiphy, &params.chandef);
+	if (err < 0)
+		return err;
+	if (err) {
+		radar_detect_width = BIT(params.chandef.width);
+		params.radar_required = true;
+	}
+
 	mutex_lock(&rdev->devlist_mtx);
-	err = cfg80211_can_use_chan(rdev, wdev, params.chandef.chan,
-				    CHAN_MODE_SHARED);
+	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
+					   params.chandef.chan,
+					   CHAN_MODE_SHARED,
+					   radar_detect_width);
 	mutex_unlock(&rdev->devlist_mtx);
 
 	if (err)
 		return err;
 
+	if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
+		params.acl = parse_acl_data(&rdev->wiphy, info);
+		if (IS_ERR(params.acl))
+			return PTR_ERR(params.acl);
+	}
+
 	err = rdev_start_ap(rdev, dev, &params);
 	if (!err) {
 		wdev->preset_chandef = params.chandef;
@@ -2732,6 +3194,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 		wdev->ssid_len = params.ssid_len;
 		memcpy(wdev->ssid, params.ssid, wdev->ssid_len);
 	}
+
+	kfree(params.acl);
+
 	return err;
 }
 
@@ -2796,6 +3261,7 @@ static int parse_station_flags(struct genl_info *info,
 		sta_flags = nla_data(nla);
 		params->sta_flags_mask = sta_flags->mask;
 		params->sta_flags_set = sta_flags->set;
+		params->sta_flags_set &= params->sta_flags_mask;
 		if ((params->sta_flags_mask |
 		     params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID))
 			return -EINVAL;
@@ -2939,12 +3405,22 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
 	    nla_put_u32(msg, NL80211_STA_INFO_INACTIVE_TIME,
 			sinfo->inactive_time))
 		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_RX_BYTES) &&
+	if ((sinfo->filled & (STATION_INFO_RX_BYTES |
+			      STATION_INFO_RX_BYTES64)) &&
 	    nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES,
-			sinfo->rx_bytes))
+			(u32)sinfo->rx_bytes))
 		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_BYTES) &&
+	if ((sinfo->filled & (STATION_INFO_TX_BYTES |
+			      NL80211_STA_INFO_TX_BYTES64)) &&
 	    nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES,
+			(u32)sinfo->tx_bytes))
+		goto nla_put_failure;
+	if ((sinfo->filled & STATION_INFO_RX_BYTES64) &&
+	    nla_put_u64(msg, NL80211_STA_INFO_RX_BYTES64,
+			sinfo->rx_bytes))
+		goto nla_put_failure;
+	if ((sinfo->filled & STATION_INFO_TX_BYTES64) &&
+	    nla_put_u64(msg, NL80211_STA_INFO_TX_BYTES64,
 			sinfo->tx_bytes))
 		goto nla_put_failure;
 	if ((sinfo->filled & STATION_INFO_LLID) &&
@@ -3001,6 +3477,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
 	    nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS,
 			sinfo->beacon_loss_count))
 		goto nla_put_failure;
+	if ((sinfo->filled & STATION_INFO_LOCAL_PM) &&
+	    nla_put_u32(msg, NL80211_STA_INFO_LOCAL_PM,
+			sinfo->local_pm))
+		goto nla_put_failure;
+	if ((sinfo->filled & STATION_INFO_PEER_PM) &&
+	    nla_put_u32(msg, NL80211_STA_INFO_PEER_PM,
+			sinfo->peer_pm))
+		goto nla_put_failure;
+	if ((sinfo->filled & STATION_INFO_NONPEER_PM) &&
+	    nla_put_u32(msg, NL80211_STA_INFO_NONPEER_PM,
+			sinfo->nonpeer_pm))
+		goto nla_put_failure;
 	if (sinfo->filled & STATION_INFO_BSS_PARAM) {
 		bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM);
 		if (!bss_param)
@@ -3048,15 +3536,20 @@ static int nl80211_dump_station(struct sk_buff *skb,
 {
 	struct station_info sinfo;
 	struct cfg80211_registered_device *dev;
-	struct net_device *netdev;
+	struct wireless_dev *wdev;
 	u8 mac_addr[ETH_ALEN];
-	int sta_idx = cb->args[1];
+	int sta_idx = cb->args[2];
 	int err;
 
-	err = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev);
+	err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
 	if (err)
 		return err;
 
+	if (!wdev->netdev) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
 	if (!dev->ops->dump_station) {
 		err = -EOPNOTSUPP;
 		goto out_err;
@@ -3064,7 +3557,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
 
 	while (1) {
 		memset(&sinfo, 0, sizeof(sinfo));
-		err = rdev_dump_station(dev, netdev, sta_idx,
+		err = rdev_dump_station(dev, wdev->netdev, sta_idx,
 					mac_addr, &sinfo);
 		if (err == -ENOENT)
 			break;
@@ -3074,7 +3567,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
 		if (nl80211_send_station(skb,
 				NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				dev, netdev, mac_addr,
+				dev, wdev->netdev, mac_addr,
 				&sinfo) < 0)
 			goto out;
 
@@ -3083,10 +3576,10 @@ static int nl80211_dump_station(struct sk_buff *skb,
 
 
  out:
-	cb->args[1] = sta_idx;
+	cb->args[2] = sta_idx;
 	err = skb->len;
  out_err:
-	nl80211_finish_netdev_dump(dev);
+	nl80211_finish_wdev_dump(dev);
 
 	return err;
 }
@@ -3127,6 +3620,136 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
 	return genlmsg_reply(msg, info);
 }
 
+int cfg80211_check_station_change(struct wiphy *wiphy,
+				  struct station_parameters *params,
+				  enum cfg80211_station_type statype)
+{
+	if (params->listen_interval != -1)
+		return -EINVAL;
+	if (params->aid)
+		return -EINVAL;
+
+	/* When you run into this, adjust the code below for the new flag */
+	BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
+
+	switch (statype) {
+	case CFG80211_STA_MESH_PEER_KERNEL:
+	case CFG80211_STA_MESH_PEER_USER:
+		/*
+		 * No ignoring the TDLS flag here -- the userspace mesh
+		 * code doesn't have the bug of including TDLS in the
+		 * mask everywhere.
+		 */
+		if (params->sta_flags_mask &
+				~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+				  BIT(NL80211_STA_FLAG_MFP) |
+				  BIT(NL80211_STA_FLAG_AUTHORIZED)))
+			return -EINVAL;
+		break;
+	case CFG80211_STA_TDLS_PEER_SETUP:
+	case CFG80211_STA_TDLS_PEER_ACTIVE:
+		if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
+			return -EINVAL;
+		/* ignore since it can't change */
+		params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
+		break;
+	default:
+		/* disallow mesh-specific things */
+		if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION)
+			return -EINVAL;
+		if (params->local_pm)
+			return -EINVAL;
+		if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
+			return -EINVAL;
+	}
+
+	if (statype != CFG80211_STA_TDLS_PEER_SETUP &&
+	    statype != CFG80211_STA_TDLS_PEER_ACTIVE) {
+		/* TDLS can't be set, ... */
+		if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
+			return -EINVAL;
+		/*
+		 * ... but don't bother the driver with it. This works around
+		 * a hostapd/wpa_supplicant issue -- it always includes the
+		 * TLDS_PEER flag in the mask even for AP mode.
+		 */
+		params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
+	}
+
+	if (statype != CFG80211_STA_TDLS_PEER_SETUP) {
+		/* reject other things that can't change */
+		if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD)
+			return -EINVAL;
+		if (params->sta_modify_mask & STATION_PARAM_APPLY_CAPABILITY)
+			return -EINVAL;
+		if (params->supported_rates)
+			return -EINVAL;
+		if (params->ext_capab || params->ht_capa || params->vht_capa)
+			return -EINVAL;
+	}
+
+	if (statype != CFG80211_STA_AP_CLIENT) {
+		if (params->vlan)
+			return -EINVAL;
+	}
+
+	switch (statype) {
+	case CFG80211_STA_AP_MLME_CLIENT:
+		/* Use this only for authorizing/unauthorizing a station */
+		if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)))
+			return -EOPNOTSUPP;
+		break;
+	case CFG80211_STA_AP_CLIENT:
+		/* accept only the listed bits */
+		if (params->sta_flags_mask &
+				~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
+				  BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+				  BIT(NL80211_STA_FLAG_ASSOCIATED) |
+				  BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
+				  BIT(NL80211_STA_FLAG_WME) |
+				  BIT(NL80211_STA_FLAG_MFP)))
+			return -EINVAL;
+
+		/* but authenticated/associated only if driver handles it */
+		if (!(wiphy->features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) &&
+		    params->sta_flags_mask &
+				(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+				 BIT(NL80211_STA_FLAG_ASSOCIATED)))
+			return -EINVAL;
+		break;
+	case CFG80211_STA_IBSS:
+	case CFG80211_STA_AP_STA:
+		/* reject any changes other than AUTHORIZED */
+		if (params->sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED))
+			return -EINVAL;
+		break;
+	case CFG80211_STA_TDLS_PEER_SETUP:
+		/* reject any changes other than AUTHORIZED or WME */
+		if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
+					       BIT(NL80211_STA_FLAG_WME)))
+			return -EINVAL;
+		/* force (at least) rates when authorizing */
+		if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED) &&
+		    !params->supported_rates)
+			return -EINVAL;
+		break;
+	case CFG80211_STA_TDLS_PEER_ACTIVE:
+		/* reject any changes */
+		return -EINVAL;
+	case CFG80211_STA_MESH_PEER_KERNEL:
+		if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
+			return -EINVAL;
+		break;
+	case CFG80211_STA_MESH_PEER_USER:
+		if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION)
+			return -EINVAL;
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(cfg80211_check_station_change);
+
 /*
  * Get vlan interface making sure it is running and on the right wiphy.
  */
@@ -3149,6 +3772,13 @@ static struct net_device *get_vlan(struct genl_info *info,
 		goto error;
 	}
 
+	if (v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
+	    v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+	    v->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
+		ret = -EINVAL;
+		goto error;
+	}
+
 	if (!netif_running(v)) {
 		ret = -ENETDOWN;
 		goto error;
@@ -3160,18 +3790,74 @@ static struct net_device *get_vlan(struct genl_info *info,
 	return ERR_PTR(ret);
 }
 
+static struct nla_policy
+nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = {
+	[NL80211_STA_WME_UAPSD_QUEUES] = { .type = NLA_U8 },
+	[NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },
+};
+
+static int nl80211_parse_sta_wme(struct genl_info *info,
+				 struct station_parameters *params)
+{
+	struct nlattr *tb[NL80211_STA_WME_MAX + 1];
+	struct nlattr *nla;
+	int err;
+
+	/* parse WME attributes if present */
+	if (!info->attrs[NL80211_ATTR_STA_WME])
+		return 0;
+
+	nla = info->attrs[NL80211_ATTR_STA_WME];
+	err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla,
+			       nl80211_sta_wme_policy);
+	if (err)
+		return err;
+
+	if (tb[NL80211_STA_WME_UAPSD_QUEUES])
+		params->uapsd_queues = nla_get_u8(
+			tb[NL80211_STA_WME_UAPSD_QUEUES]);
+	if (params->uapsd_queues & ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK)
+		return -EINVAL;
+
+	if (tb[NL80211_STA_WME_MAX_SP])
+		params->max_sp = nla_get_u8(tb[NL80211_STA_WME_MAX_SP]);
+
+	if (params->max_sp & ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK)
+		return -EINVAL;
+
+	params->sta_modify_mask |= STATION_PARAM_APPLY_UAPSD;
+
+	return 0;
+}
+
+static int nl80211_set_station_tdls(struct genl_info *info,
+				    struct station_parameters *params)
+{
+	/* Dummy STA entry gets updated once the peer capabilities are known */
+	if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
+		params->ht_capa =
+			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
+	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
+		params->vht_capa =
+			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
+
+	return nl80211_parse_sta_wme(info, params);
+}
+
 static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
-	int err;
 	struct net_device *dev = info->user_ptr[1];
 	struct station_parameters params;
-	u8 *mac_addr = NULL;
+	u8 *mac_addr;
+	int err;
 
 	memset(&params, 0, sizeof(params));
 
 	params.listen_interval = -1;
-	params.plink_state = -1;
+
+	if (!rdev->ops->change_station)
+		return -EOPNOTSUPP;
 
 	if (info->attrs[NL80211_ATTR_STA_AID])
 		return -EINVAL;
@@ -3188,119 +3874,84 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 			nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
 	}
 
-	if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
-		params.listen_interval =
-		    nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
+	if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) {
+		params.capability =
+			nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]);
+		params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY;
+	}
 
-	if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
-		params.ht_capa =
-			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
+	if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) {
+		params.ext_capab =
+			nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]);
+		params.ext_capab_len =
+			nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]);
+	}
 
-	if (!rdev->ops->change_station)
-		return -EOPNOTSUPP;
+	if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
+		return -EINVAL;
 
 	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
 		return -EINVAL;
 
-	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
+	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
 		params.plink_action =
-		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
+			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
+		if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
+			return -EINVAL;
+	}
 
-	if (info->attrs[NL80211_ATTR_STA_PLINK_STATE])
+	if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) {
 		params.plink_state =
-		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
-
-	switch (dev->ieee80211_ptr->iftype) {
-	case NL80211_IFTYPE_AP:
-	case NL80211_IFTYPE_AP_VLAN:
-	case NL80211_IFTYPE_P2P_GO:
-		/* disallow mesh-specific things */
-		if (params.plink_action)
+			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
+		if (params.plink_state >= NUM_NL80211_PLINK_STATES)
 			return -EINVAL;
+		params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE;
+	}
 
-		/* TDLS can't be set, ... */
-		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
-			return -EINVAL;
-		/*
-		 * ... but don't bother the driver with it. This works around
-		 * a hostapd/wpa_supplicant issue -- it always includes the
-		 * TLDS_PEER flag in the mask even for AP mode.
-		 */
-		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
+	if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) {
+		enum nl80211_mesh_power_mode pm = nla_get_u32(
+			info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]);
 
-		/* accept only the listed bits */
-		if (params.sta_flags_mask &
-				~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
-				  BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
-				  BIT(NL80211_STA_FLAG_WME) |
-				  BIT(NL80211_STA_FLAG_MFP)))
+		if (pm <= NL80211_MESH_POWER_UNKNOWN ||
+		    pm > NL80211_MESH_POWER_MAX)
 			return -EINVAL;
 
-		/* must be last in here for error handling */
-		params.vlan = get_vlan(info, rdev);
-		if (IS_ERR(params.vlan))
-			return PTR_ERR(params.vlan);
-		break;
+		params.local_pm = pm;
+	}
+
+	/* Include parameters for TDLS peer (will check later) */
+	err = nl80211_set_station_tdls(info, &params);
+	if (err)
+		return err;
+
+	params.vlan = get_vlan(info, rdev);
+	if (IS_ERR(params.vlan))
+		return PTR_ERR(params.vlan);
+
+	switch (dev->ieee80211_ptr->iftype) {
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_AP_VLAN:
+	case NL80211_IFTYPE_P2P_GO:
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_STATION:
-		/*
-		 * Don't allow userspace to change the TDLS_PEER flag,
-		 * but silently ignore attempts to change it since we
-		 * don't have state here to verify that it doesn't try
-		 * to change the flag.
-		 */
-		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
-		/* fall through */
 	case NL80211_IFTYPE_ADHOC:
-		/* disallow things sta doesn't support */
-		if (params.plink_action)
-			return -EINVAL;
-		if (params.ht_capa)
-			return -EINVAL;
-		if (params.listen_interval >= 0)
-			return -EINVAL;
-		/* reject any changes other than AUTHORIZED */
-		if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED))
-			return -EINVAL;
-		break;
 	case NL80211_IFTYPE_MESH_POINT:
-		/* disallow things mesh doesn't support */
-		if (params.vlan)
-			return -EINVAL;
-		if (params.ht_capa)
-			return -EINVAL;
-		if (params.listen_interval >= 0)
-			return -EINVAL;
-		/*
-		 * No special handling for TDLS here -- the userspace
-		 * mesh code doesn't have this bug.
-		 */
-		if (params.sta_flags_mask &
-				~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
-				  BIT(NL80211_STA_FLAG_MFP) |
-				  BIT(NL80211_STA_FLAG_AUTHORIZED)))
-			return -EINVAL;
 		break;
 	default:
-		return -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
+		goto out_put_vlan;
 	}
 
-	/* be aware of params.vlan when changing code here */
-
+	/* driver will call cfg80211_check_station_change() */
 	err = rdev_change_station(rdev, dev, mac_addr, &params);
 
+ out_put_vlan:
 	if (params.vlan)
 		dev_put(params.vlan);
 
 	return err;
 }
 
-static struct nla_policy
-nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = {
-	[NL80211_STA_WME_UAPSD_QUEUES] = { .type = NLA_U8 },
-	[NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },
-};
-
 static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -3311,6 +3962,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 
 	memset(&params, 0, sizeof(params));
 
+	if (!rdev->ops->add_station)
+		return -EOPNOTSUPP;
+
 	if (!info->attrs[NL80211_ATTR_MAC])
 		return -EINVAL;
 
@@ -3335,6 +3989,19 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 	if (!params.aid || params.aid > IEEE80211_MAX_AID)
 		return -EINVAL;
 
+	if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) {
+		params.capability =
+			nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]);
+		params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY;
+	}
+
+	if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) {
+		params.ext_capab =
+			nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]);
+		params.ext_capab_len =
+			nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]);
+	}
+
 	if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
 		params.ht_capa =
 			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
@@ -3343,67 +4010,72 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		params.vht_capa =
 			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
 
-	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
+	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
 		params.plink_action =
-		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
+			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
+		if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
+			return -EINVAL;
+	}
 
-	if (!rdev->ops->add_station)
-		return -EOPNOTSUPP;
+	err = nl80211_parse_sta_wme(info, &params);
+	if (err)
+		return err;
 
 	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
 		return -EINVAL;
 
+	/* When you run into this, adjust the code below for the new flag */
+	BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
+
 	switch (dev->ieee80211_ptr->iftype) {
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_P2P_GO:
-		/* parse WME attributes if sta is WME capable */
-		if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
-		    (params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)) &&
-		    info->attrs[NL80211_ATTR_STA_WME]) {
-			struct nlattr *tb[NL80211_STA_WME_MAX + 1];
-			struct nlattr *nla;
-
-			nla = info->attrs[NL80211_ATTR_STA_WME];
-			err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla,
-					       nl80211_sta_wme_policy);
-			if (err)
-				return err;
-
-			if (tb[NL80211_STA_WME_UAPSD_QUEUES])
-				params.uapsd_queues =
-				     nla_get_u8(tb[NL80211_STA_WME_UAPSD_QUEUES]);
-			if (params.uapsd_queues &
-					~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK)
-				return -EINVAL;
-
-			if (tb[NL80211_STA_WME_MAX_SP])
-				params.max_sp =
-				     nla_get_u8(tb[NL80211_STA_WME_MAX_SP]);
+		/* ignore WME attributes if iface/sta is not capable */
+		if (!(rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) ||
+		    !(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)))
+			params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
 
-			if (params.max_sp &
-					~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK)
-				return -EINVAL;
-
-			params.sta_modify_mask |= STATION_PARAM_APPLY_UAPSD;
-		}
 		/* TDLS peers cannot be added */
 		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
 			return -EINVAL;
 		/* but don't bother the driver with it */
 		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
 
+		/* allow authenticated/associated only if driver handles it */
+		if (!(rdev->wiphy.features &
+				NL80211_FEATURE_FULL_AP_CLIENT_STATE) &&
+		    params.sta_flags_mask &
+				(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+				 BIT(NL80211_STA_FLAG_ASSOCIATED)))
+			return -EINVAL;
+
 		/* must be last in here for error handling */
 		params.vlan = get_vlan(info, rdev);
 		if (IS_ERR(params.vlan))
 			return PTR_ERR(params.vlan);
 		break;
 	case NL80211_IFTYPE_MESH_POINT:
+		/* ignore uAPSD data */
+		params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
+
+		/* associated is disallowed */
+		if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED))
+			return -EINVAL;
 		/* TDLS peers cannot be added */
 		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
 			return -EINVAL;
 		break;
 	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_P2P_CLIENT:
+		/* ignore uAPSD data */
+		params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
+
+		/* these are disallowed */
+		if (params.sta_flags_mask &
+				(BIT(NL80211_STA_FLAG_ASSOCIATED) |
+				 BIT(NL80211_STA_FLAG_AUTHENTICATED)))
+			return -EINVAL;
 		/* Only TDLS peers can be added */
 		if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
 			return -EINVAL;
@@ -3413,6 +4085,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		/* ... with external setup is supported */
 		if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP))
 			return -EOPNOTSUPP;
+		/*
+		 * Older wpa_supplicant versions always mark the TDLS peer
+		 * as authorized, but it shouldn't yet be.
+		 */
+		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED);
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -3506,13 +4183,13 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
 {
 	struct mpath_info pinfo;
 	struct cfg80211_registered_device *dev;
-	struct net_device *netdev;
+	struct wireless_dev *wdev;
 	u8 dst[ETH_ALEN];
 	u8 next_hop[ETH_ALEN];
-	int path_idx = cb->args[1];
+	int path_idx = cb->args[2];
 	int err;
 
-	err = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev);
+	err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
 	if (err)
 		return err;
 
@@ -3521,14 +4198,14 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
 		goto out_err;
 	}
 
-	if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
+	if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) {
 		err = -EOPNOTSUPP;
 		goto out_err;
 	}
 
 	while (1) {
-		err = rdev_dump_mpath(dev, netdev, path_idx, dst, next_hop,
-				      &pinfo);
+		err = rdev_dump_mpath(dev, wdev->netdev, path_idx, dst,
+				      next_hop, &pinfo);
 		if (err == -ENOENT)
 			break;
 		if (err)
@@ -3536,7 +4213,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
 
 		if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid,
 				       cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				       netdev, dst, next_hop,
+				       wdev->netdev, dst, next_hop,
 				       &pinfo) < 0)
 			goto out;
 
@@ -3545,10 +4222,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
 
 
  out:
-	cb->args[1] = path_idx;
+	cb->args[2] = path_idx;
 	err = skb->len;
  out_err:
-	nl80211_finish_netdev_dump(dev);
+	nl80211_finish_wdev_dump(dev);
 	return err;
 }
 
@@ -3787,12 +4464,8 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 	 * window between nl80211_init() and regulatory_init(), if that is
 	 * even possible.
 	 */
-	mutex_lock(&cfg80211_mutex);
-	if (unlikely(!cfg80211_regdomain)) {
-		mutex_unlock(&cfg80211_mutex);
+	if (unlikely(!rcu_access_pointer(cfg80211_regdomain)))
 		return -EINPROGRESS;
-	}
-	mutex_unlock(&cfg80211_mutex);
 
 	if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
 		return -EINVAL;
@@ -3908,7 +4581,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
 	    nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL,
 			cur_params.dot11MeshHWMProotInterval) ||
 	    nla_put_u16(msg, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL,
-			cur_params.dot11MeshHWMPconfirmationInterval))
+			cur_params.dot11MeshHWMPconfirmationInterval) ||
+	    nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE,
+			cur_params.power_mode) ||
+	    nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW,
+			cur_params.dot11MeshAwakeWindowDuration))
 		goto nla_put_failure;
 	nla_nest_end(msg, pinfoattr);
 	genlmsg_end(msg, hdr);
@@ -3947,6 +4624,8 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
 	[NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 },
 	[NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 },
+	[NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 },
+	[NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 },
 };
 
 static const struct nla_policy
@@ -3955,6 +4634,7 @@ static const struct nla_policy
 	[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
 	[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
 	[NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
+	[NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG },
 	[NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
 				    .len = IEEE80211_MAX_DATA_LEN },
 	[NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
@@ -3967,13 +4647,15 @@ static int nl80211_parse_mesh_config(struct genl_info *info,
 	struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1];
 	u32 mask = 0;
 
-#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \
-do {\
-	if (table[attr_num]) {\
-		cfg->param = nla_fn(table[attr_num]); \
-		mask |= (1 << (attr_num - 1)); \
-	} \
-} while (0);\
+#define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \
+do {									    \
+	if (tb[attr]) {							    \
+		if (fn(tb[attr]) < min || fn(tb[attr]) > max)		    \
+			return -EINVAL;					    \
+		cfg->param = fn(tb[attr]);				    \
+		mask |= (1 << (attr - 1));				    \
+	}								    \
+} while (0)
 
 
 	if (!info->attrs[NL80211_ATTR_MESH_CONFIG])
@@ -3988,83 +4670,98 @@ do {\
 	BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32);
 
 	/* Fill in the params struct */
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, 1, 255,
 				  mask, NL80211_MESHCONF_RETRY_TIMEOUT,
 				  nla_get_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, 1, 255,
 				  mask, NL80211_MESHCONF_CONFIRM_TIMEOUT,
 				  nla_get_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, 1, 255,
 				  mask, NL80211_MESHCONF_HOLDING_TIMEOUT,
 				  nla_get_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, 0, 255,
 				  mask, NL80211_MESHCONF_MAX_PEER_LINKS,
 				  nla_get_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, 0, 16,
 				  mask, NL80211_MESHCONF_MAX_RETRIES,
 				  nla_get_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, 1, 255,
 				  mask, NL80211_MESHCONF_TTL, nla_get_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, 1, 255,
 				  mask, NL80211_MESHCONF_ELEMENT_TTL,
 				  nla_get_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, 0, 1,
 				  mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS,
 				  nla_get_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, mask,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor,
+				  1, 255, mask,
 				  NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR,
 				  nla_get_u32);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, 0, 255,
 				  mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
 				  nla_get_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, 1, 65535,
 				  mask, NL80211_MESHCONF_PATH_REFRESH_TIME,
 				  nla_get_u32);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, 1, 65535,
 				  mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
 				  nla_get_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, mask,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout,
+				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT,
 				  nla_get_u32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval,
-				  mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
+				  1, 65535, mask,
+				  NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
 				  nla_get_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval,
-				  mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
+				  1, 65535, mask,
+				  NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
 				  nla_get_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
-				  dot11MeshHWMPnetDiameterTraversalTime, mask,
+				  dot11MeshHWMPnetDiameterTraversalTime,
+				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
 				  nla_get_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, mask,
-				  NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, mask,
-				  NL80211_MESHCONF_HWMP_RANN_INTERVAL,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, 0, 4,
+				  mask, NL80211_MESHCONF_HWMP_ROOTMODE,
+				  nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, 1, 65535,
+				  mask, NL80211_MESHCONF_HWMP_RANN_INTERVAL,
 				  nla_get_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
-				  dot11MeshGateAnnouncementProtocol, mask,
-				  NL80211_MESHCONF_GATE_ANNOUNCEMENTS,
+				  dot11MeshGateAnnouncementProtocol, 0, 1,
+				  mask, NL80211_MESHCONF_GATE_ANNOUNCEMENTS,
 				  nla_get_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1,
 				  mask, NL80211_MESHCONF_FORWARDING,
 				  nla_get_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, 1, 255,
 				  mask, NL80211_MESHCONF_RSSI_THRESHOLD,
 				  nla_get_u32);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16,
 				  mask, NL80211_MESHCONF_HT_OPMODE,
 				  nla_get_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout,
-				  mask,
+				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
 				  nla_get_u32);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, 1, 65535,
 				  mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL,
 				  nla_get_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
-				  dot11MeshHWMPconfirmationInterval, mask,
+				  dot11MeshHWMPconfirmationInterval,
+				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL,
 				  nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode,
+				  NL80211_MESH_POWER_ACTIVE,
+				  NL80211_MESH_POWER_MAX,
+				  mask, NL80211_MESHCONF_POWER_MODE,
+				  nla_get_u32);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
+				  0, 65535, mask,
+				  NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16);
 	if (mask_out)
 		*mask_out = mask;
 
@@ -4076,6 +4773,7 @@ do {\
 static int nl80211_parse_mesh_setup(struct genl_info *info,
 				     struct mesh_setup *setup)
 {
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1];
 
 	if (!info->attrs[NL80211_ATTR_MESH_SETUP])
@@ -4112,8 +4810,14 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
 		setup->ie = nla_data(ieattr);
 		setup->ie_len = nla_len(ieattr);
 	}
+	if (tb[NL80211_MESH_SETUP_USERSPACE_MPM] &&
+	    !(rdev->wiphy.features & NL80211_FEATURE_USERSPACE_MPM))
+		return -EINVAL;
+	setup->user_mpm = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_MPM]);
 	setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);
 	setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]);
+	if (setup->is_secure)
+		setup->user_mpm = true;
 
 	return 0;
 }
@@ -4152,6 +4856,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb,
 
 static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 {
+	const struct ieee80211_regdomain *regdom;
 	struct sk_buff *msg;
 	void *hdr = NULL;
 	struct nlattr *nl_reg_rules;
@@ -4174,35 +4879,36 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 	if (!hdr)
 		goto put_failure;
 
-	if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2,
-			   cfg80211_regdomain->alpha2) ||
-	    (cfg80211_regdomain->dfs_region &&
-	     nla_put_u8(msg, NL80211_ATTR_DFS_REGION,
-			cfg80211_regdomain->dfs_region)))
-		goto nla_put_failure;
-
 	if (reg_last_request_cell_base() &&
 	    nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE,
 			NL80211_USER_REG_HINT_CELL_BASE))
 		goto nla_put_failure;
 
+	rcu_read_lock();
+	regdom = rcu_dereference(cfg80211_regdomain);
+
+	if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, regdom->alpha2) ||
+	    (regdom->dfs_region &&
+	     nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region)))
+		goto nla_put_failure_rcu;
+
 	nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES);
 	if (!nl_reg_rules)
-		goto nla_put_failure;
+		goto nla_put_failure_rcu;
 
-	for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) {
+	for (i = 0; i < regdom->n_reg_rules; i++) {
 		struct nlattr *nl_reg_rule;
 		const struct ieee80211_reg_rule *reg_rule;
 		const struct ieee80211_freq_range *freq_range;
 		const struct ieee80211_power_rule *power_rule;
 
-		reg_rule = &cfg80211_regdomain->reg_rules[i];
+		reg_rule = &regdom->reg_rules[i];
 		freq_range = &reg_rule->freq_range;
 		power_rule = &reg_rule->power_rule;
 
 		nl_reg_rule = nla_nest_start(msg, i);
 		if (!nl_reg_rule)
-			goto nla_put_failure;
+			goto nla_put_failure_rcu;
 
 		if (nla_put_u32(msg, NL80211_ATTR_REG_RULE_FLAGS,
 				reg_rule->flags) ||
@@ -4216,10 +4922,11 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 				power_rule->max_antenna_gain) ||
 		    nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP,
 				power_rule->max_eirp))
-			goto nla_put_failure;
+			goto nla_put_failure_rcu;
 
 		nla_nest_end(msg, nl_reg_rule);
 	}
+	rcu_read_unlock();
 
 	nla_nest_end(msg, nl_reg_rules);
 
@@ -4227,6 +4934,8 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 	err = genlmsg_reply(msg, info);
 	goto out;
 
+nla_put_failure_rcu:
+	rcu_read_unlock();
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
 put_failure:
@@ -4259,27 +4968,18 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 		dfs_region = nla_get_u8(info->attrs[NL80211_ATTR_DFS_REGION]);
 
 	nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
-			rem_reg_rules) {
+			    rem_reg_rules) {
 		num_rules++;
 		if (num_rules > NL80211_MAX_SUPP_REG_RULES)
 			return -EINVAL;
 	}
 
-	mutex_lock(&cfg80211_mutex);
-
-	if (!reg_is_valid_request(alpha2)) {
-		r = -EINVAL;
-		goto bad_reg;
-	}
-
 	size_of_regd = sizeof(struct ieee80211_regdomain) +
-		(num_rules * sizeof(struct ieee80211_reg_rule));
+		       num_rules * sizeof(struct ieee80211_reg_rule);
 
 	rd = kzalloc(size_of_regd, GFP_KERNEL);
-	if (!rd) {
-		r = -ENOMEM;
-		goto bad_reg;
-	}
+	if (!rd)
+		return -ENOMEM;
 
 	rd->n_reg_rules = num_rules;
 	rd->alpha2[0] = alpha2[0];
@@ -4293,10 +4993,10 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 		rd->dfs_region = dfs_region;
 
 	nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
-			rem_reg_rules) {
+			    rem_reg_rules) {
 		nla_parse(tb, NL80211_REG_RULE_ATTR_MAX,
-			nla_data(nl_reg_rule), nla_len(nl_reg_rule),
-			reg_rule_policy);
+			  nla_data(nl_reg_rule), nla_len(nl_reg_rule),
+			  reg_rule_policy);
 		r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]);
 		if (r)
 			goto bad_reg;
@@ -4309,16 +5009,14 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
-	BUG_ON(rule_idx != num_rules);
+	mutex_lock(&cfg80211_mutex);
 
 	r = set_regdom(rd);
-
+	/* set_regdom took ownership */
+	rd = NULL;
 	mutex_unlock(&cfg80211_mutex);
 
-	return r;
-
  bad_reg:
-	mutex_unlock(&cfg80211_mutex);
 	kfree(rd);
 	return r;
 }
@@ -4366,14 +5064,19 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 	if (!rdev->ops->scan)
 		return -EOPNOTSUPP;
 
-	if (rdev->scan_req)
-		return -EBUSY;
+	mutex_lock(&rdev->sched_scan_mtx);
+	if (rdev->scan_req) {
+		err = -EBUSY;
+		goto unlock;
+	}
 
 	if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
 		n_channels = validate_scan_freqs(
 				info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
-		if (!n_channels)
-			return -EINVAL;
+		if (!n_channels) {
+			err = -EINVAL;
+			goto unlock;
+		}
 	} else {
 		enum ieee80211_band band;
 		n_channels = 0;
@@ -4387,23 +5090,29 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 		nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp)
 			n_ssids++;
 
-	if (n_ssids > wiphy->max_scan_ssids)
-		return -EINVAL;
+	if (n_ssids > wiphy->max_scan_ssids) {
+		err = -EINVAL;
+		goto unlock;
+	}
 
 	if (info->attrs[NL80211_ATTR_IE])
 		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	else
 		ie_len = 0;
 
-	if (ie_len > wiphy->max_scan_ie_len)
-		return -EINVAL;
+	if (ie_len > wiphy->max_scan_ie_len) {
+		err = -EINVAL;
+		goto unlock;
+	}
 
 	request = kzalloc(sizeof(*request)
 			+ sizeof(*request->ssids) * n_ssids
 			+ sizeof(*request->channels) * n_channels
 			+ ie_len, GFP_KERNEL);
-	if (!request)
-		return -ENOMEM;
+	if (!request) {
+		err = -ENOMEM;
+		goto unlock;
+	}
 
 	if (n_ssids)
 		request->ssids = (void *)&request->channels[n_channels];
@@ -4540,6 +5249,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 		kfree(request);
 	}
 
+ unlock:
+	mutex_unlock(&rdev->sched_scan_mtx);
 	return err;
 }
 
@@ -4801,6 +5512,54 @@ static int nl80211_stop_sched_scan(struct sk_buff *skb,
 	return err;
 }
 
+static int nl80211_start_radar_detection(struct sk_buff *skb,
+					 struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_chan_def chandef;
+	int err;
+
+	err = nl80211_parse_chandef(rdev, info, &chandef);
+	if (err)
+		return err;
+
+	if (wdev->cac_started)
+		return -EBUSY;
+
+	err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef);
+	if (err < 0)
+		return err;
+
+	if (err == 0)
+		return -EINVAL;
+
+	if (chandef.chan->dfs_state != NL80211_DFS_USABLE)
+		return -EINVAL;
+
+	if (!rdev->ops->start_radar_detection)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&rdev->devlist_mtx);
+	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
+					   chandef.chan, CHAN_MODE_SHARED,
+					   BIT(chandef.width));
+	if (err)
+		goto err_locked;
+
+	err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef);
+	if (!err) {
+		wdev->channel = chandef.chan;
+		wdev->cac_started = true;
+		wdev->cac_start_time = jiffies;
+	}
+err_locked:
+	mutex_unlock(&rdev->devlist_mtx);
+
+	return err;
+}
+
 static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 			    u32 seq, int flags,
 			    struct cfg80211_registered_device *rdev,
@@ -4811,6 +5570,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 	const struct cfg80211_bss_ies *ies;
 	void *hdr;
 	struct nlattr *bss;
+	bool tsf = false;
 
 	ASSERT_WDEV_LOCK(wdev);
 
@@ -4821,9 +5581,13 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 
 	genl_dump_check_consistent(cb, hdr, &nl80211_fam);
 
-	if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation) ||
+	if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation))
+		goto nla_put_failure;
+	if (wdev->netdev &&
 	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex))
 		goto nla_put_failure;
+	if (nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)))
+		goto nla_put_failure;
 
 	bss = nla_nest_start(msg, NL80211_ATTR_BSS);
 	if (!bss)
@@ -4834,22 +5598,24 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 
 	rcu_read_lock();
 	ies = rcu_dereference(res->ies);
-	if (ies && ies->len && nla_put(msg, NL80211_BSS_INFORMATION_ELEMENTS,
-				       ies->len, ies->data)) {
-		rcu_read_unlock();
-		goto nla_put_failure;
+	if (ies) {
+		if (nla_put_u64(msg, NL80211_BSS_TSF, ies->tsf))
+			goto fail_unlock_rcu;
+		tsf = true;
+		if (ies->len && nla_put(msg, NL80211_BSS_INFORMATION_ELEMENTS,
+					ies->len, ies->data))
+			goto fail_unlock_rcu;
 	}
 	ies = rcu_dereference(res->beacon_ies);
-	if (ies && ies->len && nla_put(msg, NL80211_BSS_BEACON_IES,
-				       ies->len, ies->data)) {
-		rcu_read_unlock();
-		goto nla_put_failure;
+	if (ies) {
+		if (!tsf && nla_put_u64(msg, NL80211_BSS_TSF, ies->tsf))
+			goto fail_unlock_rcu;
+		if (ies->len && nla_put(msg, NL80211_BSS_BEACON_IES,
+					ies->len, ies->data))
+			goto fail_unlock_rcu;
 	}
 	rcu_read_unlock();
 
-	if (res->tsf &&
-	    nla_put_u64(msg, NL80211_BSS_TSF, res->tsf))
-		goto nla_put_failure;
 	if (res->beacon_interval &&
 	    nla_put_u16(msg, NL80211_BSS_BEACON_INTERVAL, res->beacon_interval))
 		goto nla_put_failure;
@@ -4894,27 +5660,25 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 
 	return genlmsg_end(msg, hdr);
 
+ fail_unlock_rcu:
+	rcu_read_unlock();
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
 	return -EMSGSIZE;
 }
 
-static int nl80211_dump_scan(struct sk_buff *skb,
-			     struct netlink_callback *cb)
+static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct cfg80211_registered_device *rdev;
-	struct net_device *dev;
 	struct cfg80211_internal_bss *scan;
 	struct wireless_dev *wdev;
-	int start = cb->args[1], idx = 0;
+	int start = cb->args[2], idx = 0;
 	int err;
 
-	err = nl80211_prepare_netdev_dump(skb, cb, &rdev, &dev);
+	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (err)
 		return err;
 
-	wdev = dev->ieee80211_ptr;
-
 	wdev_lock(wdev);
 	spin_lock_bh(&rdev->bss_lock);
 	cfg80211_bss_expire(rdev);
@@ -4935,8 +5699,8 @@ static int nl80211_dump_scan(struct sk_buff *skb,
 	spin_unlock_bh(&rdev->bss_lock);
 	wdev_unlock(wdev);
 
-	cb->args[1] = idx;
-	nl80211_finish_netdev_dump(rdev);
+	cb->args[2] = idx;
+	nl80211_finish_wdev_dump(rdev);
 
 	return skb->len;
 }
@@ -5005,14 +5769,19 @@ static int nl80211_dump_survey(struct sk_buff *skb,
 {
 	struct survey_info survey;
 	struct cfg80211_registered_device *dev;
-	struct net_device *netdev;
-	int survey_idx = cb->args[1];
+	struct wireless_dev *wdev;
+	int survey_idx = cb->args[2];
 	int res;
 
-	res = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev);
+	res = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
 	if (res)
 		return res;
 
+	if (!wdev->netdev) {
+		res = -EINVAL;
+		goto out_err;
+	}
+
 	if (!dev->ops->dump_survey) {
 		res = -EOPNOTSUPP;
 		goto out_err;
@@ -5021,7 +5790,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
 	while (1) {
 		struct ieee80211_channel *chan;
 
-		res = rdev_dump_survey(dev, netdev, survey_idx, &survey);
+		res = rdev_dump_survey(dev, wdev->netdev, survey_idx, &survey);
 		if (res == -ENOENT)
 			break;
 		if (res)
@@ -5043,17 +5812,16 @@ static int nl80211_dump_survey(struct sk_buff *skb,
 		if (nl80211_send_survey(skb,
 				NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				netdev,
-				&survey) < 0)
+				wdev->netdev, &survey) < 0)
 			goto out;
 		survey_idx++;
 	}
 
  out:
-	cb->args[1] = survey_idx;
+	cb->args[2] = survey_idx;
 	res = skb->len;
  out_err:
-	nl80211_finish_netdev_dump(dev);
+	nl80211_finish_wdev_dump(dev);
 	return res;
 }
 
@@ -5261,14 +6029,10 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
-	struct cfg80211_crypto_settings crypto;
 	struct ieee80211_channel *chan;
-	const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL;
-	int err, ssid_len, ie_len = 0;
-	bool use_mfp = false;
-	u32 flags = 0;
-	struct ieee80211_ht_cap *ht_capa = NULL;
-	struct ieee80211_ht_cap *ht_capa_mask = NULL;
+	struct cfg80211_assoc_request req = {};
+	const u8 *bssid, *ssid;
+	int err, ssid_len = 0;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -5296,41 +6060,58 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 	ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
 
 	if (info->attrs[NL80211_ATTR_IE]) {
-		ie = nla_data(info->attrs[NL80211_ATTR_IE]);
-		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+		req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+		req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
 	if (info->attrs[NL80211_ATTR_USE_MFP]) {
 		enum nl80211_mfp mfp =
 			nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
 		if (mfp == NL80211_MFP_REQUIRED)
-			use_mfp = true;
+			req.use_mfp = true;
 		else if (mfp != NL80211_MFP_NO)
 			return -EINVAL;
 	}
 
 	if (info->attrs[NL80211_ATTR_PREV_BSSID])
-		prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
+		req.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
 
 	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT]))
-		flags |= ASSOC_REQ_DISABLE_HT;
+		req.flags |= ASSOC_REQ_DISABLE_HT;
 
 	if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
-		ht_capa_mask =
-			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]);
+		memcpy(&req.ht_capa_mask,
+		       nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]),
+		       sizeof(req.ht_capa_mask));
 
 	if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) {
-		if (!ht_capa_mask)
+		if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
 			return -EINVAL;
-		ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
+		memcpy(&req.ht_capa,
+		       nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]),
+		       sizeof(req.ht_capa));
 	}
 
-	err = nl80211_crypto_settings(rdev, info, &crypto, 1);
+	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT]))
+		req.flags |= ASSOC_REQ_DISABLE_VHT;
+
+	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
+		memcpy(&req.vht_capa_mask,
+		       nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
+		       sizeof(req.vht_capa_mask));
+
+	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) {
+		if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
+			return -EINVAL;
+		memcpy(&req.vht_capa,
+		       nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]),
+		       sizeof(req.vht_capa));
+	}
+
+	err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
 	if (!err)
-		err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
-					  ssid, ssid_len, ie, ie_len, use_mfp,
-					  &crypto, flags, ht_capa,
-					  ht_capa_mask);
+		err = cfg80211_mlme_assoc(rdev, dev, chan, bssid,
+					  ssid, ssid_len, &req);
 
 	return err;
 }
@@ -5867,6 +6648,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 		connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
+	if (info->attrs[NL80211_ATTR_USE_MFP]) {
+		connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
+		if (connect.mfp != NL80211_MFP_REQUIRED &&
+		    connect.mfp != NL80211_MFP_NO)
+			return -EINVAL;
+	} else {
+		connect.mfp = NL80211_MFP_NO;
+	}
+
 	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
 		connect.channel =
 			ieee80211_get_channel(wiphy,
@@ -5901,6 +6691,24 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 		       sizeof(connect.ht_capa));
 	}
 
+	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT]))
+		connect.flags |= ASSOC_REQ_DISABLE_VHT;
+
+	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
+		memcpy(&connect.vht_capa_mask,
+		       nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
+		       sizeof(connect.vht_capa_mask));
+
+	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) {
+		if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) {
+			kfree(connkeys);
+			return -EINVAL;
+		}
+		memcpy(&connect.vht_capa,
+		       nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]),
+		       sizeof(connect.vht_capa));
+	}
+
 	err = cfg80211_connect(rdev, dev, &connect, connkeys);
 	if (err)
 		kfree(connkeys);
@@ -6652,6 +7460,21 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 			    nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE])))
 			return -EINVAL;
 
+	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
+		setup.beacon_interval =
+			nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
+		if (setup.beacon_interval < 10 ||
+		    setup.beacon_interval > 10000)
+			return -EINVAL;
+	}
+
+	if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) {
+		setup.dtim_period =
+			nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]);
+		if (setup.dtim_period < 1 || setup.dtim_period > 100)
+			return -EINVAL;
+	}
+
 	if (info->attrs[NL80211_ATTR_MESH_SETUP]) {
 		/* parse additional setup parameters if given */
 		err = nl80211_parse_mesh_setup(info, &setup);
@@ -6659,6 +7482,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 			return err;
 	}
 
+	if (setup.user_mpm)
+		cfg.auto_open_plinks = false;
+
 	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
 		err = nl80211_parse_chandef(rdev, info, &setup.chandef);
 		if (err)
@@ -6680,16 +7506,100 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info)
 }
 
 #ifdef CONFIG_PM
+static int nl80211_send_wowlan_patterns(struct sk_buff *msg,
+					struct cfg80211_registered_device *rdev)
+{
+	struct nlattr *nl_pats, *nl_pat;
+	int i, pat_len;
+
+	if (!rdev->wowlan->n_patterns)
+		return 0;
+
+	nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN);
+	if (!nl_pats)
+		return -ENOBUFS;
+
+	for (i = 0; i < rdev->wowlan->n_patterns; i++) {
+		nl_pat = nla_nest_start(msg, i + 1);
+		if (!nl_pat)
+			return -ENOBUFS;
+		pat_len = rdev->wowlan->patterns[i].pattern_len;
+		if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK,
+			    DIV_ROUND_UP(pat_len, 8),
+			    rdev->wowlan->patterns[i].mask) ||
+		    nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN,
+			    pat_len, rdev->wowlan->patterns[i].pattern) ||
+		    nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET,
+				rdev->wowlan->patterns[i].pkt_offset))
+			return -ENOBUFS;
+		nla_nest_end(msg, nl_pat);
+	}
+	nla_nest_end(msg, nl_pats);
+
+	return 0;
+}
+
+static int nl80211_send_wowlan_tcp(struct sk_buff *msg,
+				   struct cfg80211_wowlan_tcp *tcp)
+{
+	struct nlattr *nl_tcp;
+
+	if (!tcp)
+		return 0;
+
+	nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION);
+	if (!nl_tcp)
+		return -ENOBUFS;
+
+	if (nla_put_be32(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) ||
+	    nla_put_be32(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) ||
+	    nla_put(msg, NL80211_WOWLAN_TCP_DST_MAC, ETH_ALEN, tcp->dst_mac) ||
+	    nla_put_u16(msg, NL80211_WOWLAN_TCP_SRC_PORT, tcp->src_port) ||
+	    nla_put_u16(msg, NL80211_WOWLAN_TCP_DST_PORT, tcp->dst_port) ||
+	    nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD,
+		    tcp->payload_len, tcp->payload) ||
+	    nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL,
+			tcp->data_interval) ||
+	    nla_put(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD,
+		    tcp->wake_len, tcp->wake_data) ||
+	    nla_put(msg, NL80211_WOWLAN_TCP_WAKE_MASK,
+		    DIV_ROUND_UP(tcp->wake_len, 8), tcp->wake_mask))
+		return -ENOBUFS;
+
+	if (tcp->payload_seq.len &&
+	    nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ,
+		    sizeof(tcp->payload_seq), &tcp->payload_seq))
+		return -ENOBUFS;
+
+	if (tcp->payload_tok.len &&
+	    nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN,
+		    sizeof(tcp->payload_tok) + tcp->tokens_size,
+		    &tcp->payload_tok))
+		return -ENOBUFS;
+
+	return 0;
+}
+
 static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct sk_buff *msg;
 	void *hdr;
+	u32 size = NLMSG_DEFAULT_SIZE;
 
-	if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns)
+	if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns &&
+	    !rdev->wiphy.wowlan.tcp)
 		return -EOPNOTSUPP;
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (rdev->wowlan && rdev->wowlan->tcp) {
+		/* adjust size to have room for all the data */
+		size += rdev->wowlan->tcp->tokens_size +
+			rdev->wowlan->tcp->payload_len +
+			rdev->wowlan->tcp->wake_len +
+			rdev->wowlan->tcp->wake_len / 8;
+	}
+
+	msg = nlmsg_new(size, GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
 
@@ -6720,31 +7630,12 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
 		    (rdev->wowlan->rfkill_release &&
 		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
 			goto nla_put_failure;
-		if (rdev->wowlan->n_patterns) {
-			struct nlattr *nl_pats, *nl_pat;
-			int i, pat_len;
 
-			nl_pats = nla_nest_start(msg,
-					NL80211_WOWLAN_TRIG_PKT_PATTERN);
-			if (!nl_pats)
-				goto nla_put_failure;
+		if (nl80211_send_wowlan_patterns(msg, rdev))
+			goto nla_put_failure;
 
-			for (i = 0; i < rdev->wowlan->n_patterns; i++) {
-				nl_pat = nla_nest_start(msg, i + 1);
-				if (!nl_pat)
-					goto nla_put_failure;
-				pat_len = rdev->wowlan->patterns[i].pattern_len;
-				if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK,
-					    DIV_ROUND_UP(pat_len, 8),
-					    rdev->wowlan->patterns[i].mask) ||
-				    nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN,
-					    pat_len,
-					    rdev->wowlan->patterns[i].pattern))
-					goto nla_put_failure;
-				nla_nest_end(msg, nl_pat);
-			}
-			nla_nest_end(msg, nl_pats);
-		}
+		if (nl80211_send_wowlan_tcp(msg, rdev->wowlan->tcp))
+			goto nla_put_failure;
 
 		nla_nest_end(msg, nl_wowlan);
 	}
@@ -6757,6 +7648,151 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
+static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
+				    struct nlattr *attr,
+				    struct cfg80211_wowlan *trig)
+{
+	struct nlattr *tb[NUM_NL80211_WOWLAN_TCP];
+	struct cfg80211_wowlan_tcp *cfg;
+	struct nl80211_wowlan_tcp_data_token *tok = NULL;
+	struct nl80211_wowlan_tcp_data_seq *seq = NULL;
+	u32 size;
+	u32 data_size, wake_size, tokens_size = 0, wake_mask_size;
+	int err, port;
+
+	if (!rdev->wiphy.wowlan.tcp)
+		return -EINVAL;
+
+	err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP,
+			nla_data(attr), nla_len(attr),
+			nl80211_wowlan_tcp_policy);
+	if (err)
+		return err;
+
+	if (!tb[NL80211_WOWLAN_TCP_SRC_IPV4] ||
+	    !tb[NL80211_WOWLAN_TCP_DST_IPV4] ||
+	    !tb[NL80211_WOWLAN_TCP_DST_MAC] ||
+	    !tb[NL80211_WOWLAN_TCP_DST_PORT] ||
+	    !tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD] ||
+	    !tb[NL80211_WOWLAN_TCP_DATA_INTERVAL] ||
+	    !tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD] ||
+	    !tb[NL80211_WOWLAN_TCP_WAKE_MASK])
+		return -EINVAL;
+
+	data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]);
+	if (data_size > rdev->wiphy.wowlan.tcp->data_payload_max)
+		return -EINVAL;
+
+	if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) >
+			rdev->wiphy.wowlan.tcp->data_interval_max ||
+	    nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0)
+		return -EINVAL;
+
+	wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]);
+	if (wake_size > rdev->wiphy.wowlan.tcp->wake_payload_max)
+		return -EINVAL;
+
+	wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]);
+	if (wake_mask_size != DIV_ROUND_UP(wake_size, 8))
+		return -EINVAL;
+
+	if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]) {
+		u32 tokln = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]);
+
+		tok = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]);
+		tokens_size = tokln - sizeof(*tok);
+
+		if (!tok->len || tokens_size % tok->len)
+			return -EINVAL;
+		if (!rdev->wiphy.wowlan.tcp->tok)
+			return -EINVAL;
+		if (tok->len > rdev->wiphy.wowlan.tcp->tok->max_len)
+			return -EINVAL;
+		if (tok->len < rdev->wiphy.wowlan.tcp->tok->min_len)
+			return -EINVAL;
+		if (tokens_size > rdev->wiphy.wowlan.tcp->tok->bufsize)
+			return -EINVAL;
+		if (tok->offset + tok->len > data_size)
+			return -EINVAL;
+	}
+
+	if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) {
+		seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]);
+		if (!rdev->wiphy.wowlan.tcp->seq)
+			return -EINVAL;
+		if (seq->len == 0 || seq->len > 4)
+			return -EINVAL;
+		if (seq->len + seq->offset > data_size)
+			return -EINVAL;
+	}
+
+	size = sizeof(*cfg);
+	size += data_size;
+	size += wake_size + wake_mask_size;
+	size += tokens_size;
+
+	cfg = kzalloc(size, GFP_KERNEL);
+	if (!cfg)
+		return -ENOMEM;
+	cfg->src = nla_get_be32(tb[NL80211_WOWLAN_TCP_SRC_IPV4]);
+	cfg->dst = nla_get_be32(tb[NL80211_WOWLAN_TCP_DST_IPV4]);
+	memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]),
+	       ETH_ALEN);
+	if (tb[NL80211_WOWLAN_TCP_SRC_PORT])
+		port = nla_get_u16(tb[NL80211_WOWLAN_TCP_SRC_PORT]);
+	else
+		port = 0;
+#ifdef CONFIG_INET
+	/* allocate a socket and port for it and use it */
+	err = __sock_create(wiphy_net(&rdev->wiphy), PF_INET, SOCK_STREAM,
+			    IPPROTO_TCP, &cfg->sock, 1);
+	if (err) {
+		kfree(cfg);
+		return err;
+	}
+	if (inet_csk_get_port(cfg->sock->sk, port)) {
+		sock_release(cfg->sock);
+		kfree(cfg);
+		return -EADDRINUSE;
+	}
+	cfg->src_port = inet_sk(cfg->sock->sk)->inet_num;
+#else
+	if (!port) {
+		kfree(cfg);
+		return -EINVAL;
+	}
+	cfg->src_port = port;
+#endif
+
+	cfg->dst_port = nla_get_u16(tb[NL80211_WOWLAN_TCP_DST_PORT]);
+	cfg->payload_len = data_size;
+	cfg->payload = (u8 *)cfg + sizeof(*cfg) + tokens_size;
+	memcpy((void *)cfg->payload,
+	       nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]),
+	       data_size);
+	if (seq)
+		cfg->payload_seq = *seq;
+	cfg->data_interval = nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]);
+	cfg->wake_len = wake_size;
+	cfg->wake_data = (u8 *)cfg + sizeof(*cfg) + tokens_size + data_size;
+	memcpy((void *)cfg->wake_data,
+	       nla_data(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]),
+	       wake_size);
+	cfg->wake_mask = (u8 *)cfg + sizeof(*cfg) + tokens_size +
+			 data_size + wake_size;
+	memcpy((void *)cfg->wake_mask,
+	       nla_data(tb[NL80211_WOWLAN_TCP_WAKE_MASK]),
+	       wake_mask_size);
+	if (tok) {
+		cfg->tokens_size = tokens_size;
+		memcpy(&cfg->payload_tok, tok, sizeof(*tok) + tokens_size);
+	}
+
+	trig->tcp = cfg;
+
+	return 0;
+}
+
 static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6767,7 +7803,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 	int err, i;
 	bool prev_enabled = rdev->wowlan;
 
-	if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns)
+	if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns &&
+	    !rdev->wiphy.wowlan.tcp)
 		return -EOPNOTSUPP;
 
 	if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) {
@@ -6831,7 +7868,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 	if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) {
 		struct nlattr *pat;
 		int n_patterns = 0;
-		int rem, pat_len, mask_len;
+		int rem, pat_len, mask_len, pkt_offset;
 		struct nlattr *pat_tb[NUM_NL80211_WOWLAN_PKTPAT];
 
 		nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
@@ -6866,6 +7903,15 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 			    pat_len < wowlan->pattern_min_len)
 				goto error;
 
+			if (!pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET])
+				pkt_offset = 0;
+			else
+				pkt_offset = nla_get_u32(
+					pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]);
+			if (pkt_offset > wowlan->max_pkt_offset)
+				goto error;
+			new_triggers.patterns[i].pkt_offset = pkt_offset;
+
 			new_triggers.patterns[i].mask =
 				kmalloc(mask_len + pat_len, GFP_KERNEL);
 			if (!new_triggers.patterns[i].mask) {
@@ -6885,6 +7931,14 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
+	if (tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION]) {
+		err = nl80211_parse_wowlan_tcp(
+			rdev, tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION],
+			&new_triggers);
+		if (err)
+			goto error;
+	}
+
 	ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL);
 	if (!ntrig) {
 		err = -ENOMEM;
@@ -6902,6 +7956,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 	for (i = 0; i < new_triggers.n_patterns; i++)
 		kfree(new_triggers.patterns[i].mask);
 	kfree(new_triggers.patterns);
+	if (new_triggers.tcp && new_triggers.tcp->sock)
+		sock_release(new_triggers.tcp->sock);
+	kfree(new_triggers.tcp);
 	return err;
 }
 #endif
@@ -7106,21 +8163,118 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info)
 	if (!rdev->ops->stop_p2p_device)
 		return -EOPNOTSUPP;
 
-	if (!wdev->p2p_started)
-		return 0;
-
-	rdev_stop_p2p_device(rdev, wdev);
-	wdev->p2p_started = false;
-
 	mutex_lock(&rdev->devlist_mtx);
-	rdev->opencount--;
+	mutex_lock(&rdev->sched_scan_mtx);
+	cfg80211_stop_p2p_device(rdev, wdev);
+	mutex_unlock(&rdev->sched_scan_mtx);
 	mutex_unlock(&rdev->devlist_mtx);
 
-	if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) {
-		rdev->scan_req->aborted = true;
-		___cfg80211_scan_done(rdev, true);
-	}
+	return 0;
+}
+
+static int nl80211_get_protocol_features(struct sk_buff *skb,
+					 struct genl_info *info)
+{
+	void *hdr;
+	struct sk_buff *msg;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
+			     NL80211_CMD_GET_PROTOCOL_FEATURES);
+	if (!hdr)
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL80211_ATTR_PROTOCOL_FEATURES,
+			NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return genlmsg_reply(msg, info);
+
+ nla_put_failure:
+	kfree_skb(msg);
+	return -ENOBUFS;
+}
+
+static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct cfg80211_update_ft_ies_params ft_params;
+	struct net_device *dev = info->user_ptr[1];
 
+	if (!rdev->ops->update_ft_ies)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_MDID] ||
+	    !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
+		return -EINVAL;
+
+	memset(&ft_params, 0, sizeof(ft_params));
+	ft_params.md = nla_get_u16(info->attrs[NL80211_ATTR_MDID]);
+	ft_params.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+	ft_params.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+
+	return rdev_update_ft_ies(rdev, dev, &ft_params);
+}
+
+static int nl80211_crit_protocol_start(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+	enum nl80211_crit_proto_id proto = NL80211_CRIT_PROTO_UNSPEC;
+	u16 duration;
+	int ret;
+
+	if (!rdev->ops->crit_proto_start)
+		return -EOPNOTSUPP;
+
+	if (WARN_ON(!rdev->ops->crit_proto_stop))
+		return -EINVAL;
+
+	if (rdev->crit_proto_nlportid)
+		return -EBUSY;
+
+	/* determine protocol if provided */
+	if (info->attrs[NL80211_ATTR_CRIT_PROT_ID])
+		proto = nla_get_u16(info->attrs[NL80211_ATTR_CRIT_PROT_ID]);
+
+	if (proto >= NUM_NL80211_CRIT_PROTO)
+		return -EINVAL;
+
+	/* timeout must be provided */
+	if (!info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION])
+		return -EINVAL;
+
+	duration =
+		nla_get_u16(info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]);
+
+	if (duration > NL80211_CRIT_PROTO_MAX_DURATION)
+		return -ERANGE;
+
+	ret = rdev_crit_proto_start(rdev, wdev, proto, duration);
+	if (!ret)
+		rdev->crit_proto_nlportid = info->snd_portid;
+
+	return ret;
+}
+
+static int nl80211_crit_protocol_stop(struct sk_buff *skb,
+				      struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+
+	if (!rdev->ops->crit_proto_stop)
+		return -EOPNOTSUPP;
+
+	if (rdev->crit_proto_nlportid) {
+		rdev->crit_proto_nlportid = 0;
+		rdev_crit_proto_stop(rdev, wdev);
+	}
 	return 0;
 }
 
@@ -7784,6 +8938,51 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_SET_MAC_ACL,
+		.doit = nl80211_set_mac_acl,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_RADAR_DETECT,
+		.doit = nl80211_start_radar_detection,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_GET_PROTOCOL_FEATURES,
+		.doit = nl80211_get_protocol_features,
+		.policy = nl80211_policy,
+	},
+	{
+		.cmd = NL80211_CMD_UPDATE_FT_IES,
+		.doit = nl80211_update_ft_ies,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_CRIT_PROTOCOL_START,
+		.doit = nl80211_crit_protocol_start,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_CRIT_PROTOCOL_STOP,
+		.doit = nl80211_crit_protocol_stop,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	}
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -7811,7 +9010,8 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
 	if (!msg)
 		return;
 
-	if (nl80211_send_wiphy(msg, 0, 0, 0, rdev) < 0) {
+	if (nl80211_send_wiphy(rdev, msg, 0, 0, 0,
+			       false, NULL, NULL, NULL) < 0) {
 		nlmsg_free(msg);
 		return;
 	}
@@ -7827,7 +9027,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
 	struct nlattr *nest;
 	int i;
 
-	ASSERT_RDEV_LOCK(rdev);
+	lockdep_assert_held(&rdev->sched_scan_mtx);
 
 	if (WARN_ON(!req))
 		return 0;
@@ -8051,7 +9251,7 @@ void nl80211_send_reg_change_event(struct regulatory_request *request)
 			goto nla_put_failure;
 	}
 
-	if (wiphy_idx_valid(request->wiphy_idx) &&
+	if (request->wiphy_idx != WIPHY_IDX_INVALID &&
 	    nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx))
 		goto nla_put_failure;
 
@@ -8135,21 +9335,31 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 				NL80211_CMD_DISASSOCIATE, gfp);
 }
 
-void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev,
-				struct net_device *netdev, const u8 *buf,
-				size_t len, gfp_t gfp)
+void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf,
+				 size_t len)
 {
-	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_UNPROT_DEAUTHENTICATE, gfp);
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	trace_cfg80211_send_unprot_deauth(dev);
+	nl80211_send_mlme_event(rdev, dev, buf, len,
+				NL80211_CMD_UNPROT_DEAUTHENTICATE, GFP_ATOMIC);
 }
+EXPORT_SYMBOL(cfg80211_send_unprot_deauth);
 
-void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev,
-				  struct net_device *netdev, const u8 *buf,
-				  size_t len, gfp_t gfp)
+void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
+				   size_t len)
 {
-	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_UNPROT_DISASSOCIATE, gfp);
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	trace_cfg80211_send_unprot_disassoc(dev);
+	nl80211_send_mlme_event(rdev, dev, buf, len,
+				NL80211_CMD_UNPROT_DISASSOCIATE, GFP_ATOMIC);
 }
+EXPORT_SYMBOL(cfg80211_send_unprot_disassoc);
 
 static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
 				      struct net_device *netdev, int cmd,
@@ -8352,14 +9562,19 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
-void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
-		struct net_device *netdev,
-		const u8 *macaddr, const u8* ie, u8 ie_len,
-		gfp_t gfp)
+void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
+					const u8* ie, u8 ie_len, gfp_t gfp)
 {
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT))
+		return;
+
+	trace_cfg80211_notify_new_peer_candidate(dev, addr);
+
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
 	if (!msg)
 		return;
@@ -8371,8 +9586,8 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
 	}
 
 	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
-	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
-	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, macaddr) ||
+	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
+	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
 	    (ie_len && ie &&
 	     nla_put(msg, NL80211_ATTR_IE, ie_len , ie)))
 		goto nla_put_failure;
@@ -8387,6 +9602,7 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
 	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
+EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
 
 void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 				 struct net_device *netdev, const u8 *addr,
@@ -8455,7 +9671,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
 	nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE);
 	if (!nl_freq)
 		goto nla_put_failure;
-	if (nl80211_msg_put_channel(msg, channel_before))
+	if (nl80211_msg_put_channel(msg, channel_before, false))
 		goto nla_put_failure;
 	nla_nest_end(msg, nl_freq);
 
@@ -8463,7 +9679,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
 	nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER);
 	if (!nl_freq)
 		goto nla_put_failure;
-	if (nl80211_msg_put_channel(msg, channel_after))
+	if (nl80211_msg_put_channel(msg, channel_after, false))
 		goto nla_put_failure;
 	nla_nest_end(msg, nl_freq);
 
@@ -8525,31 +9741,42 @@ static void nl80211_send_remain_on_chan_event(
 	nlmsg_free(msg);
 }
 
-void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev,
-				    struct wireless_dev *wdev, u64 cookie,
-				    struct ieee80211_channel *chan,
-				    unsigned int duration, gfp_t gfp)
+void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie,
+			       struct ieee80211_channel *chan,
+			       unsigned int duration, gfp_t gfp)
 {
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration);
 	nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL,
 					  rdev, wdev, cookie, chan,
 					  duration, gfp);
 }
+EXPORT_SYMBOL(cfg80211_ready_on_channel);
 
-void nl80211_send_remain_on_channel_cancel(
-	struct cfg80211_registered_device *rdev,
-	struct wireless_dev *wdev,
-	u64 cookie, struct ieee80211_channel *chan, gfp_t gfp)
+void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
+					struct ieee80211_channel *chan,
+					gfp_t gfp)
 {
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan);
 	nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
 					  rdev, wdev, cookie, chan, 0, gfp);
 }
+EXPORT_SYMBOL(cfg80211_remain_on_channel_expired);
 
-void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
-			    struct net_device *dev, const u8 *mac_addr,
-			    struct station_info *sinfo, gfp_t gfp)
+void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
+		      struct station_info *sinfo, gfp_t gfp)
 {
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	struct sk_buff *msg;
 
+	trace_cfg80211_new_sta(dev, mac_addr, sinfo);
+
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
 	if (!msg)
 		return;
@@ -8563,14 +9790,17 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
 	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
 				nl80211_mlme_mcgrp.id, gfp);
 }
+EXPORT_SYMBOL(cfg80211_new_sta);
 
-void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
-				struct net_device *dev, const u8 *mac_addr,
-				gfp_t gfp)
+void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
 {
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 
+	trace_cfg80211_del_sta(dev, mac_addr);
+
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
 	if (!msg)
 		return;
@@ -8595,12 +9825,14 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
 	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
+EXPORT_SYMBOL(cfg80211_del_sta);
 
-void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
-				    struct net_device *dev, const u8 *mac_addr,
-				    enum nl80211_connect_failed_reason reason,
-				    gfp_t gfp)
+void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
+			  enum nl80211_connect_failed_reason reason,
+			  gfp_t gfp)
 {
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	struct sk_buff *msg;
 	void *hdr;
 
@@ -8629,6 +9861,7 @@ void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
 	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
+EXPORT_SYMBOL(cfg80211_conn_failed);
 
 static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
 				       const u8 *addr, gfp_t gfp)
@@ -8673,19 +9906,47 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
 	return true;
 }
 
-bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp)
+bool cfg80211_rx_spurious_frame(struct net_device *dev,
+				const u8 *addr, gfp_t gfp)
 {
-	return __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME,
-					  addr, gfp);
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	bool ret;
+
+	trace_cfg80211_rx_spurious_frame(dev, addr);
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
+		    wdev->iftype != NL80211_IFTYPE_P2P_GO)) {
+		trace_cfg80211_return_bool(false);
+		return false;
+	}
+	ret = __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME,
+					 addr, gfp);
+	trace_cfg80211_return_bool(ret);
+	return ret;
 }
+EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
 
-bool nl80211_unexpected_4addr_frame(struct net_device *dev,
-				    const u8 *addr, gfp_t gfp)
+bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
+					const u8 *addr, gfp_t gfp)
 {
-	return __nl80211_unexpected_frame(dev,
-					  NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
-					  addr, gfp);
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	bool ret;
+
+	trace_cfg80211_rx_unexpected_4addr_frame(dev, addr);
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
+		    wdev->iftype != NL80211_IFTYPE_P2P_GO &&
+		    wdev->iftype != NL80211_IFTYPE_AP_VLAN)) {
+		trace_cfg80211_return_bool(false);
+		return false;
+	}
+	ret = __nl80211_unexpected_frame(dev,
+					 NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
+					 addr, gfp);
+	trace_cfg80211_return_bool(ret);
+	return ret;
 }
+EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
 
 int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 		      struct wireless_dev *wdev, u32 nlportid,
@@ -8725,15 +9986,17 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 	return -ENOBUFS;
 }
 
-void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
-				 struct wireless_dev *wdev, u64 cookie,
-				 const u8 *buf, size_t len, bool ack,
-				 gfp_t gfp)
+void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
+			     const u8 *buf, size_t len, bool ack, gfp_t gfp)
 {
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	struct net_device *netdev = wdev->netdev;
 	struct sk_buff *msg;
 	void *hdr;
 
+	trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
+
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
 	if (!msg)
 		return;
@@ -8761,17 +10024,21 @@ void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
 	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
+EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
 
-void
-nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
-			     struct net_device *netdev,
-			     enum nl80211_cqm_rssi_threshold_event rssi_event,
-			     gfp_t gfp)
+void cfg80211_cqm_rssi_notify(struct net_device *dev,
+			      enum nl80211_cqm_rssi_threshold_event rssi_event,
+			      gfp_t gfp)
 {
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	struct sk_buff *msg;
 	struct nlattr *pinfoattr;
 	void *hdr;
 
+	trace_cfg80211_cqm_rssi_notify(dev, rssi_event);
+
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
 	if (!msg)
 		return;
@@ -8783,7 +10050,7 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
 	}
 
 	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
-	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex))
+	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
 		goto nla_put_failure;
 
 	pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM);
@@ -8806,10 +10073,11 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
 	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
+EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
 
-void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
-			      struct net_device *netdev, const u8 *bssid,
-			      const u8 *replay_ctr, gfp_t gfp)
+static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
+				     struct net_device *netdev, const u8 *bssid,
+				     const u8 *replay_ctr, gfp_t gfp)
 {
 	struct sk_buff *msg;
 	struct nlattr *rekey_attr;
@@ -8851,9 +10119,22 @@ void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
-void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
-				    struct net_device *netdev, int index,
-				    const u8 *bssid, bool preauth, gfp_t gfp)
+void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid,
+			       const u8 *replay_ctr, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	trace_cfg80211_gtk_rekey_notify(dev, bssid);
+	nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp);
+}
+EXPORT_SYMBOL(cfg80211_gtk_rekey_notify);
+
+static void
+nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
+			       struct net_device *netdev, int index,
+			       const u8 *bssid, bool preauth, gfp_t gfp)
 {
 	struct sk_buff *msg;
 	struct nlattr *attr;
@@ -8896,9 +10177,22 @@ void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
-void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
-			      struct net_device *netdev,
-			      struct cfg80211_chan_def *chandef, gfp_t gfp)
+void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
+				     const u8 *bssid, bool preauth, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth);
+	nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp);
+}
+EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify);
+
+static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
+				     struct net_device *netdev,
+				     struct cfg80211_chan_def *chandef,
+				     gfp_t gfp)
 {
 	struct sk_buff *msg;
 	void *hdr;
@@ -8930,11 +10224,36 @@ void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
-void
-nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
-			    struct net_device *netdev, const u8 *peer,
-			    u32 num_packets, u32 rate, u32 intvl, gfp_t gfp)
+void cfg80211_ch_switch_notify(struct net_device *dev,
+			       struct cfg80211_chan_def *chandef)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	trace_cfg80211_ch_switch_notify(dev, chandef);
+
+	wdev_lock(wdev);
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
+		    wdev->iftype != NL80211_IFTYPE_P2P_GO))
+		goto out;
+
+	wdev->channel = chandef->chan;
+	nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL);
+out:
+	wdev_unlock(wdev);
+	return;
+}
+EXPORT_SYMBOL(cfg80211_ch_switch_notify);
+
+void cfg80211_cqm_txe_notify(struct net_device *dev,
+			     const u8 *peer, u32 num_packets,
+			     u32 rate, u32 intvl, gfp_t gfp)
 {
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	struct sk_buff *msg;
 	struct nlattr *pinfoattr;
 	void *hdr;
@@ -8950,7 +10269,7 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
 	}
 
 	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
-	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
+	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer))
 		goto nla_put_failure;
 
@@ -8979,16 +10298,71 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
 	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
+EXPORT_SYMBOL(cfg80211_cqm_txe_notify);
 
 void
-nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
-				struct net_device *netdev, const u8 *peer,
-				u32 num_packets, gfp_t gfp)
+nl80211_radar_notify(struct cfg80211_registered_device *rdev,
+		     struct cfg80211_chan_def *chandef,
+		     enum nl80211_radar_event event,
+		     struct net_device *netdev, gfp_t gfp)
 {
 	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_RADAR_DETECT);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx))
+		goto nla_put_failure;
+
+	/* NOP and radar events don't need a netdev parameter */
+	if (netdev) {
+		struct wireless_dev *wdev = netdev->ieee80211_ptr;
+
+		if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
+		    nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)))
+			goto nla_put_failure;
+	}
+
+	if (nla_put_u32(msg, NL80211_ATTR_RADAR_EVENT, event))
+		goto nla_put_failure;
+
+	if (nl80211_send_chandef(msg, chandef))
+		goto nla_put_failure;
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
+void cfg80211_cqm_pktloss_notify(struct net_device *dev,
+				 const u8 *peer, u32 num_packets, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct sk_buff *msg;
 	struct nlattr *pinfoattr;
 	void *hdr;
 
+	trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets);
+
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
 	if (!msg)
 		return;
@@ -9000,7 +10374,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
 	}
 
 	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
-	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
+	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer))
 		goto nla_put_failure;
 
@@ -9023,6 +10397,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
 	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
+EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify);
 
 void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 			   u64 cookie, bool acked, gfp_t gfp)
@@ -9115,6 +10490,114 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
 }
 EXPORT_SYMBOL(cfg80211_report_obss_beacon);
 
+#ifdef CONFIG_PM
+void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev,
+				   struct cfg80211_wowlan_wakeup *wakeup,
+				   gfp_t gfp)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+	int err, size = 200;
+
+	trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup);
+
+	if (wakeup)
+		size += wakeup->packet_present_len;
+
+	msg = nlmsg_new(size, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_SET_WOWLAN);
+	if (!hdr)
+		goto free_msg;
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)))
+		goto free_msg;
+
+	if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX,
+					wdev->netdev->ifindex))
+		goto free_msg;
+
+	if (wakeup) {
+		struct nlattr *reasons;
+
+		reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS);
+
+		if (wakeup->disconnect &&
+		    nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT))
+			goto free_msg;
+		if (wakeup->magic_pkt &&
+		    nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT))
+			goto free_msg;
+		if (wakeup->gtk_rekey_failure &&
+		    nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE))
+			goto free_msg;
+		if (wakeup->eap_identity_req &&
+		    nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST))
+			goto free_msg;
+		if (wakeup->four_way_handshake &&
+		    nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE))
+			goto free_msg;
+		if (wakeup->rfkill_release &&
+		    nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))
+			goto free_msg;
+
+		if (wakeup->pattern_idx >= 0 &&
+		    nla_put_u32(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
+				wakeup->pattern_idx))
+			goto free_msg;
+
+		if (wakeup->tcp_match)
+			nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH);
+
+		if (wakeup->tcp_connlost)
+			nla_put_flag(msg,
+				     NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST);
+
+		if (wakeup->tcp_nomoretokens)
+			nla_put_flag(msg,
+				NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS);
+
+		if (wakeup->packet) {
+			u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211;
+			u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN;
+
+			if (!wakeup->packet_80211) {
+				pkt_attr =
+					NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023;
+				len_attr =
+					NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN;
+			}
+
+			if (wakeup->packet_len &&
+			    nla_put_u32(msg, len_attr, wakeup->packet_len))
+				goto free_msg;
+
+			if (nla_put(msg, pkt_attr, wakeup->packet_present_len,
+				    wakeup->packet))
+				goto free_msg;
+		}
+
+		nla_nest_end(msg, reasons);
+	}
+
+	err = genlmsg_end(msg, hdr);
+	if (err < 0)
+		goto free_msg;
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ free_msg:
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_report_wowlan_wakeup);
+#endif
+
 void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
 				enum nl80211_tdls_operation oper,
 				u16 reason_code, gfp_t gfp)
@@ -9201,6 +10684,89 @@ static struct notifier_block nl80211_netlink_notifier = {
 	.notifier_call = nl80211_netlink_notify,
 };
 
+void cfg80211_ft_event(struct net_device *netdev,
+		       struct cfg80211_ft_event_params *ft_event)
+{
+	struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	trace_cfg80211_ft_event(wiphy, netdev, ft_event);
+
+	if (!ft_event->target_ap)
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap);
+	if (ft_event->ies)
+		nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies);
+	if (ft_event->ric_ies)
+		nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len,
+			ft_event->ric_ies);
+
+	err = genlmsg_end(msg, hdr);
+	if (err < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, GFP_KERNEL);
+}
+EXPORT_SYMBOL(cfg80211_ft_event);
+
+void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
+{
+	struct cfg80211_registered_device *rdev;
+	struct sk_buff *msg;
+	void *hdr;
+	u32 nlportid;
+
+	rdev = wiphy_to_dev(wdev->wiphy);
+	if (!rdev->crit_proto_nlportid)
+		return;
+
+	nlportid = rdev->crit_proto_nlportid;
+	rdev->crit_proto_nlportid = 0;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CRIT_PROTOCOL_STOP);
+	if (!hdr)
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
+	return;
+
+ nla_put_failure:
+	if (hdr)
+		genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+
+}
+EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 2acba8477e9d..a4073e808c13 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -29,12 +29,6 @@ void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
 void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev,
 			   const u8 *buf, size_t len, gfp_t gfp);
-void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev,
-				struct net_device *netdev,
-				const u8 *buf, size_t len, gfp_t gfp);
-void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev,
-				  struct net_device *netdev,
-				  const u8 *buf, size_t len, gfp_t gfp);
 void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev,
 			       const u8 *addr, gfp_t gfp);
@@ -54,10 +48,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev, u16 reason,
 			       const u8 *ie, size_t ie_len, bool from_ap);
 
-void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
-				     struct net_device *netdev,
-				     const u8 *macaddr, const u8* ie, u8 ie_len,
-				     gfp_t gfp);
 void
 nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 			    struct net_device *netdev, const u8 *addr,
@@ -73,66 +63,15 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev, const u8 *bssid,
 			     gfp_t gfp);
 
-void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev,
-				    struct wireless_dev *wdev, u64 cookie,
-				    struct ieee80211_channel *chan,
-				    unsigned int duration, gfp_t gfp);
-void nl80211_send_remain_on_channel_cancel(
-	struct cfg80211_registered_device *rdev,
-	struct wireless_dev *wdev,
-	u64 cookie, struct ieee80211_channel *chan, gfp_t gfp);
-
-void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
-			    struct net_device *dev, const u8 *mac_addr,
-			    struct station_info *sinfo, gfp_t gfp);
-void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
-				struct net_device *dev, const u8 *mac_addr,
-				gfp_t gfp);
-
-void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
-				    struct net_device *dev, const u8 *mac_addr,
-				    enum nl80211_connect_failed_reason reason,
-				    gfp_t gfp);
-
 int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 		      struct wireless_dev *wdev, u32 nlpid,
 		      int freq, int sig_dbm,
 		      const u8 *buf, size_t len, gfp_t gfp);
-void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
-				 struct wireless_dev *wdev, u64 cookie,
-				 const u8 *buf, size_t len, bool ack,
-				 gfp_t gfp);
-
-void
-nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
-			     struct net_device *netdev,
-			     enum nl80211_cqm_rssi_threshold_event rssi_event,
-			     gfp_t gfp);
-void
-nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
-				struct net_device *netdev, const u8 *peer,
-				u32 num_packets, gfp_t gfp);
 
 void
-nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
-			    struct net_device *netdev, const u8 *peer,
-			    u32 num_packets, u32 rate, u32 intvl, gfp_t gfp);
-
-void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
-			      struct net_device *netdev, const u8 *bssid,
-			      const u8 *replay_ctr, gfp_t gfp);
-
-void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
-				    struct net_device *netdev, int index,
-				    const u8 *bssid, bool preauth, gfp_t gfp);
-
-void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
-			      struct net_device *dev,
-			      struct cfg80211_chan_def *chandef, gfp_t gfp);
-
-bool nl80211_unexpected_frame(struct net_device *dev,
-			      const u8 *addr, gfp_t gfp);
-bool nl80211_unexpected_4addr_frame(struct net_device *dev,
-				    const u8 *addr, gfp_t gfp);
+nl80211_radar_notify(struct cfg80211_registered_device *rdev,
+		     struct cfg80211_chan_def *chandef,
+		     enum nl80211_radar_event event,
+		     struct net_device *netdev, gfp_t gfp);
 
 #endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 6c0c8191f837..9f15f0ac824d 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -6,11 +6,12 @@
 #include "core.h"
 #include "trace.h"
 
-static inline int rdev_suspend(struct cfg80211_registered_device *rdev)
+static inline int rdev_suspend(struct cfg80211_registered_device *rdev,
+			       struct cfg80211_wowlan *wowlan)
 {
 	int ret;
-	trace_rdev_suspend(&rdev->wiphy, rdev->wowlan);
-	ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan);
+	trace_rdev_suspend(&rdev->wiphy, wowlan);
+	ret = rdev->ops->suspend(&rdev->wiphy, wowlan);
 	trace_rdev_return_int(&rdev->wiphy, ret);
 	return ret;
 }
@@ -874,5 +875,52 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev,
 	trace_rdev_stop_p2p_device(&rdev->wiphy, wdev);
 	rdev->ops->stop_p2p_device(&rdev->wiphy, wdev);
 	trace_rdev_return_void(&rdev->wiphy);
-}					
+}
+
+static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
+				   struct net_device *dev,
+				   struct cfg80211_acl_data *params)
+{
+	int ret;
+
+	trace_rdev_set_mac_acl(&rdev->wiphy, dev, params);
+	ret = rdev->ops->set_mac_acl(&rdev->wiphy, dev, params);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
+static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev,
+				     struct net_device *dev,
+				     struct cfg80211_update_ft_ies_params *ftie)
+{
+	int ret;
+
+	trace_rdev_update_ft_ies(&rdev->wiphy, dev, ftie);
+	ret = rdev->ops->update_ft_ies(&rdev->wiphy, dev, ftie);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
+static inline int rdev_crit_proto_start(struct cfg80211_registered_device *rdev,
+					struct wireless_dev *wdev,
+					enum nl80211_crit_proto_id protocol,
+					u16 duration)
+{
+	int ret;
+
+	trace_rdev_crit_proto_start(&rdev->wiphy, wdev, protocol, duration);
+	ret = rdev->ops->crit_proto_start(&rdev->wiphy, wdev,
+					  protocol, duration);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
+static inline void rdev_crit_proto_stop(struct cfg80211_registered_device *rdev,
+				       struct wireless_dev *wdev)
+{
+	trace_rdev_crit_proto_stop(&rdev->wiphy, wdev);
+	rdev->ops->crit_proto_stop(&rdev->wiphy, wdev);
+	trace_rdev_return_void(&rdev->wiphy);
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 82c4fc7c994c..cc35fbaa4578 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -48,7 +48,6 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/list.h>
-#include <linux/random.h>
 #include <linux/ctype.h>
 #include <linux/nl80211.h>
 #include <linux/platform_device.h>
@@ -66,6 +65,13 @@
 #define REG_DBG_PRINT(args...)
 #endif
 
+enum reg_request_treatment {
+	REG_REQ_OK,
+	REG_REQ_IGNORE,
+	REG_REQ_INTERSECT,
+	REG_REQ_ALREADY_SET,
+};
+
 static struct regulatory_request core_request_world = {
 	.initiator = NL80211_REGDOM_SET_BY_CORE,
 	.alpha2[0] = '0',
@@ -76,7 +82,8 @@ static struct regulatory_request core_request_world = {
 };
 
 /* Receipt of information from last regulatory request */
-static struct regulatory_request *last_request = &core_request_world;
+static struct regulatory_request __rcu *last_request =
+	(void __rcu *)&core_request_world;
 
 /* To trigger userspace events */
 static struct platform_device *reg_pdev;
@@ -88,16 +95,16 @@ static struct device_type reg_device_type = {
 /*
  * Central wireless core regulatory domains, we only need two,
  * the current one and a world regulatory domain in case we have no
- * information to give us an alpha2
+ * information to give us an alpha2.
  */
-const struct ieee80211_regdomain *cfg80211_regdomain;
+const struct ieee80211_regdomain __rcu *cfg80211_regdomain;
 
 /*
  * Protects static reg.c components:
- *     - cfg80211_world_regdom
- *     - cfg80211_regdom
- *     - last_request
- *     - reg_num_devs_support_basehint
+ *	- cfg80211_regdomain (if not used with RCU)
+ *	- cfg80211_world_regdom
+ *	- last_request (if not used with RCU)
+ *	- reg_num_devs_support_basehint
  */
 static DEFINE_MUTEX(reg_mutex);
 
@@ -112,6 +119,31 @@ static inline void assert_reg_lock(void)
 	lockdep_assert_held(&reg_mutex);
 }
 
+static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
+{
+	return rcu_dereference_protected(cfg80211_regdomain,
+					 lockdep_is_held(&reg_mutex));
+}
+
+static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy)
+{
+	return rcu_dereference_protected(wiphy->regd,
+					 lockdep_is_held(&reg_mutex));
+}
+
+static void rcu_free_regdom(const struct ieee80211_regdomain *r)
+{
+	if (!r)
+		return;
+	kfree_rcu((struct ieee80211_regdomain *)r, rcu_head);
+}
+
+static struct regulatory_request *get_last_request(void)
+{
+	return rcu_dereference_check(last_request,
+				     lockdep_is_held(&reg_mutex));
+}
+
 /* Used to queue up regulatory hints */
 static LIST_HEAD(reg_requests_list);
 static spinlock_t reg_requests_lock;
@@ -152,14 +184,14 @@ static const struct ieee80211_regdomain world_regdom = {
 			NL80211_RRF_NO_IBSS |
 			NL80211_RRF_NO_OFDM),
 		/* IEEE 802.11a, channel 36..48 */
-		REG_RULE(5180-10, 5240+10, 40, 6, 20,
+		REG_RULE(5180-10, 5240+10, 80, 6, 20,
                         NL80211_RRF_PASSIVE_SCAN |
                         NL80211_RRF_NO_IBSS),
 
-		/* NB: 5260 MHz - 5700 MHz requies DFS */
+		/* NB: 5260 MHz - 5700 MHz requires DFS */
 
 		/* IEEE 802.11a, channel 149..165 */
-		REG_RULE(5745-10, 5825+10, 40, 6, 20,
+		REG_RULE(5745-10, 5825+10, 80, 6, 20,
 			NL80211_RRF_PASSIVE_SCAN |
 			NL80211_RRF_NO_IBSS),
 
@@ -177,28 +209,37 @@ static char user_alpha2[2];
 module_param(ieee80211_regdom, charp, 0444);
 MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
 
-static void reset_regdomains(bool full_reset)
+static void reset_regdomains(bool full_reset,
+			     const struct ieee80211_regdomain *new_regdom)
 {
+	const struct ieee80211_regdomain *r;
+	struct regulatory_request *lr;
+
+	assert_reg_lock();
+
+	r = get_cfg80211_regdom();
+
 	/* avoid freeing static information or freeing something twice */
-	if (cfg80211_regdomain == cfg80211_world_regdom)
-		cfg80211_regdomain = NULL;
+	if (r == cfg80211_world_regdom)
+		r = NULL;
 	if (cfg80211_world_regdom == &world_regdom)
 		cfg80211_world_regdom = NULL;
-	if (cfg80211_regdomain == &world_regdom)
-		cfg80211_regdomain = NULL;
+	if (r == &world_regdom)
+		r = NULL;
 
-	kfree(cfg80211_regdomain);
-	kfree(cfg80211_world_regdom);
+	rcu_free_regdom(r);
+	rcu_free_regdom(cfg80211_world_regdom);
 
 	cfg80211_world_regdom = &world_regdom;
-	cfg80211_regdomain = NULL;
+	rcu_assign_pointer(cfg80211_regdomain, new_regdom);
 
 	if (!full_reset)
 		return;
 
-	if (last_request != &core_request_world)
-		kfree(last_request);
-	last_request = &core_request_world;
+	lr = get_last_request();
+	if (lr != &core_request_world && lr)
+		kfree_rcu(lr, rcu_head);
+	rcu_assign_pointer(last_request, &core_request_world);
 }
 
 /*
@@ -207,30 +248,29 @@ static void reset_regdomains(bool full_reset)
  */
 static void update_world_regdomain(const struct ieee80211_regdomain *rd)
 {
-	BUG_ON(!last_request);
+	struct regulatory_request *lr;
 
-	reset_regdomains(false);
+	lr = get_last_request();
+
+	WARN_ON(!lr);
+
+	reset_regdomains(false, rd);
 
 	cfg80211_world_regdom = rd;
-	cfg80211_regdomain = rd;
 }
 
 bool is_world_regdom(const char *alpha2)
 {
 	if (!alpha2)
 		return false;
-	if (alpha2[0] == '0' && alpha2[1] == '0')
-		return true;
-	return false;
+	return alpha2[0] == '0' && alpha2[1] == '0';
 }
 
 static bool is_alpha2_set(const char *alpha2)
 {
 	if (!alpha2)
 		return false;
-	if (alpha2[0] != 0 && alpha2[1] != 0)
-		return true;
-	return false;
+	return alpha2[0] && alpha2[1];
 }
 
 static bool is_unknown_alpha2(const char *alpha2)
@@ -241,9 +281,7 @@ static bool is_unknown_alpha2(const char *alpha2)
 	 * Special case where regulatory domain was built by driver
 	 * but a specific alpha2 cannot be determined
 	 */
-	if (alpha2[0] == '9' && alpha2[1] == '9')
-		return true;
-	return false;
+	return alpha2[0] == '9' && alpha2[1] == '9';
 }
 
 static bool is_intersected_alpha2(const char *alpha2)
@@ -255,39 +293,30 @@ static bool is_intersected_alpha2(const char *alpha2)
 	 * result of an intersection between two regulatory domain
 	 * structures
 	 */
-	if (alpha2[0] == '9' && alpha2[1] == '8')
-		return true;
-	return false;
+	return alpha2[0] == '9' && alpha2[1] == '8';
 }
 
 static bool is_an_alpha2(const char *alpha2)
 {
 	if (!alpha2)
 		return false;
-	if (isalpha(alpha2[0]) && isalpha(alpha2[1]))
-		return true;
-	return false;
+	return isalpha(alpha2[0]) && isalpha(alpha2[1]);
 }
 
 static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y)
 {
 	if (!alpha2_x || !alpha2_y)
 		return false;
-	if (alpha2_x[0] == alpha2_y[0] &&
-		alpha2_x[1] == alpha2_y[1])
-		return true;
-	return false;
+	return alpha2_x[0] == alpha2_y[0] && alpha2_x[1] == alpha2_y[1];
 }
 
 static bool regdom_changes(const char *alpha2)
 {
-	assert_cfg80211_lock();
+	const struct ieee80211_regdomain *r = get_cfg80211_regdom();
 
-	if (!cfg80211_regdomain)
+	if (!r)
 		return true;
-	if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2))
-		return false;
-	return true;
+	return !alpha2_equal(r->alpha2, alpha2);
 }
 
 /*
@@ -301,38 +330,36 @@ static bool is_user_regdom_saved(void)
 		return false;
 
 	/* This would indicate a mistake on the design */
-	if (WARN((!is_world_regdom(user_alpha2) &&
-		  !is_an_alpha2(user_alpha2)),
+	if (WARN(!is_world_regdom(user_alpha2) && !is_an_alpha2(user_alpha2),
 		 "Unexpected user alpha2: %c%c\n",
-		 user_alpha2[0],
-	         user_alpha2[1]))
+		 user_alpha2[0], user_alpha2[1]))
 		return false;
 
 	return true;
 }
 
-static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd,
-			 const struct ieee80211_regdomain *src_regd)
+static const struct ieee80211_regdomain *
+reg_copy_regd(const struct ieee80211_regdomain *src_regd)
 {
 	struct ieee80211_regdomain *regd;
-	int size_of_regd = 0;
+	int size_of_regd;
 	unsigned int i;
 
-	size_of_regd = sizeof(struct ieee80211_regdomain) +
-	  ((src_regd->n_reg_rules + 1) * sizeof(struct ieee80211_reg_rule));
+	size_of_regd =
+		sizeof(struct ieee80211_regdomain) +
+		src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule);
 
 	regd = kzalloc(size_of_regd, GFP_KERNEL);
 	if (!regd)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain));
 
 	for (i = 0; i < src_regd->n_reg_rules; i++)
 		memcpy(&regd->reg_rules[i], &src_regd->reg_rules[i],
-			sizeof(struct ieee80211_reg_rule));
+		       sizeof(struct ieee80211_reg_rule));
 
-	*dst_regd = regd;
-	return 0;
+	return regd;
 }
 
 #ifdef CONFIG_CFG80211_INTERNAL_REGDB
@@ -347,9 +374,8 @@ static DEFINE_MUTEX(reg_regdb_search_mutex);
 static void reg_regdb_search(struct work_struct *work)
 {
 	struct reg_regdb_search_request *request;
-	const struct ieee80211_regdomain *curdom, *regdom;
-	int i, r;
-	bool set_reg = false;
+	const struct ieee80211_regdomain *curdom, *regdom = NULL;
+	int i;
 
 	mutex_lock(&cfg80211_mutex);
 
@@ -360,14 +386,11 @@ static void reg_regdb_search(struct work_struct *work)
 					   list);
 		list_del(&request->list);
 
-		for (i=0; i<reg_regdb_size; i++) {
+		for (i = 0; i < reg_regdb_size; i++) {
 			curdom = reg_regdb[i];
 
-			if (!memcmp(request->alpha2, curdom->alpha2, 2)) {
-				r = reg_copy_regd(&regdom, curdom);
-				if (r)
-					break;
-				set_reg = true;
+			if (alpha2_equal(request->alpha2, curdom->alpha2)) {
+				regdom = reg_copy_regd(curdom);
 				break;
 			}
 		}
@@ -376,7 +399,7 @@ static void reg_regdb_search(struct work_struct *work)
 	}
 	mutex_unlock(&reg_regdb_search_mutex);
 
-	if (set_reg)
+	if (!IS_ERR_OR_NULL(regdom))
 		set_regdom(regdom);
 
 	mutex_unlock(&cfg80211_mutex);
@@ -434,15 +457,14 @@ static int call_crda(const char *alpha2)
 	return kobject_uevent(&reg_pdev->dev.kobj, KOBJ_CHANGE);
 }
 
-/* Used by nl80211 before kmalloc'ing our regulatory domain */
-bool reg_is_valid_request(const char *alpha2)
+static bool reg_is_valid_request(const char *alpha2)
 {
-	assert_cfg80211_lock();
+	struct regulatory_request *lr = get_last_request();
 
-	if (!last_request)
+	if (!lr || lr->processed)
 		return false;
 
-	return alpha2_equal(last_request->alpha2, alpha2);
+	return alpha2_equal(lr->alpha2, alpha2);
 }
 
 /* Sanity check on a regulatory rule */
@@ -460,7 +482,7 @@ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule)
 	freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz;
 
 	if (freq_range->end_freq_khz <= freq_range->start_freq_khz ||
-			freq_range->max_bandwidth_khz > freq_diff)
+	    freq_range->max_bandwidth_khz > freq_diff)
 		return false;
 
 	return true;
@@ -487,8 +509,7 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd)
 }
 
 static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range,
-			    u32 center_freq_khz,
-			    u32 bw_khz)
+			    u32 center_freq_khz, u32 bw_khz)
 {
 	u32 start_freq_khz, end_freq_khz;
 
@@ -518,7 +539,7 @@ static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range,
  * regulatory rule support for other "bands".
  **/
 static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
-	u32 freq_khz)
+			      u32 freq_khz)
 {
 #define ONE_GHZ_IN_KHZ	1000000
 	/*
@@ -540,10 +561,9 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
  * Helper for regdom_intersect(), this does the real
  * mathematical intersection fun
  */
-static int reg_rules_intersect(
-	const struct ieee80211_reg_rule *rule1,
-	const struct ieee80211_reg_rule *rule2,
-	struct ieee80211_reg_rule *intersected_rule)
+static int reg_rules_intersect(const struct ieee80211_reg_rule *rule1,
+			       const struct ieee80211_reg_rule *rule2,
+			       struct ieee80211_reg_rule *intersected_rule)
 {
 	const struct ieee80211_freq_range *freq_range1, *freq_range2;
 	struct ieee80211_freq_range *freq_range;
@@ -560,11 +580,11 @@ static int reg_rules_intersect(
 	power_rule = &intersected_rule->power_rule;
 
 	freq_range->start_freq_khz = max(freq_range1->start_freq_khz,
-		freq_range2->start_freq_khz);
+					 freq_range2->start_freq_khz);
 	freq_range->end_freq_khz = min(freq_range1->end_freq_khz,
-		freq_range2->end_freq_khz);
+				       freq_range2->end_freq_khz);
 	freq_range->max_bandwidth_khz = min(freq_range1->max_bandwidth_khz,
-		freq_range2->max_bandwidth_khz);
+					    freq_range2->max_bandwidth_khz);
 
 	freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz;
 	if (freq_range->max_bandwidth_khz > freq_diff)
@@ -575,7 +595,7 @@ static int reg_rules_intersect(
 	power_rule->max_antenna_gain = min(power_rule1->max_antenna_gain,
 		power_rule2->max_antenna_gain);
 
-	intersected_rule->flags = (rule1->flags | rule2->flags);
+	intersected_rule->flags = rule1->flags | rule2->flags;
 
 	if (!is_valid_reg_rule(intersected_rule))
 		return -EINVAL;
@@ -596,9 +616,9 @@ static int reg_rules_intersect(
  * resulting intersection of rules between rd1 and rd2. We will
  * kzalloc() this structure for you.
  */
-static struct ieee80211_regdomain *regdom_intersect(
-	const struct ieee80211_regdomain *rd1,
-	const struct ieee80211_regdomain *rd2)
+static struct ieee80211_regdomain *
+regdom_intersect(const struct ieee80211_regdomain *rd1,
+		 const struct ieee80211_regdomain *rd2)
 {
 	int r, size_of_regd;
 	unsigned int x, y;
@@ -607,12 +627,7 @@ static struct ieee80211_regdomain *regdom_intersect(
 	struct ieee80211_reg_rule *intersected_rule;
 	struct ieee80211_regdomain *rd;
 	/* This is just a dummy holder to help us count */
-	struct ieee80211_reg_rule irule;
-
-	/* Uses the stack temporarily for counter arithmetic */
-	intersected_rule = &irule;
-
-	memset(intersected_rule, 0, sizeof(struct ieee80211_reg_rule));
+	struct ieee80211_reg_rule dummy_rule;
 
 	if (!rd1 || !rd2)
 		return NULL;
@@ -629,11 +644,8 @@ static struct ieee80211_regdomain *regdom_intersect(
 		rule1 = &rd1->reg_rules[x];
 		for (y = 0; y < rd2->n_reg_rules; y++) {
 			rule2 = &rd2->reg_rules[y];
-			if (!reg_rules_intersect(rule1, rule2,
-					intersected_rule))
+			if (!reg_rules_intersect(rule1, rule2, &dummy_rule))
 				num_rules++;
-			memset(intersected_rule, 0,
-					sizeof(struct ieee80211_reg_rule));
 		}
 	}
 
@@ -641,15 +653,15 @@ static struct ieee80211_regdomain *regdom_intersect(
 		return NULL;
 
 	size_of_regd = sizeof(struct ieee80211_regdomain) +
-		((num_rules + 1) * sizeof(struct ieee80211_reg_rule));
+		       num_rules * sizeof(struct ieee80211_reg_rule);
 
 	rd = kzalloc(size_of_regd, GFP_KERNEL);
 	if (!rd)
 		return NULL;
 
-	for (x = 0; x < rd1->n_reg_rules; x++) {
+	for (x = 0; x < rd1->n_reg_rules && rule_idx < num_rules; x++) {
 		rule1 = &rd1->reg_rules[x];
-		for (y = 0; y < rd2->n_reg_rules; y++) {
+		for (y = 0; y < rd2->n_reg_rules && rule_idx < num_rules; y++) {
 			rule2 = &rd2->reg_rules[y];
 			/*
 			 * This time around instead of using the stack lets
@@ -657,8 +669,7 @@ static struct ieee80211_regdomain *regdom_intersect(
 			 * a memcpy()
 			 */
 			intersected_rule = &rd->reg_rules[rule_idx];
-			r = reg_rules_intersect(rule1, rule2,
-				intersected_rule);
+			r = reg_rules_intersect(rule1, rule2, intersected_rule);
 			/*
 			 * No need to memset here the intersected rule here as
 			 * we're not using the stack anymore
@@ -699,34 +710,16 @@ static u32 map_regdom_flags(u32 rd_flags)
 	return channel_flags;
 }
 
-static int freq_reg_info_regd(struct wiphy *wiphy,
-			      u32 center_freq,
-			      u32 desired_bw_khz,
-			      const struct ieee80211_reg_rule **reg_rule,
-			      const struct ieee80211_regdomain *custom_regd)
+static const struct ieee80211_reg_rule *
+freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq,
+		   const struct ieee80211_regdomain *regd)
 {
 	int i;
 	bool band_rule_found = false;
-	const struct ieee80211_regdomain *regd;
 	bool bw_fits = false;
 
-	if (!desired_bw_khz)
-		desired_bw_khz = MHZ_TO_KHZ(20);
-
-	regd = custom_regd ? custom_regd : cfg80211_regdomain;
-
-	/*
-	 * Follow the driver's regulatory domain, if present, unless a country
-	 * IE has been processed or a user wants to help complaince further
-	 */
-	if (!custom_regd &&
-	    last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
-	    last_request->initiator != NL80211_REGDOM_SET_BY_USER &&
-	    wiphy->regd)
-		regd = wiphy->regd;
-
 	if (!regd)
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	for (i = 0; i < regd->n_reg_rules; i++) {
 		const struct ieee80211_reg_rule *rr;
@@ -743,33 +736,36 @@ static int freq_reg_info_regd(struct wiphy *wiphy,
 		if (!band_rule_found)
 			band_rule_found = freq_in_rule_band(fr, center_freq);
 
-		bw_fits = reg_does_bw_fit(fr,
-					  center_freq,
-					  desired_bw_khz);
+		bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(20));
 
-		if (band_rule_found && bw_fits) {
-			*reg_rule = rr;
-			return 0;
-		}
+		if (band_rule_found && bw_fits)
+			return rr;
 	}
 
 	if (!band_rule_found)
-		return -ERANGE;
+		return ERR_PTR(-ERANGE);
 
-	return -EINVAL;
+	return ERR_PTR(-EINVAL);
 }
 
-int freq_reg_info(struct wiphy *wiphy,
-		  u32 center_freq,
-		  u32 desired_bw_khz,
-		  const struct ieee80211_reg_rule **reg_rule)
+const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy,
+					       u32 center_freq)
 {
-	assert_cfg80211_lock();
-	return freq_reg_info_regd(wiphy,
-				  center_freq,
-				  desired_bw_khz,
-				  reg_rule,
-				  NULL);
+	const struct ieee80211_regdomain *regd;
+	struct regulatory_request *lr = get_last_request();
+
+	/*
+	 * Follow the driver's regulatory domain, if present, unless a country
+	 * IE has been processed or a user wants to help complaince further
+	 */
+	if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+	    lr->initiator != NL80211_REGDOM_SET_BY_USER &&
+	    wiphy->regd)
+		regd = get_wiphy_regdom(wiphy);
+	else
+		regd = get_cfg80211_regdom();
+
+	return freq_reg_info_regd(wiphy, center_freq, regd);
 }
 EXPORT_SYMBOL(freq_reg_info);
 
@@ -792,7 +788,6 @@ static const char *reg_initiator_name(enum nl80211_reg_initiator initiator)
 }
 
 static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
-				    u32 desired_bw_khz,
 				    const struct ieee80211_reg_rule *reg_rule)
 {
 	const struct ieee80211_power_rule *power_rule;
@@ -807,21 +802,16 @@ static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
 	else
 		snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain);
 
-	REG_DBG_PRINT("Updating information on frequency %d MHz "
-		      "for a %d MHz width channel with regulatory rule:\n",
-		      chan->center_freq,
-		      KHZ_TO_MHZ(desired_bw_khz));
+	REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n",
+		      chan->center_freq);
 
 	REG_DBG_PRINT("%d KHz - %d KHz @ %d KHz), (%s mBi, %d mBm)\n",
-		      freq_range->start_freq_khz,
-		      freq_range->end_freq_khz,
-		      freq_range->max_bandwidth_khz,
-		      max_antenna_gain,
+		      freq_range->start_freq_khz, freq_range->end_freq_khz,
+		      freq_range->max_bandwidth_khz, max_antenna_gain,
 		      power_rule->max_eirp);
 }
 #else
 static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
-				    u32 desired_bw_khz,
 				    const struct ieee80211_reg_rule *reg_rule)
 {
 	return;
@@ -831,43 +821,25 @@ static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
 /*
  * Note that right now we assume the desired channel bandwidth
  * is always 20 MHz for each individual channel (HT40 uses 20 MHz
- * per channel, the primary and the extension channel). To support
- * smaller custom bandwidths such as 5 MHz or 10 MHz we'll need a
- * new ieee80211_channel.target_bw and re run the regulatory check
- * on the wiphy with the target_bw specified. Then we can simply use
- * that below for the desired_bw_khz below.
+ * per channel, the primary and the extension channel).
  */
 static void handle_channel(struct wiphy *wiphy,
 			   enum nl80211_reg_initiator initiator,
-			   enum ieee80211_band band,
-			   unsigned int chan_idx)
+			   struct ieee80211_channel *chan)
 {
-	int r;
 	u32 flags, bw_flags = 0;
-	u32 desired_bw_khz = MHZ_TO_KHZ(20);
 	const struct ieee80211_reg_rule *reg_rule = NULL;
 	const struct ieee80211_power_rule *power_rule = NULL;
 	const struct ieee80211_freq_range *freq_range = NULL;
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_channel *chan;
 	struct wiphy *request_wiphy = NULL;
+	struct regulatory_request *lr = get_last_request();
 
-	assert_cfg80211_lock();
-
-	request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
-
-	sband = wiphy->bands[band];
-	BUG_ON(chan_idx >= sband->n_channels);
-	chan = &sband->channels[chan_idx];
+	request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx);
 
 	flags = chan->orig_flags;
 
-	r = freq_reg_info(wiphy,
-			  MHZ_TO_KHZ(chan->center_freq),
-			  desired_bw_khz,
-			  &reg_rule);
-
-	if (r) {
+	reg_rule = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq));
+	if (IS_ERR(reg_rule)) {
 		/*
 		 * We will disable all channels that do not match our
 		 * received regulatory rule unless the hint is coming
@@ -879,23 +851,27 @@ static void handle_channel(struct wiphy *wiphy,
 		 * while 5 GHz is still supported.
 		 */
 		if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE &&
-		    r == -ERANGE)
+		    PTR_ERR(reg_rule) == -ERANGE)
 			return;
 
 		REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq);
-		chan->flags = IEEE80211_CHAN_DISABLED;
+		chan->flags |= IEEE80211_CHAN_DISABLED;
 		return;
 	}
 
-	chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule);
+	chan_reg_rule_print_dbg(chan, reg_rule);
 
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
 
 	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40))
 		bw_flags = IEEE80211_CHAN_NO_HT40;
+	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(80))
+		bw_flags |= IEEE80211_CHAN_NO_80MHZ;
+	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(160))
+		bw_flags |= IEEE80211_CHAN_NO_160MHZ;
 
-	if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
+	if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
 	    request_wiphy && request_wiphy == wiphy &&
 	    request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) {
 		/*
@@ -912,10 +888,14 @@ static void handle_channel(struct wiphy *wiphy,
 		return;
 	}
 
+	chan->dfs_state = NL80211_DFS_USABLE;
+	chan->dfs_state_entered = jiffies;
+
 	chan->beacon_found = false;
 	chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags);
-	chan->max_antenna_gain = min(chan->orig_mag,
-		(int) MBI_TO_DBI(power_rule->max_antenna_gain));
+	chan->max_antenna_gain =
+		min_t(int, chan->orig_mag,
+		      MBI_TO_DBI(power_rule->max_antenna_gain));
 	chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp);
 	if (chan->orig_mpwr) {
 		/*
@@ -935,68 +915,65 @@ static void handle_channel(struct wiphy *wiphy,
 }
 
 static void handle_band(struct wiphy *wiphy,
-			enum ieee80211_band band,
-			enum nl80211_reg_initiator initiator)
+			enum nl80211_reg_initiator initiator,
+			struct ieee80211_supported_band *sband)
 {
 	unsigned int i;
-	struct ieee80211_supported_band *sband;
 
-	BUG_ON(!wiphy->bands[band]);
-	sband = wiphy->bands[band];
+	if (!sband)
+		return;
 
 	for (i = 0; i < sband->n_channels; i++)
-		handle_channel(wiphy, initiator, band, i);
+		handle_channel(wiphy, initiator, &sband->channels[i]);
 }
 
 static bool reg_request_cell_base(struct regulatory_request *request)
 {
 	if (request->initiator != NL80211_REGDOM_SET_BY_USER)
 		return false;
-	if (request->user_reg_hint_type != NL80211_USER_REG_HINT_CELL_BASE)
-		return false;
-	return true;
+	return request->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE;
 }
 
 bool reg_last_request_cell_base(void)
 {
 	bool val;
-	assert_cfg80211_lock();
 
 	mutex_lock(&reg_mutex);
-	val = reg_request_cell_base(last_request);
+	val = reg_request_cell_base(get_last_request());
 	mutex_unlock(&reg_mutex);
+
 	return val;
 }
 
 #ifdef CONFIG_CFG80211_CERTIFICATION_ONUS
-
 /* Core specific check */
-static int reg_ignore_cell_hint(struct regulatory_request *pending_request)
+static enum reg_request_treatment
+reg_ignore_cell_hint(struct regulatory_request *pending_request)
 {
+	struct regulatory_request *lr = get_last_request();
+
 	if (!reg_num_devs_support_basehint)
-		return -EOPNOTSUPP;
+		return REG_REQ_IGNORE;
 
-	if (reg_request_cell_base(last_request)) {
-		if (!regdom_changes(pending_request->alpha2))
-			return -EALREADY;
-		return 0;
-	}
-	return 0;
+	if (reg_request_cell_base(lr) &&
+	    !regdom_changes(pending_request->alpha2))
+		return REG_REQ_ALREADY_SET;
+
+	return REG_REQ_OK;
 }
 
 /* Device specific check */
 static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy)
 {
-	if (!(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS))
-		return true;
-	return false;
+	return !(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS);
 }
 #else
 static int reg_ignore_cell_hint(struct regulatory_request *pending_request)
 {
-	return -EOPNOTSUPP;
+	return REG_REQ_IGNORE;
 }
-static int reg_dev_ignore_cell_hint(struct wiphy *wiphy)
+
+static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy)
 {
 	return true;
 }
@@ -1006,18 +983,17 @@ static int reg_dev_ignore_cell_hint(struct wiphy *wiphy)
 static bool ignore_reg_update(struct wiphy *wiphy,
 			      enum nl80211_reg_initiator initiator)
 {
-	if (!last_request) {
-		REG_DBG_PRINT("Ignoring regulatory request %s since "
-			      "last_request is not set\n",
+	struct regulatory_request *lr = get_last_request();
+
+	if (!lr) {
+		REG_DBG_PRINT("Ignoring regulatory request %s since last_request is not set\n",
 			      reg_initiator_name(initiator));
 		return true;
 	}
 
 	if (initiator == NL80211_REGDOM_SET_BY_CORE &&
 	    wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) {
-		REG_DBG_PRINT("Ignoring regulatory request %s "
-			      "since the driver uses its own custom "
-			      "regulatory domain\n",
+		REG_DBG_PRINT("Ignoring regulatory request %s since the driver uses its own custom regulatory domain\n",
 			      reg_initiator_name(initiator));
 		return true;
 	}
@@ -1028,22 +1004,35 @@ static bool ignore_reg_update(struct wiphy *wiphy,
 	 */
 	if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd &&
 	    initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
-	    !is_world_regdom(last_request->alpha2)) {
-		REG_DBG_PRINT("Ignoring regulatory request %s "
-			      "since the driver requires its own regulatory "
-			      "domain to be set first\n",
+	    !is_world_regdom(lr->alpha2)) {
+		REG_DBG_PRINT("Ignoring regulatory request %s since the driver requires its own regulatory domain to be set first\n",
 			      reg_initiator_name(initiator));
 		return true;
 	}
 
-	if (reg_request_cell_base(last_request))
+	if (reg_request_cell_base(lr))
 		return reg_dev_ignore_cell_hint(wiphy);
 
 	return false;
 }
 
-static void handle_reg_beacon(struct wiphy *wiphy,
-			      unsigned int chan_idx,
+static bool reg_is_world_roaming(struct wiphy *wiphy)
+{
+	const struct ieee80211_regdomain *cr = get_cfg80211_regdom();
+	const struct ieee80211_regdomain *wr = get_wiphy_regdom(wiphy);
+	struct regulatory_request *lr = get_last_request();
+
+	if (is_world_regdom(cr->alpha2) || (wr && is_world_regdom(wr->alpha2)))
+		return true;
+
+	if (lr && lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+	    wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY)
+		return true;
+
+	return false;
+}
+
+static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx,
 			      struct reg_beacon *reg_beacon)
 {
 	struct ieee80211_supported_band *sband;
@@ -1051,8 +1040,6 @@ static void handle_reg_beacon(struct wiphy *wiphy,
 	bool channel_changed = false;
 	struct ieee80211_channel chan_before;
 
-	assert_cfg80211_lock();
-
 	sband = wiphy->bands[reg_beacon->chan.band];
 	chan = &sband->channels[chan_idx];
 
@@ -1064,6 +1051,9 @@ static void handle_reg_beacon(struct wiphy *wiphy,
 
 	chan->beacon_found = true;
 
+	if (!reg_is_world_roaming(wiphy))
+		return;
+
 	if (wiphy->flags & WIPHY_FLAG_DISABLE_BEACON_HINTS)
 		return;
 
@@ -1094,8 +1084,6 @@ static void wiphy_update_new_beacon(struct wiphy *wiphy,
 	unsigned int i;
 	struct ieee80211_supported_band *sband;
 
-	assert_cfg80211_lock();
-
 	if (!wiphy->bands[reg_beacon->chan.band])
 		return;
 
@@ -1114,11 +1102,6 @@ static void wiphy_update_beacon_reg(struct wiphy *wiphy)
 	struct ieee80211_supported_band *sband;
 	struct reg_beacon *reg_beacon;
 
-	assert_cfg80211_lock();
-
-	if (list_empty(&reg_beacon_list))
-		return;
-
 	list_for_each_entry(reg_beacon, &reg_beacon_list, list) {
 		if (!wiphy->bands[reg_beacon->chan.band])
 			continue;
@@ -1128,18 +1111,6 @@ static void wiphy_update_beacon_reg(struct wiphy *wiphy)
 	}
 }
 
-static bool reg_is_world_roaming(struct wiphy *wiphy)
-{
-	if (is_world_regdom(cfg80211_regdomain->alpha2) ||
-	    (wiphy->regd && is_world_regdom(wiphy->regd->alpha2)))
-		return true;
-	if (last_request &&
-	    last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
-	    wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY)
-		return true;
-	return false;
-}
-
 /* Reap the advantages of previously found beacons */
 static void reg_process_beacons(struct wiphy *wiphy)
 {
@@ -1149,39 +1120,29 @@ static void reg_process_beacons(struct wiphy *wiphy)
 	 */
 	if (!last_request)
 		return;
-	if (!reg_is_world_roaming(wiphy))
-		return;
 	wiphy_update_beacon_reg(wiphy);
 }
 
-static bool is_ht40_not_allowed(struct ieee80211_channel *chan)
+static bool is_ht40_allowed(struct ieee80211_channel *chan)
 {
 	if (!chan)
-		return true;
+		return false;
 	if (chan->flags & IEEE80211_CHAN_DISABLED)
-		return true;
+		return false;
 	/* This would happen when regulatory rules disallow HT40 completely */
-	if (IEEE80211_CHAN_NO_HT40 == (chan->flags & (IEEE80211_CHAN_NO_HT40)))
-		return true;
-	return false;
+	if ((chan->flags & IEEE80211_CHAN_NO_HT40) == IEEE80211_CHAN_NO_HT40)
+		return false;
+	return true;
 }
 
 static void reg_process_ht_flags_channel(struct wiphy *wiphy,
-					 enum ieee80211_band band,
-					 unsigned int chan_idx)
+					 struct ieee80211_channel *channel)
 {
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_channel *channel;
+	struct ieee80211_supported_band *sband = wiphy->bands[channel->band];
 	struct ieee80211_channel *channel_before = NULL, *channel_after = NULL;
 	unsigned int i;
 
-	assert_cfg80211_lock();
-
-	sband = wiphy->bands[band];
-	BUG_ON(chan_idx >= sband->n_channels);
-	channel = &sband->channels[chan_idx];
-
-	if (is_ht40_not_allowed(channel)) {
+	if (!is_ht40_allowed(channel)) {
 		channel->flags |= IEEE80211_CHAN_NO_HT40;
 		return;
 	}
@@ -1192,6 +1153,7 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy,
 	 */
 	for (i = 0; i < sband->n_channels; i++) {
 		struct ieee80211_channel *c = &sband->channels[i];
+
 		if (c->center_freq == (channel->center_freq - 20))
 			channel_before = c;
 		if (c->center_freq == (channel->center_freq + 20))
@@ -1203,28 +1165,27 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy,
 	 * if that ever changes we also need to change the below logic
 	 * to include that as well.
 	 */
-	if (is_ht40_not_allowed(channel_before))
+	if (!is_ht40_allowed(channel_before))
 		channel->flags |= IEEE80211_CHAN_NO_HT40MINUS;
 	else
 		channel->flags &= ~IEEE80211_CHAN_NO_HT40MINUS;
 
-	if (is_ht40_not_allowed(channel_after))
+	if (!is_ht40_allowed(channel_after))
 		channel->flags |= IEEE80211_CHAN_NO_HT40PLUS;
 	else
 		channel->flags &= ~IEEE80211_CHAN_NO_HT40PLUS;
 }
 
 static void reg_process_ht_flags_band(struct wiphy *wiphy,
-				      enum ieee80211_band band)
+				      struct ieee80211_supported_band *sband)
 {
 	unsigned int i;
-	struct ieee80211_supported_band *sband;
 
-	BUG_ON(!wiphy->bands[band]);
-	sband = wiphy->bands[band];
+	if (!sband)
+		return;
 
 	for (i = 0; i < sband->n_channels; i++)
-		reg_process_ht_flags_channel(wiphy, band, i);
+		reg_process_ht_flags_channel(wiphy, &sband->channels[i]);
 }
 
 static void reg_process_ht_flags(struct wiphy *wiphy)
@@ -1234,34 +1195,29 @@ static void reg_process_ht_flags(struct wiphy *wiphy)
 	if (!wiphy)
 		return;
 
-	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
-		if (wiphy->bands[band])
-			reg_process_ht_flags_band(wiphy, band);
-	}
-
+	for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+		reg_process_ht_flags_band(wiphy, wiphy->bands[band]);
 }
 
 static void wiphy_update_regulatory(struct wiphy *wiphy,
 				    enum nl80211_reg_initiator initiator)
 {
 	enum ieee80211_band band;
-
-	assert_reg_lock();
+	struct regulatory_request *lr = get_last_request();
 
 	if (ignore_reg_update(wiphy, initiator))
 		return;
 
-	last_request->dfs_region = cfg80211_regdomain->dfs_region;
+	lr->dfs_region = get_cfg80211_regdom()->dfs_region;
 
-	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
-		if (wiphy->bands[band])
-			handle_band(wiphy, band, initiator);
-	}
+	for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+		handle_band(wiphy, initiator, wiphy->bands[band]);
 
 	reg_process_beacons(wiphy);
 	reg_process_ht_flags(wiphy);
+
 	if (wiphy->reg_notifier)
-		wiphy->reg_notifier(wiphy, last_request);
+		wiphy->reg_notifier(wiphy, lr);
 }
 
 static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator)
@@ -1269,6 +1225,8 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator)
 	struct cfg80211_registered_device *rdev;
 	struct wiphy *wiphy;
 
+	assert_cfg80211_lock();
+
 	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
 		wiphy = &rdev->wiphy;
 		wiphy_update_regulatory(wiphy, initiator);
@@ -1280,53 +1238,40 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator)
 		if (initiator == NL80211_REGDOM_SET_BY_CORE &&
 		    wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY &&
 		    wiphy->reg_notifier)
-			wiphy->reg_notifier(wiphy, last_request);
+			wiphy->reg_notifier(wiphy, get_last_request());
 	}
 }
 
 static void handle_channel_custom(struct wiphy *wiphy,
-				  enum ieee80211_band band,
-				  unsigned int chan_idx,
+				  struct ieee80211_channel *chan,
 				  const struct ieee80211_regdomain *regd)
 {
-	int r;
-	u32 desired_bw_khz = MHZ_TO_KHZ(20);
 	u32 bw_flags = 0;
 	const struct ieee80211_reg_rule *reg_rule = NULL;
 	const struct ieee80211_power_rule *power_rule = NULL;
 	const struct ieee80211_freq_range *freq_range = NULL;
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_channel *chan;
-
-	assert_reg_lock();
-
-	sband = wiphy->bands[band];
-	BUG_ON(chan_idx >= sband->n_channels);
-	chan = &sband->channels[chan_idx];
 
-	r = freq_reg_info_regd(wiphy,
-			       MHZ_TO_KHZ(chan->center_freq),
-			       desired_bw_khz,
-			       &reg_rule,
-			       regd);
+	reg_rule = freq_reg_info_regd(wiphy, MHZ_TO_KHZ(chan->center_freq),
+				      regd);
 
-	if (r) {
-		REG_DBG_PRINT("Disabling freq %d MHz as custom "
-			      "regd has no rule that fits a %d MHz "
-			      "wide channel\n",
-			      chan->center_freq,
-			      KHZ_TO_MHZ(desired_bw_khz));
+	if (IS_ERR(reg_rule)) {
+		REG_DBG_PRINT("Disabling freq %d MHz as custom regd has no rule that fits it\n",
+			      chan->center_freq);
 		chan->flags = IEEE80211_CHAN_DISABLED;
 		return;
 	}
 
-	chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule);
+	chan_reg_rule_print_dbg(chan, reg_rule);
 
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
 
 	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40))
 		bw_flags = IEEE80211_CHAN_NO_HT40;
+	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(80))
+		bw_flags |= IEEE80211_CHAN_NO_80MHZ;
+	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(160))
+		bw_flags |= IEEE80211_CHAN_NO_160MHZ;
 
 	chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags;
 	chan->max_antenna_gain = (int) MBI_TO_DBI(power_rule->max_antenna_gain);
@@ -1334,17 +1279,17 @@ static void handle_channel_custom(struct wiphy *wiphy,
 		(int) MBM_TO_DBM(power_rule->max_eirp);
 }
 
-static void handle_band_custom(struct wiphy *wiphy, enum ieee80211_band band,
+static void handle_band_custom(struct wiphy *wiphy,
+			       struct ieee80211_supported_band *sband,
 			       const struct ieee80211_regdomain *regd)
 {
 	unsigned int i;
-	struct ieee80211_supported_band *sband;
 
-	BUG_ON(!wiphy->bands[band]);
-	sband = wiphy->bands[band];
+	if (!sband)
+		return;
 
 	for (i = 0; i < sband->n_channels; i++)
-		handle_channel_custom(wiphy, band, i, regd);
+		handle_channel_custom(wiphy, &sband->channels[i], regd);
 }
 
 /* Used by drivers prior to wiphy registration */
@@ -1354,60 +1299,50 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy,
 	enum ieee80211_band band;
 	unsigned int bands_set = 0;
 
-	mutex_lock(&reg_mutex);
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		if (!wiphy->bands[band])
 			continue;
-		handle_band_custom(wiphy, band, regd);
+		handle_band_custom(wiphy, wiphy->bands[band], regd);
 		bands_set++;
 	}
-	mutex_unlock(&reg_mutex);
 
 	/*
 	 * no point in calling this if it won't have any effect
-	 * on your device's supportd bands.
+	 * on your device's supported bands.
 	 */
 	WARN_ON(!bands_set);
 }
 EXPORT_SYMBOL(wiphy_apply_custom_regulatory);
 
-/*
- * Return value which can be used by ignore_request() to indicate
- * it has been determined we should intersect two regulatory domains
- */
-#define REG_INTERSECT	1
-
 /* This has the logic which determines when a new request
  * should be ignored. */
-static int ignore_request(struct wiphy *wiphy,
+static enum reg_request_treatment
+get_reg_request_treatment(struct wiphy *wiphy,
 			  struct regulatory_request *pending_request)
 {
 	struct wiphy *last_wiphy = NULL;
-
-	assert_cfg80211_lock();
+	struct regulatory_request *lr = get_last_request();
 
 	/* All initial requests are respected */
-	if (!last_request)
-		return 0;
+	if (!lr)
+		return REG_REQ_OK;
 
 	switch (pending_request->initiator) {
 	case NL80211_REGDOM_SET_BY_CORE:
-		return 0;
+		return REG_REQ_OK;
 	case NL80211_REGDOM_SET_BY_COUNTRY_IE:
-
-		if (reg_request_cell_base(last_request)) {
+		if (reg_request_cell_base(lr)) {
 			/* Trust a Cell base station over the AP's country IE */
 			if (regdom_changes(pending_request->alpha2))
-				return -EOPNOTSUPP;
-			return -EALREADY;
+				return REG_REQ_IGNORE;
+			return REG_REQ_ALREADY_SET;
 		}
 
-		last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
+		last_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx);
 
 		if (unlikely(!is_an_alpha2(pending_request->alpha2)))
 			return -EINVAL;
-		if (last_request->initiator ==
-		    NL80211_REGDOM_SET_BY_COUNTRY_IE) {
+		if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) {
 			if (last_wiphy != wiphy) {
 				/*
 				 * Two cards with two APs claiming different
@@ -1416,23 +1351,23 @@ static int ignore_request(struct wiphy *wiphy,
 				 * to be correct. Reject second one for now.
 				 */
 				if (regdom_changes(pending_request->alpha2))
-					return -EOPNOTSUPP;
-				return -EALREADY;
+					return REG_REQ_IGNORE;
+				return REG_REQ_ALREADY_SET;
 			}
 			/*
 			 * Two consecutive Country IE hints on the same wiphy.
 			 * This should be picked up early by the driver/stack
 			 */
 			if (WARN_ON(regdom_changes(pending_request->alpha2)))
-				return 0;
-			return -EALREADY;
+				return REG_REQ_OK;
+			return REG_REQ_ALREADY_SET;
 		}
 		return 0;
 	case NL80211_REGDOM_SET_BY_DRIVER:
-		if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) {
+		if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) {
 			if (regdom_changes(pending_request->alpha2))
-				return 0;
-			return -EALREADY;
+				return REG_REQ_OK;
+			return REG_REQ_ALREADY_SET;
 		}
 
 		/*
@@ -1440,59 +1375,59 @@ static int ignore_request(struct wiphy *wiphy,
 		 * back in or if you add a new device for which the previously
 		 * loaded card also agrees on the regulatory domain.
 		 */
-		if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
+		if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
 		    !regdom_changes(pending_request->alpha2))
-			return -EALREADY;
+			return REG_REQ_ALREADY_SET;
 
-		return REG_INTERSECT;
+		return REG_REQ_INTERSECT;
 	case NL80211_REGDOM_SET_BY_USER:
 		if (reg_request_cell_base(pending_request))
 			return reg_ignore_cell_hint(pending_request);
 
-		if (reg_request_cell_base(last_request))
-			return -EOPNOTSUPP;
+		if (reg_request_cell_base(lr))
+			return REG_REQ_IGNORE;
 
-		if (last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)
-			return REG_INTERSECT;
+		if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)
+			return REG_REQ_INTERSECT;
 		/*
 		 * If the user knows better the user should set the regdom
 		 * to their country before the IE is picked up
 		 */
-		if (last_request->initiator == NL80211_REGDOM_SET_BY_USER &&
-			  last_request->intersect)
-			return -EOPNOTSUPP;
+		if (lr->initiator == NL80211_REGDOM_SET_BY_USER &&
+		    lr->intersect)
+			return REG_REQ_IGNORE;
 		/*
 		 * Process user requests only after previous user/driver/core
 		 * requests have been processed
 		 */
-		if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE ||
-		    last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER ||
-		    last_request->initiator == NL80211_REGDOM_SET_BY_USER) {
-			if (regdom_changes(last_request->alpha2))
-				return -EAGAIN;
-		}
+		if ((lr->initiator == NL80211_REGDOM_SET_BY_CORE ||
+		     lr->initiator == NL80211_REGDOM_SET_BY_DRIVER ||
+		     lr->initiator == NL80211_REGDOM_SET_BY_USER) &&
+		    regdom_changes(lr->alpha2))
+			return REG_REQ_IGNORE;
 
 		if (!regdom_changes(pending_request->alpha2))
-			return -EALREADY;
+			return REG_REQ_ALREADY_SET;
 
-		return 0;
+		return REG_REQ_OK;
 	}
 
-	return -EINVAL;
+	return REG_REQ_IGNORE;
 }
 
 static void reg_set_request_processed(void)
 {
 	bool need_more_processing = false;
+	struct regulatory_request *lr = get_last_request();
 
-	last_request->processed = true;
+	lr->processed = true;
 
 	spin_lock(&reg_requests_lock);
 	if (!list_empty(&reg_requests_list))
 		need_more_processing = true;
 	spin_unlock(&reg_requests_lock);
 
-	if (last_request->initiator == NL80211_REGDOM_SET_BY_USER)
+	if (lr->initiator == NL80211_REGDOM_SET_BY_USER)
 		cancel_delayed_work(&reg_timeout);
 
 	if (need_more_processing)
@@ -1508,116 +1443,122 @@ static void reg_set_request_processed(void)
  * The Wireless subsystem can use this function to hint to the wireless core
  * what it believes should be the current regulatory domain.
  *
- * Returns zero if all went fine, %-EALREADY if a regulatory domain had
- * already been set or other standard error codes.
+ * Returns one of the different reg request treatment values.
  *
- * Caller must hold &cfg80211_mutex and &reg_mutex
+ * Caller must hold &reg_mutex
  */
-static int __regulatory_hint(struct wiphy *wiphy,
-			     struct regulatory_request *pending_request)
+static enum reg_request_treatment
+__regulatory_hint(struct wiphy *wiphy,
+		  struct regulatory_request *pending_request)
 {
+	const struct ieee80211_regdomain *regd;
 	bool intersect = false;
-	int r = 0;
-
-	assert_cfg80211_lock();
+	enum reg_request_treatment treatment;
+	struct regulatory_request *lr;
 
-	r = ignore_request(wiphy, pending_request);
+	treatment = get_reg_request_treatment(wiphy, pending_request);
 
-	if (r == REG_INTERSECT) {
+	switch (treatment) {
+	case REG_REQ_INTERSECT:
 		if (pending_request->initiator ==
 		    NL80211_REGDOM_SET_BY_DRIVER) {
-			r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain);
-			if (r) {
+			regd = reg_copy_regd(get_cfg80211_regdom());
+			if (IS_ERR(regd)) {
 				kfree(pending_request);
-				return r;
+				return PTR_ERR(regd);
 			}
+			rcu_assign_pointer(wiphy->regd, regd);
 		}
 		intersect = true;
-	} else if (r) {
+		break;
+	case REG_REQ_OK:
+		break;
+	default:
 		/*
 		 * If the regulatory domain being requested by the
 		 * driver has already been set just copy it to the
 		 * wiphy
 		 */
-		if (r == -EALREADY &&
-		    pending_request->initiator ==
-		    NL80211_REGDOM_SET_BY_DRIVER) {
-			r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain);
-			if (r) {
+		if (treatment == REG_REQ_ALREADY_SET &&
+		    pending_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) {
+			regd = reg_copy_regd(get_cfg80211_regdom());
+			if (IS_ERR(regd)) {
 				kfree(pending_request);
-				return r;
+				return REG_REQ_IGNORE;
 			}
-			r = -EALREADY;
+			treatment = REG_REQ_ALREADY_SET;
+			rcu_assign_pointer(wiphy->regd, regd);
 			goto new_request;
 		}
 		kfree(pending_request);
-		return r;
+		return treatment;
 	}
 
 new_request:
-	if (last_request != &core_request_world)
-		kfree(last_request);
+	lr = get_last_request();
+	if (lr != &core_request_world && lr)
+		kfree_rcu(lr, rcu_head);
 
-	last_request = pending_request;
-	last_request->intersect = intersect;
+	pending_request->intersect = intersect;
+	pending_request->processed = false;
+	rcu_assign_pointer(last_request, pending_request);
+	lr = pending_request;
 
 	pending_request = NULL;
 
-	if (last_request->initiator == NL80211_REGDOM_SET_BY_USER) {
-		user_alpha2[0] = last_request->alpha2[0];
-		user_alpha2[1] = last_request->alpha2[1];
+	if (lr->initiator == NL80211_REGDOM_SET_BY_USER) {
+		user_alpha2[0] = lr->alpha2[0];
+		user_alpha2[1] = lr->alpha2[1];
 	}
 
-	/* When r == REG_INTERSECT we do need to call CRDA */
-	if (r < 0) {
+	/* When r == REG_REQ_INTERSECT we do need to call CRDA */
+	if (treatment != REG_REQ_OK && treatment != REG_REQ_INTERSECT) {
 		/*
 		 * Since CRDA will not be called in this case as we already
 		 * have applied the requested regulatory domain before we just
 		 * inform userspace we have processed the request
 		 */
-		if (r == -EALREADY) {
-			nl80211_send_reg_change_event(last_request);
+		if (treatment == REG_REQ_ALREADY_SET) {
+			nl80211_send_reg_change_event(lr);
 			reg_set_request_processed();
 		}
-		return r;
+		return treatment;
 	}
 
-	return call_crda(last_request->alpha2);
+	if (call_crda(lr->alpha2))
+		return REG_REQ_IGNORE;
+	return REG_REQ_OK;
 }
 
 /* This processes *all* regulatory hints */
 static void reg_process_hint(struct regulatory_request *reg_request,
 			     enum nl80211_reg_initiator reg_initiator)
 {
-	int r = 0;
 	struct wiphy *wiphy = NULL;
 
-	BUG_ON(!reg_request->alpha2);
+	if (WARN_ON(!reg_request->alpha2))
+		return;
 
-	if (wiphy_idx_valid(reg_request->wiphy_idx))
+	if (reg_request->wiphy_idx != WIPHY_IDX_INVALID)
 		wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx);
 
-	if (reg_initiator == NL80211_REGDOM_SET_BY_DRIVER &&
-	    !wiphy) {
+	if (reg_initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) {
 		kfree(reg_request);
 		return;
 	}
 
-	r = __regulatory_hint(wiphy, reg_request);
-	/* This is required so that the orig_* parameters are saved */
-	if (r == -EALREADY && wiphy &&
-	    wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) {
-		wiphy_update_regulatory(wiphy, reg_initiator);
-		return;
+	switch (__regulatory_hint(wiphy, reg_request)) {
+	case REG_REQ_ALREADY_SET:
+		/* This is required so that the orig_* parameters are saved */
+		if (wiphy && wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY)
+			wiphy_update_regulatory(wiphy, reg_initiator);
+		break;
+	default:
+		if (reg_initiator == NL80211_REGDOM_SET_BY_USER)
+			schedule_delayed_work(&reg_timeout,
+					      msecs_to_jiffies(3142));
+		break;
 	}
-
-	/*
-	 * We only time out user hints, given that they should be the only
-	 * source of bogus requests.
-	 */
-	if (r != -EALREADY &&
-	    reg_initiator == NL80211_REGDOM_SET_BY_USER)
-		schedule_delayed_work(&reg_timeout, msecs_to_jiffies(3142));
 }
 
 /*
@@ -1627,15 +1568,15 @@ static void reg_process_hint(struct regulatory_request *reg_request,
  */
 static void reg_process_pending_hints(void)
 {
-	struct regulatory_request *reg_request;
+	struct regulatory_request *reg_request, *lr;
 
 	mutex_lock(&cfg80211_mutex);
 	mutex_lock(&reg_mutex);
+	lr = get_last_request();
 
 	/* When last_request->processed becomes true this will be rescheduled */
-	if (last_request && !last_request->processed) {
-		REG_DBG_PRINT("Pending regulatory request, waiting "
-			      "for it to be processed...\n");
+	if (lr && !lr->processed) {
+		REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n");
 		goto out;
 	}
 
@@ -1666,23 +1607,14 @@ static void reg_process_pending_beacon_hints(void)
 	struct cfg80211_registered_device *rdev;
 	struct reg_beacon *pending_beacon, *tmp;
 
-	/*
-	 * No need to hold the reg_mutex here as we just touch wiphys
-	 * and do not read or access regulatory variables.
-	 */
 	mutex_lock(&cfg80211_mutex);
+	mutex_lock(&reg_mutex);
 
 	/* This goes through the _pending_ beacon list */
 	spin_lock_bh(&reg_pending_beacons_lock);
 
-	if (list_empty(&reg_pending_beacons)) {
-		spin_unlock_bh(&reg_pending_beacons_lock);
-		goto out;
-	}
-
 	list_for_each_entry_safe(pending_beacon, tmp,
 				 &reg_pending_beacons, list) {
-
 		list_del_init(&pending_beacon->list);
 
 		/* Applies the beacon hint to current wiphys */
@@ -1694,7 +1626,7 @@ static void reg_process_pending_beacon_hints(void)
 	}
 
 	spin_unlock_bh(&reg_pending_beacons_lock);
-out:
+	mutex_unlock(&reg_mutex);
 	mutex_unlock(&cfg80211_mutex);
 }
 
@@ -1706,10 +1638,8 @@ static void reg_todo(struct work_struct *work)
 
 static void queue_regulatory_request(struct regulatory_request *request)
 {
-	if (isalpha(request->alpha2[0]))
-		request->alpha2[0] = toupper(request->alpha2[0]);
-	if (isalpha(request->alpha2[1]))
-		request->alpha2[1] = toupper(request->alpha2[1]);
+	request->alpha2[0] = toupper(request->alpha2[0]);
+	request->alpha2[1] = toupper(request->alpha2[1]);
 
 	spin_lock(&reg_requests_lock);
 	list_add_tail(&request->list, &reg_requests_list);
@@ -1726,8 +1656,7 @@ static int regulatory_hint_core(const char *alpha2)
 {
 	struct regulatory_request *request;
 
-	request = kzalloc(sizeof(struct regulatory_request),
-			  GFP_KERNEL);
+	request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
 	if (!request)
 		return -ENOMEM;
 
@@ -1746,13 +1675,14 @@ int regulatory_hint_user(const char *alpha2,
 {
 	struct regulatory_request *request;
 
-	BUG_ON(!alpha2);
+	if (WARN_ON(!alpha2))
+		return -EINVAL;
 
 	request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
 	if (!request)
 		return -ENOMEM;
 
-	request->wiphy_idx = WIPHY_IDX_STALE;
+	request->wiphy_idx = WIPHY_IDX_INVALID;
 	request->alpha2[0] = alpha2[0];
 	request->alpha2[1] = alpha2[1];
 	request->initiator = NL80211_REGDOM_SET_BY_USER;
@@ -1768,8 +1698,8 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
 {
 	struct regulatory_request *request;
 
-	BUG_ON(!alpha2);
-	BUG_ON(!wiphy);
+	if (WARN_ON(!alpha2 || !wiphy))
+		return -EINVAL;
 
 	request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
 	if (!request)
@@ -1777,9 +1707,6 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
 
 	request->wiphy_idx = get_wiphy_idx(wiphy);
 
-	/* Must have registered wiphy first */
-	BUG_ON(!wiphy_idx_valid(request->wiphy_idx));
-
 	request->alpha2[0] = alpha2[0];
 	request->alpha2[1] = alpha2[1];
 	request->initiator = NL80211_REGDOM_SET_BY_DRIVER;
@@ -1794,18 +1721,17 @@ EXPORT_SYMBOL(regulatory_hint);
  * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and
  * therefore cannot iterate over the rdev list here.
  */
-void regulatory_hint_11d(struct wiphy *wiphy,
-			 enum ieee80211_band band,
-			 const u8 *country_ie,
-			 u8 country_ie_len)
+void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band,
+			 const u8 *country_ie, u8 country_ie_len)
 {
 	char alpha2[2];
 	enum environment_cap env = ENVIRON_ANY;
-	struct regulatory_request *request;
+	struct regulatory_request *request, *lr;
 
 	mutex_lock(&reg_mutex);
+	lr = get_last_request();
 
-	if (unlikely(!last_request))
+	if (unlikely(!lr))
 		goto out;
 
 	/* IE len must be evenly divisible by 2 */
@@ -1828,9 +1754,8 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 	 * We leave conflict resolution to the workqueue, where can hold
 	 * cfg80211_mutex.
 	 */
-	if (likely(last_request->initiator ==
-	    NL80211_REGDOM_SET_BY_COUNTRY_IE &&
-	    wiphy_idx_valid(last_request->wiphy_idx)))
+	if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+	    lr->wiphy_idx != WIPHY_IDX_INVALID)
 		goto out;
 
 	request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
@@ -1843,12 +1768,7 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 	request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE;
 	request->country_ie_env = env;
 
-	mutex_unlock(&reg_mutex);
-
 	queue_regulatory_request(request);
-
-	return;
-
 out:
 	mutex_unlock(&reg_mutex);
 }
@@ -1863,8 +1783,7 @@ static void restore_alpha2(char *alpha2, bool reset_user)
 	if (is_user_regdom_saved()) {
 		/* Unless we're asked to ignore it and reset it */
 		if (reset_user) {
-			REG_DBG_PRINT("Restoring regulatory settings "
-			       "including user preference\n");
+			REG_DBG_PRINT("Restoring regulatory settings including user preference\n");
 			user_alpha2[0] = '9';
 			user_alpha2[1] = '7';
 
@@ -1874,26 +1793,20 @@ static void restore_alpha2(char *alpha2, bool reset_user)
 			 * back as they were for a full restore.
 			 */
 			if (!is_world_regdom(ieee80211_regdom)) {
-				REG_DBG_PRINT("Keeping preference on "
-				       "module parameter ieee80211_regdom: %c%c\n",
-				       ieee80211_regdom[0],
-				       ieee80211_regdom[1]);
+				REG_DBG_PRINT("Keeping preference on module parameter ieee80211_regdom: %c%c\n",
+					      ieee80211_regdom[0], ieee80211_regdom[1]);
 				alpha2[0] = ieee80211_regdom[0];
 				alpha2[1] = ieee80211_regdom[1];
 			}
 		} else {
-			REG_DBG_PRINT("Restoring regulatory settings "
-			       "while preserving user preference for: %c%c\n",
-			       user_alpha2[0],
-			       user_alpha2[1]);
+			REG_DBG_PRINT("Restoring regulatory settings while preserving user preference for: %c%c\n",
+				      user_alpha2[0], user_alpha2[1]);
 			alpha2[0] = user_alpha2[0];
 			alpha2[1] = user_alpha2[1];
 		}
 	} else if (!is_world_regdom(ieee80211_regdom)) {
-		REG_DBG_PRINT("Keeping preference on "
-		       "module parameter ieee80211_regdom: %c%c\n",
-		       ieee80211_regdom[0],
-		       ieee80211_regdom[1]);
+		REG_DBG_PRINT("Keeping preference on module parameter ieee80211_regdom: %c%c\n",
+			      ieee80211_regdom[0], ieee80211_regdom[1]);
 		alpha2[0] = ieee80211_regdom[0];
 		alpha2[1] = ieee80211_regdom[1];
 	} else
@@ -1948,7 +1861,7 @@ static void restore_regulatory_settings(bool reset_user)
 	mutex_lock(&cfg80211_mutex);
 	mutex_lock(&reg_mutex);
 
-	reset_regdomains(true);
+	reset_regdomains(true, &world_regdom);
 	restore_alpha2(alpha2, reset_user);
 
 	/*
@@ -1958,49 +1871,35 @@ static void restore_regulatory_settings(bool reset_user)
 	 * settings.
 	 */
 	spin_lock(&reg_requests_lock);
-	if (!list_empty(&reg_requests_list)) {
-		list_for_each_entry_safe(reg_request, tmp,
-					 &reg_requests_list, list) {
-			if (reg_request->initiator !=
-			    NL80211_REGDOM_SET_BY_USER)
-				continue;
-			list_move_tail(&reg_request->list, &tmp_reg_req_list);
-		}
+	list_for_each_entry_safe(reg_request, tmp, &reg_requests_list, list) {
+		if (reg_request->initiator != NL80211_REGDOM_SET_BY_USER)
+			continue;
+		list_move_tail(&reg_request->list, &tmp_reg_req_list);
 	}
 	spin_unlock(&reg_requests_lock);
 
 	/* Clear beacon hints */
 	spin_lock_bh(&reg_pending_beacons_lock);
-	if (!list_empty(&reg_pending_beacons)) {
-		list_for_each_entry_safe(reg_beacon, btmp,
-					 &reg_pending_beacons, list) {
-			list_del(&reg_beacon->list);
-			kfree(reg_beacon);
-		}
+	list_for_each_entry_safe(reg_beacon, btmp, &reg_pending_beacons, list) {
+		list_del(&reg_beacon->list);
+		kfree(reg_beacon);
 	}
 	spin_unlock_bh(&reg_pending_beacons_lock);
 
-	if (!list_empty(&reg_beacon_list)) {
-		list_for_each_entry_safe(reg_beacon, btmp,
-					 &reg_beacon_list, list) {
-			list_del(&reg_beacon->list);
-			kfree(reg_beacon);
-		}
+	list_for_each_entry_safe(reg_beacon, btmp, &reg_beacon_list, list) {
+		list_del(&reg_beacon->list);
+		kfree(reg_beacon);
 	}
 
 	/* First restore to the basic regulatory settings */
-	cfg80211_regdomain = cfg80211_world_regdom;
-	world_alpha2[0] = cfg80211_regdomain->alpha2[0];
-	world_alpha2[1] = cfg80211_regdomain->alpha2[1];
+	world_alpha2[0] = cfg80211_world_regdom->alpha2[0];
+	world_alpha2[1] = cfg80211_world_regdom->alpha2[1];
 
 	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
 		if (rdev->wiphy.flags & WIPHY_FLAG_CUSTOM_REGULATORY)
 			restore_custom_reg_settings(&rdev->wiphy);
 	}
 
-	mutex_unlock(&reg_mutex);
-	mutex_unlock(&cfg80211_mutex);
-
 	regulatory_hint_core(world_alpha2);
 
 	/*
@@ -2011,20 +1910,8 @@ static void restore_regulatory_settings(bool reset_user)
 	if (is_an_alpha2(alpha2))
 		regulatory_hint_user(user_alpha2, NL80211_USER_REG_HINT_USER);
 
-	if (list_empty(&tmp_reg_req_list))
-		return;
-
-	mutex_lock(&cfg80211_mutex);
-	mutex_lock(&reg_mutex);
-
 	spin_lock(&reg_requests_lock);
-	list_for_each_entry_safe(reg_request, tmp, &tmp_reg_req_list, list) {
-		REG_DBG_PRINT("Adding request for country %c%c back "
-			      "into the queue\n",
-			      reg_request->alpha2[0],
-			      reg_request->alpha2[1]);
-		list_move_tail(&reg_request->list, &reg_requests_list);
-	}
+	list_splice_tail_init(&tmp_reg_req_list, &reg_requests_list);
 	spin_unlock(&reg_requests_lock);
 
 	mutex_unlock(&reg_mutex);
@@ -2037,8 +1924,7 @@ static void restore_regulatory_settings(bool reset_user)
 
 void regulatory_hint_disconnect(void)
 {
-	REG_DBG_PRINT("All devices are disconnected, going to "
-		      "restore regulatory settings\n");
+	REG_DBG_PRINT("All devices are disconnected, going to restore regulatory settings\n");
 	restore_regulatory_settings(false);
 }
 
@@ -2051,31 +1937,48 @@ static bool freq_is_chan_12_13_14(u16 freq)
 	return false;
 }
 
+static bool pending_reg_beacon(struct ieee80211_channel *beacon_chan)
+{
+	struct reg_beacon *pending_beacon;
+
+	list_for_each_entry(pending_beacon, &reg_pending_beacons, list)
+		if (beacon_chan->center_freq ==
+		    pending_beacon->chan.center_freq)
+			return true;
+	return false;
+}
+
 int regulatory_hint_found_beacon(struct wiphy *wiphy,
 				 struct ieee80211_channel *beacon_chan,
 				 gfp_t gfp)
 {
 	struct reg_beacon *reg_beacon;
+	bool processing;
 
-	if (likely((beacon_chan->beacon_found ||
-	    (beacon_chan->flags & IEEE80211_CHAN_RADAR) ||
+	if (beacon_chan->beacon_found ||
+	    beacon_chan->flags & IEEE80211_CHAN_RADAR ||
 	    (beacon_chan->band == IEEE80211_BAND_2GHZ &&
-	     !freq_is_chan_12_13_14(beacon_chan->center_freq)))))
+	     !freq_is_chan_12_13_14(beacon_chan->center_freq)))
+		return 0;
+
+	spin_lock_bh(&reg_pending_beacons_lock);
+	processing = pending_reg_beacon(beacon_chan);
+	spin_unlock_bh(&reg_pending_beacons_lock);
+
+	if (processing)
 		return 0;
 
 	reg_beacon = kzalloc(sizeof(struct reg_beacon), gfp);
 	if (!reg_beacon)
 		return -ENOMEM;
 
-	REG_DBG_PRINT("Found new beacon on "
-		      "frequency: %d MHz (Ch %d) on %s\n",
+	REG_DBG_PRINT("Found new beacon on frequency: %d MHz (Ch %d) on %s\n",
 		      beacon_chan->center_freq,
 		      ieee80211_frequency_to_channel(beacon_chan->center_freq),
 		      wiphy_name(wiphy));
 
 	memcpy(&reg_beacon->chan, beacon_chan,
-		sizeof(struct ieee80211_channel));
-
+	       sizeof(struct ieee80211_channel));
 
 	/*
 	 * Since we can be called from BH or and non-BH context
@@ -2155,21 +2058,19 @@ static void print_dfs_region(u8 dfs_region)
 		pr_info(" DFS Master region JP");
 		break;
 	default:
-		pr_info(" DFS Master region Uknown");
+		pr_info(" DFS Master region Unknown");
 		break;
 	}
 }
 
 static void print_regdomain(const struct ieee80211_regdomain *rd)
 {
+	struct regulatory_request *lr = get_last_request();
 
 	if (is_intersected_alpha2(rd->alpha2)) {
-
-		if (last_request->initiator ==
-		    NL80211_REGDOM_SET_BY_COUNTRY_IE) {
+		if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) {
 			struct cfg80211_registered_device *rdev;
-			rdev = cfg80211_rdev_by_wiphy_idx(
-				last_request->wiphy_idx);
+			rdev = cfg80211_rdev_by_wiphy_idx(lr->wiphy_idx);
 			if (rdev) {
 				pr_info("Current regulatory domain updated by AP to: %c%c\n",
 					rdev->country_ie_alpha2[0],
@@ -2178,22 +2079,21 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
 				pr_info("Current regulatory domain intersected:\n");
 		} else
 			pr_info("Current regulatory domain intersected:\n");
-	} else if (is_world_regdom(rd->alpha2))
+	} else if (is_world_regdom(rd->alpha2)) {
 		pr_info("World regulatory domain updated:\n");
-	else {
+	} else {
 		if (is_unknown_alpha2(rd->alpha2))
 			pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n");
 		else {
-			if (reg_request_cell_base(last_request))
-				pr_info("Regulatory domain changed "
-					"to country: %c%c by Cell Station\n",
+			if (reg_request_cell_base(lr))
+				pr_info("Regulatory domain changed to country: %c%c by Cell Station\n",
 					rd->alpha2[0], rd->alpha2[1]);
 			else
-				pr_info("Regulatory domain changed "
-					"to country: %c%c\n",
+				pr_info("Regulatory domain changed to country: %c%c\n",
 					rd->alpha2[0], rd->alpha2[1]);
 		}
 	}
+
 	print_dfs_region(rd->dfs_region);
 	print_rd_rules(rd);
 }
@@ -2207,22 +2107,23 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd)
 /* Takes ownership of rd only if it doesn't fail */
 static int __set_regdom(const struct ieee80211_regdomain *rd)
 {
+	const struct ieee80211_regdomain *regd;
 	const struct ieee80211_regdomain *intersected_rd = NULL;
 	struct wiphy *request_wiphy;
+	struct regulatory_request *lr = get_last_request();
+
 	/* Some basic sanity checks first */
 
+	if (!reg_is_valid_request(rd->alpha2))
+		return -EINVAL;
+
 	if (is_world_regdom(rd->alpha2)) {
-		if (WARN_ON(!reg_is_valid_request(rd->alpha2)))
-			return -EINVAL;
 		update_world_regdomain(rd);
 		return 0;
 	}
 
 	if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) &&
-			!is_unknown_alpha2(rd->alpha2))
-		return -EINVAL;
-
-	if (!last_request)
+	    !is_unknown_alpha2(rd->alpha2))
 		return -EINVAL;
 
 	/*
@@ -2230,7 +2131,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 	 * rd is non static (it means CRDA was present and was used last)
 	 * and the pending request came in from a country IE
 	 */
-	if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) {
+	if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) {
 		/*
 		 * If someone else asked us to change the rd lets only bother
 		 * checking if the alpha2 changes if CRDA was already called
@@ -2246,29 +2147,23 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 	 * internal EEPROM data
 	 */
 
-	if (WARN_ON(!reg_is_valid_request(rd->alpha2)))
-		return -EINVAL;
-
 	if (!is_valid_rd(rd)) {
 		pr_err("Invalid regulatory domain detected:\n");
 		print_regdomain_info(rd);
 		return -EINVAL;
 	}
 
-	request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
+	request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx);
 	if (!request_wiphy &&
-	    (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER ||
-	     last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) {
+	    (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER ||
+	     lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) {
 		schedule_delayed_work(&reg_timeout, 0);
 		return -ENODEV;
 	}
 
-	if (!last_request->intersect) {
-		int r;
-
-		if (last_request->initiator != NL80211_REGDOM_SET_BY_DRIVER) {
-			reset_regdomains(false);
-			cfg80211_regdomain = rd;
+	if (!lr->intersect) {
+		if (lr->initiator != NL80211_REGDOM_SET_BY_DRIVER) {
+			reset_regdomains(false, rd);
 			return 0;
 		}
 
@@ -2284,20 +2179,19 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 		if (request_wiphy->regd)
 			return -EALREADY;
 
-		r = reg_copy_regd(&request_wiphy->regd, rd);
-		if (r)
-			return r;
+		regd = reg_copy_regd(rd);
+		if (IS_ERR(regd))
+			return PTR_ERR(regd);
 
-		reset_regdomains(false);
-		cfg80211_regdomain = rd;
+		rcu_assign_pointer(request_wiphy->regd, regd);
+		reset_regdomains(false, rd);
 		return 0;
 	}
 
 	/* Intersection requires a bit more work */
 
-	if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) {
-
-		intersected_rd = regdom_intersect(rd, cfg80211_regdomain);
+	if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) {
+		intersected_rd = regdom_intersect(rd, get_cfg80211_regdom());
 		if (!intersected_rd)
 			return -EINVAL;
 
@@ -2306,15 +2200,19 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 		 * However if a driver requested this specific regulatory
 		 * domain we keep it for its private use
 		 */
-		if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER)
-			request_wiphy->regd = rd;
-		else
+		if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER) {
+			const struct ieee80211_regdomain *tmp;
+
+			tmp = get_wiphy_regdom(request_wiphy);
+			rcu_assign_pointer(request_wiphy->regd, rd);
+			rcu_free_regdom(tmp);
+		} else {
 			kfree(rd);
+		}
 
 		rd = NULL;
 
-		reset_regdomains(false);
-		cfg80211_regdomain = intersected_rd;
+		reset_regdomains(false, intersected_rd);
 
 		return 0;
 	}
@@ -2326,15 +2224,15 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 /*
  * Use this call to set the current regulatory domain. Conflicts with
  * multiple drivers can be ironed out later. Caller must've already
- * kmalloc'd the rd structure. Caller must hold cfg80211_mutex
+ * kmalloc'd the rd structure.
  */
 int set_regdom(const struct ieee80211_regdomain *rd)
 {
+	struct regulatory_request *lr;
 	int r;
 
-	assert_cfg80211_lock();
-
 	mutex_lock(&reg_mutex);
+	lr = get_last_request();
 
 	/* Note that this doesn't update the wiphys, this is done below */
 	r = __set_regdom(rd);
@@ -2343,23 +2241,25 @@ int set_regdom(const struct ieee80211_regdomain *rd)
 			reg_set_request_processed();
 
 		kfree(rd);
-		mutex_unlock(&reg_mutex);
-		return r;
+		goto out;
 	}
 
 	/* This would make this whole thing pointless */
-	if (!last_request->intersect)
-		BUG_ON(rd != cfg80211_regdomain);
+	if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) {
+		r = -EINVAL;
+		goto out;
+	}
 
 	/* update all wiphys now with the new established regulatory domain */
-	update_all_wiphy_regulatory(last_request->initiator);
+	update_all_wiphy_regulatory(lr->initiator);
 
-	print_regdomain(cfg80211_regdomain);
+	print_regdomain(get_cfg80211_regdom());
 
-	nl80211_send_reg_change_event(last_request);
+	nl80211_send_reg_change_event(lr);
 
 	reg_set_request_processed();
 
+ out:
 	mutex_unlock(&reg_mutex);
 
 	return r;
@@ -2367,20 +2267,26 @@ int set_regdom(const struct ieee80211_regdomain *rd)
 
 int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
-	if (last_request && !last_request->processed) {
-		if (add_uevent_var(env, "COUNTRY=%c%c",
-				   last_request->alpha2[0],
-				   last_request->alpha2[1]))
-			return -ENOMEM;
+	struct regulatory_request *lr;
+	u8 alpha2[2];
+	bool add = false;
+
+	rcu_read_lock();
+	lr = get_last_request();
+	if (lr && !lr->processed) {
+		memcpy(alpha2, lr->alpha2, 2);
+		add = true;
 	}
+	rcu_read_unlock();
 
+	if (add)
+		return add_uevent_var(env, "COUNTRY=%c%c",
+				      alpha2[0], alpha2[1]);
 	return 0;
 }
 
 void wiphy_regulatory_register(struct wiphy *wiphy)
 {
-	assert_cfg80211_lock();
-
 	mutex_lock(&reg_mutex);
 
 	if (!reg_dev_ignore_cell_hint(wiphy))
@@ -2395,32 +2301,32 @@ void wiphy_regulatory_register(struct wiphy *wiphy)
 void wiphy_regulatory_deregister(struct wiphy *wiphy)
 {
 	struct wiphy *request_wiphy = NULL;
-
-	assert_cfg80211_lock();
+	struct regulatory_request *lr;
 
 	mutex_lock(&reg_mutex);
+	lr = get_last_request();
 
 	if (!reg_dev_ignore_cell_hint(wiphy))
 		reg_num_devs_support_basehint--;
 
-	kfree(wiphy->regd);
+	rcu_free_regdom(get_wiphy_regdom(wiphy));
+	rcu_assign_pointer(wiphy->regd, NULL);
 
-	if (last_request)
-		request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
+	if (lr)
+		request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx);
 
 	if (!request_wiphy || request_wiphy != wiphy)
 		goto out;
 
-	last_request->wiphy_idx = WIPHY_IDX_STALE;
-	last_request->country_ie_env = ENVIRON_ANY;
+	lr->wiphy_idx = WIPHY_IDX_INVALID;
+	lr->country_ie_env = ENVIRON_ANY;
 out:
 	mutex_unlock(&reg_mutex);
 }
 
 static void reg_timeout_work(struct work_struct *work)
 {
-	REG_DBG_PRINT("Timeout while waiting for CRDA to reply, "
-		      "restoring regulatory settings\n");
+	REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n");
 	restore_regulatory_settings(true);
 }
 
@@ -2439,13 +2345,13 @@ int __init regulatory_init(void)
 
 	reg_regdb_size_check();
 
-	cfg80211_regdomain = cfg80211_world_regdom;
+	rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
 
 	user_alpha2[0] = '9';
 	user_alpha2[1] = '7';
 
 	/* We always try to get an update for the static regdomain */
-	err = regulatory_hint_core(cfg80211_regdomain->alpha2);
+	err = regulatory_hint_core(cfg80211_world_regdom->alpha2);
 	if (err) {
 		if (err == -ENOMEM)
 			return err;
@@ -2457,10 +2363,6 @@ int __init regulatory_init(void)
 		 * errors as non-fatal.
 		 */
 		pr_err("kobject_uevent_env() was unable to call CRDA during init\n");
-#ifdef CONFIG_CFG80211_REG_DEBUG
-		/* We want to find out exactly why when debugging */
-		WARN_ON(err);
-#endif
 	}
 
 	/*
@@ -2474,7 +2376,7 @@ int __init regulatory_init(void)
 	return 0;
 }
 
-void /* __init_or_exit */ regulatory_exit(void)
+void regulatory_exit(void)
 {
 	struct regulatory_request *reg_request, *tmp;
 	struct reg_beacon *reg_beacon, *btmp;
@@ -2482,43 +2384,27 @@ void /* __init_or_exit */ regulatory_exit(void)
 	cancel_work_sync(&reg_work);
 	cancel_delayed_work_sync(&reg_timeout);
 
-	mutex_lock(&cfg80211_mutex);
+	/* Lock to suppress warnings */
 	mutex_lock(&reg_mutex);
-
-	reset_regdomains(true);
+	reset_regdomains(true, NULL);
+	mutex_unlock(&reg_mutex);
 
 	dev_set_uevent_suppress(&reg_pdev->dev, true);
 
 	platform_device_unregister(reg_pdev);
 
-	spin_lock_bh(&reg_pending_beacons_lock);
-	if (!list_empty(&reg_pending_beacons)) {
-		list_for_each_entry_safe(reg_beacon, btmp,
-					 &reg_pending_beacons, list) {
-			list_del(&reg_beacon->list);
-			kfree(reg_beacon);
-		}
+	list_for_each_entry_safe(reg_beacon, btmp, &reg_pending_beacons, list) {
+		list_del(&reg_beacon->list);
+		kfree(reg_beacon);
 	}
-	spin_unlock_bh(&reg_pending_beacons_lock);
 
-	if (!list_empty(&reg_beacon_list)) {
-		list_for_each_entry_safe(reg_beacon, btmp,
-					 &reg_beacon_list, list) {
-			list_del(&reg_beacon->list);
-			kfree(reg_beacon);
-		}
+	list_for_each_entry_safe(reg_beacon, btmp, &reg_beacon_list, list) {
+		list_del(&reg_beacon->list);
+		kfree(reg_beacon);
 	}
 
-	spin_lock(&reg_requests_lock);
-	if (!list_empty(&reg_requests_list)) {
-		list_for_each_entry_safe(reg_request, tmp,
-					 &reg_requests_list, list) {
-			list_del(&reg_request->list);
-			kfree(reg_request);
-		}
+	list_for_each_entry_safe(reg_request, tmp, &reg_requests_list, list) {
+		list_del(&reg_request->list);
+		kfree(reg_request);
 	}
-	spin_unlock(&reg_requests_lock);
-
-	mutex_unlock(&reg_mutex);
-	mutex_unlock(&cfg80211_mutex);
 }
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 4c0a32ffd530..af2d5f8a5d82 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -16,10 +16,9 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
-extern const struct ieee80211_regdomain *cfg80211_regdomain;
+extern const struct ieee80211_regdomain __rcu *cfg80211_regdomain;
 
 bool is_world_regdom(const char *alpha2);
-bool reg_is_valid_request(const char *alpha2);
 bool reg_supported_dfs_region(u8 dfs_region);
 
 int regulatory_hint_user(const char *alpha2,
@@ -55,8 +54,8 @@ bool reg_last_request_cell_base(void);
  * set the wiphy->disable_beacon_hints to true.
  */
 int regulatory_hint_found_beacon(struct wiphy *wiphy,
-					struct ieee80211_channel *beacon_chan,
-					gfp_t gfp);
+				 struct ieee80211_channel *beacon_chan,
+				 gfp_t gfp);
 
 /**
  * regulatory_hint_11d - hints a country IE as a regulatory domain
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 45f1618c8e23..fd99ea495b7e 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -19,55 +19,142 @@
 #include "wext-compat.h"
 #include "rdev-ops.h"
 
+/**
+ * DOC: BSS tree/list structure
+ *
+ * At the top level, the BSS list is kept in both a list in each
+ * registered device (@bss_list) as well as an RB-tree for faster
+ * lookup. In the RB-tree, entries can be looked up using their
+ * channel, MESHID, MESHCONF (for MBSSes) or channel, BSSID, SSID
+ * for other BSSes.
+ *
+ * Due to the possibility of hidden SSIDs, there's a second level
+ * structure, the "hidden_list" and "hidden_beacon_bss" pointer.
+ * The hidden_list connects all BSSes belonging to a single AP
+ * that has a hidden SSID, and connects beacon and probe response
+ * entries. For a probe response entry for a hidden SSID, the
+ * hidden_beacon_bss pointer points to the BSS struct holding the
+ * beacon's information.
+ *
+ * Reference counting is done for all these references except for
+ * the hidden_list, so that a beacon BSS struct that is otherwise
+ * not referenced has one reference for being on the bss_list and
+ * one for each probe response entry that points to it using the
+ * hidden_beacon_bss pointer. When a BSS struct that has such a
+ * pointer is get/put, the refcount update is also propagated to
+ * the referenced struct, this ensure that it cannot get removed
+ * while somebody is using the probe response version.
+ *
+ * Note that the hidden_beacon_bss pointer never changes, due to
+ * the reference counting. Therefore, no locking is needed for
+ * it.
+ *
+ * Also note that the hidden_beacon_bss pointer is only relevant
+ * if the driver uses something other than the IEs, e.g. private
+ * data stored stored in the BSS struct, since the beacon IEs are
+ * also linked into the probe response struct.
+ */
+
 #define IEEE80211_SCAN_RESULT_EXPIRE	(30 * HZ)
 
-static void bss_release(struct kref *ref)
+static void bss_free(struct cfg80211_internal_bss *bss)
 {
 	struct cfg80211_bss_ies *ies;
-	struct cfg80211_internal_bss *bss;
-
-	bss = container_of(ref, struct cfg80211_internal_bss, ref);
 
 	if (WARN_ON(atomic_read(&bss->hold)))
 		return;
 
-	if (bss->pub.free_priv)
-		bss->pub.free_priv(&bss->pub);
-
 	ies = (void *)rcu_access_pointer(bss->pub.beacon_ies);
-	if (ies)
+	if (ies && !bss->pub.hidden_beacon_bss)
 		kfree_rcu(ies, rcu_head);
 	ies = (void *)rcu_access_pointer(bss->pub.proberesp_ies);
 	if (ies)
 		kfree_rcu(ies, rcu_head);
 
+	/*
+	 * This happens when the module is removed, it doesn't
+	 * really matter any more save for completeness
+	 */
+	if (!list_empty(&bss->hidden_list))
+		list_del(&bss->hidden_list);
+
 	kfree(bss);
 }
 
-/* must hold dev->bss_lock! */
-static void __cfg80211_unlink_bss(struct cfg80211_registered_device *dev,
+static inline void bss_ref_get(struct cfg80211_registered_device *dev,
+			       struct cfg80211_internal_bss *bss)
+{
+	lockdep_assert_held(&dev->bss_lock);
+
+	bss->refcount++;
+	if (bss->pub.hidden_beacon_bss) {
+		bss = container_of(bss->pub.hidden_beacon_bss,
+				   struct cfg80211_internal_bss,
+				   pub);
+		bss->refcount++;
+	}
+}
+
+static inline void bss_ref_put(struct cfg80211_registered_device *dev,
+			       struct cfg80211_internal_bss *bss)
+{
+	lockdep_assert_held(&dev->bss_lock);
+
+	if (bss->pub.hidden_beacon_bss) {
+		struct cfg80211_internal_bss *hbss;
+		hbss = container_of(bss->pub.hidden_beacon_bss,
+				    struct cfg80211_internal_bss,
+				    pub);
+		hbss->refcount--;
+		if (hbss->refcount == 0)
+			bss_free(hbss);
+	}
+	bss->refcount--;
+	if (bss->refcount == 0)
+		bss_free(bss);
+}
+
+static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *dev,
 				  struct cfg80211_internal_bss *bss)
 {
+	lockdep_assert_held(&dev->bss_lock);
+
+	if (!list_empty(&bss->hidden_list)) {
+		/*
+		 * don't remove the beacon entry if it has
+		 * probe responses associated with it
+		 */
+		if (!bss->pub.hidden_beacon_bss)
+			return false;
+		/*
+		 * if it's a probe response entry break its
+		 * link to the other entries in the group
+		 */
+		list_del_init(&bss->hidden_list);
+	}
+
 	list_del_init(&bss->list);
 	rb_erase(&bss->rbn, &dev->bss_tree);
-	kref_put(&bss->ref, bss_release);
+	bss_ref_put(dev, bss);
+	return true;
 }
 
-/* must hold dev->bss_lock! */
 static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev,
 				  unsigned long expire_time)
 {
 	struct cfg80211_internal_bss *bss, *tmp;
 	bool expired = false;
 
+	lockdep_assert_held(&dev->bss_lock);
+
 	list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) {
 		if (atomic_read(&bss->hold))
 			continue;
 		if (!time_after(expire_time, bss->ts))
 			continue;
 
-		__cfg80211_unlink_bss(dev, bss);
-		expired = true;
+		if (__cfg80211_unlink_bss(dev, bss))
+			expired = true;
 	}
 
 	if (expired)
@@ -82,7 +169,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak)
 	union iwreq_data wrqu;
 #endif
 
-	ASSERT_RDEV_LOCK(rdev);
+	lockdep_assert_held(&rdev->sched_scan_mtx);
 
 	request = rdev->scan_req;
 
@@ -143,9 +230,9 @@ void __cfg80211_scan_done(struct work_struct *wk)
 	rdev = container_of(wk, struct cfg80211_registered_device,
 			    scan_done_wk);
 
-	cfg80211_lock_rdev(rdev);
+	mutex_lock(&rdev->sched_scan_mtx);
 	___cfg80211_scan_done(rdev, false);
-	cfg80211_unlock_rdev(rdev);
+	mutex_unlock(&rdev->sched_scan_mtx);
 }
 
 void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
@@ -234,15 +321,16 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
 	return 0;
 }
 
-/* must hold dev->bss_lock! */
 void cfg80211_bss_age(struct cfg80211_registered_device *dev,
                       unsigned long age_secs)
 {
 	struct cfg80211_internal_bss *bss;
 	unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC);
 
+	spin_lock_bh(&dev->bss_lock);
 	list_for_each_entry(bss, &dev->bss_list, list)
 		bss->ts -= age_jiffies;
+	spin_unlock_bh(&dev->bss_lock);
 }
 
 void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
@@ -277,40 +365,24 @@ const u8 *cfg80211_find_vendor_ie(unsigned int oui, u8 oui_type,
 		if (!pos)
 			return NULL;
 
-		if (end - pos < sizeof(*ie))
-			return NULL;
-
 		ie = (struct ieee80211_vendor_ie *)pos;
+
+		/* make sure we can access ie->len */
+		BUILD_BUG_ON(offsetof(struct ieee80211_vendor_ie, len) != 1);
+
+		if (ie->len < sizeof(*ie))
+			goto cont;
+
 		ie_oui = ie->oui[0] << 16 | ie->oui[1] << 8 | ie->oui[2];
 		if (ie_oui == oui && ie->oui_type == oui_type)
 			return pos;
-
+cont:
 		pos += 2 + ie->len;
 	}
 	return NULL;
 }
 EXPORT_SYMBOL(cfg80211_find_vendor_ie);
 
-static int cmp_ies(u8 num, const u8 *ies1, int len1, const u8 *ies2, int len2)
-{
-	const u8 *ie1 = cfg80211_find_ie(num, ies1, len1);
-	const u8 *ie2 = cfg80211_find_ie(num, ies2, len2);
-
-	/* equal if both missing */
-	if (!ie1 && !ie2)
-		return 0;
-	/* sort missing IE before (left of) present IE */
-	if (!ie1)
-		return -1;
-	if (!ie2)
-		return 1;
-
-	/* sort by length first, then by contents */
-	if (ie1[1] != ie2[1])
-		return ie2[1] - ie1[1];
-	return memcmp(ie1 + 2, ie2 + 2, ie1[1]);
-}
-
 static bool is_bss(struct cfg80211_bss *a, const u8 *bssid,
 		   const u8 *ssid, size_t ssid_len)
 {
@@ -334,109 +406,30 @@ static bool is_bss(struct cfg80211_bss *a, const u8 *bssid,
 	return memcmp(ssidie + 2, ssid, ssid_len) == 0;
 }
 
-static bool is_mesh_bss(struct cfg80211_bss *a)
-{
-	const struct cfg80211_bss_ies *ies;
-	const u8 *ie;
-
-	if (!WLAN_CAPABILITY_IS_STA_BSS(a->capability))
-		return false;
-
-	ies = rcu_access_pointer(a->ies);
-	if (!ies)
-		return false;
-
-	ie = cfg80211_find_ie(WLAN_EID_MESH_ID, ies->data, ies->len);
-	if (!ie)
-		return false;
-
-	ie = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, ies->data, ies->len);
-	if (!ie)
-		return false;
-
-	return true;
-}
-
-static bool is_mesh(struct cfg80211_bss *a,
-		    const u8 *meshid, size_t meshidlen,
-		    const u8 *meshcfg)
-{
-	const struct cfg80211_bss_ies *ies;
-	const u8 *ie;
-
-	if (!WLAN_CAPABILITY_IS_STA_BSS(a->capability))
-		return false;
-
-	ies = rcu_access_pointer(a->ies);
-	if (!ies)
-		return false;
-
-	ie = cfg80211_find_ie(WLAN_EID_MESH_ID, ies->data, ies->len);
-	if (!ie)
-		return false;
-	if (ie[1] != meshidlen)
-		return false;
-	if (memcmp(ie + 2, meshid, meshidlen))
-		return false;
-
-	ie = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, ies->data, ies->len);
-	if (!ie)
-		return false;
-	if (ie[1] != sizeof(struct ieee80211_meshconf_ie))
-		return false;
-
-	/*
-	 * Ignore mesh capability (last two bytes of the IE) when
-	 * comparing since that may differ between stations taking
-	 * part in the same mesh.
-	 */
-	return memcmp(ie + 2, meshcfg,
-		      sizeof(struct ieee80211_meshconf_ie) - 2) == 0;
-}
+/**
+ * enum bss_compare_mode - BSS compare mode
+ * @BSS_CMP_REGULAR: regular compare mode (for insertion and normal find)
+ * @BSS_CMP_HIDE_ZLEN: find hidden SSID with zero-length mode
+ * @BSS_CMP_HIDE_NUL: find hidden SSID with NUL-ed out mode
+ */
+enum bss_compare_mode {
+	BSS_CMP_REGULAR,
+	BSS_CMP_HIDE_ZLEN,
+	BSS_CMP_HIDE_NUL,
+};
 
-static int cmp_bss_core(struct cfg80211_bss *a, struct cfg80211_bss *b)
+static int cmp_bss(struct cfg80211_bss *a,
+		   struct cfg80211_bss *b,
+		   enum bss_compare_mode mode)
 {
 	const struct cfg80211_bss_ies *a_ies, *b_ies;
-	int r;
+	const u8 *ie1 = NULL;
+	const u8 *ie2 = NULL;
+	int i, r;
 
 	if (a->channel != b->channel)
 		return b->channel->center_freq - a->channel->center_freq;
 
-	if (is_mesh_bss(a) && is_mesh_bss(b)) {
-		a_ies = rcu_access_pointer(a->ies);
-		if (!a_ies)
-			return -1;
-		b_ies = rcu_access_pointer(b->ies);
-		if (!b_ies)
-			return 1;
-
-		r = cmp_ies(WLAN_EID_MESH_ID,
-			    a_ies->data, a_ies->len,
-			    b_ies->data, b_ies->len);
-		if (r)
-			return r;
-		return cmp_ies(WLAN_EID_MESH_CONFIG,
-			       a_ies->data, a_ies->len,
-			       b_ies->data, b_ies->len);
-	}
-
-	/*
-	 * we can't use compare_ether_addr here since we need a < > operator.
-	 * The binary return value of compare_ether_addr isn't enough
-	 */
-	return memcmp(a->bssid, b->bssid, sizeof(a->bssid));
-}
-
-static int cmp_bss(struct cfg80211_bss *a,
-		   struct cfg80211_bss *b)
-{
-	const struct cfg80211_bss_ies *a_ies, *b_ies;
-	int r;
-
-	r = cmp_bss_core(a, b);
-	if (r)
-		return r;
-
 	a_ies = rcu_access_pointer(a->ies);
 	if (!a_ies)
 		return -1;
@@ -444,42 +437,51 @@ static int cmp_bss(struct cfg80211_bss *a,
 	if (!b_ies)
 		return 1;
 
-	return cmp_ies(WLAN_EID_SSID,
-		       a_ies->data, a_ies->len,
-		       b_ies->data, b_ies->len);
-}
-
-static int cmp_hidden_bss(struct cfg80211_bss *a, struct cfg80211_bss *b)
-{
-	const struct cfg80211_bss_ies *a_ies, *b_ies;
-	const u8 *ie1;
-	const u8 *ie2;
-	int i;
-	int r;
+	if (WLAN_CAPABILITY_IS_STA_BSS(a->capability))
+		ie1 = cfg80211_find_ie(WLAN_EID_MESH_ID,
+				       a_ies->data, a_ies->len);
+	if (WLAN_CAPABILITY_IS_STA_BSS(b->capability))
+		ie2 = cfg80211_find_ie(WLAN_EID_MESH_ID,
+				       b_ies->data, b_ies->len);
+	if (ie1 && ie2) {
+		int mesh_id_cmp;
+
+		if (ie1[1] == ie2[1])
+			mesh_id_cmp = memcmp(ie1 + 2, ie2 + 2, ie1[1]);
+		else
+			mesh_id_cmp = ie2[1] - ie1[1];
+
+		ie1 = cfg80211_find_ie(WLAN_EID_MESH_CONFIG,
+				       a_ies->data, a_ies->len);
+		ie2 = cfg80211_find_ie(WLAN_EID_MESH_CONFIG,
+				       b_ies->data, b_ies->len);
+		if (ie1 && ie2) {
+			if (mesh_id_cmp)
+				return mesh_id_cmp;
+			if (ie1[1] != ie2[1])
+				return ie2[1] - ie1[1];
+			return memcmp(ie1 + 2, ie2 + 2, ie1[1]);
+		}
+	}
 
-	r = cmp_bss_core(a, b);
+	/*
+	 * we can't use compare_ether_addr here since we need a < > operator.
+	 * The binary return value of compare_ether_addr isn't enough
+	 */
+	r = memcmp(a->bssid, b->bssid, sizeof(a->bssid));
 	if (r)
 		return r;
 
-	a_ies = rcu_access_pointer(a->ies);
-	if (!a_ies)
-		return -1;
-	b_ies = rcu_access_pointer(b->ies);
-	if (!b_ies)
-		return 1;
-
 	ie1 = cfg80211_find_ie(WLAN_EID_SSID, a_ies->data, a_ies->len);
 	ie2 = cfg80211_find_ie(WLAN_EID_SSID, b_ies->data, b_ies->len);
 
+	if (!ie1 && !ie2)
+		return 0;
+
 	/*
-	 * Key comparator must use same algorithm in any rb-tree
-	 * search function (order is important), otherwise ordering
-	 * of items in the tree is broken and search gives incorrect
-	 * results. This code uses same order as cmp_ies() does.
-	 *
-	 * Note that due to the differring behaviour with hidden SSIDs
-	 * this function only works when "b" is the tree element and
-	 * "a" is the key we're looking for.
+	 * Note that with "hide_ssid", the function returns a match if
+	 * the already-present BSS ("b") is a hidden SSID beacon for
+	 * the new BSS ("a").
 	 */
 
 	/* sort missing IE before (left of) present IE */
@@ -488,24 +490,36 @@ static int cmp_hidden_bss(struct cfg80211_bss *a, struct cfg80211_bss *b)
 	if (!ie2)
 		return 1;
 
-	/* zero-size SSID is used as an indication of the hidden bss */
-	if (!ie2[1])
+	switch (mode) {
+	case BSS_CMP_HIDE_ZLEN:
+		/*
+		 * In ZLEN mode we assume the BSS entry we're
+		 * looking for has a zero-length SSID. So if
+		 * the one we're looking at right now has that,
+		 * return 0. Otherwise, return the difference
+		 * in length, but since we're looking for the
+		 * 0-length it's really equivalent to returning
+		 * the length of the one we're looking at.
+		 *
+		 * No content comparison is needed as we assume
+		 * the content length is zero.
+		 */
+		return ie2[1];
+	case BSS_CMP_REGULAR:
+	default:
+		/* sort by length first, then by contents */
+		if (ie1[1] != ie2[1])
+			return ie2[1] - ie1[1];
+		return memcmp(ie1 + 2, ie2 + 2, ie1[1]);
+	case BSS_CMP_HIDE_NUL:
+		if (ie1[1] != ie2[1])
+			return ie2[1] - ie1[1];
+		/* this is equivalent to memcmp(zeroes, ie2 + 2, len) */
+		for (i = 0; i < ie2[1]; i++)
+			if (ie2[i + 2])
+				return -1;
 		return 0;
-
-	/* sort by length first, then by contents */
-	if (ie1[1] != ie2[1])
-		return ie2[1] - ie1[1];
-
-	/*
-	 * zeroed SSID ie is another indication of a hidden bss;
-	 * if it isn't zeroed just return the regular sort value
-	 * to find the next candidate
-	 */
-	for (i = 0; i < ie2[1]; i++)
-		if (ie2[i + 2])
-			return memcmp(ie1 + 2, ie2 + 2, ie1[1]);
-
-	return 0;
+	}
 }
 
 struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
@@ -534,7 +548,7 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
 			continue;
 		if (is_bss(&bss->pub, bssid, ssid, ssid_len)) {
 			res = bss;
-			kref_get(&res->ref);
+			bss_ref_get(dev, res);
 			break;
 		}
 	}
@@ -547,34 +561,6 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
 }
 EXPORT_SYMBOL(cfg80211_get_bss);
 
-struct cfg80211_bss *cfg80211_get_mesh(struct wiphy *wiphy,
-				       struct ieee80211_channel *channel,
-				       const u8 *meshid, size_t meshidlen,
-				       const u8 *meshcfg)
-{
-	struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
-	struct cfg80211_internal_bss *bss, *res = NULL;
-
-	spin_lock_bh(&dev->bss_lock);
-
-	list_for_each_entry(bss, &dev->bss_list, list) {
-		if (channel && bss->pub.channel != channel)
-			continue;
-		if (is_mesh(&bss->pub, meshid, meshidlen, meshcfg)) {
-			res = bss;
-			kref_get(&res->ref);
-			break;
-		}
-	}
-
-	spin_unlock_bh(&dev->bss_lock);
-	if (!res)
-		return NULL;
-	return &res->pub;
-}
-EXPORT_SYMBOL(cfg80211_get_mesh);
-
-
 static void rb_insert_bss(struct cfg80211_registered_device *dev,
 			  struct cfg80211_internal_bss *bss)
 {
@@ -587,7 +573,7 @@ static void rb_insert_bss(struct cfg80211_registered_device *dev,
 		parent = *p;
 		tbss = rb_entry(parent, struct cfg80211_internal_bss, rbn);
 
-		cmp = cmp_bss(&bss->pub, &tbss->pub);
+		cmp = cmp_bss(&bss->pub, &tbss->pub, BSS_CMP_REGULAR);
 
 		if (WARN_ON(!cmp)) {
 			/* will sort of leak this BSS */
@@ -606,7 +592,8 @@ static void rb_insert_bss(struct cfg80211_registered_device *dev,
 
 static struct cfg80211_internal_bss *
 rb_find_bss(struct cfg80211_registered_device *dev,
-	    struct cfg80211_internal_bss *res)
+	    struct cfg80211_internal_bss *res,
+	    enum bss_compare_mode mode)
 {
 	struct rb_node *n = dev->bss_tree.rb_node;
 	struct cfg80211_internal_bss *bss;
@@ -614,7 +601,7 @@ rb_find_bss(struct cfg80211_registered_device *dev,
 
 	while (n) {
 		bss = rb_entry(n, struct cfg80211_internal_bss, rbn);
-		r = cmp_bss(&res->pub, &bss->pub);
+		r = cmp_bss(&res->pub, &bss->pub, mode);
 
 		if (r == 0)
 			return bss;
@@ -627,46 +614,67 @@ rb_find_bss(struct cfg80211_registered_device *dev,
 	return NULL;
 }
 
-static struct cfg80211_internal_bss *
-rb_find_hidden_bss(struct cfg80211_registered_device *dev,
-		   struct cfg80211_internal_bss *res)
+static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev,
+				   struct cfg80211_internal_bss *new)
 {
-	struct rb_node *n = dev->bss_tree.rb_node;
+	const struct cfg80211_bss_ies *ies;
 	struct cfg80211_internal_bss *bss;
-	int r;
+	const u8 *ie;
+	int i, ssidlen;
+	u8 fold = 0;
 
-	while (n) {
-		bss = rb_entry(n, struct cfg80211_internal_bss, rbn);
-		r = cmp_hidden_bss(&res->pub, &bss->pub);
+	ies = rcu_access_pointer(new->pub.beacon_ies);
+	if (WARN_ON(!ies))
+		return false;
 
-		if (r == 0)
-			return bss;
-		else if (r < 0)
-			n = n->rb_left;
-		else
-			n = n->rb_right;
+	ie = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len);
+	if (!ie) {
+		/* nothing to do */
+		return true;
 	}
 
-	return NULL;
-}
+	ssidlen = ie[1];
+	for (i = 0; i < ssidlen; i++)
+		fold |= ie[2 + i];
 
-static void
-copy_hidden_ies(struct cfg80211_internal_bss *res,
-		struct cfg80211_internal_bss *hidden)
-{
-	const struct cfg80211_bss_ies *ies;
+	if (fold) {
+		/* not a hidden SSID */
+		return true;
+	}
 
-	if (rcu_access_pointer(res->pub.beacon_ies))
-		return;
+	/* This is the bad part ... */
 
-	ies = rcu_access_pointer(hidden->pub.beacon_ies);
-	if (WARN_ON(!ies))
-		return;
+	list_for_each_entry(bss, &dev->bss_list, list) {
+		if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid))
+			continue;
+		if (bss->pub.channel != new->pub.channel)
+			continue;
+		if (rcu_access_pointer(bss->pub.beacon_ies))
+			continue;
+		ies = rcu_access_pointer(bss->pub.ies);
+		if (!ies)
+			continue;
+		ie = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len);
+		if (!ie)
+			continue;
+		if (ssidlen && ie[1] != ssidlen)
+			continue;
+		/* that would be odd ... */
+		if (bss->pub.beacon_ies)
+			continue;
+		if (WARN_ON_ONCE(bss->pub.hidden_beacon_bss))
+			continue;
+		if (WARN_ON_ONCE(!list_empty(&bss->hidden_list)))
+			list_del(&bss->hidden_list);
+		/* combine them */
+		list_add(&bss->hidden_list, &new->hidden_list);
+		bss->pub.hidden_beacon_bss = &new->pub;
+		new->refcount += bss->refcount;
+		rcu_assign_pointer(bss->pub.beacon_ies,
+				   new->pub.beacon_ies);
+	}
 
-	ies = kmemdup(ies, sizeof(*ies) + ies->len, GFP_ATOMIC);
-	if (unlikely(!ies))
-		return;
-	rcu_assign_pointer(res->pub.beacon_ies, ies);
+	return true;
 }
 
 static struct cfg80211_internal_bss *
@@ -687,15 +695,9 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 		return NULL;
 	}
 
-	found = rb_find_bss(dev, tmp);
+	found = rb_find_bss(dev, tmp, BSS_CMP_REGULAR);
 
 	if (found) {
-		found->pub.beacon_interval = tmp->pub.beacon_interval;
-		found->pub.tsf = tmp->pub.tsf;
-		found->pub.signal = tmp->pub.signal;
-		found->pub.capability = tmp->pub.capability;
-		found->ts = tmp->ts;
-
 		/* Update IEs */
 		if (rcu_access_pointer(tmp->pub.proberesp_ies)) {
 			const struct cfg80211_bss_ies *old;
@@ -711,41 +713,65 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 				kfree_rcu((struct cfg80211_bss_ies *)old,
 					  rcu_head);
 		} else if (rcu_access_pointer(tmp->pub.beacon_ies)) {
-			const struct cfg80211_bss_ies *old, *ies;
+			const struct cfg80211_bss_ies *old;
+			struct cfg80211_internal_bss *bss;
+
+			if (found->pub.hidden_beacon_bss &&
+			    !list_empty(&found->hidden_list)) {
+				const struct cfg80211_bss_ies *f;
+
+				/*
+				 * The found BSS struct is one of the probe
+				 * response members of a group, but we're
+				 * receiving a beacon (beacon_ies in the tmp
+				 * bss is used). This can only mean that the
+				 * AP changed its beacon from not having an
+				 * SSID to showing it, which is confusing so
+				 * drop this information.
+				 */
+
+				f = rcu_access_pointer(tmp->pub.beacon_ies);
+				kfree_rcu((struct cfg80211_bss_ies *)f,
+					  rcu_head);
+				goto drop;
+			}
 
 			old = rcu_access_pointer(found->pub.beacon_ies);
-			ies = rcu_access_pointer(found->pub.ies);
 
 			rcu_assign_pointer(found->pub.beacon_ies,
 					   tmp->pub.beacon_ies);
 
 			/* Override IEs if they were from a beacon before */
-			if (old == ies)
+			if (old == rcu_access_pointer(found->pub.ies))
 				rcu_assign_pointer(found->pub.ies,
 						   tmp->pub.beacon_ies);
 
+			/* Assign beacon IEs to all sub entries */
+			list_for_each_entry(bss, &found->hidden_list,
+					    hidden_list) {
+				const struct cfg80211_bss_ies *ies;
+
+				ies = rcu_access_pointer(bss->pub.beacon_ies);
+				WARN_ON(ies != old);
+
+				rcu_assign_pointer(bss->pub.beacon_ies,
+						   tmp->pub.beacon_ies);
+			}
+
 			if (old)
 				kfree_rcu((struct cfg80211_bss_ies *)old,
 					  rcu_head);
 		}
+
+		found->pub.beacon_interval = tmp->pub.beacon_interval;
+		found->pub.signal = tmp->pub.signal;
+		found->pub.capability = tmp->pub.capability;
+		found->ts = tmp->ts;
 	} else {
 		struct cfg80211_internal_bss *new;
 		struct cfg80211_internal_bss *hidden;
 		struct cfg80211_bss_ies *ies;
 
-		/* First check if the beacon is a probe response from
-		 * a hidden bss. If so, copy beacon ies (with nullified
-		 * ssid) into the probe response bss entry (with real ssid).
-		 * It is required basically for PSM implementation
-		 * (probe responses do not contain tim ie) */
-
-		/* TODO: The code is not trying to update existing probe
-		 * response bss entries when beacon ies are
-		 * getting changed. */
-		hidden = rb_find_hidden_bss(dev, tmp);
-		if (hidden)
-			copy_hidden_ies(tmp, hidden);
-
 		/*
 		 * create a copy -- the "res" variable that is passed in
 		 * is allocated on the stack since it's not needed in the
@@ -760,21 +786,51 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 			ies = (void *)rcu_dereference(tmp->pub.proberesp_ies);
 			if (ies)
 				kfree_rcu(ies, rcu_head);
-			spin_unlock_bh(&dev->bss_lock);
-			return NULL;
+			goto drop;
 		}
 		memcpy(new, tmp, sizeof(*new));
-		kref_init(&new->ref);
+		new->refcount = 1;
+		INIT_LIST_HEAD(&new->hidden_list);
+
+		if (rcu_access_pointer(tmp->pub.proberesp_ies)) {
+			hidden = rb_find_bss(dev, tmp, BSS_CMP_HIDE_ZLEN);
+			if (!hidden)
+				hidden = rb_find_bss(dev, tmp,
+						     BSS_CMP_HIDE_NUL);
+			if (hidden) {
+				new->pub.hidden_beacon_bss = &hidden->pub;
+				list_add(&new->hidden_list,
+					 &hidden->hidden_list);
+				hidden->refcount++;
+				rcu_assign_pointer(new->pub.beacon_ies,
+						   hidden->pub.beacon_ies);
+			}
+		} else {
+			/*
+			 * Ok so we found a beacon, and don't have an entry. If
+			 * it's a beacon with hidden SSID, we might be in for an
+			 * expensive search for any probe responses that should
+			 * be grouped with this beacon for updates ...
+			 */
+			if (!cfg80211_combine_bsses(dev, new)) {
+				kfree(new);
+				goto drop;
+			}
+		}
+
 		list_add_tail(&new->list, &dev->bss_list);
 		rb_insert_bss(dev, new);
 		found = new;
 	}
 
 	dev->bss_generation++;
+	bss_ref_get(dev, found);
 	spin_unlock_bh(&dev->bss_lock);
 
-	kref_get(&found->ref);
 	return found;
+ drop:
+	spin_unlock_bh(&dev->bss_lock);
+	return NULL;
 }
 
 static struct ieee80211_channel *
@@ -833,7 +889,6 @@ cfg80211_inform_bss(struct wiphy *wiphy,
 	memcpy(tmp.pub.bssid, bssid, ETH_ALEN);
 	tmp.pub.channel = channel;
 	tmp.pub.signal = signal;
-	tmp.pub.tsf = tsf;
 	tmp.pub.beacon_interval = beacon_interval;
 	tmp.pub.capability = capability;
 	/*
@@ -841,16 +896,14 @@ cfg80211_inform_bss(struct wiphy *wiphy,
 	 * Response frame, we need to pick one of the options and only use it
 	 * with the driver that does not provide the full Beacon/Probe Response
 	 * frame. Use Beacon frame pointer to avoid indicating that this should
-	 * override the iies pointer should we have received an earlier
+	 * override the IEs pointer should we have received an earlier
 	 * indication of Probe Response data.
-	 *
-	 * The initial buffer for the IEs is allocated with the BSS entry and
-	 * is located after the private area.
 	 */
 	ies = kmalloc(sizeof(*ies) + ielen, gfp);
 	if (!ies)
 		return NULL;
 	ies->len = ielen;
+	ies->tsf = tsf;
 	memcpy(ies->data, ie, ielen);
 
 	rcu_assign_pointer(tmp.pub.beacon_ies, ies);
@@ -907,6 +960,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 	if (!ies)
 		return NULL;
 	ies->len = ielen;
+	ies->tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
 	memcpy(ies->data, mgmt->u.probe_resp.variable, ielen);
 
 	if (ieee80211_is_probe_resp(mgmt->frame_control))
@@ -918,7 +972,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 	memcpy(tmp.pub.bssid, mgmt->bssid, ETH_ALEN);
 	tmp.pub.channel = channel;
 	tmp.pub.signal = signal;
-	tmp.pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
 	tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
 	tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
 
@@ -935,27 +988,35 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 }
 EXPORT_SYMBOL(cfg80211_inform_bss_frame);
 
-void cfg80211_ref_bss(struct cfg80211_bss *pub)
+void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 {
+	struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
 	struct cfg80211_internal_bss *bss;
 
 	if (!pub)
 		return;
 
 	bss = container_of(pub, struct cfg80211_internal_bss, pub);
-	kref_get(&bss->ref);
+
+	spin_lock_bh(&dev->bss_lock);
+	bss_ref_get(dev, bss);
+	spin_unlock_bh(&dev->bss_lock);
 }
 EXPORT_SYMBOL(cfg80211_ref_bss);
 
-void cfg80211_put_bss(struct cfg80211_bss *pub)
+void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 {
+	struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
 	struct cfg80211_internal_bss *bss;
 
 	if (!pub)
 		return;
 
 	bss = container_of(pub, struct cfg80211_internal_bss, pub);
-	kref_put(&bss->ref, bss_release);
+
+	spin_lock_bh(&dev->bss_lock);
+	bss_ref_put(dev, bss);
+	spin_unlock_bh(&dev->bss_lock);
 }
 EXPORT_SYMBOL(cfg80211_put_bss);
 
@@ -971,8 +1032,8 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 
 	spin_lock_bh(&dev->bss_lock);
 	if (!list_empty(&bss->list)) {
-		__cfg80211_unlink_bss(dev, bss);
-		dev->bss_generation++;
+		if (__cfg80211_unlink_bss(dev, bss))
+			dev->bss_generation++;
 	}
 	spin_unlock_bh(&dev->bss_lock);
 }
@@ -1001,6 +1062,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
 	if (IS_ERR(rdev))
 		return PTR_ERR(rdev);
 
+	mutex_lock(&rdev->sched_scan_mtx);
 	if (rdev->scan_req) {
 		err = -EBUSY;
 		goto out;
@@ -1107,6 +1169,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
 		dev_hold(dev);
 	}
  out:
+	mutex_unlock(&rdev->sched_scan_mtx);
 	kfree(creq);
 	cfg80211_unlock_rdev(rdev);
 	return err;
@@ -1155,16 +1218,6 @@ static void ieee80211_scan_add_ies(struct iw_request_info *info,
 	}
 }
 
-static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
-{
-	unsigned long end = jiffies;
-
-	if (end >= start)
-		return jiffies_to_msecs(end - start);
-
-	return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1);
-}
-
 static char *
 ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info,
 	      struct cfg80211_internal_bss *bss, char *current_ev,
@@ -1241,15 +1294,10 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info,
 
 	rcu_read_lock();
 	ies = rcu_dereference(bss->pub.ies);
-	if (ies) {
-		rem = ies->len;
-		ie = ies->data;
-	} else {
-		rem = 0;
-		ie = NULL;
-	}
+	rem = ies->len;
+	ie = ies->data;
 
-	while (ies && rem >= 2) {
+	while (rem >= 2) {
 		/* invalid data */
 		if (ie[1] > rem - 2)
 			break;
@@ -1362,7 +1410,7 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info,
 	if (buf) {
 		memset(&iwe, 0, sizeof(iwe));
 		iwe.cmd = IWEVCUSTOM;
-		sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->pub.tsf));
+		sprintf(buf, "tsf=%016llx", (unsigned long long)(ies->tsf));
 		iwe.u.data.length = strlen(buf);
 		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
 						  &iwe, buf);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index f2431e41a373..a9dc5c736df0 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -85,6 +85,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
 	ASSERT_RTNL();
 	ASSERT_RDEV_LOCK(rdev);
 	ASSERT_WDEV_LOCK(wdev);
+	lockdep_assert_held(&rdev->sched_scan_mtx);
 
 	if (rdev->scan_req)
 		return -EBUSY;
@@ -159,7 +160,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
 	struct cfg80211_connect_params *params;
-	const u8 *prev_bssid = NULL;
+	struct cfg80211_assoc_request req = {};
 	int err;
 
 	ASSERT_WDEV_LOCK(wdev);
@@ -186,15 +187,20 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 		BUG_ON(!rdev->ops->assoc);
 		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
 		if (wdev->conn->prev_bssid_valid)
-			prev_bssid = wdev->conn->prev_bssid;
-		err = __cfg80211_mlme_assoc(rdev, wdev->netdev,
-					    params->channel, params->bssid,
-					    prev_bssid,
-					    params->ssid, params->ssid_len,
-					    params->ie, params->ie_len,
-					    false, &params->crypto,
-					    params->flags, &params->ht_capa,
-					    &params->ht_capa_mask);
+			req.prev_bssid = wdev->conn->prev_bssid;
+		req.ie = params->ie;
+		req.ie_len = params->ie_len;
+		req.use_mfp = params->mfp != NL80211_MFP_NO;
+		req.crypto = params->crypto;
+		req.flags = params->flags;
+		req.ht_capa = params->ht_capa;
+		req.ht_capa_mask = params->ht_capa_mask;
+		req.vht_capa = params->vht_capa;
+		req.vht_capa_mask = params->vht_capa_mask;
+
+		err = __cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel,
+					    params->bssid, params->ssid,
+					    params->ssid_len, &req);
 		if (err)
 			__cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
 					       NULL, 0,
@@ -222,6 +228,7 @@ void cfg80211_conn_work(struct work_struct *work)
 	rtnl_lock();
 	cfg80211_lock_rdev(rdev);
 	mutex_lock(&rdev->devlist_mtx);
+	mutex_lock(&rdev->sched_scan_mtx);
 
 	list_for_each_entry(wdev, &rdev->wdev_list, list) {
 		wdev_lock(wdev);
@@ -229,7 +236,7 @@ void cfg80211_conn_work(struct work_struct *work)
 			wdev_unlock(wdev);
 			continue;
 		}
-		if (wdev->sme_state != CFG80211_SME_CONNECTING) {
+		if (wdev->sme_state != CFG80211_SME_CONNECTING || !wdev->conn) {
 			wdev_unlock(wdev);
 			continue;
 		}
@@ -246,6 +253,7 @@ void cfg80211_conn_work(struct work_struct *work)
 		wdev_unlock(wdev);
 	}
 
+	mutex_unlock(&rdev->sched_scan_mtx);
 	mutex_unlock(&rdev->devlist_mtx);
 	cfg80211_unlock_rdev(rdev);
 	rtnl_unlock();
@@ -300,7 +308,7 @@ static void __cfg80211_sme_scan_done(struct net_device *dev)
 
 	bss = cfg80211_get_conn_bss(wdev);
 	if (bss) {
-		cfg80211_put_bss(bss);
+		cfg80211_put_bss(&rdev->wiphy, bss);
 	} else {
 		/* not found */
 		if (wdev->conn->state == CFG80211_CONN_SCAN_AGAIN)
@@ -319,11 +327,9 @@ void cfg80211_sme_scan_done(struct net_device *dev)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 
-	mutex_lock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx);
 	wdev_lock(wdev);
 	__cfg80211_sme_scan_done(dev);
 	wdev_unlock(wdev);
-	mutex_unlock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx);
 }
 
 void cfg80211_sme_rx_auth(struct net_device *dev,
@@ -463,7 +469,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
-		cfg80211_put_bss(&wdev->current_bss->pub);
+		cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
 		wdev->current_bss = NULL;
 	}
 
@@ -479,7 +485,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 		kfree(wdev->connect_keys);
 		wdev->connect_keys = NULL;
 		wdev->ssid_len = 0;
-		cfg80211_put_bss(bss);
+		cfg80211_put_bss(wdev->wiphy, bss);
 		return;
 	}
 
@@ -519,10 +525,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 	 * - country_ie + 2, the start of the country ie data, and
 	 * - and country_ie[1] which is the IE length
 	 */
-	regulatory_hint_11d(wdev->wiphy,
-			    bss->channel->band,
-			    country_ie + 2,
-			    country_ie[1]);
+	regulatory_hint_11d(wdev->wiphy, bss->channel->band,
+			    country_ie + 2, country_ie[1]);
 	kfree(country_ie);
 }
 
@@ -587,7 +591,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev,
 	}
 
 	cfg80211_unhold_bss(wdev->current_bss);
-	cfg80211_put_bss(&wdev->current_bss->pub);
+	cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
 	wdev->current_bss = NULL;
 
 	cfg80211_hold_bss(bss_from_pub(bss));
@@ -622,7 +626,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev,
 
 	return;
 out:
-	cfg80211_put_bss(bss);
+	cfg80211_put_bss(wdev->wiphy, bss);
 }
 
 void cfg80211_roamed(struct net_device *dev,
@@ -664,7 +668,7 @@ void cfg80211_roamed_bss(struct net_device *dev,
 
 	ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp);
 	if (!ev) {
-		cfg80211_put_bss(bss);
+		cfg80211_put_bss(wdev->wiphy, bss);
 		return;
 	}
 
@@ -705,7 +709,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
-		cfg80211_put_bss(&wdev->current_bss->pub);
+		cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
 	}
 
 	wdev->current_bss = NULL;
@@ -876,7 +880,7 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 		if (bss) {
 			wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
 			err = cfg80211_conn_do_work(wdev);
-			cfg80211_put_bss(bss);
+			cfg80211_put_bss(wdev->wiphy, bss);
 		} else {
 			/* otherwise we'll need to scan for the AP first */
 			err = cfg80211_conn_scan(wdev);
@@ -925,9 +929,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 	int err;
 
 	mutex_lock(&rdev->devlist_mtx);
+	/* might request scan - scan_mtx -> wdev_mtx dependency */
+	mutex_lock(&rdev->sched_scan_mtx);
 	wdev_lock(dev->ieee80211_ptr);
 	err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL);
 	wdev_unlock(dev->ieee80211_ptr);
+	mutex_unlock(&rdev->sched_scan_mtx);
 	mutex_unlock(&rdev->devlist_mtx);
 
 	return err;
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 1f6f01e2dc4c..8f28b9f798d8 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -83,6 +83,14 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
 	return 0;
 }
 
+static void cfg80211_leave_all(struct cfg80211_registered_device *rdev)
+{
+	struct wireless_dev *wdev;
+
+	list_for_each_entry(wdev, &rdev->wdev_list, list)
+		cfg80211_leave(rdev, wdev);
+}
+
 static int wiphy_suspend(struct device *dev, pm_message_t state)
 {
 	struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
@@ -90,12 +98,19 @@ static int wiphy_suspend(struct device *dev, pm_message_t state)
 
 	rdev->suspend_at = get_seconds();
 
-	if (rdev->ops->suspend) {
-		rtnl_lock();
-		if (rdev->wiphy.registered)
-			ret = rdev_suspend(rdev);
-		rtnl_unlock();
+	rtnl_lock();
+	if (rdev->wiphy.registered) {
+		if (!rdev->wowlan)
+			cfg80211_leave_all(rdev);
+		if (rdev->ops->suspend)
+			ret = rdev_suspend(rdev, rdev->wowlan);
+		if (ret == 1) {
+			/* Driver refuse to configure wowlan */
+			cfg80211_leave_all(rdev);
+			ret = rdev_suspend(rdev, NULL);
+		}
 	}
+	rtnl_unlock();
 
 	return ret;
 }
@@ -106,9 +121,7 @@ static int wiphy_resume(struct device *dev)
 	int ret = 0;
 
 	/* Age scan results with time spent in suspend */
-	spin_lock_bh(&rdev->bss_lock);
 	cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at);
-	spin_unlock_bh(&rdev->bss_lock);
 
 	if (rdev->ops->resume) {
 		rtnl_lock();
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 2134576f426e..ecd4fcec3c94 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -27,7 +27,8 @@
 #define WIPHY_PR_ARG	__entry->wiphy_name
 
 #define WDEV_ENTRY	__field(u32, id)
-#define WDEV_ASSIGN	(__entry->id) = (wdev ? wdev->identifier : 0)
+#define WDEV_ASSIGN	(__entry->id) = (!IS_ERR_OR_NULL(wdev)	\
+					 ? wdev->identifier : 0)
 #define WDEV_PR_FMT	"wdev(%u)"
 #define WDEV_PR_ARG	(__entry->id)
 
@@ -1767,6 +1768,79 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device,
 	TP_ARGS(wiphy, wdev)
 );
 
+TRACE_EVENT(rdev_set_mac_acl,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 struct cfg80211_acl_data *params),
+	TP_ARGS(wiphy, netdev, params),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		__field(u32, acl_policy)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		__entry->acl_policy = params->acl_policy;
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy)
+);
+
+TRACE_EVENT(rdev_update_ft_ies,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 struct cfg80211_update_ft_ies_params *ftie),
+	TP_ARGS(wiphy, netdev, ftie),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		__field(u16, md)
+		__dynamic_array(u8, ie, ftie->ie_len)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		__entry->md = ftie->md;
+		memcpy(__get_dynamic_array(ie), ftie->ie, ftie->ie_len);
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", md: 0x%x",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md)
+);
+
+TRACE_EVENT(rdev_crit_proto_start,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 enum nl80211_crit_proto_id protocol, u16 duration),
+	TP_ARGS(wiphy, wdev, protocol, duration),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(u16, proto)
+		__field(u16, duration)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->proto = protocol;
+		__entry->duration = duration;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", proto=%x, duration=%u",
+		  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->proto, __entry->duration)
+);
+
+TRACE_EVENT(rdev_crit_proto_stop,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+	TP_ARGS(wiphy, wdev),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT,
+		  WIPHY_PR_ARG, WDEV_PR_ARG)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
@@ -2033,6 +2107,21 @@ TRACE_EVENT(cfg80211_reg_can_beacon,
 		  WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
 );
 
+TRACE_EVENT(cfg80211_chandef_dfs_required,
+	TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef),
+	TP_ARGS(wiphy, chandef),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		CHAN_DEF_ENTRY
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		CHAN_DEF_ASSIGN(chandef);
+	),
+	TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT,
+		  WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
+);
+
 TRACE_EVENT(cfg80211_ch_switch_notify,
 	TP_PROTO(struct net_device *netdev,
 		 struct cfg80211_chan_def *chandef),
@@ -2049,6 +2138,36 @@ TRACE_EVENT(cfg80211_ch_switch_notify,
 		  NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
 );
 
+TRACE_EVENT(cfg80211_radar_event,
+	TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef),
+	TP_ARGS(wiphy, chandef),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		CHAN_DEF_ENTRY
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		CHAN_DEF_ASSIGN(chandef);
+	),
+	TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT,
+		  WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
+);
+
+TRACE_EVENT(cfg80211_cac_event,
+	TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt),
+	TP_ARGS(netdev, evt),
+	TP_STRUCT__entry(
+		NETDEV_ENTRY
+		__field(enum nl80211_radar_event, evt)
+	),
+	TP_fast_assign(
+		NETDEV_ASSIGN;
+		__entry->evt = evt;
+	),
+	TP_printk(NETDEV_PR_FMT ",  event: %d",
+		  NETDEV_PR_ARG, __entry->evt)
+);
+
 DECLARE_EVENT_CLASS(cfg80211_rx_evt,
 	TP_PROTO(struct net_device *netdev, const u8 *addr),
 	TP_ARGS(netdev, addr),
@@ -2315,6 +2434,67 @@ TRACE_EVENT(cfg80211_return_u32,
 	TP_printk("ret: %u", __entry->ret)
 );
 
+TRACE_EVENT(cfg80211_report_wowlan_wakeup,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 struct cfg80211_wowlan_wakeup *wakeup),
+	TP_ARGS(wiphy, wdev, wakeup),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(bool, disconnect)
+		__field(bool, magic_pkt)
+		__field(bool, gtk_rekey_failure)
+		__field(bool, eap_identity_req)
+		__field(bool, four_way_handshake)
+		__field(bool, rfkill_release)
+		__field(s32, pattern_idx)
+		__field(u32, packet_len)
+		__dynamic_array(u8, packet, wakeup->packet_present_len)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->disconnect = wakeup->disconnect;
+		__entry->magic_pkt = wakeup->magic_pkt;
+		__entry->gtk_rekey_failure = wakeup->gtk_rekey_failure;
+		__entry->eap_identity_req = wakeup->eap_identity_req;
+		__entry->four_way_handshake = wakeup->four_way_handshake;
+		__entry->rfkill_release = wakeup->rfkill_release;
+		__entry->pattern_idx = wakeup->pattern_idx;
+		__entry->packet_len = wakeup->packet_len;
+		if (wakeup->packet && wakeup->packet_present_len)
+			memcpy(__get_dynamic_array(packet), wakeup->packet,
+			       wakeup->packet_present_len);
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
+);
+
+TRACE_EVENT(cfg80211_ft_event,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 struct cfg80211_ft_event_params *ft_event),
+	TP_ARGS(wiphy, netdev, ft_event),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		__dynamic_array(u8, ies, ft_event->ies_len)
+		MAC_ENTRY(target_ap)
+		__dynamic_array(u8, ric_ies, ft_event->ric_ies_len)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		if (ft_event->ies)
+			memcpy(__get_dynamic_array(ies), ft_event->ies,
+			       ft_event->ies_len);
+		MAC_ASSIGN(target_ap, ft_event->target_ap);
+		if (ft_event->ric_ies)
+			memcpy(__get_dynamic_array(ric_ies), ft_event->ric_ies,
+			       ft_event->ric_ies_len);
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: " MAC_PR_FMT,
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap))
+);
+
 #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 16d76a807c2f..f5ad4d94ba88 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -511,7 +511,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
 		encaps_data = bridge_tunnel_header;
 		encaps_len = sizeof(bridge_tunnel_header);
 		skip_header_bytes -= 2;
-	} else if (ethertype > 0x600) {
+	} else if (ethertype >= ETH_P_802_3_MIN) {
 		encaps_data = rfc1042_header;
 		encaps_len = sizeof(rfc1042_header);
 		skip_header_bytes -= 2;
@@ -1155,6 +1155,26 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
 }
 EXPORT_SYMBOL(cfg80211_get_p2p_attr);
 
+bool ieee80211_operating_class_to_band(u8 operating_class,
+				       enum ieee80211_band *band)
+{
+	switch (operating_class) {
+	case 112:
+	case 115 ... 127:
+		*band = IEEE80211_BAND_5GHZ;
+		return true;
+	case 81:
+	case 82:
+	case 83:
+	case 84:
+		*band = IEEE80211_BAND_2GHZ;
+		return true;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(ieee80211_operating_class_to_band);
+
 int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
 				 u32 beacon_int)
 {
@@ -1184,7 +1204,8 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 				 struct wireless_dev *wdev,
 				 enum nl80211_iftype iftype,
 				 struct ieee80211_channel *chan,
-				 enum cfg80211_chan_mode chanmode)
+				 enum cfg80211_chan_mode chanmode,
+				 u8 radar_detect)
 {
 	struct wireless_dev *wdev_iter;
 	u32 used_iftypes = BIT(iftype);
@@ -1195,14 +1216,46 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 	enum cfg80211_chan_mode chmode;
 	int num_different_channels = 0;
 	int total = 1;
+	bool radar_required;
 	int i, j;
 
 	ASSERT_RTNL();
 	lockdep_assert_held(&rdev->devlist_mtx);
 
+	if (WARN_ON(hweight32(radar_detect) > 1))
+		return -EINVAL;
+
+	switch (iftype) {
+	case NL80211_IFTYPE_ADHOC:
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_AP_VLAN:
+	case NL80211_IFTYPE_MESH_POINT:
+	case NL80211_IFTYPE_P2P_GO:
+	case NL80211_IFTYPE_WDS:
+		radar_required = !!(chan &&
+				    (chan->flags & IEEE80211_CHAN_RADAR));
+		break;
+	case NL80211_IFTYPE_P2P_CLIENT:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_MONITOR:
+		radar_required = false;
+		break;
+	case NUM_NL80211_IFTYPES:
+	case NL80211_IFTYPE_UNSPECIFIED:
+	default:
+		return -EINVAL;
+	}
+
+	if (radar_required && !radar_detect)
+		return -EINVAL;
+
 	/* Always allow software iftypes */
-	if (rdev->wiphy.software_iftypes & BIT(iftype))
+	if (rdev->wiphy.software_iftypes & BIT(iftype)) {
+		if (radar_detect)
+			return -EINVAL;
 		return 0;
+	}
 
 	memset(num, 0, sizeof(num));
 	memset(used_channels, 0, sizeof(used_channels));
@@ -1225,12 +1278,12 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 	list_for_each_entry(wdev_iter, &rdev->wdev_list, list) {
 		if (wdev_iter == wdev)
 			continue;
-		if (wdev_iter->netdev) {
-			if (!netif_running(wdev_iter->netdev))
-				continue;
-		} else if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) {
+		if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) {
 			if (!wdev_iter->p2p_started)
 				continue;
+		} else if (wdev_iter->netdev) {
+			if (!netif_running(wdev_iter->netdev))
+				continue;
 		} else {
 			WARN_ON(1);
 		}
@@ -1275,7 +1328,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 		used_iftypes |= BIT(wdev_iter->iftype);
 	}
 
-	if (total == 1)
+	if (total == 1 && !radar_detect)
 		return 0;
 
 	for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) {
@@ -1308,6 +1361,9 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 			}
 		}
 
+		if (radar_detect && !(c->radar_detect_widths & radar_detect))
+			goto cont;
+
 		/*
 		 * Finally check that all iftypes that we're currently
 		 * using are actually part of this combination. If they
diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c
index 8bafa31fa9f8..e98a01c1034f 100644
--- a/net/wireless/wext-proc.c
+++ b/net/wireless/wext-proc.c
@@ -143,7 +143,8 @@ static const struct file_operations wireless_seq_fops = {
 int __net_init wext_proc_init(struct net *net)
 {
 	/* Create /proc/net/wireless entry */
-	if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops))
+	if (!proc_create("wireless", S_IRUGO, net->proc_net,
+			 &wireless_seq_fops))
 		return -ENOMEM;
 
 	return 0;
@@ -151,5 +152,5 @@ int __net_init wext_proc_init(struct net *net)
 
 void __net_exit wext_proc_exit(struct net *net)
 {
-	proc_net_remove(net, "wireless");
+	remove_proc_entry("wireless", net->proc_net);
 }
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index fb9622f6d99c..e79cb5c0655a 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -89,6 +89,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
 
 	cfg80211_lock_rdev(rdev);
 	mutex_lock(&rdev->devlist_mtx);
+	mutex_lock(&rdev->sched_scan_mtx);
 	wdev_lock(wdev);
 
 	if (wdev->sme_state != CFG80211_SME_IDLE) {
@@ -135,6 +136,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
 	err = cfg80211_mgd_wext_connect(rdev, wdev);
  out:
 	wdev_unlock(wdev);
+	mutex_unlock(&rdev->sched_scan_mtx);
 	mutex_unlock(&rdev->devlist_mtx);
 	cfg80211_unlock_rdev(rdev);
 	return err;
@@ -190,6 +192,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
 
 	cfg80211_lock_rdev(rdev);
 	mutex_lock(&rdev->devlist_mtx);
+	mutex_lock(&rdev->sched_scan_mtx);
 	wdev_lock(wdev);
 
 	err = 0;
@@ -223,6 +226,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
 	err = cfg80211_mgd_wext_connect(rdev, wdev);
  out:
 	wdev_unlock(wdev);
+	mutex_unlock(&rdev->sched_scan_mtx);
 	mutex_unlock(&rdev->devlist_mtx);
 	cfg80211_unlock_rdev(rdev);
 	return err;
@@ -285,6 +289,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
 
 	cfg80211_lock_rdev(rdev);
 	mutex_lock(&rdev->devlist_mtx);
+	mutex_lock(&rdev->sched_scan_mtx);
 	wdev_lock(wdev);
 
 	if (wdev->sme_state != CFG80211_SME_IDLE) {
@@ -313,6 +318,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
 	err = cfg80211_mgd_wext_connect(rdev, wdev);
  out:
 	wdev_unlock(wdev);
+	mutex_unlock(&rdev->sched_scan_mtx);
 	mutex_unlock(&rdev->devlist_mtx);
 	cfg80211_unlock_rdev(rdev);
 	return err;
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index e6759c9660bb..c959312c45e3 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -3,8 +3,7 @@
 #
 
 config X25
-	tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	tristate "CCITT X.25 Packet Layer"
 	---help---
 	  X.25 is a set of standardized network protocols, similar in scope to
 	  frame relay; the one physical line from your box to the X.25 network
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index a306bc66000e..37ca9694aabe 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -208,11 +208,10 @@ static void x25_remove_socket(struct sock *sk)
 static void x25_kill_by_device(struct net_device *dev)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	write_lock_bh(&x25_list_lock);
 
-	sk_for_each(s, node, &x25_list)
+	sk_for_each(s, &x25_list)
 		if (x25_sk(s)->neighbour && x25_sk(s)->neighbour->dev == dev)
 			x25_disconnect(s, ENETUNREACH, 0, 0);
 
@@ -280,12 +279,11 @@ static struct sock *x25_find_listener(struct x25_address *addr,
 {
 	struct sock *s;
 	struct sock *next_best;
-	struct hlist_node *node;
 
 	read_lock_bh(&x25_list_lock);
 	next_best = NULL;
 
-	sk_for_each(s, node, &x25_list)
+	sk_for_each(s, &x25_list)
 		if ((!strcmp(addr->x25_addr,
 			x25_sk(s)->source_addr.x25_addr) ||
 				!strcmp(addr->x25_addr,
@@ -323,9 +321,8 @@ found:
 static struct sock *__x25_find_socket(unsigned int lci, struct x25_neigh *nb)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
-	sk_for_each(s, node, &x25_list)
+	sk_for_each(s, &x25_list)
 		if (x25_sk(s)->lci == lci && x25_sk(s)->neighbour == nb) {
 			sock_hold(s);
 			goto found;
@@ -1782,11 +1779,10 @@ static struct notifier_block x25_dev_notifier = {
 void x25_kill_by_neigh(struct x25_neigh *nb)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	write_lock_bh(&x25_list_lock);
 
-	sk_for_each(s, node, &x25_list)
+	sk_for_each(s, &x25_list)
 		if (x25_sk(s)->neighbour == nb)
 			x25_disconnect(s, ENETUNREACH, 0, 0);
 
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 2ffde4631ae2..0917f047f2cf 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -187,7 +187,6 @@ static int x25_seq_forward_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations x25_seq_socket_fops = {
-	.owner		= THIS_MODULE,
 	.open		= x25_seq_socket_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -195,7 +194,6 @@ static const struct file_operations x25_seq_socket_fops = {
 };
 
 static const struct file_operations x25_seq_route_fops = {
-	.owner		= THIS_MODULE,
 	.open		= x25_seq_route_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -203,55 +201,38 @@ static const struct file_operations x25_seq_route_fops = {
 };
 
 static const struct file_operations x25_seq_forward_fops = {
-	.owner		= THIS_MODULE,
 	.open		= x25_seq_forward_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
 
-static struct proc_dir_entry *x25_proc_dir;
-
 int __init x25_proc_init(void)
 {
-	struct proc_dir_entry *p;
-	int rc = -ENOMEM;
+	if (!proc_mkdir("x25", init_net.proc_net))
+		return -ENOMEM;
 
-	x25_proc_dir = proc_mkdir("x25", init_net.proc_net);
-	if (!x25_proc_dir)
+	if (!proc_create("x25/route", S_IRUGO, init_net.proc_net,
+			&x25_seq_route_fops))
 		goto out;
 
-	p = proc_create("route", S_IRUGO, x25_proc_dir, &x25_seq_route_fops);
-	if (!p)
-		goto out_route;
-
-	p = proc_create("socket", S_IRUGO, x25_proc_dir, &x25_seq_socket_fops);
-	if (!p)
-		goto out_socket;
+	if (!proc_create("x25/socket", S_IRUGO, init_net.proc_net,
+			&x25_seq_socket_fops))
+		goto out;
 
-	p = proc_create("forward", S_IRUGO, x25_proc_dir,
-			&x25_seq_forward_fops);
-	if (!p)
-		goto out_forward;
-	rc = 0;
+	if (!proc_create("x25/forward", S_IRUGO, init_net.proc_net,
+			&x25_seq_forward_fops))
+		goto out;
+	return 0;
 
 out:
-	return rc;
-out_forward:
-	remove_proc_entry("socket", x25_proc_dir);
-out_socket:
-	remove_proc_entry("route", x25_proc_dir);
-out_route:
-	remove_proc_entry("x25", init_net.proc_net);
-	goto out;
+	remove_proc_subtree("x25", init_net.proc_net);
+	return -ENOMEM;
 }
 
 void __exit x25_proc_exit(void)
 {
-	remove_proc_entry("forward", x25_proc_dir);
-	remove_proc_entry("route", x25_proc_dir);
-	remove_proc_entry("socket", x25_proc_dir);
-	remove_proc_entry("x25", init_net.proc_net);
+	remove_proc_subtree("x25", init_net.proc_net);
 }
 
 #else /* CONFIG_PROC_FS */
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index ce90b8d92365..bda1a13628a8 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -21,8 +21,8 @@ config XFRM_USER
 	  If unsure, say Y.
 
 config XFRM_SUB_POLICY
-	bool "Transformation sub policy support (EXPERIMENTAL)"
-	depends on XFRM && EXPERIMENTAL
+	bool "Transformation sub policy support"
+	depends on XFRM
 	---help---
 	  Support sub policy for developers. By using sub policy with main
 	  one, two policies can be applied to the same packet at once.
@@ -31,8 +31,8 @@ config XFRM_SUB_POLICY
 	  If unsure, say N.
 
 config XFRM_MIGRATE
-	bool "Transformation migrate database (EXPERIMENTAL)"
-	depends on XFRM && EXPERIMENTAL
+	bool "Transformation migrate database"
+	depends on XFRM
 	---help---
 	  A feature to update locator(s) of a given IPsec security
 	  association dynamically.  This feature is required, for
@@ -42,8 +42,8 @@ config XFRM_MIGRATE
 	  If unsure, say N.
 
 config XFRM_STATISTICS
-	bool "Transformation statistics (EXPERIMENTAL)"
-	depends on INET && XFRM && PROC_FS && EXPERIMENTAL
+	bool "Transformation statistics"
+	depends on INET && XFRM && PROC_FS
 	---help---
 	  This statistics is not a SNMP/MIB specification but shows
 	  statistics about transformation error (or almost error) factor
@@ -68,8 +68,8 @@ config NET_KEY
 	  Say Y unless you know what you are doing.
 
 config NET_KEY_MIGRATE
-	bool "PF_KEY MIGRATE (EXPERIMENTAL)"
-	depends on NET_KEY && EXPERIMENTAL
+	bool "PF_KEY MIGRATE"
+	depends on NET_KEY
 	select XFRM_MIGRATE
 	---help---
 	  Add a PF_KEY MIGRATE message to PF_KEYv2 socket family.
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 4ce2d93162c1..ab4ef72f0b1d 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -35,6 +35,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AES_GCM_ICV8,
 		.sadb_alg_ivlen = 8,
@@ -51,6 +53,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AES_GCM_ICV12,
 		.sadb_alg_ivlen = 8,
@@ -67,6 +71,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AES_GCM_ICV16,
 		.sadb_alg_ivlen = 8,
@@ -83,6 +89,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AES_CCM_ICV8,
 		.sadb_alg_ivlen = 8,
@@ -99,6 +107,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AES_CCM_ICV12,
 		.sadb_alg_ivlen = 8,
@@ -115,6 +125,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AES_CCM_ICV16,
 		.sadb_alg_ivlen = 8,
@@ -131,6 +143,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_NULL_AES_GMAC,
 		.sadb_alg_ivlen = 8,
@@ -151,6 +165,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_AALG_NULL,
 		.sadb_alg_ivlen = 0,
@@ -169,6 +185,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_AALG_MD5HMAC,
 		.sadb_alg_ivlen = 0,
@@ -187,6 +205,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_AALG_SHA1HMAC,
 		.sadb_alg_ivlen = 0,
@@ -205,6 +225,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_AALG_SHA2_256HMAC,
 		.sadb_alg_ivlen = 0,
@@ -222,6 +244,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_AALG_SHA2_384HMAC,
 		.sadb_alg_ivlen = 0,
@@ -239,6 +263,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_AALG_SHA2_512HMAC,
 		.sadb_alg_ivlen = 0,
@@ -257,6 +283,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC,
 		.sadb_alg_ivlen = 0,
@@ -274,6 +302,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC,
 		.sadb_alg_ivlen = 0,
@@ -281,6 +311,19 @@ static struct xfrm_algo_desc aalg_list[] = {
 		.sadb_alg_maxbits = 128
 	}
 },
+{
+	/* rfc4494 */
+	.name = "cmac(aes)",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 128,
+		}
+	},
+
+	.pfkey_supported = 0,
+},
 };
 
 static struct xfrm_algo_desc ealg_list[] = {
@@ -295,6 +338,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id =	SADB_EALG_NULL,
 		.sadb_alg_ivlen = 0,
@@ -313,6 +358,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_EALG_DESCBC,
 		.sadb_alg_ivlen = 8,
@@ -331,6 +378,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_EALG_3DESCBC,
 		.sadb_alg_ivlen = 8,
@@ -349,6 +398,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_CASTCBC,
 		.sadb_alg_ivlen = 8,
@@ -367,6 +418,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_BLOWFISHCBC,
 		.sadb_alg_ivlen = 8,
@@ -385,6 +438,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AESCBC,
 		.sadb_alg_ivlen = 8,
@@ -403,6 +458,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_SERPENTCBC,
 		.sadb_alg_ivlen = 8,
@@ -421,6 +478,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_CAMELLIACBC,
 		.sadb_alg_ivlen = 8,
@@ -439,6 +498,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_TWOFISHCBC,
 		.sadb_alg_ivlen = 8,
@@ -456,6 +517,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AESCTR,
 		.sadb_alg_ivlen	= 8,
@@ -473,6 +536,7 @@ static struct xfrm_algo_desc calg_list[] = {
 			.threshold = 90,
 		}
 	},
+	.pfkey_supported = 1,
 	.desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE }
 },
 {
@@ -482,6 +546,7 @@ static struct xfrm_algo_desc calg_list[] = {
 			.threshold = 90,
 		}
 	},
+	.pfkey_supported = 1,
 	.desc = { .sadb_alg_id = SADB_X_CALG_LZS }
 },
 {
@@ -491,6 +556,7 @@ static struct xfrm_algo_desc calg_list[] = {
 			.threshold = 50,
 		}
 	},
+	.pfkey_supported = 1,
 	.desc = { .sadb_alg_id = SADB_X_CALG_LZJH }
 },
 };
@@ -700,8 +766,7 @@ void xfrm_probe_algs(void)
 	}
 
 	for (i = 0; i < ealg_entries(); i++) {
-		status = crypto_has_blkcipher(ealg_list[i].name, 0,
-					      CRYPTO_ALG_ASYNC);
+		status = crypto_has_ablkcipher(ealg_list[i].name, 0, 0);
 		if (ealg_list[i].available != status)
 			ealg_list[i].available = status;
 	}
@@ -715,27 +780,27 @@ void xfrm_probe_algs(void)
 }
 EXPORT_SYMBOL_GPL(xfrm_probe_algs);
 
-int xfrm_count_auth_supported(void)
+int xfrm_count_pfkey_auth_supported(void)
 {
 	int i, n;
 
 	for (i = 0, n = 0; i < aalg_entries(); i++)
-		if (aalg_list[i].available)
+		if (aalg_list[i].available && aalg_list[i].pfkey_supported)
 			n++;
 	return n;
 }
-EXPORT_SYMBOL_GPL(xfrm_count_auth_supported);
+EXPORT_SYMBOL_GPL(xfrm_count_pfkey_auth_supported);
 
-int xfrm_count_enc_supported(void)
+int xfrm_count_pfkey_enc_supported(void)
 {
 	int i, n;
 
 	for (i = 0, n = 0; i < ealg_entries(); i++)
-		if (ealg_list[i].available)
+		if (ealg_list[i].available && ealg_list[i].pfkey_supported)
 			n++;
 	return n;
 }
-EXPORT_SYMBOL_GPL(xfrm_count_enc_supported);
+EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported);
 
 #if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
 
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 95a338c89f99..bcfda8921b5b 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -61,6 +61,12 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 		}
 
 		spin_lock_bh(&x->lock);
+
+		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID);
+			goto error;
+		}
+
 		err = xfrm_state_check_expire(x);
 		if (err) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 07c585756d2a..23cea0f74336 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -35,6 +35,10 @@
 
 #include "xfrm_hash.h"
 
+#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
+#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
+#define XFRM_MAX_QUEUE_LEN	100
+
 DEFINE_MUTEX(xfrm_cfg_mutex);
 EXPORT_SYMBOL(xfrm_cfg_mutex);
 
@@ -51,7 +55,7 @@ static struct kmem_cache *xfrm_dst_cache __read_mostly;
 static void xfrm_init_pmtu(struct dst_entry *dst);
 static int stale_bundle(struct dst_entry *dst);
 static int xfrm_bundle_ok(struct xfrm_dst *xdst);
-
+static void xfrm_policy_queue_process(unsigned long arg);
 
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 						int dir);
@@ -287,8 +291,11 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 		INIT_HLIST_NODE(&policy->byidx);
 		rwlock_init(&policy->lock);
 		atomic_set(&policy->refcnt, 1);
+		skb_queue_head_init(&policy->polq.hold_queue);
 		setup_timer(&policy->timer, xfrm_policy_timer,
 				(unsigned long)policy);
+		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
+			    (unsigned long)policy);
 		policy->flo.ops = &xfrm_policy_fc_ops;
 	}
 	return policy;
@@ -309,6 +316,16 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
 }
 EXPORT_SYMBOL(xfrm_policy_destroy);
 
+static void xfrm_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(list)) != NULL) {
+		dev_put(skb->dev);
+		kfree_skb(skb);
+	}
+}
+
 /* Rule must be locked. Release descentant resources, announce
  * entry dead. The rule must be unlinked from lists to the moment.
  */
@@ -319,6 +336,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 
 	atomic_inc(&policy->genid);
 
+	del_timer(&policy->polq.hold_timer);
+	xfrm_queue_purge(&policy->polq.hold_queue);
+
 	if (del_timer(&policy->timer))
 		xfrm_pol_put(policy);
 
@@ -359,27 +379,27 @@ static void xfrm_dst_hash_transfer(struct hlist_head *list,
 				   struct hlist_head *ndsttable,
 				   unsigned int nhashmask)
 {
-	struct hlist_node *entry, *tmp, *entry0 = NULL;
+	struct hlist_node *tmp, *entry0 = NULL;
 	struct xfrm_policy *pol;
 	unsigned int h0 = 0;
 
 redo:
-	hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
+	hlist_for_each_entry_safe(pol, tmp, list, bydst) {
 		unsigned int h;
 
 		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
 				pol->family, nhashmask);
 		if (!entry0) {
-			hlist_del(entry);
+			hlist_del(&pol->bydst);
 			hlist_add_head(&pol->bydst, ndsttable+h);
 			h0 = h;
 		} else {
 			if (h != h0)
 				continue;
-			hlist_del(entry);
+			hlist_del(&pol->bydst);
 			hlist_add_after(entry0, &pol->bydst);
 		}
-		entry0 = entry;
+		entry0 = &pol->bydst;
 	}
 	if (!hlist_empty(list)) {
 		entry0 = NULL;
@@ -391,10 +411,10 @@ static void xfrm_idx_hash_transfer(struct hlist_head *list,
 				   struct hlist_head *nidxtable,
 				   unsigned int nhashmask)
 {
-	struct hlist_node *entry, *tmp;
+	struct hlist_node *tmp;
 	struct xfrm_policy *pol;
 
-	hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
+	hlist_for_each_entry_safe(pol, tmp, list, byidx) {
 		unsigned int h;
 
 		h = __idx_hash(pol->index, nhashmask);
@@ -524,7 +544,6 @@ static u32 xfrm_gen_index(struct net *net, int dir)
 	static u32 idx_generator;
 
 	for (;;) {
-		struct hlist_node *entry;
 		struct hlist_head *list;
 		struct xfrm_policy *p;
 		u32 idx;
@@ -536,7 +555,7 @@ static u32 xfrm_gen_index(struct net *net, int dir)
 			idx = 8;
 		list = net->xfrm.policy_byidx + idx_hash(net, idx);
 		found = 0;
-		hlist_for_each_entry(p, entry, list, byidx) {
+		hlist_for_each_entry(p, list, byidx) {
 			if (p->index == idx) {
 				found = 1;
 				break;
@@ -562,23 +581,62 @@ static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s
 	return 0;
 }
 
+static void xfrm_policy_requeue(struct xfrm_policy *old,
+				struct xfrm_policy *new)
+{
+	struct xfrm_policy_queue *pq = &old->polq;
+	struct sk_buff_head list;
+
+	__skb_queue_head_init(&list);
+
+	spin_lock_bh(&pq->hold_queue.lock);
+	skb_queue_splice_init(&pq->hold_queue, &list);
+	del_timer(&pq->hold_timer);
+	spin_unlock_bh(&pq->hold_queue.lock);
+
+	if (skb_queue_empty(&list))
+		return;
+
+	pq = &new->polq;
+
+	spin_lock_bh(&pq->hold_queue.lock);
+	skb_queue_splice(&list, &pq->hold_queue);
+	pq->timeout = XFRM_QUEUE_TMO_MIN;
+	mod_timer(&pq->hold_timer, jiffies);
+	spin_unlock_bh(&pq->hold_queue.lock);
+}
+
+static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
+				   struct xfrm_policy *pol)
+{
+	u32 mark = policy->mark.v & policy->mark.m;
+
+	if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
+		return true;
+
+	if ((mark & pol->mark.m) == pol->mark.v &&
+	    policy->priority == pol->priority)
+		return true;
+
+	return false;
+}
+
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 {
 	struct net *net = xp_net(policy);
 	struct xfrm_policy *pol;
 	struct xfrm_policy *delpol;
 	struct hlist_head *chain;
-	struct hlist_node *entry, *newpos;
-	u32 mark = policy->mark.v & policy->mark.m;
+	struct hlist_node *newpos;
 
 	write_lock_bh(&xfrm_policy_lock);
 	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
 	delpol = NULL;
 	newpos = NULL;
-	hlist_for_each_entry(pol, entry, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst) {
 		if (pol->type == policy->type &&
 		    !selector_cmp(&pol->selector, &policy->selector) &&
-		    (mark & pol->mark.m) == pol->mark.v &&
+		    xfrm_policy_mark_match(policy, pol) &&
 		    xfrm_sec_ctx_match(pol->security, policy->security) &&
 		    !WARN_ON(delpol)) {
 			if (excl) {
@@ -603,8 +661,10 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	net->xfrm.policy_count[dir]++;
 	atomic_inc(&flow_cache_genid);
 	rt_genid_bump(net);
-	if (delpol)
+	if (delpol) {
+		xfrm_policy_requeue(delpol, policy);
 		__xfrm_policy_unlink(delpol, dir);
+	}
 	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir);
 	hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
 	policy->curlft.add_time = get_seconds();
@@ -630,13 +690,12 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
 {
 	struct xfrm_policy *pol, *ret;
 	struct hlist_head *chain;
-	struct hlist_node *entry;
 
 	*err = 0;
 	write_lock_bh(&xfrm_policy_lock);
 	chain = policy_hash_bysel(net, sel, sel->family, dir);
 	ret = NULL;
-	hlist_for_each_entry(pol, entry, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst) {
 		if (pol->type == type &&
 		    (mark & pol->mark.m) == pol->mark.v &&
 		    !selector_cmp(sel, &pol->selector) &&
@@ -668,7 +727,6 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
 {
 	struct xfrm_policy *pol, *ret;
 	struct hlist_head *chain;
-	struct hlist_node *entry;
 
 	*err = -ENOENT;
 	if (xfrm_policy_id2dir(id) != dir)
@@ -678,7 +736,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
 	write_lock_bh(&xfrm_policy_lock);
 	chain = net->xfrm.policy_byidx + idx_hash(net, id);
 	ret = NULL;
-	hlist_for_each_entry(pol, entry, chain, byidx) {
+	hlist_for_each_entry(pol, chain, byidx) {
 		if (pol->type == type && pol->index == id &&
 		    (mark & pol->mark.m) == pol->mark.v) {
 			xfrm_pol_hold(pol);
@@ -711,10 +769,9 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 		struct xfrm_policy *pol;
-		struct hlist_node *entry;
 		int i;
 
-		hlist_for_each_entry(pol, entry,
+		hlist_for_each_entry(pol,
 				     &net->xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
@@ -728,7 +785,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 			}
 		}
 		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
-			hlist_for_each_entry(pol, entry,
+			hlist_for_each_entry(pol,
 					     net->xfrm.policy_bydst[dir].table + i,
 					     bydst) {
 				if (pol->type != type)
@@ -767,11 +824,10 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 		struct xfrm_policy *pol;
-		struct hlist_node *entry;
 		int i;
 
 	again1:
-		hlist_for_each_entry(pol, entry,
+		hlist_for_each_entry(pol,
 				     &net->xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
@@ -791,7 +847,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 
 		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
 	again2:
-			hlist_for_each_entry(pol, entry,
+			hlist_for_each_entry(pol,
 					     net->xfrm.policy_bydst[dir].table + i,
 					     bydst) {
 				if (pol->type != type)
@@ -919,7 +975,6 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	int err;
 	struct xfrm_policy *pol, *ret;
 	const xfrm_address_t *daddr, *saddr;
-	struct hlist_node *entry;
 	struct hlist_head *chain;
 	u32 priority = ~0U;
 
@@ -931,7 +986,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	read_lock_bh(&xfrm_policy_lock);
 	chain = policy_hash_direct(net, daddr, saddr, family, dir);
 	ret = NULL;
-	hlist_for_each_entry(pol, entry, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst) {
 		err = xfrm_policy_match(pol, fl, type, family, dir);
 		if (err) {
 			if (err == -ESRCH)
@@ -947,7 +1002,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 		}
 	}
 	chain = &net->xfrm.policy_inexact[dir];
-	hlist_for_each_entry(pol, entry, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst) {
 		err = xfrm_policy_match(pol, fl, type, family, dir);
 		if (err) {
 			if (err == -ESRCH)
@@ -982,6 +1037,24 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
 	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 }
 
+static int flow_to_policy_dir(int dir)
+{
+	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
+	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
+	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
+		return dir;
+
+	switch (dir) {
+	default:
+	case FLOW_DIR_IN:
+		return XFRM_POLICY_IN;
+	case FLOW_DIR_OUT:
+		return XFRM_POLICY_OUT;
+	case FLOW_DIR_FWD:
+		return XFRM_POLICY_FWD;
+	}
+}
+
 static struct flow_cache_object *
 xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
 		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
@@ -991,7 +1064,7 @@ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
 	if (old_obj)
 		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
 
-	pol = __xfrm_policy_lookup(net, fl, family, dir);
+	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
 	if (IS_ERR_OR_NULL(pol))
 		return ERR_CAST(pol);
 
@@ -1115,11 +1188,15 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 		pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir);
 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
 	}
-	if (old_pol)
+	if (old_pol) {
+		if (pol)
+			xfrm_policy_requeue(old_pol, pol);
+
 		/* Unlinking succeeds always. This is the only function
 		 * allowed to delete or replace socket policy.
 		 */
 		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
+	}
 	write_unlock_bh(&xfrm_policy_lock);
 
 	if (old_pol) {
@@ -1310,6 +1387,8 @@ static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *f
 		 * It means we need to try again resolving. */
 		if (xdst->num_xfrms > 0)
 			return NULL;
+	} else if (dst->flags & DST_XFRM_QUEUE) {
+		return NULL;
 	} else {
 		/* Real bundle */
 		if (stale_bundle(dst))
@@ -1673,6 +1752,171 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 	return xdst;
 }
 
+static void xfrm_policy_queue_process(unsigned long arg)
+{
+	int err = 0;
+	struct sk_buff *skb;
+	struct sock *sk;
+	struct dst_entry *dst;
+	struct net_device *dev;
+	struct xfrm_policy *pol = (struct xfrm_policy *)arg;
+	struct xfrm_policy_queue *pq = &pol->polq;
+	struct flowi fl;
+	struct sk_buff_head list;
+
+	spin_lock(&pq->hold_queue.lock);
+	skb = skb_peek(&pq->hold_queue);
+	dst = skb_dst(skb);
+	sk = skb->sk;
+	xfrm_decode_session(skb, &fl, dst->ops->family);
+	spin_unlock(&pq->hold_queue.lock);
+
+	dst_hold(dst->path);
+	dst = xfrm_lookup(xp_net(pol), dst->path, &fl,
+			  sk, 0);
+	if (IS_ERR(dst))
+		goto purge_queue;
+
+	if (dst->flags & DST_XFRM_QUEUE) {
+		dst_release(dst);
+
+		if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
+			goto purge_queue;
+
+		pq->timeout = pq->timeout << 1;
+		mod_timer(&pq->hold_timer, jiffies + pq->timeout);
+		return;
+	}
+
+	dst_release(dst);
+
+	__skb_queue_head_init(&list);
+
+	spin_lock(&pq->hold_queue.lock);
+	pq->timeout = 0;
+	skb_queue_splice_init(&pq->hold_queue, &list);
+	spin_unlock(&pq->hold_queue.lock);
+
+	while (!skb_queue_empty(&list)) {
+		skb = __skb_dequeue(&list);
+
+		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
+		dst_hold(skb_dst(skb)->path);
+		dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
+				  &fl, skb->sk, 0);
+		if (IS_ERR(dst)) {
+			dev_put(skb->dev);
+			kfree_skb(skb);
+			continue;
+		}
+
+		nf_reset(skb);
+		skb_dst_drop(skb);
+		skb_dst_set(skb, dst);
+
+		dev = skb->dev;
+		err = dst_output(skb);
+		dev_put(dev);
+	}
+
+	return;
+
+purge_queue:
+	pq->timeout = 0;
+	xfrm_queue_purge(&pq->hold_queue);
+}
+
+static int xdst_queue_output(struct sk_buff *skb)
+{
+	unsigned long sched_next;
+	struct dst_entry *dst = skb_dst(skb);
+	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
+	struct xfrm_policy_queue *pq = &xdst->pols[0]->polq;
+
+	if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
+		kfree_skb(skb);
+		return -EAGAIN;
+	}
+
+	skb_dst_force(skb);
+	dev_hold(skb->dev);
+
+	spin_lock_bh(&pq->hold_queue.lock);
+
+	if (!pq->timeout)
+		pq->timeout = XFRM_QUEUE_TMO_MIN;
+
+	sched_next = jiffies + pq->timeout;
+
+	if (del_timer(&pq->hold_timer)) {
+		if (time_before(pq->hold_timer.expires, sched_next))
+			sched_next = pq->hold_timer.expires;
+	}
+
+	__skb_queue_tail(&pq->hold_queue, skb);
+	mod_timer(&pq->hold_timer, sched_next);
+
+	spin_unlock_bh(&pq->hold_queue.lock);
+
+	return 0;
+}
+
+static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
+						 struct dst_entry *dst,
+						 const struct flowi *fl,
+						 int num_xfrms,
+						 u16 family)
+{
+	int err;
+	struct net_device *dev;
+	struct dst_entry *dst1;
+	struct xfrm_dst *xdst;
+
+	xdst = xfrm_alloc_dst(net, family);
+	if (IS_ERR(xdst))
+		return xdst;
+
+	if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 ||
+	    (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP))
+		return xdst;
+
+	dst1 = &xdst->u.dst;
+	dst_hold(dst);
+	xdst->route = dst;
+
+	dst_copy_metrics(dst1, dst);
+
+	dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
+	dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
+	dst1->lastuse = jiffies;
+
+	dst1->input = dst_discard;
+	dst1->output = xdst_queue_output;
+
+	dst_hold(dst);
+	dst1->child = dst;
+	dst1->path = dst;
+
+	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
+
+	err = -ENODEV;
+	dev = dst->dev;
+	if (!dev)
+		goto free_dst;
+
+	err = xfrm_fill_dst(xdst, dev, fl);
+	if (err)
+		goto free_dst;
+
+out:
+	return xdst;
+
+free_dst:
+	dst_release(dst1);
+	xdst = ERR_PTR(err);
+	goto out;
+}
+
 static struct flow_cache_object *
 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 		   struct flow_cache_object *oldflo, void *ctx)
@@ -1706,7 +1950,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 	 * previous cache entry */
 	if (xdst == NULL) {
 		num_pols = 1;
-		pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
+		pols[0] = __xfrm_policy_lookup(net, fl, family,
+					       flow_to_policy_dir(dir));
 		err = xfrm_expand_policies(fl, family, pols,
 					   &num_pols, &num_xfrms);
 		if (err < 0)
@@ -1751,7 +1996,7 @@ make_dummy_bundle:
 	/* We found policies, but there's no bundles to instantiate:
 	 * either because the policy blocks, has no transformations or
 	 * we could not build template (no xfrm_states).*/
-	xdst = xfrm_alloc_dst(net, family);
+	xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family);
 	if (IS_ERR(xdst)) {
 		xfrm_pols_put(pols, num_pols);
 		return ERR_CAST(xdst);
@@ -2359,6 +2604,9 @@ static int xfrm_bundle_ok(struct xfrm_dst *first)
 	    (dst->dev && !netif_running(dst->dev)))
 		return 0;
 
+	if (dst->flags & DST_XFRM_QUEUE)
+		return 1;
+
 	last = NULL;
 
 	do {
@@ -2786,10 +3034,10 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
 {
 	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
 		if (sel_tgt->family == sel_cmp->family &&
-		    xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
-				  sel_cmp->family) == 0 &&
-		    xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
-				  sel_cmp->family) == 0 &&
+		    xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
+				    sel_cmp->family) &&
+		    xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
+				    sel_cmp->family) &&
 		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
 		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
 			return true;
@@ -2806,13 +3054,12 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector
 						     u8 dir, u8 type)
 {
 	struct xfrm_policy *pol, *ret = NULL;
-	struct hlist_node *entry;
 	struct hlist_head *chain;
 	u32 priority = ~0U;
 
 	read_lock_bh(&xfrm_policy_lock);
 	chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir);
-	hlist_for_each_entry(pol, entry, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst) {
 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
 		    pol->type == type) {
 			ret = pol;
@@ -2821,7 +3068,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector
 		}
 	}
 	chain = &init_net.xfrm.policy_inexact[dir];
-	hlist_for_each_entry(pol, entry, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst) {
 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
 		    pol->type == type &&
 		    pol->priority < priority) {
@@ -2847,10 +3094,10 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm
 		switch (t->mode) {
 		case XFRM_MODE_TUNNEL:
 		case XFRM_MODE_BEET:
-			if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
-					  m->old_family) == 0 &&
-			    xfrm_addr_cmp(&t->saddr, &m->old_saddr,
-					  m->old_family) == 0) {
+			if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
+					    m->old_family) &&
+			    xfrm_addr_equal(&t->saddr, &m->old_saddr,
+					    m->old_family)) {
 				match = 1;
 			}
 			break;
@@ -2916,10 +3163,10 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
 		return -EINVAL;
 
 	for (i = 0; i < num_migrate; i++) {
-		if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
-				   m[i].old_family) == 0) &&
-		    (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
-				   m[i].old_family) == 0))
+		if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
+				    m[i].old_family) &&
+		    xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
+				    m[i].old_family))
 			return -EINVAL;
 		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
 		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index d0a1af8ed584..c721b0d9ab8b 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -43,6 +43,7 @@ static const struct snmp_mib xfrm_mib_list[] = {
 	SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD),
 	SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR),
 	SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR),
+	SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID),
 	SNMP_MIB_SENTINEL
 };
 
@@ -73,13 +74,13 @@ static const struct file_operations xfrm_statistics_seq_fops = {
 
 int __net_init xfrm_proc_init(struct net *net)
 {
-	if (!proc_net_fops_create(net, "xfrm_stat", S_IRUGO,
-				  &xfrm_statistics_seq_fops))
+	if (!proc_create("xfrm_stat", S_IRUGO, net->proc_net,
+			 &xfrm_statistics_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void xfrm_proc_fini(struct net *net)
 {
-	proc_net_remove(net, "xfrm_stat");
+	remove_proc_entry("xfrm_stat", net->proc_net);
 }
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 35754cc8a9e5..8dafe6d3c6e4 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -334,6 +334,70 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event)
 		x->xflags &= ~XFRM_TIME_DEFER;
 }
 
+static void xfrm_replay_notify_esn(struct xfrm_state *x, int event)
+{
+	u32 seq_diff, oseq_diff;
+	struct km_event c;
+	struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+	struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn;
+
+	/* we send notify messages in case
+	 *  1. we updated on of the sequence numbers, and the seqno difference
+	 *     is at least x->replay_maxdiff, in this case we also update the
+	 *     timeout of our timer function
+	 *  2. if x->replay_maxage has elapsed since last update,
+	 *     and there were changes
+	 *
+	 *  The state structure must be locked!
+	 */
+
+	switch (event) {
+	case XFRM_REPLAY_UPDATE:
+		if (!x->replay_maxdiff)
+			break;
+
+		if (replay_esn->seq_hi == preplay_esn->seq_hi)
+			seq_diff = replay_esn->seq - preplay_esn->seq;
+		else
+			seq_diff = ~preplay_esn->seq + replay_esn->seq + 1;
+
+		if (replay_esn->oseq_hi == preplay_esn->oseq_hi)
+			oseq_diff = replay_esn->oseq - preplay_esn->oseq;
+		else
+			oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1;
+
+		if (seq_diff < x->replay_maxdiff &&
+		    oseq_diff < x->replay_maxdiff) {
+
+			if (x->xflags & XFRM_TIME_DEFER)
+				event = XFRM_REPLAY_TIMEOUT;
+			else
+				return;
+		}
+
+		break;
+
+	case XFRM_REPLAY_TIMEOUT:
+		if (memcmp(x->replay_esn, x->preplay_esn,
+			   xfrm_replay_state_esn_len(replay_esn)) == 0) {
+			x->xflags |= XFRM_TIME_DEFER;
+			return;
+		}
+
+		break;
+	}
+
+	memcpy(x->preplay_esn, x->replay_esn,
+	       xfrm_replay_state_esn_len(replay_esn));
+	c.event = XFRM_MSG_NEWAE;
+	c.data.aevent = event;
+	km_state_notify(x, &c);
+
+	if (x->replay_maxage &&
+	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
+		x->xflags &= ~XFRM_TIME_DEFER;
+}
+
 static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err = 0;
@@ -510,7 +574,7 @@ static struct xfrm_replay xfrm_replay_esn = {
 	.advance	= xfrm_replay_advance_esn,
 	.check		= xfrm_replay_check_esn,
 	.recheck	= xfrm_replay_recheck_esn,
-	.notify		= xfrm_replay_notify_bmp,
+	.notify		= xfrm_replay_notify_esn,
 	.overflow	= xfrm_replay_overflow_esn,
 };
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 3459692092ec..78f66fa92449 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -72,10 +72,10 @@ static void xfrm_hash_transfer(struct hlist_head *list,
 			       struct hlist_head *nspitable,
 			       unsigned int nhashmask)
 {
-	struct hlist_node *entry, *tmp;
+	struct hlist_node *tmp;
 	struct xfrm_state *x;
 
-	hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
+	hlist_for_each_entry_safe(x, tmp, list, bydst) {
 		unsigned int h;
 
 		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
@@ -158,8 +158,8 @@ out_unlock:
 	mutex_unlock(&hash_resize_mutex);
 }
 
-static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
-static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
+static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
+static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
 
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
@@ -168,58 +168,45 @@ int __xfrm_state_delete(struct xfrm_state *x);
 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
 void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
 
-static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family)
-{
-	struct xfrm_state_afinfo *afinfo;
-	if (unlikely(family >= NPROTO))
-		return NULL;
-	write_lock_bh(&xfrm_state_afinfo_lock);
-	afinfo = xfrm_state_afinfo[family];
-	if (unlikely(!afinfo))
-		write_unlock_bh(&xfrm_state_afinfo_lock);
-	return afinfo;
-}
-
-static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo)
-	__releases(xfrm_state_afinfo_lock)
-{
-	write_unlock_bh(&xfrm_state_afinfo_lock);
-}
-
+static DEFINE_SPINLOCK(xfrm_type_lock);
 int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
 {
-	struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 	const struct xfrm_type **typemap;
 	int err = 0;
 
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 	typemap = afinfo->type_map;
+	spin_lock_bh(&xfrm_type_lock);
 
 	if (likely(typemap[type->proto] == NULL))
 		typemap[type->proto] = type;
 	else
 		err = -EEXIST;
-	xfrm_state_unlock_afinfo(afinfo);
+	spin_unlock_bh(&xfrm_type_lock);
+	xfrm_state_put_afinfo(afinfo);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_register_type);
 
 int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
 {
-	struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 	const struct xfrm_type **typemap;
 	int err = 0;
 
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 	typemap = afinfo->type_map;
+	spin_lock_bh(&xfrm_type_lock);
 
 	if (unlikely(typemap[type->proto] != type))
 		err = -ENOENT;
 	else
 		typemap[type->proto] = NULL;
-	xfrm_state_unlock_afinfo(afinfo);
+	spin_unlock_bh(&xfrm_type_lock);
+	xfrm_state_put_afinfo(afinfo);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_unregister_type);
@@ -256,6 +243,7 @@ static void xfrm_put_type(const struct xfrm_type *type)
 	module_put(type->owner);
 }
 
+static DEFINE_SPINLOCK(xfrm_mode_lock);
 int xfrm_register_mode(struct xfrm_mode *mode, int family)
 {
 	struct xfrm_state_afinfo *afinfo;
@@ -265,12 +253,13 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family)
 	if (unlikely(mode->encap >= XFRM_MODE_MAX))
 		return -EINVAL;
 
-	afinfo = xfrm_state_lock_afinfo(family);
+	afinfo = xfrm_state_get_afinfo(family);
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
 	err = -EEXIST;
 	modemap = afinfo->mode_map;
+	spin_lock_bh(&xfrm_mode_lock);
 	if (modemap[mode->encap])
 		goto out;
 
@@ -283,7 +272,8 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family)
 	err = 0;
 
 out:
-	xfrm_state_unlock_afinfo(afinfo);
+	spin_unlock_bh(&xfrm_mode_lock);
+	xfrm_state_put_afinfo(afinfo);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_register_mode);
@@ -297,19 +287,21 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
 	if (unlikely(mode->encap >= XFRM_MODE_MAX))
 		return -EINVAL;
 
-	afinfo = xfrm_state_lock_afinfo(family);
+	afinfo = xfrm_state_get_afinfo(family);
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
 	err = -ENOENT;
 	modemap = afinfo->mode_map;
+	spin_lock_bh(&xfrm_mode_lock);
 	if (likely(modemap[mode->encap] == mode)) {
 		modemap[mode->encap] = NULL;
 		module_put(mode->afinfo->owner);
 		err = 0;
 	}
 
-	xfrm_state_unlock_afinfo(afinfo);
+	spin_unlock_bh(&xfrm_mode_lock);
+	xfrm_state_put_afinfo(afinfo);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_unregister_mode);
@@ -376,14 +368,14 @@ static void xfrm_state_gc_task(struct work_struct *work)
 {
 	struct net *net = container_of(work, struct net, xfrm.state_gc_work);
 	struct xfrm_state *x;
-	struct hlist_node *entry, *tmp;
+	struct hlist_node *tmp;
 	struct hlist_head gc_list;
 
 	spin_lock_bh(&xfrm_state_gc_lock);
 	hlist_move_list(&net->xfrm.state_gc_list, &gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 
-	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist)
+	hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
 		xfrm_state_gc_destroy(x);
 
 	wake_up(&net->xfrm.km_waitq);
@@ -585,10 +577,9 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi
 	int i, err = 0;
 
 	for (i = 0; i <= net->xfrm.state_hmask; i++) {
-		struct hlist_node *entry;
 		struct xfrm_state *x;
 
-		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) {
+		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 			if (xfrm_id_proto_match(x->id.proto, proto) &&
 			   (err = security_xfrm_state_delete(x)) != 0) {
 				xfrm_audit_state_delete(x, 0,
@@ -621,10 +612,9 @@ int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info)
 
 	err = -ESRCH;
 	for (i = 0; i <= net->xfrm.state_hmask; i++) {
-		struct hlist_node *entry;
 		struct xfrm_state *x;
 restart:
-		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) {
+		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 			if (!xfrm_state_kern(x) &&
 			    xfrm_id_proto_match(x->id.proto, proto)) {
 				xfrm_state_hold(x);
@@ -693,13 +683,12 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 {
 	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
 	struct xfrm_state *x;
-	struct hlist_node *entry;
 
-	hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) {
+	hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) {
 		if (x->props.family != family ||
 		    x->id.spi       != spi ||
 		    x->id.proto     != proto ||
-		    xfrm_addr_cmp(&x->id.daddr, daddr, family))
+		    !xfrm_addr_equal(&x->id.daddr, daddr, family))
 			continue;
 
 		if ((mark & x->mark.m) != x->mark.v)
@@ -718,13 +707,12 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 {
 	unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
 	struct xfrm_state *x;
-	struct hlist_node *entry;
 
-	hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) {
+	hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
 		if (x->props.family != family ||
 		    x->id.proto     != proto ||
-		    xfrm_addr_cmp(&x->id.daddr, daddr, family) ||
-		    xfrm_addr_cmp(&x->props.saddr, saddr, family))
+		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
+		    !xfrm_addr_equal(&x->props.saddr, saddr, family))
 			continue;
 
 		if ((mark & x->mark.m) != x->mark.v)
@@ -806,7 +794,6 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	static xfrm_address_t saddr_wildcard = { };
 	struct net *net = xp_net(pol);
 	unsigned int h, h_wildcard;
-	struct hlist_node *entry;
 	struct xfrm_state *x, *x0, *to_put;
 	int acquire_in_progress = 0;
 	int error = 0;
@@ -818,7 +805,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 
 	spin_lock_bh(&xfrm_state_lock);
 	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
-	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
+	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -834,7 +821,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		goto found;
 
 	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
-	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) {
+	hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) {
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -914,11 +901,10 @@ xfrm_stateonly_find(struct net *net, u32 mark,
 {
 	unsigned int h;
 	struct xfrm_state *rx = NULL, *x = NULL;
-	struct hlist_node *entry;
 
 	spin_lock(&xfrm_state_lock);
 	h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
-	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
+	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -980,17 +966,16 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 	unsigned short family = xnew->props.family;
 	u32 reqid = xnew->props.reqid;
 	struct xfrm_state *x;
-	struct hlist_node *entry;
 	unsigned int h;
 	u32 mark = xnew->mark.v & xnew->mark.m;
 
 	h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
-	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
+	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family	== family &&
 		    x->props.reqid	== reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
-		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
-		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
+		    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
+		    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
 			x->genid++;
 	}
 }
@@ -1012,11 +997,10 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
 					  const xfrm_address_t *saddr, int create)
 {
 	unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
-	struct hlist_node *entry;
 	struct xfrm_state *x;
 	u32 mark = m->v & m->m;
 
-	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
+	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.reqid  != reqid ||
 		    x->props.mode   != mode ||
 		    x->props.family != family ||
@@ -1024,8 +1008,8 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
 		    x->id.spi       != 0 ||
 		    x->id.proto	    != proto ||
 		    (mark & x->mark.m) != x->mark.v ||
-		    xfrm_addr_cmp(&x->id.daddr, daddr, family) ||
-		    xfrm_addr_cmp(&x->props.saddr, saddr, family))
+		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
+		    !xfrm_addr_equal(&x->props.saddr, saddr, family))
 			continue;
 
 		xfrm_state_hold(x);
@@ -1108,7 +1092,7 @@ int xfrm_state_add(struct xfrm_state *x)
 	if (use_spi && x->km.seq) {
 		x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
 		if (x1 && ((x1->id.proto != x->id.proto) ||
-		    xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
+		    !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
 			to_put = x1;
 			x1 = NULL;
 		}
@@ -1203,6 +1187,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 		goto error;
 
 	x->props.flags = orig->props.flags;
+	x->props.extra_flags = orig->props.extra_flags;
 
 	x->curlft.add_time = orig->curlft.add_time;
 	x->km.state = orig->km.state;
@@ -1223,21 +1208,20 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
 {
 	unsigned int h;
 	struct xfrm_state *x;
-	struct hlist_node *entry;
 
 	if (m->reqid) {
 		h = xfrm_dst_hash(&init_net, &m->old_daddr, &m->old_saddr,
 				  m->reqid, m->old_family);
-		hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
+		hlist_for_each_entry(x, init_net.xfrm.state_bydst+h, bydst) {
 			if (x->props.mode != m->mode ||
 			    x->id.proto != m->proto)
 				continue;
 			if (m->reqid && x->props.reqid != m->reqid)
 				continue;
-			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
-					  m->old_family) ||
-			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
-					  m->old_family))
+			if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
+					     m->old_family) ||
+			    !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
+					     m->old_family))
 				continue;
 			xfrm_state_hold(x);
 			return x;
@@ -1245,14 +1229,14 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
 	} else {
 		h = xfrm_src_hash(&init_net, &m->old_daddr, &m->old_saddr,
 				  m->old_family);
-		hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) {
+		hlist_for_each_entry(x, init_net.xfrm.state_bysrc+h, bysrc) {
 			if (x->props.mode != m->mode ||
 			    x->id.proto != m->proto)
 				continue;
-			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
-					  m->old_family) ||
-			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
-					  m->old_family))
+			if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
+					     m->old_family) ||
+			    !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
+					     m->old_family))
 				continue;
 			xfrm_state_hold(x);
 			return x;
@@ -1277,7 +1261,7 @@ struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
 	memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
 
 	/* add state */
-	if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
+	if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {
 		/* a care is needed when the destination address of the
 		   state is to be updated as it is a part of triplet */
 		xfrm_state_insert(xc);
@@ -1370,9 +1354,6 @@ int xfrm_state_check_expire(struct xfrm_state *x)
 	if (!x->curlft.use_time)
 		x->curlft.use_time = get_seconds();
 
-	if (x->km.state != XFRM_STATE_VALID)
-		return -EINVAL;
-
 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
 	    x->curlft.packets >= x->lft.hard_packet_limit) {
 		x->km.state = XFRM_STATE_EXPIRED;
@@ -1477,10 +1458,9 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
 	int i;
 
 	for (i = 0; i <= net->xfrm.state_hmask; i++) {
-		struct hlist_node *entry;
 		struct xfrm_state *x;
 
-		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) {
+		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 			if (x->km.seq == seq &&
 			    (mark & x->mark.m) == x->mark.v &&
 			    x->km.state == XFRM_STATE_ACQ) {
@@ -1648,27 +1628,26 @@ static void xfrm_replay_timer_handler(unsigned long data)
 }
 
 static LIST_HEAD(xfrm_km_list);
-static DEFINE_RWLOCK(xfrm_km_lock);
 
 void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
 {
 	struct xfrm_mgr *km;
 
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list)
 		if (km->notify_policy)
 			km->notify_policy(xp, dir, c);
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 }
 
 void km_state_notify(struct xfrm_state *x, const struct km_event *c)
 {
 	struct xfrm_mgr *km;
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list)
 		if (km->notify)
 			km->notify(x, c);
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 }
 
 EXPORT_SYMBOL(km_policy_notify);
@@ -1698,13 +1677,13 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
 	int err = -EINVAL, acqret;
 	struct xfrm_mgr *km;
 
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
 		acqret = km->acquire(x, t, pol);
 		if (!acqret)
 			err = acqret;
 	}
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 	return err;
 }
 EXPORT_SYMBOL(km_query);
@@ -1714,14 +1693,14 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
 	int err = -EINVAL;
 	struct xfrm_mgr *km;
 
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
 		if (km->new_mapping)
 			err = km->new_mapping(x, ipaddr, sport);
 		if (!err)
 			break;
 	}
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 	return err;
 }
 EXPORT_SYMBOL(km_new_mapping);
@@ -1750,15 +1729,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
 	int ret;
 	struct xfrm_mgr *km;
 
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
 		if (km->migrate) {
 			ret = km->migrate(sel, dir, type, m, num_migrate, k);
 			if (!ret)
 				err = ret;
 		}
 	}
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 	return err;
 }
 EXPORT_SYMBOL(km_migrate);
@@ -1770,15 +1749,15 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address
 	int ret;
 	struct xfrm_mgr *km;
 
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
 		if (km->report) {
 			ret = km->report(net, proto, sel, addr);
 			if (!ret)
 				err = ret;
 		}
 	}
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 	return err;
 }
 EXPORT_SYMBOL(km_report);
@@ -1802,14 +1781,14 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
 		goto out;
 
 	err = -EINVAL;
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
 		pol = km->compile_policy(sk, optname, data,
 					 optlen, &err);
 		if (err >= 0)
 			break;
 	}
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 
 	if (err >= 0) {
 		xfrm_sk_policy_insert(sk, err, pol);
@@ -1823,20 +1802,23 @@ out:
 }
 EXPORT_SYMBOL(xfrm_user_policy);
 
+static DEFINE_SPINLOCK(xfrm_km_lock);
+
 int xfrm_register_km(struct xfrm_mgr *km)
 {
-	write_lock_bh(&xfrm_km_lock);
-	list_add_tail(&km->list, &xfrm_km_list);
-	write_unlock_bh(&xfrm_km_lock);
+	spin_lock_bh(&xfrm_km_lock);
+	list_add_tail_rcu(&km->list, &xfrm_km_list);
+	spin_unlock_bh(&xfrm_km_lock);
 	return 0;
 }
 EXPORT_SYMBOL(xfrm_register_km);
 
 int xfrm_unregister_km(struct xfrm_mgr *km)
 {
-	write_lock_bh(&xfrm_km_lock);
-	list_del(&km->list);
-	write_unlock_bh(&xfrm_km_lock);
+	spin_lock_bh(&xfrm_km_lock);
+	list_del_rcu(&km->list);
+	spin_unlock_bh(&xfrm_km_lock);
+	synchronize_rcu();
 	return 0;
 }
 EXPORT_SYMBOL(xfrm_unregister_km);
@@ -1848,12 +1830,12 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
 		return -EINVAL;
 	if (unlikely(afinfo->family >= NPROTO))
 		return -EAFNOSUPPORT;
-	write_lock_bh(&xfrm_state_afinfo_lock);
+	spin_lock_bh(&xfrm_state_afinfo_lock);
 	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
 		err = -ENOBUFS;
 	else
-		xfrm_state_afinfo[afinfo->family] = afinfo;
-	write_unlock_bh(&xfrm_state_afinfo_lock);
+		rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
+	spin_unlock_bh(&xfrm_state_afinfo_lock);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_register_afinfo);
@@ -1865,14 +1847,15 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
 		return -EINVAL;
 	if (unlikely(afinfo->family >= NPROTO))
 		return -EAFNOSUPPORT;
-	write_lock_bh(&xfrm_state_afinfo_lock);
+	spin_lock_bh(&xfrm_state_afinfo_lock);
 	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
 		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
 			err = -EINVAL;
 		else
-			xfrm_state_afinfo[afinfo->family] = NULL;
+			RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
 	}
-	write_unlock_bh(&xfrm_state_afinfo_lock);
+	spin_unlock_bh(&xfrm_state_afinfo_lock);
+	synchronize_rcu();
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
@@ -1882,17 +1865,16 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
 	struct xfrm_state_afinfo *afinfo;
 	if (unlikely(family >= NPROTO))
 		return NULL;
-	read_lock(&xfrm_state_afinfo_lock);
-	afinfo = xfrm_state_afinfo[family];
+	rcu_read_lock();
+	afinfo = rcu_dereference(xfrm_state_afinfo[family]);
 	if (unlikely(!afinfo))
-		read_unlock(&xfrm_state_afinfo_lock);
+		rcu_read_unlock();
 	return afinfo;
 }
 
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
-	__releases(xfrm_state_afinfo_lock)
 {
-	read_unlock(&xfrm_state_afinfo_lock);
+	rcu_read_unlock();
 }
 
 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index eb872b2e366e..aa778748c565 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -515,6 +515,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	copy_from_user_state(x, p);
 
+	if (attrs[XFRMA_SA_EXTRA_FLAGS])
+		x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
+
 	if ((err = attach_aead(&x->aead, &x->props.ealgo,
 			       attrs[XFRMA_ALG_AEAD])))
 		goto error;
@@ -779,6 +782,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 
 	copy_to_user_state(x, p);
 
+	if (x->props.extra_flags) {
+		ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS,
+				  x->props.extra_flags);
+		if (ret)
+			goto out;
+	}
+
 	if (x->coaddr) {
 		ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
 		if (ret)
@@ -1112,7 +1122,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	mark = xfrm_mark_get(attrs, &m);
 	if (p->info.seq) {
 		x = xfrm_find_acq_byseq(net, mark, p->info.seq);
-		if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) {
+		if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {
 			xfrm_state_put(x);
 			x = NULL;
 		}
@@ -2302,9 +2312,10 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_MARK]		= { .len = sizeof(struct xfrm_mark) },
 	[XFRMA_TFCPAD]		= { .type = NLA_U32 },
 	[XFRMA_REPLAY_ESN_VAL]	= { .len = sizeof(struct xfrm_replay_state_esn) },
+	[XFRMA_SA_EXTRA_FLAGS]	= { .type = NLA_U32 },
 };
 
-static struct xfrm_link {
+static const struct xfrm_link {
 	int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
 	int (*dump)(struct sk_buff *, struct netlink_callback *);
 	int (*done)(struct netlink_callback *);
@@ -2338,7 +2349,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *attrs[XFRMA_MAX+1];
-	struct xfrm_link *link;
+	const struct xfrm_link *link;
 	int type, err;
 
 	type = nlh->nlmsg_type;
@@ -2495,6 +2506,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 				    x->security->ctx_len);
 	if (x->coaddr)
 		l += nla_total_size(sizeof(*x->coaddr));
+	if (x->props.extra_flags)
+		l += nla_total_size(sizeof(x->props.extra_flags));
 
 	/* Must count x->lastused as it may become non-zero behind our back. */
 	l += nla_total_size(sizeof(u64));