46 files changed, 1869 insertions, 585 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f49bd7897e95..e1846b97ee69 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -186,7 +186,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp);
 static void addrconf_dad_work(struct work_struct *w);
-static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id);
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
+				   bool send_na);
 static void addrconf_dad_run(struct inet6_dev *idev);
 static void addrconf_rs_timer(struct timer_list *t);
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -3438,6 +3439,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		} else if (event == NETDEV_CHANGE) {
 			if (!addrconf_link_ready(dev)) {
 				/* device is still not ready. */
+				rt6_sync_down_dev(dev, event);
 				break;
 			}
 
@@ -3449,6 +3451,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 					 * multicast snooping switches
 					 */
 					ipv6_mc_up(idev);
+					rt6_sync_up(dev, RTNH_F_LINKDOWN);
 					break;
 				}
 				idev->if_flags |= IF_READY;
@@ -3484,6 +3487,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			if (run_pending)
 				addrconf_dad_run(idev);
 
+			/* Device has an address by now */
+			rt6_sync_up(dev, RTNH_F_DEAD);
+
 			/*
 			 * If the MTU changed during the interface down,
 			 * when the interface up, the changed MTU must be
@@ -3577,6 +3583,7 @@ static bool addr_is_local(const struct in6_addr *addr)
 
 static int addrconf_ifdown(struct net_device *dev, int how)
 {
+	unsigned long event = how ? NETDEV_UNREGISTER : NETDEV_DOWN;
 	struct net *net = dev_net(dev);
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa, *tmp;
@@ -3586,8 +3593,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	ASSERT_RTNL();
 
-	rt6_ifdown(net, dev);
-	neigh_ifdown(&nd_tbl, dev);
+	rt6_disable_ip(dev, event);
 
 	idev = __in6_dev_get(dev);
 	if (!idev)
@@ -3833,12 +3839,17 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 	     idev->cnf.accept_dad < 1) ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
 	    ifp->flags & IFA_F_NODAD) {
+		bool send_na = false;
+
+		if (ifp->flags & IFA_F_TENTATIVE &&
+		    !(ifp->flags & IFA_F_OPTIMISTIC))
+			send_na = true;
 		bump_id = ifp->flags & IFA_F_TENTATIVE;
 		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
 		spin_unlock(&ifp->lock);
 		read_unlock_bh(&idev->lock);
 
-		addrconf_dad_completed(ifp, bump_id);
+		addrconf_dad_completed(ifp, bump_id, send_na);
 		return;
 	}
 
@@ -3967,16 +3978,21 @@ static void addrconf_dad_work(struct work_struct *w)
 	}
 
 	if (ifp->dad_probes == 0) {
+		bool send_na = false;
+
 		/*
 		 * DAD was successful
 		 */
 
+		if (ifp->flags & IFA_F_TENTATIVE &&
+		    !(ifp->flags & IFA_F_OPTIMISTIC))
+			send_na = true;
 		bump_id = ifp->flags & IFA_F_TENTATIVE;
 		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
 		spin_unlock(&ifp->lock);
 		write_unlock_bh(&idev->lock);
 
-		addrconf_dad_completed(ifp, bump_id);
+		addrconf_dad_completed(ifp, bump_id, send_na);
 
 		goto out;
 	}
@@ -4014,7 +4030,8 @@ static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp)
 	return true;
 }
 
-static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
+				   bool send_na)
 {
 	struct net_device *dev = ifp->idev->dev;
 	struct in6_addr lladdr;
@@ -4046,6 +4063,16 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
 	if (send_mld)
 		ipv6_mc_dad_complete(ifp->idev);
 
+	/* send unsolicited NA if enabled */
+	if (send_na &&
+	    (ifp->idev->cnf.ndisc_notify ||
+	     dev_net(dev)->ipv6.devconf_all->ndisc_notify)) {
+		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr,
+			      /*router=*/ !!ifp->idev->cnf.forwarding,
+			      /*solicited=*/ false, /*override=*/ true,
+			      /*inc_opt=*/ true);
+	}
+
 	if (send_rs) {
 		/*
 		 *	If a host as already performed a random delay
@@ -4209,7 +4236,6 @@ static int if6_seq_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations if6_fops = {
-	.owner		= THIS_MODULE,
 	.open		= if6_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -4352,9 +4378,11 @@ restart:
 						spin_lock(&ifpub->lock);
 						ifpub->regen_count = 0;
 						spin_unlock(&ifpub->lock);
+						rcu_read_unlock_bh();
 						ipv6_create_tempaddr(ifpub, ifp, true);
 						in6_ifa_put(ifpub);
 						in6_ifa_put(ifp);
+						rcu_read_lock_bh();
 						goto restart;
 					}
 				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
@@ -6595,27 +6623,45 @@ int __init addrconf_init(void)
 
 	rtnl_af_register(&inet6_ops);
 
-	err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
-			      0);
+	err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
+				   NULL, inet6_dump_ifinfo, 0);
 	if (err < 0)
 		goto errout;
 
-	/* Only the first call to __rtnl_register can fail */
-	__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
-	__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
-	__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
-			inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED);
-	__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
-			inet6_dump_ifmcaddr, 0);
-	__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
-			inet6_dump_ifacaddr, 0);
-	__rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
-			inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED);
-
-	ipv6_addr_label_rtnl_register();
+	err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR,
+				   inet6_rtm_newaddr, NULL, 0);
+	if (err < 0)
+		goto errout;
+	err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR,
+				   inet6_rtm_deladdr, NULL, 0);
+	if (err < 0)
+		goto errout;
+	err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
+				   inet6_rtm_getaddr, inet6_dump_ifaddr,
+				   RTNL_FLAG_DOIT_UNLOCKED);
+	if (err < 0)
+		goto errout;
+	err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
+				   NULL, inet6_dump_ifmcaddr, 0);
+	if (err < 0)
+		goto errout;
+	err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
+				   NULL, inet6_dump_ifacaddr, 0);
+	if (err < 0)
+		goto errout;
+	err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
+				   inet6_netconf_get_devconf,
+				   inet6_netconf_dump_devconf,
+				   RTNL_FLAG_DOIT_UNLOCKED);
+	if (err < 0)
+		goto errout;
+	err = ipv6_addr_label_rtnl_register();
+	if (err < 0)
+		goto errout;
 
 	return 0;
 errout:
+	rtnl_unregister_all(PF_INET6);
 	rtnl_af_unregister(&inet6_ops);
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 errlo:
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 00e1f8ee08f8..1d6ced37ad71 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -547,13 +547,22 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	return err;
 }
 
-void __init ipv6_addr_label_rtnl_register(void)
+int __init ipv6_addr_label_rtnl_register(void)
 {
-	__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
-			NULL, RTNL_FLAG_DOIT_UNLOCKED);
-	__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
-			NULL, RTNL_FLAG_DOIT_UNLOCKED);
-	__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
-			ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
-}
+	int ret;
 
+	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDRLABEL,
+				   ip6addrlbl_newdel,
+				   NULL, RTNL_FLAG_DOIT_UNLOCKED);
+	if (ret < 0)
+		return ret;
+	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDRLABEL,
+				   ip6addrlbl_newdel,
+				   NULL, RTNL_FLAG_DOIT_UNLOCKED);
+	if (ret < 0)
+		return ret;
+	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
+				   ip6addrlbl_get,
+				   ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
+	return ret;
+}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c9441ca45399..416917719a6f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -284,6 +284,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct net *net = sock_net(sk);
 	__be32 v4addr = 0;
 	unsigned short snum;
+	bool saved_ipv6only;
 	int addr_type = 0;
 	int err = 0;
 
@@ -389,19 +390,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
 		np->saddr = addr->sin6_addr;
 
+	saved_ipv6only = sk->sk_ipv6only;
+	if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
+		sk->sk_ipv6only = 1;
+
 	/* Make sure we are allowed to bind here. */
 	if ((snum || !inet->bind_address_no_port) &&
 	    sk->sk_prot->get_port(sk, snum)) {
+		sk->sk_ipv6only = saved_ipv6only;
 		inet_reset_saddr(sk);
 		err = -EADDRINUSE;
 		goto out;
 	}
 
-	if (addr_type != IPV6_ADDR_ANY) {
+	if (addr_type != IPV6_ADDR_ANY)
 		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
-		if (addr_type != IPV6_ADDR_MAPPED)
-			sk->sk_ipv6only = 1;
-	}
 	if (snum)
 		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
 	inet->inet_sport = htons(inet->inet_num);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0bbab8a4b5d8..8e085cc05aeb 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -533,7 +533,6 @@ static int ac6_seq_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations ac6_seq_fops = {
-	.owner		=	THIS_MODULE,
 	.open		=	ac6_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a1f918713006..fbf08ce3f5ab 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -221,8 +221,7 @@ ipv4_connected:
 	if (__ipv6_addr_needs_scope_id(addr_type)) {
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    usin->sin6_scope_id) {
-			if (sk->sk_bound_dev_if &&
-			    sk->sk_bound_dev_if != usin->sin6_scope_id) {
+			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) {
 				err = -EINVAL;
 				goto out;
 			}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 1a7f00cd4803..97513f35bcc5 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -141,14 +141,32 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
 static void esp_output_done(struct crypto_async_request *base, int err)
 {
 	struct sk_buff *skb = base->data;
+	struct xfrm_offload *xo = xfrm_offload(skb);
 	void *tmp;
-	struct dst_entry *dst = skb_dst(skb);
-	struct xfrm_state *x = dst->xfrm;
+	struct xfrm_state *x;
+
+	if (xo && (xo->flags & XFRM_DEV_RESUME))
+		x = skb->sp->xvec[skb->sp->len - 1];
+	else
+		x = skb_dst(skb)->xfrm;
 
 	tmp = ESP_SKB_CB(skb)->tmp;
 	esp_ssg_unref(x, tmp);
 	kfree(tmp);
-	xfrm_output_resume(skb, err);
+
+	if (xo && (xo->flags & XFRM_DEV_RESUME)) {
+		if (err) {
+			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+			kfree_skb(skb);
+			return;
+		}
+
+		skb_push(skb, skb->data - skb_mac_header(skb));
+		secpath_reset(skb);
+		xfrm_dev_resume(skb);
+	} else {
+		xfrm_output_resume(skb, err);
+	}
 }
 
 /* Move ESP header back into place. */
@@ -734,17 +752,13 @@ static int esp_init_aead(struct xfrm_state *x)
 	char aead_name[CRYPTO_MAX_ALG_NAME];
 	struct crypto_aead *aead;
 	int err;
-	u32 mask = 0;
 
 	err = -ENAMETOOLONG;
 	if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
 		     x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
 		goto error;
 
-	if (x->xso.offload_handle)
-		mask |= CRYPTO_ALG_ASYNC;
-
-	aead = crypto_alloc_aead(aead_name, 0, mask);
+	aead = crypto_alloc_aead(aead_name, 0, 0);
 	err = PTR_ERR(aead);
 	if (IS_ERR(aead))
 		goto error;
@@ -774,7 +788,6 @@ static int esp_init_authenc(struct xfrm_state *x)
 	char authenc_name[CRYPTO_MAX_ALG_NAME];
 	unsigned int keylen;
 	int err;
-	u32 mask = 0;
 
 	err = -EINVAL;
 	if (!x->ealg)
@@ -800,10 +813,7 @@ static int esp_init_authenc(struct xfrm_state *x)
 			goto error;
 	}
 
-	if (x->xso.offload_handle)
-		mask |= CRYPTO_ALG_ASYNC;
-
-	aead = crypto_alloc_aead(authenc_name, 0, mask);
+	aead = crypto_alloc_aead(authenc_name, 0, 0);
 	err = PTR_ERR(aead);
 	if (IS_ERR(aead))
 		goto error;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index f52c314d4c97..3fd1ec775dc2 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -136,78 +136,39 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
 static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
 				        netdev_features_t features)
 {
-	__u32 seq;
-	int err = 0;
-	struct sk_buff *skb2;
 	struct xfrm_state *x;
 	struct ip_esp_hdr *esph;
 	struct crypto_aead *aead;
-	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	netdev_features_t esp_features = features;
 	struct xfrm_offload *xo = xfrm_offload(skb);
 
 	if (!xo)
-		goto out;
+		return ERR_PTR(-EINVAL);
 
 	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
-		goto out;
-
-	seq = xo->seq.low;
+		return ERR_PTR(-EINVAL);
 
 	x = skb->sp->xvec[skb->sp->len - 1];
 	aead = x->data;
 	esph = ip_esp_hdr(skb);
 
 	if (esph->spi != x->id.spi)
-		goto out;
+		return ERR_PTR(-EINVAL);
 
 	if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
-		goto out;
+		return ERR_PTR(-EINVAL);
 
 	__skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
 
 	skb->encap_hdr_csum = 1;
 
-	if (!(features & NETIF_F_HW_ESP))
+	if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
+	    (x->xso.dev != skb->dev))
 		esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
 
-	segs = x->outer_mode->gso_segment(x, skb, esp_features);
-	if (IS_ERR_OR_NULL(segs))
-		goto out;
-
-	__skb_pull(skb, skb->data - skb_mac_header(skb));
-
-	skb2 = segs;
-	do {
-		struct sk_buff *nskb = skb2->next;
-
-		xo = xfrm_offload(skb2);
-		xo->flags |= XFRM_GSO_SEGMENT;
-		xo->seq.low = seq;
-		xo->seq.hi = xfrm_replay_seqhi(x, seq);
-
-		if(!(features & NETIF_F_HW_ESP))
-			xo->flags |= CRYPTO_FALLBACK;
-
-		x->outer_mode->xmit(x, skb2);
-
-		err = x->type_offload->xmit(x, skb2, esp_features);
-		if (err) {
-			kfree_skb_list(segs);
-			return ERR_PTR(err);
-		}
+	xo->flags |= XFRM_GSO_SEGMENT;
 
-		if (!skb_is_gso(skb2))
-			seq++;
-		else
-			seq += skb_shinfo(skb2)->gso_segs;
-
-		skb_push(skb2, skb2->mac_len);
-		skb2 = nskb;
-	} while (skb2);
-
-out:
-	return segs;
+	return x->outer_mode->gso_segment(x, skb, esp_features);
 }
 
 static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
@@ -226,6 +187,7 @@ static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
 
 static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features_t features)
 {
+	int len;
 	int err;
 	int alen;
 	int blksize;
@@ -234,6 +196,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features
 	struct crypto_aead *aead;
 	struct esp_info esp;
 	bool hw_offload = true;
+	__u32 seq;
 
 	esp.inplace = true;
 
@@ -269,28 +232,33 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features
 			return esp.nfrags;
 	}
 
+	seq = xo->seq.low;
+
 	esph = ip_esp_hdr(skb);
 	esph->spi = x->id.spi;
 
 	skb_push(skb, -skb_network_offset(skb));
 
 	if (xo->flags & XFRM_GSO_SEGMENT) {
-		esph->seq_no = htonl(xo->seq.low);
-	} else {
-		int len;
-
-		len = skb->len - sizeof(struct ipv6hdr);
-		if (len > IPV6_MAXPLEN)
-			len = 0;
+		esph->seq_no = htonl(seq);
 
-		ipv6_hdr(skb)->payload_len = htons(len);
+		if (!skb_is_gso(skb))
+			xo->seq.low++;
+		else
+			xo->seq.low += skb_shinfo(skb)->gso_segs;
 	}
 
+	esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+
+	len = skb->len - sizeof(struct ipv6hdr);
+	if (len > IPV6_MAXPLEN)
+		len = 0;
+
+	ipv6_hdr(skb)->payload_len = htons(len);
+
 	if (hw_offload)
 		return 0;
 
-	esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
-
 	err = esp6_output_tail(x, skb, &esp);
 	if (err)
 		return err;
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 6eb5e68f112a..44c39c5f0638 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -512,9 +512,7 @@ static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	struct ila_map *ila;
 	int ret;
 
-	ret = rhashtable_walk_start(rhiter);
-	if (ret && ret != -EAGAIN)
-		goto done;
+	rhashtable_walk_start(rhiter);
 
 	for (;;) {
 		ila = rhashtable_walk_next(rhiter);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b01858f5deb1..2febe26de6a1 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -125,6 +125,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
 }
 
 /* called with rcu_read_lock() */
+static struct sock *inet6_lhash2_lookup(struct net *net,
+		struct inet_listen_hashbucket *ilb2,
+		struct sk_buff *skb, int doff,
+		const struct in6_addr *saddr,
+		const __be16 sport, const struct in6_addr *daddr,
+		const unsigned short hnum, const int dif, const int sdif)
+{
+	bool exact_dif = inet6_exact_dif_match(net, skb);
+	struct inet_connection_sock *icsk;
+	struct sock *sk, *result = NULL;
+	int score, hiscore = 0;
+	u32 phash = 0;
+
+	inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
+		sk = (struct sock *)icsk;
+		score = compute_score(sk, net, hnum, daddr, dif, sdif,
+				      exact_dif);
+		if (score > hiscore) {
+			if (sk->sk_reuseport) {
+				phash = inet6_ehashfn(net, daddr, hnum,
+						      saddr, sport);
+				result = reuseport_select_sock(sk, phash,
+							       skb, doff);
+				if (result)
+					return result;
+			}
+			result = sk;
+			hiscore = score;
+		}
+	}
+
+	return result;
+}
+
 struct sock *inet6_lookup_listener(struct net *net,
 		struct inet_hashinfo *hashinfo,
 		struct sk_buff *skb, int doff,
@@ -134,31 +168,56 @@ struct sock *inet6_lookup_listener(struct net *net,
 {
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
-	int score, hiscore = 0, matches = 0, reuseport = 0;
 	bool exact_dif = inet6_exact_dif_match(net, skb);
+	struct inet_listen_hashbucket *ilb2;
 	struct sock *sk, *result = NULL;
+	int score, hiscore = 0;
+	unsigned int hash2;
 	u32 phash = 0;
 
+	if (ilb->count <= 10 || !hashinfo->lhash2)
+		goto port_lookup;
+
+	/* Too many sk in the ilb bucket (which is hashed by port alone).
+	 * Try lhash2 (which is hashed by port and addr) instead.
+	 */
+
+	hash2 = ipv6_portaddr_hash(net, daddr, hnum);
+	ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+	if (ilb2->count > ilb->count)
+		goto port_lookup;
+
+	result = inet6_lhash2_lookup(net, ilb2, skb, doff,
+				     saddr, sport, daddr, hnum,
+				     dif, sdif);
+	if (result)
+		return result;
+
+	/* Lookup lhash2 with in6addr_any */
+
+	hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
+	ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+	if (ilb2->count > ilb->count)
+		goto port_lookup;
+
+	return inet6_lhash2_lookup(net, ilb2, skb, doff,
+				   saddr, sport, daddr, hnum,
+				   dif, sdif);
+
+port_lookup:
 	sk_for_each(sk, &ilb->head) {
 		score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
 		if (score > hiscore) {
-			reuseport = sk->sk_reuseport;
-			if (reuseport) {
+			if (sk->sk_reuseport) {
 				phash = inet6_ehashfn(net, daddr, hnum,
 						      saddr, sport);
 				result = reuseport_select_sock(sk, phash,
 							       skb, doff);
 				if (result)
 					return result;
-				matches = 1;
 			}
 			result = sk;
 			hiscore = score;
-		} else if (score == hiscore && reuseport) {
-			matches++;
-			if (reciprocal_scale(phash, matches) == 0)
-				result = sk;
-			phash = next_pseudo_random32(phash);
 		}
 	}
 	return result;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 217683d40f12..92b8d8c75eed 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -107,16 +107,13 @@ enum {
 
 void fib6_update_sernum(struct rt6_info *rt)
 {
-	struct fib6_table *table = rt->rt6i_table;
 	struct net *net = dev_net(rt->dst.dev);
 	struct fib6_node *fn;
 
-	spin_lock_bh(&table->tb6_lock);
 	fn = rcu_dereference_protected(rt->rt6i_node,
-			lockdep_is_held(&table->tb6_lock));
+			lockdep_is_held(&rt->rt6i_table->tb6_lock));
 	if (fn)
 		fn->fn_sernum = fib6_new_sernum(net);
-	spin_unlock_bh(&table->tb6_lock);
 }
 
 /*
@@ -804,12 +801,6 @@ insert_above:
 	return ln;
 }
 
-static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
-{
-	return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
-	       RTF_GATEWAY;
-}
-
 static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
 {
 	int i;
@@ -898,7 +889,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	ins = &fn->leaf;
 
 	for (iter = leaf; iter;
-	     iter = rcu_dereference_protected(iter->dst.rt6_next,
+	     iter = rcu_dereference_protected(iter->rt6_next,
 				lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
 		/*
 		 *	Search for duplicates
@@ -955,7 +946,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			break;
 
 next_iter:
-		ins = &iter->dst.rt6_next;
+		ins = &iter->rt6_next;
 	}
 
 	if (fallback_ins && !found) {
@@ -984,7 +975,7 @@ next_iter:
 					      &sibling->rt6i_siblings);
 				break;
 			}
-			sibling = rcu_dereference_protected(sibling->dst.rt6_next,
+			sibling = rcu_dereference_protected(sibling->rt6_next,
 				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
 		}
 		/* For each sibling in the list, increment the counter of
@@ -999,6 +990,7 @@ next_iter:
 			rt6i_nsiblings++;
 		}
 		BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
+		rt6_multipath_rebalance(temp_sibling);
 	}
 
 	/*
@@ -1014,7 +1006,7 @@ add:
 		if (err)
 			return err;
 
-		rcu_assign_pointer(rt->dst.rt6_next, iter);
+		rcu_assign_pointer(rt->rt6_next, iter);
 		atomic_inc(&rt->rt6i_ref);
 		rcu_assign_pointer(rt->rt6i_node, fn);
 		rcu_assign_pointer(*ins, rt);
@@ -1045,7 +1037,7 @@ add:
 
 		atomic_inc(&rt->rt6i_ref);
 		rcu_assign_pointer(rt->rt6i_node, fn);
-		rt->dst.rt6_next = iter->dst.rt6_next;
+		rt->rt6_next = iter->rt6_next;
 		rcu_assign_pointer(*ins, rt);
 		call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
 					  rt, extack);
@@ -1064,14 +1056,14 @@ add:
 
 		if (nsiblings) {
 			/* Replacing an ECMP route, remove all siblings */
-			ins = &rt->dst.rt6_next;
+			ins = &rt->rt6_next;
 			iter = rcu_dereference_protected(*ins,
 				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
 			while (iter) {
 				if (iter->rt6i_metric > rt->rt6i_metric)
 					break;
 				if (rt6_qualify_for_ecmp(iter)) {
-					*ins = iter->dst.rt6_next;
+					*ins = iter->rt6_next;
 					iter->rt6i_node = NULL;
 					fib6_purge_rt(iter, fn, info->nl_net);
 					if (rcu_access_pointer(fn->rr_ptr) == iter)
@@ -1080,7 +1072,7 @@ add:
 					nsiblings--;
 					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
 				} else {
-					ins = &iter->dst.rt6_next;
+					ins = &iter->rt6_next;
 				}
 				iter = rcu_dereference_protected(*ins,
 					lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@ -1107,8 +1099,8 @@ void fib6_force_start_gc(struct net *net)
 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
 
-static void fib6_update_sernum_upto_root(struct rt6_info *rt,
-					 int sernum)
+static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
+					   int sernum)
 {
 	struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
 				lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@ -1122,6 +1114,11 @@ static void fib6_update_sernum_upto_root(struct rt6_info *rt,
 	}
 }
 
+void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
+{
+	__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
+}
+
 /*
  *	Add routing information to the routing tree.
  *	<destination addr>/<source addr>
@@ -1241,7 +1238,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 
 	err = fib6_add_rt2node(fn, rt, info, mxc, extack);
 	if (!err) {
-		fib6_update_sernum_upto_root(rt, sernum);
+		__fib6_update_sernum_upto_root(rt, sernum);
 		fib6_start_gc(info->nl_net, rt);
 	}
 
@@ -1670,7 +1667,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 	WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
 
 	/* Unlink it */
-	*rtp = rt->dst.rt6_next;
+	*rtp = rt->rt6_next;
 	rt->rt6i_node = NULL;
 	net->ipv6.rt6_stats->fib_rt_entries--;
 	net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1691,6 +1688,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 			sibling->rt6i_nsiblings--;
 		rt->rt6i_nsiblings = 0;
 		list_del_init(&rt->rt6i_siblings);
+		rt6_multipath_rebalance(next_sibling);
 	}
 
 	/* Adjust walkers */
@@ -1698,7 +1696,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 	FOR_WALKERS(net, w) {
 		if (w->state == FWS_C && w->leaf == rt) {
 			RT6_TRACE("walker %p adjusted by delroute\n", w);
-			w->leaf = rcu_dereference_protected(rt->dst.rt6_next,
+			w->leaf = rcu_dereference_protected(rt->rt6_next,
 					    lockdep_is_held(&table->tb6_lock));
 			if (!w->leaf)
 				w->state = FWS_U;
@@ -1762,7 +1760,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 			fib6_del_route(table, fn, rtp, info);
 			return 0;
 		}
-		rtp_next = &cur->dst.rt6_next;
+		rtp_next = &cur->rt6_next;
 	}
 	return -ENOENT;
 }
@@ -1918,7 +1916,7 @@ static int fib6_clean_node(struct fib6_walker *w)
 
 	for_each_fib6_walker_rt(w) {
 		res = c->func(rt, c->arg);
-		if (res < 0) {
+		if (res == -1) {
 			w->leaf = rt;
 			res = fib6_del(rt, &info);
 			if (res) {
@@ -1931,6 +1929,12 @@ static int fib6_clean_node(struct fib6_walker *w)
 				continue;
 			}
 			return 0;
+		} else if (res == -2) {
+			if (WARN_ON(!rt->rt6i_nsiblings))
+				continue;
+			rt = list_last_entry(&rt->rt6i_siblings,
+					     struct rt6_info, rt6i_siblings);
+			continue;
 		}
 		WARN_ON(res != 0);
 	}
@@ -1942,7 +1946,8 @@ static int fib6_clean_node(struct fib6_walker *w)
  *	Convenient frontend to tree walker.
  *
  *	func is called on each route.
- *		It may return -1 -> delete this route.
+ *		It may return -2 -> skip multipath route.
+ *			      -1 -> delete this route.
  *		              0  -> continue walking
  */
 
@@ -2134,7 +2139,6 @@ static void fib6_net_exit(struct net *net)
 {
 	unsigned int i;
 
-	rt6_ifdown(net, NULL);
 	del_timer_sync(&net->ipv6.ip6_fib_timer);
 
 	for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
@@ -2173,8 +2177,8 @@ int __init fib6_init(void)
 	if (ret)
 		goto out_kmem_cache_create;
 
-	ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
-			      0);
+	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
+				   inet6_dump_fib, 0);
 	if (ret)
 		goto out_unregister_subsys;
 
@@ -2239,7 +2243,7 @@ static int ipv6_route_yield(struct fib6_walker *w)
 
 	do {
 		iter->w.leaf = rcu_dereference_protected(
-				iter->w.leaf->dst.rt6_next,
+				iter->w.leaf->rt6_next,
 				lockdep_is_held(&iter->tbl->tb6_lock));
 		iter->skip--;
 		if (!iter->skip && iter->w.leaf)
@@ -2305,7 +2309,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (!v)
 		goto iter_table;
 
-	n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next);
+	n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next);
 	if (n) {
 		++*pos;
 		return n;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 7f59c8fabeeb..3dab664ff503 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -836,7 +836,6 @@ static int ip6fl_seq_release(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations ip6fl_seq_fops = {
-	.owner		=	THIS_MODULE,
 	.open		=	ip6fl_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 873549228ccb..05f070e123e4 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -55,6 +55,8 @@
 #include <net/ip6_route.h>
 #include <net/ip6_tunnel.h>
 #include <net/gre.h>
+#include <net/erspan.h>
+#include <net/dst_metadata.h>
 
 
 static bool log_ecn_error = true;
@@ -68,11 +70,13 @@ static unsigned int ip6gre_net_id __read_mostly;
 struct ip6gre_net {
 	struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
 
+	struct ip6_tnl __rcu *collect_md_tun;
 	struct net_device *fb_tunnel_dev;
 };
 
 static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
 static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
+static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly;
 static int ip6gre_tunnel_init(struct net_device *dev);
 static void ip6gre_tunnel_setup(struct net_device *dev);
 static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
@@ -121,7 +125,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 	unsigned int h1 = HASH_KEY(key);
 	struct ip6_tnl *t, *cand = NULL;
 	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
-	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+	int dev_type = (gre_proto == htons(ETH_P_TEB) ||
+			gre_proto == htons(ETH_P_ERSPAN)) ?
 		       ARPHRD_ETHER : ARPHRD_IP6GRE;
 	int score, cand_score = 4;
 
@@ -226,6 +231,10 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 	if (cand)
 		return cand;
 
+	t = rcu_dereference(ign->collect_md_tun);
+	if (t && t->dev->flags & IFF_UP)
+		return t;
+
 	dev = ign->fb_tunnel_dev;
 	if (dev->flags & IFF_UP)
 		return netdev_priv(dev);
@@ -261,6 +270,9 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
 {
 	struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
 
+	if (t->parms.collect_md)
+		rcu_assign_pointer(ign->collect_md_tun, t);
+
 	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 	rcu_assign_pointer(*tp, t);
 }
@@ -270,6 +282,9 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
 	struct ip6_tnl __rcu **tp;
 	struct ip6_tnl *iter;
 
+	if (t->parms.collect_md)
+		rcu_assign_pointer(ign->collect_md_tun, NULL);
+
 	for (tp = ip6gre_bucket(ign, t);
 	     (iter = rtnl_dereference(*tp)) != NULL;
 	     tp = &iter->next) {
@@ -461,7 +476,101 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
 				      &ipv6h->saddr, &ipv6h->daddr, tpi->key,
 				      tpi->proto);
 	if (tunnel) {
-		ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+		if (tunnel->parms.collect_md) {
+			struct metadata_dst *tun_dst;
+			__be64 tun_id;
+			__be16 flags;
+
+			flags = tpi->flags;
+			tun_id = key32_to_tunnel_id(tpi->key);
+
+			tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0);
+			if (!tun_dst)
+				return PACKET_REJECT;
+
+			ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+		} else {
+			ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+		}
+
+		return PACKET_RCVD;
+	}
+
+	return PACKET_REJECT;
+}
+
+static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
+			 struct tnl_ptk_info *tpi)
+{
+	struct erspan_base_hdr *ershdr;
+	struct erspan_metadata *pkt_md;
+	const struct ipv6hdr *ipv6h;
+	struct ip6_tnl *tunnel;
+	u8 ver;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+		return PACKET_REJECT;
+
+	ipv6h = ipv6_hdr(skb);
+	ershdr = (struct erspan_base_hdr *)skb->data;
+	ver = ershdr->ver;
+	tpi->key = cpu_to_be32(get_session_id(ershdr));
+
+	tunnel = ip6gre_tunnel_lookup(skb->dev,
+				      &ipv6h->saddr, &ipv6h->daddr, tpi->key,
+				      tpi->proto);
+	if (tunnel) {
+		int len = erspan_hdr_len(ver);
+
+		if (unlikely(!pskb_may_pull(skb, len)))
+			return PACKET_REJECT;
+
+		ershdr = (struct erspan_base_hdr *)skb->data;
+		pkt_md = (struct erspan_metadata *)(ershdr + 1);
+
+		if (__iptunnel_pull_header(skb, len,
+					   htons(ETH_P_TEB),
+					   false, false) < 0)
+			return PACKET_REJECT;
+
+		if (tunnel->parms.collect_md) {
+			struct metadata_dst *tun_dst;
+			struct ip_tunnel_info *info;
+			struct erspan_metadata *md;
+			__be64 tun_id;
+			__be16 flags;
+
+			tpi->flags |= TUNNEL_KEY;
+			flags = tpi->flags;
+			tun_id = key32_to_tunnel_id(tpi->key);
+
+			tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id,
+						  sizeof(*md));
+			if (!tun_dst)
+				return PACKET_REJECT;
+
+			info = &tun_dst->u.tun_info;
+			md = ip_tunnel_info_opts(info);
+
+			memcpy(md, pkt_md, sizeof(*md));
+			md->version = ver;
+			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+			info->options_len = sizeof(*md);
+
+			ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+
+		} else {
+			tunnel->parms.erspan_ver = ver;
+
+			if (ver == 1) {
+				tunnel->parms.index = ntohl(pkt_md->u.index);
+			} else {
+				tunnel->parms.dir = pkt_md->u.md2.dir;
+				tunnel->parms.hwid = get_hwid(&pkt_md->u.md2);
+			}
+
+			ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+		}
 
 		return PACKET_RCVD;
 	}
@@ -482,9 +591,17 @@ static int gre_rcv(struct sk_buff *skb)
 	if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
 		goto drop;
 
+	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
+		     tpi.proto == htons(ETH_P_ERSPAN2))) {
+		if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
+			return 0;
+		goto out;
+	}
+
 	if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
 		return 0;
 
+out:
 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 drop:
 	kfree_skb(skb);
@@ -497,6 +614,78 @@ static int gre_handle_offloads(struct sk_buff *skb, bool csum)
 					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }
 
+static void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb,
+				     struct net_device *dev,
+				     struct flowi6 *fl6, __u8 *dsfield,
+				     int *encap_limit)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct ip6_tnl *t = netdev_priv(dev);
+
+	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		*encap_limit = t->parms.encap_limit;
+
+	memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
+
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+		*dsfield = ipv4_get_dsfield(iph);
+	else
+		*dsfield = ip6_tclass(t->parms.flowinfo);
+
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+		fl6->flowi6_mark = skb->mark;
+	else
+		fl6->flowi6_mark = t->parms.fwmark;
+
+	fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+}
+
+static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
+				    struct net_device *dev,
+				    struct flowi6 *fl6, __u8 *dsfield,
+				    int *encap_limit)
+{
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	struct ip6_tnl *t = netdev_priv(dev);
+	__u16 offset;
+
+	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+	/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+
+	if (offset > 0) {
+		struct ipv6_tlv_tnl_enc_lim *tel;
+
+		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+		if (tel->encap_limit == 0) {
+			icmpv6_send(skb, ICMPV6_PARAMPROB,
+				    ICMPV6_HDR_FIELD, offset + 2);
+			return -1;
+		}
+		*encap_limit = tel->encap_limit - 1;
+	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+		*encap_limit = t->parms.encap_limit;
+	}
+
+	memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
+
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+		*dsfield = ipv6_get_dsfield(ipv6h);
+	else
+		*dsfield = ip6_tclass(t->parms.flowinfo);
+
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+		fl6->flowlabel |= ip6_flowlabel(ipv6h);
+
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+		fl6->flowi6_mark = skb->mark;
+	else
+		fl6->flowi6_mark = t->parms.fwmark;
+
+	fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+	return 0;
+}
+
 static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 			       struct net_device *dev, __u8 dsfield,
 			       struct flowi6 *fl6, int encap_limit,
@@ -518,8 +707,38 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 
 	/* Push GRE header. */
 	protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
-	gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
-			 protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+
+	if (tunnel->parms.collect_md) {
+		struct ip_tunnel_info *tun_info;
+		const struct ip_tunnel_key *key;
+		__be16 flags;
+
+		tun_info = skb_tunnel_info(skb);
+		if (unlikely(!tun_info ||
+			     !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+			     ip_tunnel_info_af(tun_info) != AF_INET6))
+			return -EINVAL;
+
+		key = &tun_info->key;
+		memset(fl6, 0, sizeof(*fl6));
+		fl6->flowi6_proto = IPPROTO_GRE;
+		fl6->daddr = key->u.ipv6.dst;
+		fl6->flowlabel = key->label;
+		fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+		dsfield = key->tos;
+		flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+		tunnel->tun_hlen = gre_calc_hlen(flags);
+
+		gre_build_header(skb, tunnel->tun_hlen,
+				 flags, protocol,
+				 tunnel_id_to_key32(tun_info->key.tun_id), 0);
+
+	} else {
+		gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+				 protocol, tunnel->parms.o_key,
+				 htonl(tunnel->o_seqno));
+	}
 
 	return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
 			    NEXTHDR_GRE);
@@ -528,30 +747,17 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	const struct iphdr  *iph = ip_hdr(skb);
 	int encap_limit = -1;
 	struct flowi6 fl6;
-	__u8 dsfield;
+	__u8 dsfield = 0;
 	__u32 mtu;
 	int err;
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 
-	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
-		encap_limit = t->parms.encap_limit;
-
-	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		dsfield = ipv4_get_dsfield(iph);
-	else
-		dsfield = ip6_tclass(t->parms.flowinfo);
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
-		fl6.flowi6_mark = skb->mark;
-	else
-		fl6.flowi6_mark = t->parms.fwmark;
-
-	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+	if (!t->parms.collect_md)
+		prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
+					 &dsfield, &encap_limit);
 
 	err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
 	if (err)
@@ -575,46 +781,17 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
 	struct ip6_tnl *t = netdev_priv(dev);
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	int encap_limit = -1;
-	__u16 offset;
 	struct flowi6 fl6;
-	__u8 dsfield;
+	__u8 dsfield = 0;
 	__u32 mtu;
 	int err;
 
 	if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
 		return -1;
 
-	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
-	/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
-	ipv6h = ipv6_hdr(skb);
-
-	if (offset > 0) {
-		struct ipv6_tlv_tnl_enc_lim *tel;
-		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
-		if (tel->encap_limit == 0) {
-			icmpv6_send(skb, ICMPV6_PARAMPROB,
-				    ICMPV6_HDR_FIELD, offset + 2);
-			return -1;
-		}
-		encap_limit = tel->encap_limit - 1;
-	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
-		encap_limit = t->parms.encap_limit;
-
-	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		dsfield = ipv6_get_dsfield(ipv6h);
-	else
-		dsfield = ip6_tclass(t->parms.flowinfo);
-
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
-		fl6.flowlabel |= ip6_flowlabel(ipv6h);
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
-		fl6.flowi6_mark = skb->mark;
-	else
-		fl6.flowi6_mark = t->parms.fwmark;
-
-	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+	if (!t->parms.collect_md &&
+	    prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
+		return -1;
 
 	if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
 		return -1;
@@ -661,7 +838,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
 	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 		encap_limit = t->parms.encap_limit;
 
-	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+	if (!t->parms.collect_md)
+		memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
 
 	err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
 	if (err)
@@ -706,6 +884,137 @@ tx_err:
 	return NETDEV_TX_OK;
 }
 
+static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+					 struct net_device *dev)
+{
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct dst_entry *dst = skb_dst(skb);
+	struct net_device_stats *stats;
+	bool truncate = false;
+	int encap_limit = -1;
+	__u8 dsfield = false;
+	struct flowi6 fl6;
+	int err = -EINVAL;
+	__u32 mtu;
+
+	if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
+		goto tx_err;
+
+	if (gre_handle_offloads(skb, false))
+		goto tx_err;
+
+	if (skb->len > dev->mtu + dev->hard_header_len) {
+		pskb_trim(skb, dev->mtu + dev->hard_header_len);
+		truncate = true;
+	}
+
+	t->parms.o_flags &= ~TUNNEL_KEY;
+	IPCB(skb)->flags = 0;
+
+	/* For collect_md mode, derive fl6 from the tunnel key,
+	 * for native mode, call prepare_ip6gre_xmit_{ipv4,ipv6}.
+	 */
+	if (t->parms.collect_md) {
+		struct ip_tunnel_info *tun_info;
+		const struct ip_tunnel_key *key;
+		struct erspan_metadata *md;
+		__be32 tun_id;
+
+		tun_info = skb_tunnel_info(skb);
+		if (unlikely(!tun_info ||
+			     !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+			     ip_tunnel_info_af(tun_info) != AF_INET6))
+			return -EINVAL;
+
+		key = &tun_info->key;
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_GRE;
+		fl6.daddr = key->u.ipv6.dst;
+		fl6.flowlabel = key->label;
+		fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+		dsfield = key->tos;
+		md = ip_tunnel_info_opts(tun_info);
+		if (!md)
+			goto tx_err;
+
+		tun_id = tunnel_id_to_key32(key->tun_id);
+		if (md->version == 1) {
+			erspan_build_header(skb,
+					    ntohl(tun_id),
+					    ntohl(md->u.index), truncate,
+					    false);
+		} else if (md->version == 2) {
+			erspan_build_header_v2(skb,
+					       ntohl(tun_id),
+					       md->u.md2.dir,
+					       get_hwid(&md->u.md2),
+					       truncate, false);
+		}
+	} else {
+		switch (skb->protocol) {
+		case htons(ETH_P_IP):
+			memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+			prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
+						 &dsfield, &encap_limit);
+			break;
+		case htons(ETH_P_IPV6):
+			if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+				goto tx_err;
+			if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6,
+						     &dsfield, &encap_limit))
+				goto tx_err;
+			break;
+		default:
+			memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+			break;
+		}
+
+		if (t->parms.erspan_ver == 1)
+			erspan_build_header(skb, ntohl(t->parms.o_key),
+					    t->parms.index,
+					    truncate, false);
+		else
+			erspan_build_header_v2(skb, ntohl(t->parms.o_key),
+					       t->parms.dir,
+					       t->parms.hwid,
+					       truncate, false);
+		fl6.daddr = t->parms.raddr;
+	}
+
+	/* Push GRE header. */
+	gre_build_header(skb, 8, TUNNEL_SEQ,
+			 htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++));
+
+	/* TooBig packet may have updated dst->dev's mtu */
+	if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+		dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+
+	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+			   NEXTHDR_GRE);
+	if (err != 0) {
+		/* XXX: send ICMP error even if DF is not set. */
+		if (err == -EMSGSIZE) {
+			if (skb->protocol == htons(ETH_P_IP))
+				icmp_send(skb, ICMP_DEST_UNREACH,
+					  ICMP_FRAG_NEEDED, htonl(mtu));
+			else
+				icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		}
+
+		goto tx_err;
+	}
+	return NETDEV_TX_OK;
+
+tx_err:
+	stats = &t->dev->stats;
+	stats->tx_errors++;
+	stats->tx_dropped++;
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
 static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
 {
 	struct net_device *dev = t->dev;
@@ -1079,6 +1388,10 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
 	if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 		dev->mtu -= 8;
 
+	if (tunnel->parms.collect_md) {
+		dev->features |= NETIF_F_NETNS_LOCAL;
+		netif_keep_dst(dev);
+	}
 	ip6gre_tnl_init_features(dev);
 
 	return 0;
@@ -1095,6 +1408,9 @@ static int ip6gre_tunnel_init(struct net_device *dev)
 
 	tunnel = netdev_priv(dev);
 
+	if (tunnel->parms.collect_md)
+		return 0;
+
 	memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
 	memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
 
@@ -1117,7 +1433,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
 	dev_hold(dev);
 }
 
-
 static struct inet6_protocol ip6gre_protocol __read_mostly = {
 	.handler     = gre_rcv,
 	.err_handler = ip6gre_err,
@@ -1132,7 +1447,8 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
 
 	for_each_netdev_safe(net, dev, aux)
 		if (dev->rtnl_link_ops == &ip6gre_link_ops ||
-		    dev->rtnl_link_ops == &ip6gre_tap_ops)
+		    dev->rtnl_link_ops == &ip6gre_tap_ops ||
+		    dev->rtnl_link_ops == &ip6erspan_tap_ops)
 			unregister_netdevice_queue(dev, head);
 
 	for (prio = 0; prio < 4; prio++) {
@@ -1254,6 +1570,70 @@ out:
 	return ip6gre_tunnel_validate(tb, data, extack);
 }
 
+static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
+				  struct netlink_ext_ack *extack)
+{
+	__be16 flags = 0;
+	int ret, ver = 0;
+
+	if (!data)
+		return 0;
+
+	ret = ip6gre_tap_validate(tb, data, extack);
+	if (ret)
+		return ret;
+
+	/* ERSPAN should only have GRE sequence and key flag */
+	if (data[IFLA_GRE_OFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+	if (data[IFLA_GRE_IFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+	if (!data[IFLA_GRE_COLLECT_METADATA] &&
+	    flags != (GRE_SEQ | GRE_KEY))
+		return -EINVAL;
+
+	/* ERSPAN Session ID only has 10-bit. Since we reuse
+	 * 32-bit key field as ID, check it's range.
+	 */
+	if (data[IFLA_GRE_IKEY] &&
+	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
+		return -EINVAL;
+
+	if (data[IFLA_GRE_OKEY] &&
+	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
+		return -EINVAL;
+
+	if (data[IFLA_GRE_ERSPAN_VER]) {
+		ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+		if (ver != 1 && ver != 2)
+			return -EINVAL;
+	}
+
+	if (ver == 1) {
+		if (data[IFLA_GRE_ERSPAN_INDEX]) {
+			u32 index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+
+			if (index & ~INDEX_MASK)
+				return -EINVAL;
+		}
+	} else if (ver == 2) {
+		if (data[IFLA_GRE_ERSPAN_DIR]) {
+			u16 dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+
+			if (dir & ~(DIR_MASK >> DIR_OFFSET))
+				return -EINVAL;
+		}
+
+		if (data[IFLA_GRE_ERSPAN_HWID]) {
+			u16 hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+
+			if (hwid & ~(HWID_MASK >> HWID_OFFSET))
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
 
 static void ip6gre_netlink_parms(struct nlattr *data[],
 				struct __ip6_tnl_parm *parms)
@@ -1300,6 +1680,22 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
 
 	if (data[IFLA_GRE_FWMARK])
 		parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
+
+	if (data[IFLA_GRE_COLLECT_METADATA])
+		parms->collect_md = true;
+
+	if (data[IFLA_GRE_ERSPAN_VER])
+		parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+
+	if (parms->erspan_ver == 1) {
+		if (data[IFLA_GRE_ERSPAN_INDEX])
+			parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+	} else if (parms->erspan_ver == 2) {
+		if (data[IFLA_GRE_ERSPAN_DIR])
+			parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+		if (data[IFLA_GRE_ERSPAN_HWID])
+			parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+	}
 }
 
 static int ip6gre_tap_init(struct net_device *dev)
@@ -1326,6 +1722,59 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
 	.ndo_get_iflink = ip6_tnl_get_iflink,
 };
 
+static int ip6erspan_tap_init(struct net_device *dev)
+{
+	struct ip6_tnl *tunnel;
+	int t_hlen;
+	int ret;
+
+	tunnel = netdev_priv(dev);
+
+	tunnel->dev = dev;
+	tunnel->net = dev_net(dev);
+	strcpy(tunnel->parms.name, dev->name);
+
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
+	if (ret) {
+		free_percpu(dev->tstats);
+		dev->tstats = NULL;
+		return ret;
+	}
+
+	tunnel->tun_hlen = 8;
+	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+		       erspan_hdr_len(tunnel->parms.erspan_ver);
+	t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+
+	dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+	dev->mtu = ETH_DATA_LEN - t_hlen;
+	if (dev->type == ARPHRD_ETHER)
+		dev->mtu -= ETH_HLEN;
+	if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		dev->mtu -= 8;
+
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	tunnel = netdev_priv(dev);
+	ip6gre_tnl_link_config(tunnel, 1);
+
+	return 0;
+}
+
+static const struct net_device_ops ip6erspan_netdev_ops = {
+	.ndo_init =		ip6erspan_tap_init,
+	.ndo_uninit =		ip6gre_tunnel_uninit,
+	.ndo_start_xmit =	ip6erspan_tunnel_xmit,
+	.ndo_set_mac_address =	eth_mac_addr,
+	.ndo_validate_addr =	eth_validate_addr,
+	.ndo_change_mtu =	ip6_tnl_change_mtu,
+	.ndo_get_stats64 =	ip_tunnel_get_stats64,
+	.ndo_get_iflink =	ip6_tnl_get_iflink,
+};
+
 static void ip6gre_tap_setup(struct net_device *dev)
 {
 
@@ -1396,8 +1845,13 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
 
 	ip6gre_netlink_parms(data, &nt->parms);
 
-	if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
-		return -EEXIST;
+	if (nt->parms.collect_md) {
+		if (rtnl_dereference(ign->collect_md_tun))
+			return -EEXIST;
+	} else {
+		if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+			return -EEXIST;
+	}
 
 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
 		eth_hw_addr_random(dev);
@@ -1500,8 +1954,12 @@ static size_t ip6gre_get_size(const struct net_device *dev)
 		nla_total_size(2) +
 		/* IFLA_GRE_ENCAP_DPORT */
 		nla_total_size(2) +
+		/* IFLA_GRE_COLLECT_METADATA */
+		nla_total_size(0) +
 		/* IFLA_GRE_FWMARK */
 		nla_total_size(4) +
+		/* IFLA_GRE_ERSPAN_INDEX */
+		nla_total_size(4) +
 		0;
 }
 
@@ -1523,7 +1981,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
 	    nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
 	    nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
-	    nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark))
+	    nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) ||
+	    nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
 		goto nla_put_failure;
 
 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -1536,6 +1995,24 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			t->encap.flags))
 		goto nla_put_failure;
 
+	if (p->collect_md) {
+		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
+			goto nla_put_failure;
+	}
+
+	if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
+		goto nla_put_failure;
+
+	if (p->erspan_ver == 1) {
+		if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+			goto nla_put_failure;
+	} else if (p->erspan_ver == 2) {
+		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
+			goto nla_put_failure;
+		if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
+			goto nla_put_failure;
+	}
+
 	return 0;
 
 nla_put_failure:
@@ -1558,9 +2035,28 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
 	[IFLA_GRE_ENCAP_FLAGS]  = { .type = NLA_U16 },
 	[IFLA_GRE_ENCAP_SPORT]  = { .type = NLA_U16 },
 	[IFLA_GRE_ENCAP_DPORT]  = { .type = NLA_U16 },
+	[IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
 	[IFLA_GRE_FWMARK]       = { .type = NLA_U32 },
+	[IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
+	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
+	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
+	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
 };
 
+static void ip6erspan_tap_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	dev->netdev_ops = &ip6erspan_netdev_ops;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip6gre_dev_free;
+
+	dev->features |= NETIF_F_NETNS_LOCAL;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	netif_keep_dst(dev);
+}
+
 static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
 	.kind		= "ip6gre",
 	.maxtype	= IFLA_GRE_MAX,
@@ -1590,6 +2086,20 @@ static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
 	.get_link_net	= ip6_tnl_get_link_net,
 };
 
+static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
+	.kind		= "ip6erspan",
+	.maxtype	= IFLA_GRE_MAX,
+	.policy		= ip6gre_policy,
+	.priv_size	= sizeof(struct ip6_tnl),
+	.setup		= ip6erspan_tap_setup,
+	.validate	= ip6erspan_tap_validate,
+	.newlink	= ip6gre_newlink,
+	.changelink	= ip6gre_changelink,
+	.get_size	= ip6gre_get_size,
+	.fill_info	= ip6gre_fill_info,
+	.get_link_net	= ip6_tnl_get_link_net,
+};
+
 /*
  *	And now the modules code and kernel interface.
  */
@@ -1618,9 +2128,15 @@ static int __init ip6gre_init(void)
 	if (err < 0)
 		goto tap_ops_failed;
 
+	err = rtnl_link_register(&ip6erspan_tap_ops);
+	if (err < 0)
+		goto erspan_link_failed;
+
 out:
 	return err;
 
+erspan_link_failed:
+	rtnl_link_unregister(&ip6gre_tap_ops);
 tap_ops_failed:
 	rtnl_link_unregister(&ip6gre_link_ops);
 rtnl_link_failed:
@@ -1634,6 +2150,7 @@ static void __exit ip6gre_fini(void)
 {
 	rtnl_link_unregister(&ip6gre_tap_ops);
 	rtnl_link_unregister(&ip6gre_link_ops);
+	rtnl_link_unregister(&ip6erspan_tap_ops);
 	inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
 	unregister_pernet_device(&ip6gre_net_ops);
 }
@@ -1645,4 +2162,5 @@ MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
 MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
 MODULE_ALIAS_RTNL_LINK("ip6gre");
 MODULE_ALIAS_RTNL_LINK("ip6gretap");
+MODULE_ALIAS_RTNL_LINK("ip6erspan");
 MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3763dc01e374..997c7f19ad62 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -138,6 +138,14 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
 		return ret;
 	}
 
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
+	/* Policy lookup after SNAT yielded a new policy */
+	if (skb_dst(skb)->xfrm) {
+		IPCB(skb)->flags |= IPSKB_REROUTED;
+		return dst_output(net, sk, skb);
+	}
+#endif
+
 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 	    dst_allfrag(skb_dst(skb)) ||
 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
@@ -370,7 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 	return dst_output(net, sk, skb);
 }
 
-static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 {
 	unsigned int mtu;
 	struct inet6_dev *idev;
@@ -390,6 +398,7 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 
 	return mtu;
 }
+EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
 
 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 {
@@ -1209,7 +1218,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
 	else
 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
-		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
+			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
 	if (np->frag_size < mtu) {
 		if (np->frag_size)
 			mtu = np->frag_size;
@@ -1217,7 +1226,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 	if (mtu < IPV6_MIN_MTU)
 		return -EINVAL;
 	cork->base.fragsize = mtu;
-	if (dst_allfrag(rt->dst.path))
+	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
 		cork->base.flags |= IPCORK_ALLFRAG;
 	cork->base.length = 0;
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 1ee5584c3555..4b15fe928278 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -860,7 +860,7 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
 		struct metadata_dst *tun_dst,
 		bool log_ecn_err)
 {
-	return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
+	return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
 			     log_ecn_err);
 }
 EXPORT_SYMBOL(ip6_tnl_rcv);
@@ -978,6 +978,9 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
 	int ret = 0;
 	struct net *net = t->net;
 
+	if (t->parms.collect_md)
+		return 1;
+
 	if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
 	    ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
 	     (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 8c184f84f353..fa3ae1cb50d3 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -626,6 +626,7 @@ static void vti6_link_config(struct ip6_tnl *t)
 {
 	struct net_device *dev = t->dev;
 	struct __ip6_tnl_parm *p = &t->parms;
+	struct net_device *tdev = NULL;
 
 	memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
 	memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
@@ -638,6 +639,25 @@ static void vti6_link_config(struct ip6_tnl *t)
 		dev->flags |= IFF_POINTOPOINT;
 	else
 		dev->flags &= ~IFF_POINTOPOINT;
+
+	if (p->flags & IP6_TNL_F_CAP_XMIT) {
+		int strict = (ipv6_addr_type(&p->raddr) &
+			      (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
+		struct rt6_info *rt = rt6_lookup(t->net,
+						 &p->raddr, &p->laddr,
+						 p->link, strict);
+
+		if (rt)
+			tdev = rt->dst.dev;
+		ip6_rt_put(rt);
+	}
+
+	if (!tdev && p->link)
+		tdev = __dev_get_by_index(t->net, p->link);
+
+	if (tdev)
+		dev->mtu = max_t(int, tdev->mtu - dev->hard_header_len,
+				 IPV6_MIN_MTU);
 }
 
 /**
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index a2e1a864eb46..9f6cace9c817 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -477,7 +477,6 @@ static int ip6mr_vif_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations ip6mr_vif_fops = {
-	.owner	 = THIS_MODULE,
 	.open    = ip6mr_vif_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
@@ -495,6 +494,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 		return ERR_PTR(-ENOENT);
 
 	it->mrt = mrt;
+	it->cache = NULL;
 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 		: SEQ_START_TOKEN;
 }
@@ -609,7 +609,6 @@ static int ipmr_mfc_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations ip6mr_mfc_fops = {
-	.owner	 = THIS_MODULE,
 	.open    = ipmr_mfc_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
@@ -1425,10 +1424,13 @@ int __init ip6_mr_init(void)
 		goto add_proto_fail;
 	}
 #endif
-	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
-		      ip6mr_rtm_dumproute, 0);
-	return 0;
+	err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
+				   NULL, ip6mr_rtm_dumproute, 0);
+	if (err == 0)
+		return 0;
+
 #ifdef CONFIG_IPV6_PIMSM_V2
+	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
 add_proto_fail:
 	unregister_netdevice_notifier(&ip6_mr_notifier);
 #endif
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 844642682b83..6a5d0e39bb87 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1655,8 +1655,6 @@ static void mld_sendpack(struct sk_buff *skb)
 	if (err)
 		goto err_out;
 
-	payload_len = skb->len;
-
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 		      net, net->ipv6.igmp_sk, skb, NULL, skb->dev,
 		      dst_output);
@@ -2758,7 +2756,6 @@ static int igmp6_mc_seq_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations igmp6_mc_seq_fops = {
-	.owner		=	THIS_MODULE,
 	.open		=	igmp6_mc_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
@@ -2913,7 +2910,6 @@ static int igmp6_mcf_seq_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations igmp6_mcf_seq_fops = {
-	.owner		=	THIS_MODULE,
 	.open		=	igmp6_mcf_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b3cea200c85e..f61a5b613b52 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -566,6 +566,11 @@ static void ndisc_send_unsol_na(struct net_device *dev)
 
 	read_lock_bh(&idev->lock);
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
+		/* skip tentative addresses until dad completes */
+		if (ifa->flags & IFA_F_TENTATIVE &&
+		    !(ifa->flags & IFA_F_OPTIMISTIC))
+			continue;
+
 		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
 			      /*router=*/ !!idev->cnf.forwarding,
 			      /*solicited=*/ false, /*override=*/ true,
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 39970e212ad5..d95ceca7ff8f 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -68,32 +68,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(ip6_route_me_harder);
 
-/*
- * Extra routing may needed on local out, as the QUEUE target never
- * returns control to the table.
- */
-
-struct ip6_rt_info {
-	struct in6_addr daddr;
-	struct in6_addr saddr;
-	u_int32_t mark;
-};
-
-static void nf_ip6_saveroute(const struct sk_buff *skb,
-			     struct nf_queue_entry *entry)
-{
-	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
-	if (entry->state.hook == NF_INET_LOCAL_OUT) {
-		const struct ipv6hdr *iph = ipv6_hdr(skb);
-
-		rt_info->daddr = iph->daddr;
-		rt_info->saddr = iph->saddr;
-		rt_info->mark = skb->mark;
-	}
-}
-
-static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
+static int nf_ip6_reroute(struct sk_buff *skb,
 			  const struct nf_queue_entry *entry)
 {
 	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -103,7 +78,7 @@ static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
 		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
 		    skb->mark != rt_info->mark)
-			return ip6_route_me_harder(net, skb);
+			return ip6_route_me_harder(entry->state.net, skb);
 	}
 	return 0;
 }
@@ -190,25 +165,19 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
 };
 
 static const struct nf_ipv6_ops ipv6ops = {
-	.chk_addr	= ipv6_chk_addr,
-	.route_input    = ip6_route_input,
-	.fragment	= ip6_fragment
-};
-
-static const struct nf_afinfo nf_ip6_afinfo = {
-	.family			= AF_INET6,
+	.chk_addr		= ipv6_chk_addr,
+	.route_input    	= ip6_route_input,
+	.fragment		= ip6_fragment,
 	.checksum		= nf_ip6_checksum,
 	.checksum_partial	= nf_ip6_checksum_partial,
 	.route			= nf_ip6_route,
-	.saveroute		= nf_ip6_saveroute,
 	.reroute		= nf_ip6_reroute,
-	.route_key_size		= sizeof(struct ip6_rt_info),
 };
 
 int __init ipv6_netfilter_init(void)
 {
 	RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
-	return nf_register_afinfo(&nf_ip6_afinfo);
+	return 0;
 }
 
 /* This can be called from inet6_init() on errors, so it cannot
@@ -217,5 +186,4 @@ int __init ipv6_netfilter_init(void)
 void ipv6_netfilter_fini(void)
 {
 	RCU_INIT_POINTER(nf_ipv6_ops, NULL);
-	nf_unregister_afinfo(&nf_ip6_afinfo);
 }
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 6acb2eecd986..4a634b7a2c80 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -71,6 +71,15 @@ config NFT_FIB_IPV6
 endif # NF_TABLES_IPV6
 endif # NF_TABLES
 
+config NF_FLOW_TABLE_IPV6
+	tristate "Netfilter flow table IPv6 module"
+	depends on NF_CONNTRACK && NF_TABLES
+	select NF_FLOW_TABLE
+	help
+	  This option adds the flow table IPv6 support.
+
+	  To compile it as a module, choose M here.
+
 config NF_DUP_IPV6
 	tristate "Netfilter IPv6 packet duplication to alternate destination"
 	depends on !NF_CONNTRACK || NF_CONNTRACK
@@ -232,6 +241,15 @@ config IP6_NF_MATCH_RT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config IP6_NF_MATCH_SRH
+        tristate '"srh" Segment Routing header match support'
+        depends on NETFILTER_ADVANCED
+        help
+          srh matching allows you to match packets based on the segment
+	  routing header of the packet.
+
+          To compile it as a module, choose M here.  If unsure, say N.
+
 # The targets
 config IP6_NF_TARGET_HL
 	tristate '"HL" hoplimit target support'
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index c6ee0cdd0ba9..d984057b8395 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
 obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
 obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
 
+# flow table support
+obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
+
 # matches
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
 obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
@@ -54,6 +57,7 @@ obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
 obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
 obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
 obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
+obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o
 
 # targets
 obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 66a8c69a3db4..af4c917e0836 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -986,9 +986,8 @@ static int get_info(struct net *net, void __user *user,
 	if (compat)
 		xt_compat_lock(AF_INET6);
 #endif
-	t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
-				    "ip6table_%s", name);
-	if (t) {
+	t = xt_request_find_table_lock(net, AF_INET6, name);
+	if (!IS_ERR(t)) {
 		struct ip6t_getinfo info;
 		const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
@@ -1018,7 +1017,7 @@ static int get_info(struct net *net, void __user *user,
 		xt_table_unlock(t);
 		module_put(t->me);
 	} else
-		ret = -ENOENT;
+		ret = PTR_ERR(t);
 #ifdef CONFIG_COMPAT
 	if (compat)
 		xt_compat_unlock(AF_INET6);
@@ -1044,7 +1043,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
 	get.name[sizeof(get.name) - 1] = '\0';
 
 	t = xt_find_table_lock(net, AF_INET6, get.name);
-	if (t) {
+	if (!IS_ERR(t)) {
 		struct xt_table_info *private = t->private;
 		if (get.size == private->size)
 			ret = copy_entries_to_user(private->size,
@@ -1055,7 +1054,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
 		module_put(t->me);
 		xt_table_unlock(t);
 	} else
-		ret = -ENOENT;
+		ret = PTR_ERR(t);
 
 	return ret;
 }
@@ -1078,10 +1077,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 		goto out;
 	}
 
-	t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
-				    "ip6table_%s", name);
-	if (!t) {
-		ret = -ENOENT;
+	t = xt_request_find_table_lock(net, AF_INET6, name);
+	if (IS_ERR(t)) {
+		ret = PTR_ERR(t);
 		goto free_newinfo_counters_untrans;
 	}
 
@@ -1194,8 +1192,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	if (IS_ERR(paddc))
 		return PTR_ERR(paddc);
 	t = xt_find_table_lock(net, AF_INET6, tmp.name);
-	if (!t) {
-		ret = -ENOENT;
+	if (IS_ERR(t)) {
+		ret = PTR_ERR(t);
 		goto free;
 	}
 
@@ -1631,7 +1629,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
 
 	xt_compat_lock(AF_INET6);
 	t = xt_find_table_lock(net, AF_INET6, get.name);
-	if (t) {
+	if (!IS_ERR(t)) {
 		const struct xt_table_info *private = t->private;
 		struct xt_table_info info;
 		ret = compat_table_info(private, &info);
@@ -1645,7 +1643,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
 		module_put(t->me);
 		xt_table_unlock(t);
 	} else
-		ret = -ENOENT;
+		ret = PTR_ERR(t);
 
 	xt_compat_unlock(AF_INET6);
 	return ret;
@@ -1949,7 +1947,6 @@ static int __init ip6_tables_init(void)
 	if (ret < 0)
 		goto err5;
 
-	pr_info("(C) 2000-2006 Netfilter Core Team\n");
 	return 0;
 
 err5:
diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c
new file mode 100644
index 000000000000..9642164107ce
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_srh.c
@@ -0,0 +1,161 @@
+/* Kernel module to match Segment Routing Header (SRH) parameters. */
+
+/* Author:
+ * Ahmed Abdelsalam <amsalam20@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version 2
+ *	of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/ipv6.h>
+#include <net/seg6.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6t_srh.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+/* Test a struct->mt_invflags and a boolean for inequality */
+#define NF_SRH_INVF(ptr, flag, boolean)	\
+	((boolean) ^ !!((ptr)->mt_invflags & (flag)))
+
+static bool srh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct ip6t_srh *srhinfo = par->matchinfo;
+	struct ipv6_sr_hdr *srh;
+	struct ipv6_sr_hdr _srh;
+	int hdrlen, srhoff = 0;
+
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+		return false;
+	srh = skb_header_pointer(skb, srhoff, sizeof(_srh), &_srh);
+	if (!srh)
+		return false;
+
+	hdrlen = ipv6_optlen(srh);
+	if (skb->len - srhoff < hdrlen)
+		return false;
+
+	if (srh->type != IPV6_SRCRT_TYPE_4)
+		return false;
+
+	if (srh->segments_left > srh->first_segment)
+		return false;
+
+	/* Next Header matching */
+	if (srhinfo->mt_flags & IP6T_SRH_NEXTHDR)
+		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NEXTHDR,
+				!(srh->nexthdr == srhinfo->next_hdr)))
+			return false;
+
+	/* Header Extension Length matching */
+	if (srhinfo->mt_flags & IP6T_SRH_LEN_EQ)
+		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_EQ,
+				!(srh->hdrlen == srhinfo->hdr_len)))
+			return false;
+
+	if (srhinfo->mt_flags & IP6T_SRH_LEN_GT)
+		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_GT,
+				!(srh->hdrlen > srhinfo->hdr_len)))
+			return false;
+
+	if (srhinfo->mt_flags & IP6T_SRH_LEN_LT)
+		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_LT,
+				!(srh->hdrlen < srhinfo->hdr_len)))
+			return false;
+
+	/* Segments Left matching */
+	if (srhinfo->mt_flags & IP6T_SRH_SEGS_EQ)
+		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_EQ,
+				!(srh->segments_left == srhinfo->segs_left)))
+			return false;
+
+	if (srhinfo->mt_flags & IP6T_SRH_SEGS_GT)
+		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_GT,
+				!(srh->segments_left > srhinfo->segs_left)))
+			return false;
+
+	if (srhinfo->mt_flags & IP6T_SRH_SEGS_LT)
+		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_LT,
+				!(srh->segments_left < srhinfo->segs_left)))
+			return false;
+
+	/**
+	 * Last Entry matching
+	 * Last_Entry field was introduced in revision 6 of the SRH draft.
+	 * It was called First_Segment in the previous revision
+	 */
+	if (srhinfo->mt_flags & IP6T_SRH_LAST_EQ)
+		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_EQ,
+				!(srh->first_segment == srhinfo->last_entry)))
+			return false;
+
+	if (srhinfo->mt_flags & IP6T_SRH_LAST_GT)
+		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_GT,
+				!(srh->first_segment > srhinfo->last_entry)))
+			return false;
+
+	if (srhinfo->mt_flags & IP6T_SRH_LAST_LT)
+		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_LT,
+				!(srh->first_segment < srhinfo->last_entry)))
+			return false;
+
+	/**
+	 * Tag matchig
+	 * Tag field was introduced in revision 6 of the SRH draft.
+	 */
+	if (srhinfo->mt_flags & IP6T_SRH_TAG)
+		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_TAG,
+				!(srh->tag == srhinfo->tag)))
+			return false;
+	return true;
+}
+
+static int srh_mt6_check(const struct xt_mtchk_param *par)
+{
+	const struct ip6t_srh *srhinfo = par->matchinfo;
+
+	if (srhinfo->mt_flags & ~IP6T_SRH_MASK) {
+		pr_err("unknown srh match flags  %X\n", srhinfo->mt_flags);
+		return -EINVAL;
+	}
+
+	if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) {
+		pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct xt_match srh_mt6_reg __read_mostly = {
+	.name		= "srh",
+	.family		= NFPROTO_IPV6,
+	.match		= srh_mt6,
+	.matchsize	= sizeof(struct ip6t_srh),
+	.checkentry	= srh_mt6_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init srh_mt6_init(void)
+{
+	return xt_register_match(&srh_mt6_reg);
+}
+
+static void __exit srh_mt6_exit(void)
+{
+	xt_unregister_match(&srh_mt6_reg);
+}
+
+module_init(srh_mt6_init);
+module_exit(srh_mt6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: IPv6 Segment Routing Header match");
+MODULE_AUTHOR("Ahmed Abdelsalam <amsalam20@gmail.com>");
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 2b1a9dcdbcb3..b0524b18c4fb 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -42,14 +42,6 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 	u_int8_t hop_limit;
 	u_int32_t flowlabel, mark;
 	int err;
-#if 0
-	/* root is playing with raw sockets. */
-	if (skb->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
-		net_warn_ratelimited("ip6t_hook: happy cracking\n");
-		return NF_ACCEPT;
-	}
-#endif
 
 	/* save source/dest address, mark, hoplimit, flowlabel, priority,  */
 	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 991512576c8c..47306e45a80a 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -74,6 +74,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
 	{
 		.hook		= ip6table_nat_in,
 		.pf		= NFPROTO_IPV6,
+		.nat_hook	= true,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_NAT_DST,
 	},
@@ -81,6 +82,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
 	{
 		.hook		= ip6table_nat_out,
 		.pf		= NFPROTO_IPV6,
+		.nat_hook	= true,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP6_PRI_NAT_SRC,
 	},
@@ -88,12 +90,14 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
 	{
 		.hook		= ip6table_nat_local_fn,
 		.pf		= NFPROTO_IPV6,
+		.nat_hook	= true,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_NAT_DST,
 	},
 	/* After packet filtering, change source */
 	{
 		.hook		= ip6table_nat_fn,
+		.nat_hook	= true,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_NAT_SRC,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index d4bc56443dc1..710fa0806c37 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/slab.h>
@@ -11,6 +12,10 @@
 
 static int __net_init ip6table_raw_table_init(struct net *net);
 
+static bool raw_before_defrag __read_mostly;
+MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
+module_param(raw_before_defrag, bool, 0000);
+
 static const struct xt_table packet_raw = {
 	.name = "raw",
 	.valid_hooks = RAW_VALID_HOOKS,
@@ -20,6 +25,15 @@ static const struct xt_table packet_raw = {
 	.table_init = ip6table_raw_table_init,
 };
 
+static const struct xt_table packet_raw_before_defrag = {
+	.name = "raw",
+	.valid_hooks = RAW_VALID_HOOKS,
+	.me = THIS_MODULE,
+	.af = NFPROTO_IPV6,
+	.priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
+	.table_init = ip6table_raw_table_init,
+};
+
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6table_raw_hook(void *priv, struct sk_buff *skb,
@@ -33,15 +47,19 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
 static int __net_init ip6table_raw_table_init(struct net *net)
 {
 	struct ip6t_replace *repl;
+	const struct xt_table *table = &packet_raw;
 	int ret;
 
+	if (raw_before_defrag)
+		table = &packet_raw_before_defrag;
+
 	if (net->ipv6.ip6table_raw)
 		return 0;
 
-	repl = ip6t_alloc_initial_table(&packet_raw);
+	repl = ip6t_alloc_initial_table(table);
 	if (repl == NULL)
 		return -ENOMEM;
-	ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops,
+	ret = ip6t_register_table(net, table, repl, rawtable_ops,
 				  &net->ipv6.ip6table_raw);
 	kfree(repl);
 	return ret;
@@ -62,9 +80,16 @@ static struct pernet_operations ip6table_raw_net_ops = {
 static int __init ip6table_raw_init(void)
 {
 	int ret;
+	const struct xt_table *table = &packet_raw;
+
+	if (raw_before_defrag) {
+		table = &packet_raw_before_defrag;
+
+		pr_info("Enabling raw table before defrag\n");
+	}
 
 	/* Register hooks */
-	rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook);
+	rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
 	if (IS_ERR(rawtable_ops))
 		return PTR_ERR(rawtable_ops);
 
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 3b80a38f62b8..11a313fd9273 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -176,11 +176,6 @@ static unsigned int ipv6_conntrack_local(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
-	/* root is playing with raw sockets. */
-	if (skb->len < sizeof(struct ipv6hdr)) {
-		net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
-		return NF_ACCEPT;
-	}
 	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
 }
 
@@ -368,7 +363,7 @@ static struct nf_sockopt_ops so_getorigdst6 = {
 	.owner		= THIS_MODULE,
 };
 
-static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
+static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = {
 	&nf_conntrack_l4proto_tcp6,
 	&nf_conntrack_l4proto_udp6,
 	&nf_conntrack_l4proto_icmpv6,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 3ac0d826afc4..2548e2c8aedd 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -27,7 +27,7 @@
 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
 #include <net/netfilter/nf_log.h>
 
-static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
+static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
 
 static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
 {
@@ -352,7 +352,7 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
 	return &net->ct.nf_ct_proto.icmpv6.pn;
 }
 
-struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
 {
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_ICMPV6,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 977d8900cfd1..ce53dcfda88a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -231,7 +231,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
 		pr_debug("offset is too large.\n");
-		return -1;
+		return -EINVAL;
 	}
 
 	ecn = ip6_frag_ecn(ipv6_hdr(skb));
@@ -264,7 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 			 * this case. -DaveM
 			 */
 			pr_debug("end of fragment not rounded to 8 bytes.\n");
-			return -1;
+			return -EPROTO;
 		}
 		if (end > fq->q.len) {
 			/* Some bits beyond end -> corruption. */
@@ -358,7 +358,7 @@ found:
 discard_fq:
 	inet_frag_kill(&fq->q, &nf_frags);
 err:
-	return -1;
+	return -EINVAL;
 }
 
 /*
@@ -567,6 +567,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
 
 int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 {
+	u16 savethdr = skb->transport_header;
 	struct net_device *dev = skb->dev;
 	int fhoff, nhoff, ret;
 	struct frag_hdr *fhdr;
@@ -600,8 +601,12 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 
 	spin_lock_bh(&fq->q.lock);
 
-	if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) {
-		ret = -EINVAL;
+	ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
+	if (ret < 0) {
+		if (ret == -EPROTO) {
+			skb->transport_header = savethdr;
+			ret = 0;
+		}
 		goto out_unlock;
 	}
 
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index b326da59257f..c87b48359e8f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -63,6 +63,9 @@ static unsigned int ipv6_defrag(void *priv,
 	/* Previously seen (loopback)?	*/
 	if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb)))
 		return NF_ACCEPT;
+
+	if (skb->_nfct == IP_CT_UNTRACKED)
+		return NF_ACCEPT;
 #endif
 
 	err = nf_ct_frag6_gather(state->net, skb,
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
new file mode 100644
index 000000000000..fff21602875a
--- /dev/null
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -0,0 +1,277 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+				struct in6_addr *addr,
+				struct in6_addr *new_addr)
+{
+	struct tcphdr *tcph;
+
+	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+		return -1;
+
+	tcph = (void *)(skb_network_header(skb) + thoff);
+	inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+				  new_addr->s6_addr32, true);
+
+	return 0;
+}
+
+static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+				struct in6_addr *addr,
+				struct in6_addr *new_addr)
+{
+	struct udphdr *udph;
+
+	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
+		return -1;
+
+	udph = (void *)(skb_network_header(skb) + thoff);
+	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+		inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+					  new_addr->s6_addr32, true);
+		if (!udph->check)
+			udph->check = CSUM_MANGLED_0;
+	}
+
+	return 0;
+}
+
+static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+				    unsigned int thoff, struct in6_addr *addr,
+				    struct in6_addr *new_addr)
+{
+	switch (ip6h->nexthdr) {
+	case IPPROTO_TCP:
+		if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+			return NF_DROP;
+		break;
+	case IPPROTO_UDP:
+		if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+			return NF_DROP;
+		break;
+	}
+
+	return 0;
+}
+
+static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+			     struct sk_buff *skb, struct ipv6hdr *ip6h,
+			     unsigned int thoff,
+			     enum flow_offload_tuple_dir dir)
+{
+	struct in6_addr addr, new_addr;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = ip6h->saddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
+		ip6h->saddr = new_addr;
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = ip6h->daddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+		ip6h->daddr = new_addr;
+		break;
+	default:
+		return -1;
+	}
+
+	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+			     struct sk_buff *skb, struct ipv6hdr *ip6h,
+			     unsigned int thoff,
+			     enum flow_offload_tuple_dir dir)
+{
+	struct in6_addr addr, new_addr;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = ip6h->daddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
+		ip6h->daddr = new_addr;
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = ip6h->saddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+		ip6h->saddr = new_addr;
+		break;
+	default:
+		return -1;
+	}
+
+	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+			    struct sk_buff *skb,
+			    enum flow_offload_tuple_dir dir)
+{
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	unsigned int thoff = sizeof(*ip6h);
+
+	if (flow->flags & FLOW_OFFLOAD_SNAT &&
+	    (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+	     nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+		return -1;
+	if (flow->flags & FLOW_OFFLOAD_DNAT &&
+	    (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+	     nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+		return -1;
+
+	return 0;
+}
+
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+			      struct flow_offload_tuple *tuple)
+{
+	struct flow_ports *ports;
+	struct ipv6hdr *ip6h;
+	unsigned int thoff;
+
+	if (!pskb_may_pull(skb, sizeof(*ip6h)))
+		return -1;
+
+	ip6h = ipv6_hdr(skb);
+
+	if (ip6h->nexthdr != IPPROTO_TCP &&
+	    ip6h->nexthdr != IPPROTO_UDP)
+		return -1;
+
+	thoff = sizeof(*ip6h);
+	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+		return -1;
+
+	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+	tuple->src_v6		= ip6h->saddr;
+	tuple->dst_v6		= ip6h->daddr;
+	tuple->src_port		= ports->source;
+	tuple->dst_port		= ports->dest;
+	tuple->l3proto		= AF_INET6;
+	tuple->l4proto		= ip6h->nexthdr;
+	tuple->iifidx		= dev->ifindex;
+
+	return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+	if (skb->len <= mtu)
+		return false;
+
+	if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+		return false;
+
+	return true;
+}
+
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
+{
+	u32 mtu;
+
+	mtu = ip6_dst_mtu_forward(&rt->dst);
+	if (__nf_flow_exceeds_mtu(skb, mtu))
+		return true;
+
+	return false;
+}
+
+unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+			  const struct nf_hook_state *state)
+{
+	struct flow_offload_tuple_rhash *tuplehash;
+	struct nf_flowtable *flow_table = priv;
+	struct flow_offload_tuple tuple = {};
+	enum flow_offload_tuple_dir dir;
+	struct flow_offload *flow;
+	struct net_device *outdev;
+	struct in6_addr *nexthop;
+	struct ipv6hdr *ip6h;
+	struct rt6_info *rt;
+
+	if (skb->protocol != htons(ETH_P_IPV6))
+		return NF_ACCEPT;
+
+	if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+		return NF_ACCEPT;
+
+	tuplehash = flow_offload_lookup(flow_table, &tuple);
+	if (tuplehash == NULL)
+		return NF_ACCEPT;
+
+	outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+	if (!outdev)
+		return NF_ACCEPT;
+
+	dir = tuplehash->tuple.dir;
+	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+	if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+		return NF_ACCEPT;
+
+	if (skb_try_make_writable(skb, sizeof(*ip6h)))
+		return NF_DROP;
+
+	if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+	    nf_flow_nat_ipv6(flow, skb, dir) < 0)
+		return NF_DROP;
+
+	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+	ip6h = ipv6_hdr(skb);
+	ip6h->hop_limit--;
+
+	skb->dev = outdev;
+	nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+	neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+
+	return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
+
+static struct nf_flowtable_type flowtable_ipv6 = {
+	.family		= NFPROTO_IPV6,
+	.params		= &nf_flow_offload_rhash_params,
+	.gc		= nf_flow_offload_work_gc,
+	.hook		= nf_flow_offload_ipv6_hook,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nf_flow_ipv6_module_init(void)
+{
+	nft_register_flowtable_type(&flowtable_ipv6);
+
+	return 0;
+}
+
+static void __exit nf_flow_ipv6_module_exit(void)
+{
+	nft_unregister_flowtable_type(&flowtable_ipv6);
+}
+
+module_init(nf_flow_ipv6_module_init);
+module_exit(nf_flow_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 1d2fb9267d6f..bed57ee65f7b 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -369,10 +369,6 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
 #endif
 	unsigned int ret;
 
-	/* root is playing with raw sockets. */
-	if (skb->len < sizeof(struct ipv6hdr))
-		return NF_ACCEPT;
-
 	ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -408,10 +404,6 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
 	unsigned int ret;
 	int err;
 
-	/* root is playing with raw sockets. */
-	if (skb->len < sizeof(struct ipv6hdr))
-		return NF_ACCEPT;
-
 	ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index d6e4ba5de916..17e03589331c 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -22,68 +22,12 @@ static unsigned int nft_do_chain_ipv6(void *priv,
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo_ipv6(&pkt, skb, state);
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_ipv6(&pkt, skb);
 
 	return nft_do_chain(&pkt, priv);
 }
 
-static unsigned int nft_ipv6_output(void *priv,
-				    struct sk_buff *skb,
-				    const struct nf_hook_state *state)
-{
-	if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
-		if (net_ratelimit())
-			pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
-				"packet\n");
-		return NF_ACCEPT;
-	}
-
-	return nft_do_chain_ipv6(priv, skb, state);
-}
-
-struct nft_af_info nft_af_ipv6 __read_mostly = {
-	.family		= NFPROTO_IPV6,
-	.nhooks		= NF_INET_NUMHOOKS,
-	.owner		= THIS_MODULE,
-	.nops		= 1,
-	.hooks		= {
-		[NF_INET_LOCAL_IN]	= nft_do_chain_ipv6,
-		[NF_INET_LOCAL_OUT]	= nft_ipv6_output,
-		[NF_INET_FORWARD]	= nft_do_chain_ipv6,
-		[NF_INET_PRE_ROUTING]	= nft_do_chain_ipv6,
-		[NF_INET_POST_ROUTING]	= nft_do_chain_ipv6,
-	},
-};
-EXPORT_SYMBOL_GPL(nft_af_ipv6);
-
-static int nf_tables_ipv6_init_net(struct net *net)
-{
-	net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
-	if (net->nft.ipv6 == NULL)
-		return -ENOMEM;
-
-	memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
-
-	if (nft_register_afinfo(net, net->nft.ipv6) < 0)
-		goto err;
-
-	return 0;
-err:
-	kfree(net->nft.ipv6);
-	return -ENOMEM;
-}
-
-static void nf_tables_ipv6_exit_net(struct net *net)
-{
-	nft_unregister_afinfo(net, net->nft.ipv6);
-	kfree(net->nft.ipv6);
-}
-
-static struct pernet_operations nf_tables_ipv6_net_ops = {
-	.init	= nf_tables_ipv6_init_net,
-	.exit	= nf_tables_ipv6_exit_net,
-};
-
 static const struct nf_chain_type filter_ipv6 = {
 	.name		= "filter",
 	.type		= NFT_CHAIN_T_DEFAULT,
@@ -94,26 +38,22 @@ static const struct nf_chain_type filter_ipv6 = {
 			  (1 << NF_INET_FORWARD) |
 			  (1 << NF_INET_PRE_ROUTING) |
 			  (1 << NF_INET_POST_ROUTING),
+	.hooks		= {
+		[NF_INET_LOCAL_IN]	= nft_do_chain_ipv6,
+		[NF_INET_LOCAL_OUT]	= nft_do_chain_ipv6,
+		[NF_INET_FORWARD]	= nft_do_chain_ipv6,
+		[NF_INET_PRE_ROUTING]	= nft_do_chain_ipv6,
+		[NF_INET_POST_ROUTING]	= nft_do_chain_ipv6,
+	},
 };
 
 static int __init nf_tables_ipv6_init(void)
 {
-	int ret;
-
-	ret = nft_register_chain_type(&filter_ipv6);
-	if (ret < 0)
-		return ret;
-
-	ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
-	if (ret < 0)
-		nft_unregister_chain_type(&filter_ipv6);
-
-	return ret;
+	return nft_register_chain_type(&filter_ipv6);
 }
 
 static void __exit nf_tables_ipv6_exit(void)
 {
-	unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
 	nft_unregister_chain_type(&filter_ipv6);
 }
 
@@ -122,4 +62,4 @@ module_exit(nf_tables_ipv6_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_FAMILY(AF_INET6);
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter");
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 443cd306c0b0..73fe2bd13fcf 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -31,7 +31,8 @@ static unsigned int nft_nat_do_chain(void *priv,
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo_ipv6(&pkt, skb, state);
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_ipv6(&pkt, skb);
 
 	return nft_do_chain(&pkt, priv);
 }
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index f2727475895e..11d3c3b9aa18 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -33,7 +33,8 @@ static unsigned int nf_route_table_hook(void *priv,
 	u32 mark, flowlabel;
 	int err;
 
-	nft_set_pktinfo_ipv6(&pkt, skb, state);
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_ipv6(&pkt, skb);
 
 	/* save source/dest address, mark, hoplimit, flowlabel, priority */
 	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 54b5899543ef..cc5174c7254c 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -60,7 +60,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
 {
 	const struct net_device *dev = NULL;
 	const struct nf_ipv6_ops *v6ops;
-	const struct nf_afinfo *afinfo;
 	int route_err, addrtype;
 	struct rt6_info *rt;
 	struct flowi6 fl6 = {
@@ -69,8 +68,8 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
 	};
 	u32 ret = 0;
 
-	afinfo = nf_get_afinfo(NFPROTO_IPV6);
-	if (!afinfo)
+	v6ops = nf_get_ipv6_ops();
+	if (!v6ops)
 		return RTN_UNREACHABLE;
 
 	if (priv->flags & NFTA_FIB_F_IIF)
@@ -80,12 +79,11 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
 
 	nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
 
-	v6ops = nf_get_ipv6_ops();
-	if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+	if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
 		ret = RTN_LOCAL;
 
-	route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt,
-				  flowi6_to_flowi(&fl6), false);
+	route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt,
+				 flowi6_to_flowi(&fl6), false);
 	if (route_err)
 		goto err;
 
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index e88bcb8ff0fd..b67814242f78 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -58,7 +58,6 @@ static int sockstat6_seq_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations sockstat6_seq_fops = {
-	.owner	 = THIS_MODULE,
 	.open	 = sockstat6_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
@@ -248,7 +247,6 @@ static int snmp6_seq_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations snmp6_seq_fops = {
-	.owner	 = THIS_MODULE,
 	.open	 = snmp6_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
@@ -274,7 +272,6 @@ static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations snmp6_dev_seq_fops = {
-	.owner	 = THIS_MODULE,
 	.open	 = snmp6_dev_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 761a473a07c5..ddda7eb3c623 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1308,7 +1308,6 @@ static int raw6_seq_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations raw6_seq_fops = {
-	.owner =	THIS_MODULE,
 	.open =		raw6_seq_open,
 	.read =		seq_read,
 	.llseek =	seq_lseek,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0458b761f3c5..fb2d251c0500 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -186,7 +186,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 
 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
 {
-	return dst_metrics_write_ptr(rt->dst.from);
+	return dst_metrics_write_ptr(&rt->from->dst);
 }
 
 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
@@ -391,7 +391,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
 	struct rt6_exception_bucket *bucket;
-	struct dst_entry *from = dst->from;
+	struct rt6_info *from = rt->from;
 	struct inet6_dev *idev;
 
 	dst_destroy_metrics_generic(dst);
@@ -409,8 +409,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 		kfree(bucket);
 	}
 
-	dst->from = NULL;
-	dst_release(from);
+	rt->from = NULL;
+	dst_release(&from->dst);
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -443,9 +443,9 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 	if (rt->rt6i_flags & RTF_EXPIRES) {
 		if (time_after(jiffies, rt->dst.expires))
 			return true;
-	} else if (rt->dst.from) {
+	} else if (rt->from) {
 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
-		       rt6_check_expired((struct rt6_info *)rt->dst.from);
+			rt6_check_expired(rt->from);
 	}
 	return false;
 }
@@ -455,7 +455,6 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 					     int strict)
 {
 	struct rt6_info *sibling, *next_sibling;
-	int route_choosen;
 
 	/* We might have already computed the hash for ICMPv6 errors. In such
 	 * case it will always be non-zero. Otherwise now is the time to do it.
@@ -463,26 +462,19 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 	if (!fl6->mp_hash)
 		fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
 
-	route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
-	/* Don't change the route, if route_choosen == 0
-	 * (siblings does not include ourself)
-	 */
-	if (route_choosen)
-		list_for_each_entry_safe(sibling, next_sibling,
-				&match->rt6i_siblings, rt6i_siblings) {
-			route_choosen--;
-			if (route_choosen == 0) {
-				struct inet6_dev *idev = sibling->rt6i_idev;
-
-				if (!netif_carrier_ok(sibling->dst.dev) &&
-				    idev->cnf.ignore_routes_with_linkdown)
-					break;
-				if (rt6_score_route(sibling, oif, strict) < 0)
-					break;
-				match = sibling;
-				break;
-			}
-		}
+	if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
+		return match;
+
+	list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
+				 rt6i_siblings) {
+		if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
+			continue;
+		if (rt6_score_route(sibling, oif, strict) < 0)
+			break;
+		match = sibling;
+		break;
+	}
+
 	return match;
 }
 
@@ -499,12 +491,15 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 	struct rt6_info *local = NULL;
 	struct rt6_info *sprt;
 
-	if (!oif && ipv6_addr_any(saddr))
-		goto out;
+	if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
+		return rt;
 
-	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
+	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
 		struct net_device *dev = sprt->dst.dev;
 
+		if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
+			continue;
+
 		if (oif) {
 			if (dev->ifindex == oif)
 				return sprt;
@@ -533,8 +528,8 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 		if (flags & RT6_LOOKUP_F_IFACE)
 			return net->ipv6.ip6_null_entry;
 	}
-out:
-	return rt;
+
+	return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
 }
 
 #ifdef CONFIG_IPV6_ROUTER_PREF
@@ -679,10 +674,12 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 	int m;
 	bool match_do_rr = false;
 	struct inet6_dev *idev = rt->rt6i_idev;
-	struct net_device *dev = rt->dst.dev;
 
-	if (dev && !netif_carrier_ok(dev) &&
-	    idev->cnf.ignore_routes_with_linkdown &&
+	if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+		goto out;
+
+	if (idev->cnf.ignore_routes_with_linkdown &&
+	    rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 		goto out;
 
@@ -721,7 +718,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 
 	match = NULL;
 	cont = NULL;
-	for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
+	for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
 		if (rt->rt6i_metric != metric) {
 			cont = rt;
 			break;
@@ -731,7 +728,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 	}
 
 	for (rt = leaf; rt && rt != rr_head;
-	     rt = rcu_dereference(rt->dst.rt6_next)) {
+	     rt = rcu_dereference(rt->rt6_next)) {
 		if (rt->rt6i_metric != metric) {
 			cont = rt;
 			break;
@@ -743,7 +740,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 	if (match || !cont)
 		return match;
 
-	for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
+	for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 
 	return match;
@@ -781,7 +778,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
 			     &do_rr);
 
 	if (do_rr) {
-		struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
+		struct rt6_info *next = rcu_dereference(rt0->rt6_next);
 
 		/* no entries matched; do round-robin */
 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -1054,7 +1051,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	 */
 
 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
-		ort = (struct rt6_info *)ort->dst.from;
+		ort = ort->from;
 
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(ort);
@@ -1274,7 +1271,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 
 	/* ort can't be a cache or pcpu route */
 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
-		ort = (struct rt6_info *)ort->dst.from;
+		ort = ort->from;
 	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
 
 	spin_lock_bh(&rt6_exception_lock);
@@ -1346,7 +1343,9 @@ out:
 
 	/* Update fn->fn_sernum to invalidate all cached dst */
 	if (!err) {
+		spin_lock_bh(&ort->rt6i_table->tb6_lock);
 		fib6_update_sernum(ort);
+		spin_unlock_bh(&ort->rt6i_table->tb6_lock);
 		fib6_force_start_gc(net);
 	}
 
@@ -1415,8 +1414,8 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
 /* Remove the passed in cached rt from the hash table that contains it */
 int rt6_remove_exception_rt(struct rt6_info *rt)
 {
-	struct rt6_info *from = (struct rt6_info *)rt->dst.from;
 	struct rt6_exception_bucket *bucket;
+	struct rt6_info *from = rt->from;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
 	int err;
@@ -1460,8 +1459,8 @@ int rt6_remove_exception_rt(struct rt6_info *rt)
  */
 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 {
-	struct rt6_info *from = (struct rt6_info *)rt->dst.from;
 	struct rt6_exception_bucket *bucket;
+	struct rt6_info *from = rt->from;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
 
@@ -1586,12 +1585,19 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
 	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
 	 * expired, independently from their aging, as per RFC 8201 section 4
 	 */
-	if (!(rt->rt6i_flags & RTF_EXPIRES) &&
-	    time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
-		RT6_TRACE("aging clone %p\n", rt);
+	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
+		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
+			RT6_TRACE("aging clone %p\n", rt);
+			rt6_remove_exception(bucket, rt6_ex);
+			return;
+		}
+	} else if (time_after(jiffies, rt->dst.expires)) {
+		RT6_TRACE("purging expired route %p\n", rt);
 		rt6_remove_exception(bucket, rt6_ex);
 		return;
-	} else if (rt->rt6i_flags & RTF_GATEWAY) {
+	}
+
+	if (rt->rt6i_flags & RTF_GATEWAY) {
 		struct neighbour *neigh;
 		__u8 neigh_flags = 0;
 
@@ -1606,11 +1612,8 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
 			rt6_remove_exception(bucket, rt6_ex);
 			return;
 		}
-	} else if (__rt6_check_expired(rt)) {
-		RT6_TRACE("purging expired route %p\n", rt);
-		rt6_remove_exception(bucket, rt6_ex);
-		return;
 	}
+
 	gc_args->more++;
 }
 
@@ -1824,10 +1827,10 @@ u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
 
 	if (skb) {
 		ip6_multipath_l3_keys(skb, &hash_keys);
-		return flow_hash_from_keys(&hash_keys);
+		return flow_hash_from_keys(&hash_keys) >> 1;
 	}
 
-	return get_hash_from_flowi6(fl6);
+	return get_hash_from_flowi6(fl6) >> 1;
 }
 
 void ip6_route_input(struct sk_buff *skb)
@@ -1929,9 +1932,9 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 
 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
 {
-	if (rt->dst.from &&
-	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
-		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
+	if (rt->from &&
+	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
+		dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
 }
 
 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
@@ -1951,7 +1954,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
 {
 	if (!__rt6_check_expired(rt) &&
 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
-	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
+	    rt6_check(rt->from, cookie))
 		return &rt->dst;
 	else
 		return NULL;
@@ -1971,7 +1974,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 	rt6_dst_from_metrics_check(rt);
 
 	if (rt->rt6i_flags & RTF_PCPU ||
-	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
+	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
 		return rt6_dst_from_check(rt, cookie);
 	else
 		return rt6_check(rt, cookie);
@@ -2154,6 +2157,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
 	for_each_fib6_node_rt_rcu(fn) {
+		if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+			continue;
 		if (rt6_check_expired(rt))
 			continue;
 		if (rt->dst.error)
@@ -2344,7 +2349,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	rt->rt6i_idev     = idev;
 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
 
-	/* Add this dst into uncached_list so that rt6_ifdown() can
+	/* Add this dst into uncached_list so that rt6_disable_ip() can
 	 * do proper release of the net_device
 	 */
 	rt6_uncached_list_add(rt);
@@ -2439,7 +2444,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
 
 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 					    struct fib6_config *cfg,
-					    const struct in6_addr *gw_addr)
+					    const struct in6_addr *gw_addr,
+					    u32 tbid, int flags)
 {
 	struct flowi6 fl6 = {
 		.flowi6_oif = cfg->fc_ifindex,
@@ -2448,15 +2454,15 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 	};
 	struct fib6_table *table;
 	struct rt6_info *rt;
-	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
 
-	table = fib6_get_table(net, cfg->fc_table);
+	table = fib6_get_table(net, tbid);
 	if (!table)
 		return NULL;
 
 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 
+	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
 
 	/* if table lookup failed, fall back to full lookup */
@@ -2468,6 +2474,82 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 	return rt;
 }
 
+static int ip6_route_check_nh_onlink(struct net *net,
+				     struct fib6_config *cfg,
+				     struct net_device *dev,
+				     struct netlink_ext_ack *extack)
+{
+	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+	const struct in6_addr *gw_addr = &cfg->fc_gateway;
+	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
+	struct rt6_info *grt;
+	int err;
+
+	err = 0;
+	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
+	if (grt) {
+		if (grt->rt6i_flags & flags || dev != grt->dst.dev) {
+			NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+			err = -EINVAL;
+		}
+
+		ip6_rt_put(grt);
+	}
+
+	return err;
+}
+
+static int ip6_route_check_nh(struct net *net,
+			      struct fib6_config *cfg,
+			      struct net_device **_dev,
+			      struct inet6_dev **idev)
+{
+	const struct in6_addr *gw_addr = &cfg->fc_gateway;
+	struct net_device *dev = _dev ? *_dev : NULL;
+	struct rt6_info *grt = NULL;
+	int err = -EHOSTUNREACH;
+
+	if (cfg->fc_table) {
+		int flags = RT6_LOOKUP_F_IFACE;
+
+		grt = ip6_nh_lookup_table(net, cfg, gw_addr,
+					  cfg->fc_table, flags);
+		if (grt) {
+			if (grt->rt6i_flags & RTF_GATEWAY ||
+			    (dev && dev != grt->dst.dev)) {
+				ip6_rt_put(grt);
+				grt = NULL;
+			}
+		}
+	}
+
+	if (!grt)
+		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+
+	if (!grt)
+		goto out;
+
+	if (dev) {
+		if (dev != grt->dst.dev) {
+			ip6_rt_put(grt);
+			goto out;
+		}
+	} else {
+		*_dev = dev = grt->dst.dev;
+		*idev = grt->rt6i_idev;
+		dev_hold(dev);
+		in6_dev_hold(grt->rt6i_idev);
+	}
+
+	if (!(grt->rt6i_flags & RTF_GATEWAY))
+		err = 0;
+
+	ip6_rt_put(grt);
+
+out:
+	return err;
+}
+
 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 					      struct netlink_ext_ack *extack)
 {
@@ -2519,6 +2601,21 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	if (cfg->fc_metric == 0)
 		cfg->fc_metric = IP6_RT_PRIO_USER;
 
+	if (cfg->fc_flags & RTNH_F_ONLINK) {
+		if (!dev) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop device required for onlink");
+			err = -ENODEV;
+			goto out;
+		}
+
+		if (!(dev->flags & IFF_UP)) {
+			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+			err = -ENETDOWN;
+			goto out;
+		}
+	}
+
 	err = -ENOBUFS;
 	if (cfg->fc_nlinfo.nlh &&
 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
@@ -2593,6 +2690,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 #endif
 
 	rt->rt6i_metric = cfg->fc_metric;
+	rt->rt6i_nh_weight = 1;
 
 	/* We cannot add true routes via loopback here,
 	   they would result in kernel looping; promote them to reject routes
@@ -2662,8 +2760,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		rt->rt6i_gateway = *gw_addr;
 
 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
-			struct rt6_info *grt = NULL;
-
 			/* IPv6 strictly inhibits using not link-local
 			   addresses as nexthop address.
 			   Otherwise, router will not able to send redirects.
@@ -2680,40 +2776,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 				goto out;
 			}
 
-			if (cfg->fc_table) {
-				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
-
-				if (grt) {
-					if (grt->rt6i_flags & RTF_GATEWAY ||
-					    (dev && dev != grt->dst.dev)) {
-						ip6_rt_put(grt);
-						grt = NULL;
-					}
-				}
-			}
-
-			if (!grt)
-				grt = rt6_lookup(net, gw_addr, NULL,
-						 cfg->fc_ifindex, 1);
-
-			err = -EHOSTUNREACH;
-			if (!grt)
-				goto out;
-			if (dev) {
-				if (dev != grt->dst.dev) {
-					ip6_rt_put(grt);
-					goto out;
-				}
+			if (cfg->fc_flags & RTNH_F_ONLINK) {
+				err = ip6_route_check_nh_onlink(net, cfg, dev,
+								extack);
 			} else {
-				dev = grt->dst.dev;
-				idev = grt->rt6i_idev;
-				dev_hold(dev);
-				in6_dev_hold(grt->rt6i_idev);
+				err = ip6_route_check_nh(net, cfg, &dev, &idev);
 			}
-			if (!(grt->rt6i_flags & RTF_GATEWAY))
-				err = 0;
-			ip6_rt_put(grt);
-
 			if (err)
 				goto out;
 		}
@@ -2732,6 +2800,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	if (!dev)
 		goto out;
 
+	if (!(dev->flags & IFF_UP)) {
+		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+		err = -ENETDOWN;
+		goto out;
+	}
+
 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
 			NL_SET_ERR_MSG(extack, "Invalid source address");
@@ -2746,6 +2820,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	rt->rt6i_flags = cfg->fc_flags;
 
 install_route:
+	if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
+	    !netif_carrier_ok(dev))
+		rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+	rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
 	rt->dst.dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
@@ -3056,11 +3134,11 @@ out:
 
 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 {
-	BUG_ON(from->dst.from);
+	BUG_ON(from->from);
 
 	rt->rt6i_flags &= ~RTF_EXPIRES;
 	dst_hold(&from->dst);
-	rt->dst.from = &from->dst;
+	rt->from = from;
 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
 }
 
@@ -3459,37 +3537,249 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
 	fib6_clean_all(net, fib6_clean_tohost, gateway);
 }
 
-struct arg_dev_net {
-	struct net_device *dev;
-	struct net *net;
+struct arg_netdev_event {
+	const struct net_device *dev;
+	union {
+		unsigned int nh_flags;
+		unsigned long event;
+	};
 };
 
+static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
+{
+	struct rt6_info *iter;
+	struct fib6_node *fn;
+
+	fn = rcu_dereference_protected(rt->rt6i_node,
+			lockdep_is_held(&rt->rt6i_table->tb6_lock));
+	iter = rcu_dereference_protected(fn->leaf,
+			lockdep_is_held(&rt->rt6i_table->tb6_lock));
+	while (iter) {
+		if (iter->rt6i_metric == rt->rt6i_metric &&
+		    rt6_qualify_for_ecmp(iter))
+			return iter;
+		iter = rcu_dereference_protected(iter->rt6_next,
+				lockdep_is_held(&rt->rt6i_table->tb6_lock));
+	}
+
+	return NULL;
+}
+
+static bool rt6_is_dead(const struct rt6_info *rt)
+{
+	if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
+	    (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
+	     rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
+		return true;
+
+	return false;
+}
+
+static int rt6_multipath_total_weight(const struct rt6_info *rt)
+{
+	struct rt6_info *iter;
+	int total = 0;
+
+	if (!rt6_is_dead(rt))
+		total += rt->rt6i_nh_weight;
+
+	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
+		if (!rt6_is_dead(iter))
+			total += iter->rt6i_nh_weight;
+	}
+
+	return total;
+}
+
+static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
+{
+	int upper_bound = -1;
+
+	if (!rt6_is_dead(rt)) {
+		*weight += rt->rt6i_nh_weight;
+		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
+						    total) - 1;
+	}
+	atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
+}
+
+static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
+{
+	struct rt6_info *iter;
+	int weight = 0;
+
+	rt6_upper_bound_set(rt, &weight, total);
+
+	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+		rt6_upper_bound_set(iter, &weight, total);
+}
+
+void rt6_multipath_rebalance(struct rt6_info *rt)
+{
+	struct rt6_info *first;
+	int total;
+
+	/* In case the entire multipath route was marked for flushing,
+	 * then there is no need to rebalance upon the removal of every
+	 * sibling route.
+	 */
+	if (!rt->rt6i_nsiblings || rt->should_flush)
+		return;
+
+	/* During lookup routes are evaluated in order, so we need to
+	 * make sure upper bounds are assigned from the first sibling
+	 * onwards.
+	 */
+	first = rt6_multipath_first_sibling(rt);
+	if (WARN_ON_ONCE(!first))
+		return;
+
+	total = rt6_multipath_total_weight(first);
+	rt6_multipath_upper_bound_set(first, total);
+}
+
+static int fib6_ifup(struct rt6_info *rt, void *p_arg)
+{
+	const struct arg_netdev_event *arg = p_arg;
+	const struct net *net = dev_net(arg->dev);
+
+	if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
+		rt->rt6i_nh_flags &= ~arg->nh_flags;
+		fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
+		rt6_multipath_rebalance(rt);
+	}
+
+	return 0;
+}
+
+void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
+{
+	struct arg_netdev_event arg = {
+		.dev = dev,
+		{
+			.nh_flags = nh_flags,
+		},
+	};
+
+	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
+		arg.nh_flags |= RTNH_F_LINKDOWN;
+
+	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
+}
+
+static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
+				   const struct net_device *dev)
+{
+	struct rt6_info *iter;
+
+	if (rt->dst.dev == dev)
+		return true;
+	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+		if (iter->dst.dev == dev)
+			return true;
+
+	return false;
+}
+
+static void rt6_multipath_flush(struct rt6_info *rt)
+{
+	struct rt6_info *iter;
+
+	rt->should_flush = 1;
+	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+		iter->should_flush = 1;
+}
+
+static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
+					     const struct net_device *down_dev)
+{
+	struct rt6_info *iter;
+	unsigned int dead = 0;
+
+	if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
+		dead++;
+	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+		if (iter->dst.dev == down_dev ||
+		    iter->rt6i_nh_flags & RTNH_F_DEAD)
+			dead++;
+
+	return dead;
+}
+
+static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
+				       const struct net_device *dev,
+				       unsigned int nh_flags)
+{
+	struct rt6_info *iter;
+
+	if (rt->dst.dev == dev)
+		rt->rt6i_nh_flags |= nh_flags;
+	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+		if (iter->dst.dev == dev)
+			iter->rt6i_nh_flags |= nh_flags;
+}
+
 /* called with write lock held for table with rt */
-static int fib6_ifdown(struct rt6_info *rt, void *arg)
+static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
 {
-	const struct arg_dev_net *adn = arg;
-	const struct net_device *dev = adn->dev;
+	const struct arg_netdev_event *arg = p_arg;
+	const struct net_device *dev = arg->dev;
+	const struct net *net = dev_net(dev);
 
-	if ((rt->dst.dev == dev || !dev) &&
-	    rt != adn->net->ipv6.ip6_null_entry &&
-	    (rt->rt6i_nsiblings == 0 ||
-	     (dev && netdev_unregistering(dev)) ||
-	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
-		return -1;
+	if (rt == net->ipv6.ip6_null_entry)
+		return 0;
+
+	switch (arg->event) {
+	case NETDEV_UNREGISTER:
+		return rt->dst.dev == dev ? -1 : 0;
+	case NETDEV_DOWN:
+		if (rt->should_flush)
+			return -1;
+		if (!rt->rt6i_nsiblings)
+			return rt->dst.dev == dev ? -1 : 0;
+		if (rt6_multipath_uses_dev(rt, dev)) {
+			unsigned int count;
+
+			count = rt6_multipath_dead_count(rt, dev);
+			if (rt->rt6i_nsiblings + 1 == count) {
+				rt6_multipath_flush(rt);
+				return -1;
+			}
+			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
+						   RTNH_F_LINKDOWN);
+			fib6_update_sernum(rt);
+			rt6_multipath_rebalance(rt);
+		}
+		return -2;
+	case NETDEV_CHANGE:
+		if (rt->dst.dev != dev ||
+		    rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
+			break;
+		rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+		rt6_multipath_rebalance(rt);
+		break;
+	}
 
 	return 0;
 }
 
-void rt6_ifdown(struct net *net, struct net_device *dev)
+void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
 {
-	struct arg_dev_net adn = {
+	struct arg_netdev_event arg = {
 		.dev = dev,
-		.net = net,
+		{
+			.event = event,
+		},
 	};
 
-	fib6_clean_all(net, fib6_ifdown, &adn);
-	if (dev)
-		rt6_uncached_list_flush_dev(net, dev);
+	fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
+}
+
+void rt6_disable_ip(struct net_device *dev, unsigned long event)
+{
+	rt6_sync_down_dev(dev, event);
+	rt6_uncached_list_flush_dev(dev_net(dev), dev);
+	neigh_ifdown(&nd_tbl, dev);
 }
 
 struct rt6_mtu_change_arg {
@@ -3603,6 +3893,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (rtm->rtm_flags & RTM_F_CLONED)
 		cfg->fc_flags |= RTF_CACHE;
 
+	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
+
 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
 	cfg->fc_nlinfo.nlh = nlh;
 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -3812,6 +4104,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 			goto cleanup;
 		}
 
+		rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
+
 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
 		if (err) {
 			dst_release_immediate(&rt->dst);
@@ -3992,7 +4286,10 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 			    unsigned int *flags, bool skip_oif)
 {
-	if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
+	if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+		*flags |= RTNH_F_DEAD;
+
+	if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
 		*flags |= RTNH_F_LINKDOWN;
 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
 			*flags |= RTNH_F_DEAD;
@@ -4003,6 +4300,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 			goto nla_put_failure;
 	}
 
+	*flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
 	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
 		*flags |= RTNH_F_OFFLOAD;
 
@@ -4031,7 +4329,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 	if (!rtnh)
 		goto nla_put_failure;
 
-	rtnh->rtnh_hops = 0;
+	rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
 	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
 
 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
@@ -4321,9 +4619,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		goto errout;
 	}
 
-	if (fibmatch && rt->dst.from) {
-		struct rt6_info *ort = container_of(rt->dst.from,
-						    struct rt6_info, dst);
+	if (fibmatch && rt->from) {
+		struct rt6_info *ort = rt->from;
 
 		dst_hold(&ort->dst);
 		ip6_rt_put(rt);
@@ -4427,7 +4724,6 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 #ifdef CONFIG_PROC_FS
 
 static const struct file_operations ipv6_route_proc_fops = {
-	.owner		= THIS_MODULE,
 	.open		= ipv6_route_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -4455,7 +4751,6 @@ static int rt6_stats_seq_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations rt6_stats_seq_fops = {
-	.owner	 = THIS_MODULE,
 	.open	 = rt6_stats_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
@@ -4600,8 +4895,6 @@ static int __net_init ip6_route_net_init(struct net *net)
 					   GFP_KERNEL);
 	if (!net->ipv6.ip6_null_entry)
 		goto out_ip6_dst_entries;
-	net->ipv6.ip6_null_entry->dst.path =
-		(struct dst_entry *)net->ipv6.ip6_null_entry;
 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
 			 ip6_template_metrics, true);
@@ -4613,8 +4906,6 @@ static int __net_init ip6_route_net_init(struct net *net)
 					       GFP_KERNEL);
 	if (!net->ipv6.ip6_prohibit_entry)
 		goto out_ip6_null_entry;
-	net->ipv6.ip6_prohibit_entry->dst.path =
-		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
 			 ip6_template_metrics, true);
@@ -4624,8 +4915,6 @@ static int __net_init ip6_route_net_init(struct net *net)
 					       GFP_KERNEL);
 	if (!net->ipv6.ip6_blk_hole_entry)
 		goto out_ip6_prohibit_entry;
-	net->ipv6.ip6_blk_hole_entry->dst.path =
-		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
 			 ip6_template_metrics, true);
@@ -4782,11 +5071,20 @@ int __init ip6_route_init(void)
 	if (ret)
 		goto fib6_rules_init;
 
-	ret = -ENOBUFS;
-	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
-	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
-	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
-			    RTNL_FLAG_DOIT_UNLOCKED))
+	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
+				   inet6_rtm_newroute, NULL, 0);
+	if (ret < 0)
+		goto out_register_late_subsys;
+
+	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
+				   inet6_rtm_delroute, NULL, 0);
+	if (ret < 0)
+		goto out_register_late_subsys;
+
+	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
+				   inet6_rtm_getroute, NULL,
+				   RTNL_FLAG_DOIT_UNLOCKED);
+	if (ret < 0)
 		goto out_register_late_subsys;
 
 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
@@ -4804,6 +5102,7 @@ out:
 	return ret;
 
 out_register_late_subsys:
+	rtnl_unregister_all(PF_INET6);
 	unregister_pernet_subsys(&ip6_route_net_late_ops);
 fib6_rules_init:
 	fib6_rules_cleanup();
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index c81407770956..7f5621d09571 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -306,9 +306,7 @@ static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
 	struct seg6_hmac_info *hinfo;
 	int ret;
 
-	ret = rhashtable_walk_start(iter);
-	if (ret && ret != -EAGAIN)
-		goto done;
+	rhashtable_walk_start(iter);
 
 	for (;;) {
 		hinfo = rhashtable_walk_next(iter);
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 825b8e01f947..ba3767ef5e93 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -501,7 +501,7 @@ static struct seg6_action_desc *__get_action_desc(int action)
 	struct seg6_action_desc *desc;
 	int i, count;
 
-	count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc);
+	count = ARRAY_SIZE(seg6_action_table);
 	for (i = 0; i < count; i++) {
 		desc = &seg6_action_table[i];
 		if (desc->action == action)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7178476b3d2f..a1ab29e2ab3b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -176,8 +176,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			/* If interface is set while binding, indices
 			 * must coincide.
 			 */
-			if (sk->sk_bound_dev_if &&
-			    sk->sk_bound_dev_if != usin->sin6_scope_id)
+			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
 				return -EINVAL;
 
 			sk->sk_bound_dev_if = usin->sin6_scope_id;
@@ -1795,7 +1794,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		timer_expires = jiffies;
 	}
 
-	state = sk_state_load(sp);
+	state = inet_sk_state_load(sp);
 	if (state == TCP_LISTEN)
 		rx_queue = sp->sk_ack_backlog;
 	else
@@ -1884,7 +1883,6 @@ out:
 }
 
 static const struct file_operations tcp6_afinfo_seq_fops = {
-	.owner   = THIS_MODULE,
 	.open    = tcp_seq_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3f30fa313bf2..52e3ea0e6f50 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -89,28 +89,12 @@ static u32 udp6_ehashfn(const struct net *net,
 			       udp_ipv6_hash_secret + net_hash_mix(net));
 }
 
-static u32 udp6_portaddr_hash(const struct net *net,
-			      const struct in6_addr *addr6,
-			      unsigned int port)
-{
-	unsigned int hash, mix = net_hash_mix(net);
-
-	if (ipv6_addr_any(addr6))
-		hash = jhash_1word(0, mix);
-	else if (ipv6_addr_v4mapped(addr6))
-		hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
-	else
-		hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
-
-	return hash ^ port;
-}
-
 int udp_v6_get_port(struct sock *sk, unsigned short snum)
 {
 	unsigned int hash2_nulladdr =
-		udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
+		ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
 	unsigned int hash2_partial =
-		udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
+		ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
 
 	/* precompute partial secondary hash */
 	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -119,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
 
 static void udp_v6_rehash(struct sock *sk)
 {
-	u16 new_hash = udp6_portaddr_hash(sock_net(sk),
+	u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
 					  &sk->sk_v6_rcv_saddr,
 					  inet_sk(sk)->inet_num);
 
@@ -184,7 +168,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 		struct udp_hslot *hslot2, struct sk_buff *skb)
 {
 	struct sock *sk, *result;
-	int score, badness, matches = 0, reuseport = 0;
+	int score, badness;
 	u32 hash = 0;
 
 	result = NULL;
@@ -193,8 +177,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
-			reuseport = sk->sk_reuseport;
-			if (reuseport) {
+			if (sk->sk_reuseport) {
 				hash = udp6_ehashfn(net, daddr, hnum,
 						    saddr, sport);
 
@@ -202,15 +185,9 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 							sizeof(struct udphdr));
 				if (result)
 					return result;
-				matches = 1;
 			}
 			result = sk;
 			badness = score;
-		} else if (score == badness && reuseport) {
-			matches++;
-			if (reciprocal_scale(hash, matches) == 0)
-				result = sk;
-			hash = next_pseudo_random32(hash);
 		}
 	}
 	return result;
@@ -228,11 +205,11 @@ struct sock *__udp6_lib_lookup(struct net *net,
 	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
 	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
 	bool exact_dif = udp6_lib_exact_dif_match(net, skb);
-	int score, badness, matches = 0, reuseport = 0;
+	int score, badness;
 	u32 hash = 0;
 
 	if (hslot->count > 10) {
-		hash2 = udp6_portaddr_hash(net, daddr, hnum);
+		hash2 = ipv6_portaddr_hash(net, daddr, hnum);
 		slot2 = hash2 & udptable->mask;
 		hslot2 = &udptable->hash2[slot2];
 		if (hslot->count < hslot2->count)
@@ -243,7 +220,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
 					  hslot2, skb);
 		if (!result) {
 			unsigned int old_slot2 = slot2;
-			hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
+			hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
 			slot2 = hash2 & udptable->mask;
 			/* avoid searching the same slot again. */
 			if (unlikely(slot2 == old_slot2))
@@ -267,23 +244,16 @@ begin:
 		score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
 				      sdif, exact_dif);
 		if (score > badness) {
-			reuseport = sk->sk_reuseport;
-			if (reuseport) {
+			if (sk->sk_reuseport) {
 				hash = udp6_ehashfn(net, daddr, hnum,
 						    saddr, sport);
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
 				if (result)
 					return result;
-				matches = 1;
 			}
 			result = sk;
 			badness = score;
-		} else if (score == badness && reuseport) {
-			matches++;
-			if (reciprocal_scale(hash, matches) == 0)
-				result = sk;
-			hash = next_pseudo_random32(hash);
 		}
 	}
 	return result;
@@ -719,9 +689,9 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	struct sk_buff *nskb;
 
 	if (use_hash2) {
-		hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
+		hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) &
 			    udptable->mask;
-		hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask;
+		hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
 start_lookup:
 		hslot = &udptable->hash2[hash2];
 		offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
@@ -909,7 +879,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
 			int dif, int sdif)
 {
 	unsigned short hnum = ntohs(loc_port);
-	unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
+	unsigned int hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
 	unsigned int slot2 = hash2 & udp_table.mask;
 	struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
 	const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
@@ -1509,7 +1479,6 @@ int udp6_seq_show(struct seq_file *seq, void *v)
 }
 
 static const struct file_operations udp6_afinfo_seq_fops = {
-	.owner    = THIS_MODULE,
 	.open     = udp_seq_open,
 	.read     = seq_read,
 	.llseek   = seq_lseek,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 2784cc363f2b..14ae32bb1f3d 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -94,7 +94,6 @@ void udplitev6_exit(void)
 #ifdef CONFIG_PROC_FS
 
 static const struct file_operations udplite6_afinfo_seq_fops = {
-	.owner    = THIS_MODULE,
 	.open     = udp_seq_open,
 	.read     = seq_read,
 	.llseek   = seq_lseek,
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index dc93002ff9d1..bb935a3b7fea 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -59,7 +59,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	if (x->props.flags & XFRM_STATE_NOECN)
 		dsfield &= ~INET_ECN_MASK;
 	ipv6_change_dsfield(top_iph, 0, dsfield);
-	top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
+	top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
 	top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
 	top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
 	return 0;
@@ -106,17 +106,14 @@ static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x,
 {
 	__skb_push(skb, skb->mac_len);
 	return skb_mac_gso_segment(skb, features);
-
 }
 
 static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct xfrm_offload *xo = xfrm_offload(skb);
 
-	if (xo->flags & XFRM_GSO_SEGMENT) {
-		skb->network_header = skb->network_header - x->props.header_len;
+	if (xo->flags & XFRM_GSO_SEGMENT)
 		skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
-	}
 
 	skb_reset_mac_len(skb);
 	pskb_pull(skb, skb->mac_len + x->props.header_len);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 885ade234a49..09fb44ee3b45 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -265,7 +265,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 			in6_dev_put(xdst->u.rt6.rt6i_idev);
 			xdst->u.rt6.rt6i_idev = loopback_idev;
 			in6_dev_hold(loopback_idev);
-			xdst = (struct xfrm_dst *)xdst->u.dst.child;
+			xdst = (struct xfrm_dst *)xfrm_dst_child(&xdst->u.dst);
 		} while (xdst->u.dst.xfrm);
 
 		__in6_dev_put(loopback_idev);