From a725356b6659469d182d662f22d770d83d3bc7b5 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 18 Sep 2018 16:34:34 +0300
Subject: vfs: swap names of {do,vfs}_clone_file_range()

Commit 031a072a0b8a ("vfs: call vfs_clone_file_range() under freeze
protection") created a wrapper do_clone_file_range() around
vfs_clone_file_range() moving the freeze protection to former, so
overlayfs could call the latter.

The more common vfs practice is to call do_xxx helpers from vfs_xxx
helpers, where freeze protecction is taken in the vfs_xxx helper, so
this anomality could be a source of confusion.

It seems that commit 8ede205541ff ("ovl: add reflink/copyfile/dedup
support") may have fallen a victim to this confusion -
ovl_clone_file_range() calls the vfs_clone_file_range() helper in the
hope of getting freeze protection on upper fs, but in fact results in
overlayfs allowing to bypass upper fs freeze protection.

Swap the names of the two helpers to conform to common vfs practice
and call the correct helpers from overlayfs and nfsd.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6c0b4a1c22ff..897eae8faee1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1828,8 +1828,10 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 				      struct inode *inode_out, loff_t pos_out,
 				      u64 *len, bool is_dedupe);
+extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
+			       struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-		struct file *file_out, loff_t pos_out, u64 len);
+				struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 struct inode *dest, loff_t destoff,
 					 loff_t len, bool *is_same);
@@ -2773,19 +2775,6 @@ static inline void file_end_write(struct file *file)
 	__sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 }
 
-static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
-				      struct file *file_out, loff_t pos_out,
-				      u64 len)
-{
-	int ret;
-
-	file_start_write(file_out);
-	ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
-	file_end_write(file_out);
-
-	return ret;
-}
-
 /*
  * get_write_access() gets write permission for a file.
  * put_write_access() releases this write permission.
-- 
cgit v1.2.3


From 6194114324139dc16f3251c67ed853bd6d4ae056 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 24 Sep 2018 21:58:59 +0200
Subject: net: core: add member wol_enabled to struct net_device

Add flag wol_enabled to struct net_device indicating whether
Wake-on-LAN is enabled. As first user phy_suspend() will use it to
decide whether PHY can be suspended or not.

Fixes: f1e911d5d0df ("r8169: add basic phylib support")
Fixes: e8cfd9d6c772 ("net: phy: call state machine synchronously in phy_stop")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ca5ab98053c8..c7861e4b402c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1730,6 +1730,8 @@ enum netdev_priv_flags {
  *			switch driver and used to set the phys state of the
  *			switch port.
  *
+ *	@wol_enabled:	Wake-on-LAN is enabled
+ *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */
@@ -2014,6 +2016,7 @@ struct net_device {
 	struct lock_class_key	*qdisc_tx_busylock;
 	struct lock_class_key	*qdisc_running_key;
 	bool			proto_down;
+	unsigned		wol_enabled:1;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
-- 
cgit v1.2.3


From 421c119f558761556afca6a62ad183bc2d8659e0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 24 Sep 2018 14:10:04 +0200
Subject: netfilter: avoid erronous array bounds warning

Unfortunately some versions of gcc emit following warning:
  $ make net/xfrm/xfrm_output.o
  linux/compiler.h:252:20: warning: array subscript is above array bounds [-Warray-bounds]
  hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
                            ^~~~~~~~~~~~~~~~~~~~~
xfrm_output_resume passes skb_dst(skb)->ops->family as its 'pf' arg so compiler
can't know that we'll never access hooks_arp[].
(NFPROTO_IPV4 or NFPROTO_IPV6 are only possible cases).

Avoid this by adding an explicit WARN_ON_ONCE() check.

This patch has no effect if the family is a compile-time constant as gcc
will remove the switch() construct entirely.

Reported-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 07efffd0c759..bbe99d2b28b4 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -215,6 +215,8 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 		break;
 	case NFPROTO_ARP:
 #ifdef CONFIG_NETFILTER_FAMILY_ARP
+		if (WARN_ON_ONCE(hook >= ARRAY_SIZE(net->nf.hooks_arp)))
+			break;
 		hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
 #endif
 		break;
-- 
cgit v1.2.3


From 492ecf6d6598e7c3b0b1372653ed21da50ddfb09 Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@kernel.org>
Date: Wed, 12 Sep 2018 09:43:25 -0500
Subject: docs: fpga: document fpga manager flags

Add flags #defines to kerneldoc documentation in a
useful place.

Signed-off-by: Alan Tull <atull@kernel.org>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fpga/fpga-mgr.h | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 8942e61f0028..8ab5df769923 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -53,12 +53,20 @@ enum fpga_mgr_states {
 	FPGA_MGR_STATE_OPERATING,
 };
 
-/*
- * FPGA Manager flags
- * FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported
- * FPGA_MGR_EXTERNAL_CONFIG: FPGA has been configured prior to Linux booting
- * FPGA_MGR_BITSTREAM_LSB_FIRST: SPI bitstream bit order is LSB first
- * FPGA_MGR_COMPRESSED_BITSTREAM: FPGA bitstream is compressed
+/**
+ * DOC: FPGA Manager flags
+ *
+ * Flags used in the &fpga_image_info->flags field
+ *
+ * %FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported
+ *
+ * %FPGA_MGR_EXTERNAL_CONFIG: FPGA has been configured prior to Linux booting
+ *
+ * %FPGA_MGR_ENCRYPTED_BITSTREAM: indicates bitstream is encrypted
+ *
+ * %FPGA_MGR_BITSTREAM_LSB_FIRST: SPI bitstream bit order is LSB first
+ *
+ * %FPGA_MGR_COMPRESSED_BITSTREAM: FPGA bitstream is compressed
  */
 #define FPGA_MGR_PARTIAL_RECONFIG	BIT(0)
 #define FPGA_MGR_EXTERNAL_CONFIG	BIT(1)
-- 
cgit v1.2.3


From 4d8fcf216c90bc25e34ae2200aa8985ee3158898 Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@mellanox.com>
Date: Wed, 5 Sep 2018 11:43:23 +0300
Subject: net/mlx5e: Avoid unbounded peer devices when unpairing TC hairpin
 rules

If the peer device was already unbound, then do not attempt to modify
it's resources, otherwise we will crash on dereferencing non-existing
device.

Fixes: 5c65c564c962 ("net/mlx5e: Support offloading TC NIC hairpin flows")
Signed-off-by: Alaa Hleihel <alaa@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/transobj.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
index 83a33a1873a6..7f5ca2cd3a32 100644
--- a/include/linux/mlx5/transobj.h
+++ b/include/linux/mlx5/transobj.h
@@ -90,6 +90,8 @@ struct mlx5_hairpin {
 
 	u32 *rqn;
 	u32 *sqn;
+
+	bool peer_gone;
 };
 
 struct mlx5_hairpin *
-- 
cgit v1.2.3


From efaffc5e40aeced0bcb497ed7a0a5b8c14abfcdf Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 1 Oct 2018 11:05:24 +0100
Subject: mm, sched/numa: Remove rate-limiting of automatic NUMA balancing
 migration

Rate limiting of page migrations due to automatic NUMA balancing was
introduced to mitigate the worst-case scenario of migrating at high
frequency due to false sharing or slowly ping-ponging between nodes.
Since then, a lot of effort was spent on correctly identifying these
pages and avoiding unnecessary migrations and the safety net may no longer
be required.

Jirka Hladky reported a regression in 4.17 due to a scheduler patch that
avoids spreading STREAM tasks wide prematurely. However, once the task
was properly placed, it delayed migrating the memory due to rate limiting.
Increasing the limit fixed the problem for him.

Currently, the limit is hard-coded and does not account for the real
capabilities of the hardware. Even if an estimate was attempted, it would
not properly account for the number of memory controllers and it could
not account for the amount of bandwidth used for normal accesses. Rather
than fudging, this patch simply eliminates the rate limiting.

However, Jirka reports that a STREAM configuration using multiple
processes achieved similar performance to 4.16. In local tests, this patch
improved performance of STREAM relative to the baseline but it is somewhat
machine-dependent. Most workloads show little or not performance difference
implying that there is not a heavily reliance on the throttling mechanism
and it is safe to remove.

STREAM on 2-socket machine
                         4.19.0-rc5             4.19.0-rc5
                         numab-v1r1       noratelimit-v1r1
MB/sec copy     43298.52 (   0.00%)    44673.38 (   3.18%)
MB/sec scale    30115.06 (   0.00%)    31293.06 (   3.91%)
MB/sec add      32825.12 (   0.00%)    34883.62 (   6.27%)
MB/sec triad    32549.52 (   0.00%)    34906.60 (   7.24%

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jirka Hladky <jhladky@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Linux-MM <linux-mm@kvack.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181001100525.29789-2-mgorman@techsingularity.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mmzone.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1e22d96734e0..3f4c0b167333 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -671,12 +671,6 @@ typedef struct pglist_data {
 #ifdef CONFIG_NUMA_BALANCING
 	/* Lock serializing the migrate rate limiting window */
 	spinlock_t numabalancing_migrate_lock;
-
-	/* Rate limiting time interval */
-	unsigned long numabalancing_migrate_next_window;
-
-	/* Number of pages migrated during the rate limiting time interval */
-	unsigned long numabalancing_migrate_nr_pages;
 #endif
 	/*
 	 * This is a per-node reserve of pages that are not available
-- 
cgit v1.2.3


From 10653022456dc77b398777fd8e95126c77954b49 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 30 Aug 2018 14:54:03 +0200
Subject: Revert "serial: sh-sci: Remove SCIx_RZ_SCIFA_REGTYPE"

This reverts commit 7acece71a517cad83a0842a94d94c13f271b680c.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_sci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index c0e795d95477..1c89611e0e06 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -36,6 +36,7 @@ enum {
 	SCIx_SH4_SCIF_FIFODATA_REGTYPE,
 	SCIx_SH7705_SCIF_REGTYPE,
 	SCIx_HSCIF_REGTYPE,
+	SCIx_RZ_SCIFA_REGTYPE,
 
 	SCIx_NR_REGTYPES,
 };
-- 
cgit v1.2.3


From 9d2f67e43b73e8af7438be219b66a5de0cfa8bd9 Mon Sep 17 00:00:00 2001
From: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
Date: Sat, 29 Sep 2018 15:41:27 +0000
Subject: net/packet: fix packet drop as of virtio gso

When we use raw socket as the vhost backend, a packet from virito with
gso offloading information, cannot be sent out in later validaton at
xmit path, as we did not set correct skb->protocol which is further used
for looking up the gso function.

To fix this, we set this field according to virito hdr information.

Fixes: e858fae2b0b8f4 ("virtio_net: use common code for virtio_net_hdr and skb GSO conversion")
Signed-off-by: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 9397628a1967..cb462f9ab7dd 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -5,6 +5,24 @@
 #include <linux/if_vlan.h>
 #include <uapi/linux/virtio_net.h>
 
+static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
+					   const struct virtio_net_hdr *hdr)
+{
+	switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+	case VIRTIO_NET_HDR_GSO_TCPV4:
+	case VIRTIO_NET_HDR_GSO_UDP:
+		skb->protocol = cpu_to_be16(ETH_P_IP);
+		break;
+	case VIRTIO_NET_HDR_GSO_TCPV6:
+		skb->protocol = cpu_to_be16(ETH_P_IPV6);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 					const struct virtio_net_hdr *hdr,
 					bool little_endian)
-- 
cgit v1.2.3


From 017b1660df89f5fb4bfe66c34e35f7d2031100c7 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 5 Oct 2018 15:51:29 -0700
Subject: mm: migration: fix migration of huge PMD shared pages

The page migration code employs try_to_unmap() to try and unmap the source
page.  This is accomplished by using rmap_walk to find all vmas where the
page is mapped.  This search stops when page mapcount is zero.  For shared
PMD huge pages, the page map count is always 1 no matter the number of
mappings.  Shared mappings are tracked via the reference count of the PMD
page.  Therefore, try_to_unmap stops prematurely and does not completely
unmap all mappings of the source page.

This problem can result is data corruption as writes to the original
source page can happen after contents of the page are copied to the target
page.  Hence, data is lost.

This problem was originally seen as DB corruption of shared global areas
after a huge page was soft offlined due to ECC memory errors.  DB
developers noticed they could reproduce the issue by (hotplug) offlining
memory used to back huge pages.  A simple testcase can reproduce the
problem by creating a shared PMD mapping (note that this must be at least
PUD_SIZE in size and PUD_SIZE aligned (1GB on x86)), and using
migrate_pages() to migrate process pages between nodes while continually
writing to the huge pages being migrated.

To fix, have the try_to_unmap_one routine check for huge PMD sharing by
calling huge_pmd_unshare for hugetlbfs huge pages.  If it is a shared
mapping it will be 'unshared' which removes the page table entry and drops
the reference on the PMD page.  After this, flush caches and TLB.

mmu notifiers are called before locking page tables, but we can not be
sure of PMD sharing until page tables are locked.  Therefore, check for
the possibility of PMD sharing before locking so that notifiers can
prepare for the worst possible case.

Link: http://lkml.kernel.org/r/20180823205917.16297-2-mike.kravetz@oracle.com
[mike.kravetz@oracle.com: make _range_in_vma() a static inline]
  Link: http://lkml.kernel.org/r/6063f215-a5c8-2f0c-465a-2c515ddc952d@oracle.com
Fixes: 39dde65c9940 ("shared page table for hugetlb page")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hugetlb.h | 14 ++++++++++++++
 include/linux/mm.h      |  6 ++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6b68e345f0ca..087fd5f48c91 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 pte_t *huge_pte_offset(struct mm_struct *mm,
 		       unsigned long addr, unsigned long sz);
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+				unsigned long *start, unsigned long *end);
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
 			      int write);
 struct page *follow_huge_pd(struct vm_area_struct *vma,
@@ -170,6 +172,18 @@ static inline unsigned long hugetlb_total_pages(void)
 	return 0;
 }
 
+static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
+					pte_t *ptep)
+{
+	return 0;
+}
+
+static inline void adjust_range_if_pmd_sharing_possible(
+				struct vm_area_struct *vma,
+				unsigned long *start, unsigned long *end)
+{
+}
+
 #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n)	({ BUG(); 0; })
 #define follow_huge_addr(mm, addr, write)	ERR_PTR(-EINVAL)
 #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a61ebe8ad4ca..0416a7204be3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2455,6 +2455,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
 	return vma;
 }
 
+static inline bool range_in_vma(struct vm_area_struct *vma,
+				unsigned long start, unsigned long end)
+{
+	return (vma && vma->vm_start <= start && end <= vma->vm_end);
+}
+
 #ifdef CONFIG_MMU
 pgprot_t vm_get_page_prot(unsigned long vm_flags);
 void vma_set_page_prot(struct vm_area_struct *vma);
-- 
cgit v1.2.3