From 860dd4424f344400b491b212ee4acb3a358ba9d9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 21 Nov 2017 14:23:37 +0100
Subject: scsi: dma-mapping: always provide dma_get_cache_alignment

Provide the dummy version of dma_get_cache_alignment that always returns
1 even if CONFIG_HAS_DMA is not set, so that drivers and subsystems can
use it without ifdefs.

Cc: stable@vger.kernel.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index eee1499db396..29cfd18360be 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -710,7 +710,6 @@ static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
 	return ret;
 }
 
-#ifdef CONFIG_HAS_DMA
 static inline int dma_get_cache_alignment(void)
 {
 #ifdef ARCH_DMA_MINALIGN
@@ -718,7 +717,6 @@ static inline int dma_get_cache_alignment(void)
 #endif
 	return 1;
 }
-#endif
 
 /* flags for the coherent memory api */
 #define DMA_MEMORY_EXCLUSIVE		0x01
-- 
cgit v1.2.3


From 20b7035c66bacc909ae3ffe92c1a1ea7db99fe4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= <jschoenh@amazon.de>
Date: Fri, 24 Nov 2017 22:39:01 +0100
Subject: KVM: Let KVM_SET_SIGNAL_MASK work as advertised
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

KVM API says for the signal mask you set via KVM_SET_SIGNAL_MASK, that
"any unblocked signal received [...] will cause KVM_RUN to return with
-EINTR" and that "the signal will only be delivered if not blocked by
the original signal mask".

This, however, is only true, when the calling task has a signal handler
registered for a signal. If not, signal evaluation is short-circuited for
SIG_IGN and SIG_DFL, and the signal is either ignored without KVM_RUN
returning or the whole process is terminated.

Make KVM_SET_SIGNAL_MASK behave as advertised by utilizing logic similar
to that in do_sigtimedwait() to avoid short-circuiting of signals.

Signed-off-by: Jan H. SchÃ¶nherr <jschoenh@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2e754b7c282c..893d6d606cd0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -715,6 +715,9 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
 			 unsigned long len);
 void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
 
+void kvm_sigset_activate(struct kvm_vcpu *vcpu);
+void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
+
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From 1751e8a6cb935e555fcdbcb9ab4f0446e322ca3e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 27 Nov 2017 13:05:09 -0800
Subject: Rename superblock flags (MS_xyz -> SB_xyz)

This is a pure automated search-and-replace of the internal kernel
superblock flags.

The s_flags are now called SB_*, with the names and the values for the
moment mirroring the MS_* flags that they're equivalent to.

Note how the MS_xyz flags are the ones passed to the mount system call,
while the SB_xyz flags are what we then use in sb->s_flags.

The script to do this was:

    # places to look in; re security/*: it generally should *not* be
    # touched (that stuff parses mount(2) arguments directly), but
    # there are two places where we really deal with superblock flags.
    FILES="drivers/mtd drivers/staging/lustre fs ipc mm \
            include/linux/fs.h include/uapi/linux/bfs_fs.h \
            security/apparmor/apparmorfs.c security/apparmor/include/lib.h"
    # the list of MS_... constants
    SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \
          DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \
          POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \
          I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \
          ACTIVE NOUSER"

    SED_PROG=
    for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done

    # we want files that contain at least one of MS_...,
    # with fs/namespace.c and fs/pnode.c excluded.
    L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c')

    for f in $L; do sed -i $f $SED_PROG; done

Requested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2995a271ec46..bbd92da0946e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1872,7 +1872,7 @@ struct super_operations {
  */
 #define __IS_FLG(inode, flg)	((inode)->i_sb->s_flags & (flg))
 
-static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & MS_RDONLY; }
+static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; }
 #define IS_RDONLY(inode)	sb_rdonly((inode)->i_sb)
 #define IS_SYNC(inode)		(__IS_FLG(inode, SB_SYNCHRONOUS) || \
 					((inode)->i_flags & S_SYNC))
-- 
cgit v1.2.3


From d34971a65b72619508f49cd237283e92f1c329d5 Mon Sep 17 00:00:00 2001
From: Bhumika Goyal <bhumirks@gmail.com>
Date: Tue, 17 Oct 2017 18:14:23 +0200
Subject: sunrpc: make the function arg as const

Make the struct cache_detail *tmpl argument of the function
cache_create_net as const as it is only getting passed to kmemup having
the argument as const void *.
Add const to the prototype too.

Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 270bad0e1bed..40d2822f0e2f 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -213,7 +213,7 @@ extern void __init cache_initialize(void);
 extern int cache_register_net(struct cache_detail *cd, struct net *net);
 extern void cache_unregister_net(struct cache_detail *cd, struct net *net);
 
-extern struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net);
+extern struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net);
 extern void cache_destroy_net(struct cache_detail *cd, struct net *net);
 
 extern void sunrpc_init_cache_detail(struct cache_detail *cd);
-- 
cgit v1.2.3


From f50caa9b517a2542ae9769fc17ad84110ae07c8b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Nov 2017 12:40:31 +0100
Subject: debugfs: fix debugfs_real_fops() build error

Some drivers use debugfs_real_fops() even when CONFIG_DEBUG_FS is disabled,
which now leads to a build error:

In file included from include/linux/list.h:9:0,
                 from include/linux/wait.h:7,
                 from include/linux/wait_bit.h:8,
                 from include/linux/fs.h:6,
                 from drivers/net/wireless/broadcom/b43legacy/debugfs.c:26:
drivers/net/wireless/broadcom/b43legacy/debugfs.c: In function 'b43legacy_debugfs_read':
drivers/net/wireless/broadcom/b43legacy/debugfs.c:224:23: error: implicit declaration of function 'debugfs_real_fops'; did you mean 'debugfs_create_bool'? [-Werror=implicit-function-declaration]

My first impulse was to add another 'static inline' dummy function
returning NULL for it, which would work fine. However, most callers
feed the pointer into container_of(), so it seems a little dangerous
here. Since all the callers are inside of a read/write file operation
that gets eliminated in this configuration, so having an 'extern'
declaration seems better here. If it ever gets used in a dangerous
way, that will now result in a link error.

Fixes: 7c8d469877b1 ("debugfs: add support for more elaborate ->d_fsdata")
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/debugfs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index f36ecc2a5712..3b0ba54cc4d5 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -216,6 +216,8 @@ static inline void debugfs_remove(struct dentry *dentry)
 static inline void debugfs_remove_recursive(struct dentry *dentry)
 { }
 
+const struct file_operations *debugfs_real_fops(const struct file *filp);
+
 static inline int debugfs_file_get(struct dentry *dentry)
 {
 	return 0;
-- 
cgit v1.2.3


From fd00cf81a9a84776ba58e56bd042c726dcf75cf3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 3 Nov 2017 15:30:53 +0100
Subject: serdev: fix receive_buf return value when no callback

The receive_buf callback is supposed to return the number of bytes
processed and should specifically not return a negative errno.

Due to missing sanity checks in the serdev tty-port controller, a driver
not providing a receive_buf callback could cause the flush_to_ldisc()
worker to spin in a tight loop when the tty buffer pointers are
incremented with -EINVAL (-22).

The missing sanity checks have now been added to the tty-port
controller, but let's fix up the serdev-controller helper as well.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index e69402d4a8ae..d609e6dc5bad 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -184,7 +184,7 @@ static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl,
 	struct serdev_device *serdev = ctrl->serdev;
 
 	if (!serdev || !serdev->ops->receive_buf)
-		return -EINVAL;
+		return 0;
 
 	return serdev->ops->receive_buf(serdev, data, count);
 }
-- 
cgit v1.2.3


From 7fa32e5ec28b1609abc0b797b58267f725fc3964 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Tue, 14 Nov 2017 06:53:33 -0700
Subject: Drivers: hv: vmbus: Fix a rescind issue

The current rescind processing code will not correctly handle
the case where the host immediately rescinds a channel that has
been offerred. In this case, we could be blocked in the open call and
since the channel is rescinded, the host will not respond and we could
be blocked forever in the vmbus open call.i Fix this problem.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index f3e97c5f94c9..6c9336626592 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -708,6 +708,7 @@ struct vmbus_channel {
 	u8 monitor_bit;
 
 	bool rescind; /* got rescind msg */
+	struct completion rescind_event;
 
 	u32 ringbuffer_gpadlhandle;
 
-- 
cgit v1.2.3


From 668533dc0764b30c9dd2baf3ca800156f688326b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 29 Nov 2017 10:30:13 -0800
Subject: kallsyms: take advantage of the new '%px' format

The conditional kallsym hex printing used a special fixed-width '%lx'
output (KALLSYM_FMT) in preparation for the hashing of %p, but that
series ended up adding a %px specifier to help with the conversions.

Use it, and avoid the "print pointer as an unsigned long" code.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kallsyms.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 708f337d780b..bd118a6c60cb 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -14,12 +14,6 @@
 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \
 			 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1)
 
-#ifndef CONFIG_64BIT
-# define KALLSYM_FMT "%08lx"
-#else
-# define KALLSYM_FMT "%016lx"
-#endif
-
 struct module;
 
 #ifdef CONFIG_KALLSYMS
-- 
cgit v1.2.3


From 1501899a898dfb5477c55534bdfd734c046da06d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 29 Nov 2017 16:10:06 -0800
Subject: mm: fix device-dax pud write-faults triggered by get_user_pages()

Currently only get_user_pages_fast() can safely handle the writable gup
case due to its use of pud_access_permitted() to check whether the pud
entry is writable.  In the gup slow path pud_write() is used instead of
pud_access_permitted() and to date it has been unimplemented, just calls
BUG_ON().

    kernel BUG at ./include/linux/hugetlb.h:244!
    [..]
    RIP: 0010:follow_devmap_pud+0x482/0x490
    [..]
    Call Trace:
     follow_page_mask+0x28c/0x6e0
     __get_user_pages+0xe4/0x6c0
     get_user_pages_unlocked+0x130/0x1b0
     get_user_pages_fast+0x89/0xb0
     iov_iter_get_pages_alloc+0x114/0x4a0
     nfs_direct_read_schedule_iovec+0xd2/0x350
     ? nfs_start_io_direct+0x63/0x70
     nfs_file_direct_read+0x1e0/0x250
     nfs_file_read+0x90/0xc0

For now this just implements a simple check for the _PAGE_RW bit similar
to pmd_write.  However, this implies that the gup-slow-path check is
missing the extra checks that the gup-fast-path performs with
pud_access_permitted.  Later patches will align all checks to use the
'access_permitted' helper if the architecture provides it.

Note that the generic 'access_permitted' helper fallback is the simple
_PAGE_RW check on architectures that do not define the
'access_permitted' helper(s).

[dan.j.williams@intel.com: fix powerpc compile error]
  Link: http://lkml.kernel.org/r/151129126165.37405.16031785266675461397.stgit@dwillia2-desk3.amr.corp.intel.com
Link: http://lkml.kernel.org/r/151043109938.2842.14834662818213616199.stgit@dwillia2-desk3.amr.corp.intel.com
Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Thomas Gleixner <tglx@linutronix.de>	[x86]
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index fbf5b31d47ee..82a25880714a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -239,14 +239,6 @@ static inline int pgd_write(pgd_t pgd)
 }
 #endif
 
-#ifndef pud_write
-static inline int pud_write(pud_t pud)
-{
-	BUG();
-	return 0;
-}
-#endif
-
 #define HUGETLB_ANON_FILE "anon_hugepage"
 
 enum {
-- 
cgit v1.2.3


From 31383c6865a578834dd953d9dbc88e6b19fe3997 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 29 Nov 2017 16:10:28 -0800
Subject: mm, hugetlbfs: introduce ->split() to vm_operations_struct

Patch series "device-dax: fix unaligned munmap handling"

When device-dax is operating in huge-page mode we want it to behave like
hugetlbfs and fail attempts to split vmas into unaligned ranges.  It
would be messy to teach the munmap path about device-dax alignment
constraints in the same (hstate) way that hugetlbfs communicates this
constraint.  Instead, these patches introduce a new ->split() vm
operation.

This patch (of 2):

The device-dax interface has similar constraints as hugetlbfs in that it
requires the munmap path to unmap in huge page aligned units.  Rather
than add more custom vma handling code in __split_vma() introduce a new
vm operation to perform this vma specific check.

Link: http://lkml.kernel.org/r/151130418135.4029.6783191281930729710.stgit@dwillia2-desk3.amr.corp.intel.com
Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ee073146aaa7..b3b6a7e313e9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -377,6 +377,7 @@ enum page_entry_size {
 struct vm_operations_struct {
 	void (*open)(struct vm_area_struct * area);
 	void (*close)(struct vm_area_struct * area);
+	int (*split)(struct vm_area_struct * area, unsigned long addr);
 	int (*mremap)(struct vm_area_struct * area);
 	int (*fault)(struct vm_fault *vmf);
 	int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size);
-- 
cgit v1.2.3


From 2bb6d2837083de722bfdc369cb0d76ce188dd9b4 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 29 Nov 2017 16:10:35 -0800
Subject: mm: introduce get_user_pages_longterm

Patch series "introduce get_user_pages_longterm()", v2.

Here is a new get_user_pages api for cases where a driver intends to
keep an elevated page count indefinitely.  This is distinct from usages
like iov_iter_get_pages where the elevated page counts are transient.
The iov_iter_get_pages cases immediately turn around and submit the
pages to a device driver which will put_page when the i/o operation
completes (under kernel control).

In the longterm case userspace is responsible for dropping the page
reference at some undefined point in the future.  This is untenable for
filesystem-dax case where the filesystem is in control of the lifetime
of the block / page and needs reasonable limits on how long it can wait
for pages in a mapping to become idle.

Fixing filesystems to actually wait for dax pages to be idle before
blocks from a truncate/hole-punch operation are repurposed is saved for
a later patch series.

Also, allowing longterm registration of dax mappings is a future patch
series that introduces a "map with lease" semantic where the kernel can
revoke a lease and force userspace to drop its page references.

I have also tagged these for -stable to purposely break cases that might
assume that longterm memory registrations for filesystem-dax mappings
were supported by the kernel.  The behavior regression this policy
change implies is one of the reasons we maintain the "dax enabled.
Warning: EXPERIMENTAL, use at your own risk" notification when mounting
a filesystem in dax mode.

It is worth noting the device-dax interface does not suffer the same
constraints since it does not support file space management operations
like hole-punch.

This patch (of 4):

Until there is a solution to the dma-to-dax vs truncate problem it is
not safe to allow long standing memory registrations against
filesytem-dax vmas.  Device-dax vmas do not have this problem and are
explicitly allowed.

This is temporary until a "memory registration with layout-lease"
mechanism can be implemented for the affected sub-systems (RDMA and
V4L2).

[akpm@linux-foundation.org: use kcalloc()]
Link: http://lkml.kernel.org/r/151068939435.7446.13560129395419350737.stgit@dwillia2-desk3.amr.corp.intel.com
Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Hal Rosenstock <hal.rosenstock@gmail.com>
Cc: Inki Dae <inki.dae@samsung.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Joonyoung Shim <jy0922.shim@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Seung-Woo Kim <sw0312.kim@samsung.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 14 ++++++++++++++
 include/linux/mm.h | 13 +++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bbd92da0946e..9dc498d16cc1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3194,6 +3194,20 @@ static inline bool vma_is_dax(struct vm_area_struct *vma)
 	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
 }
 
+static inline bool vma_is_fsdax(struct vm_area_struct *vma)
+{
+	struct inode *inode;
+
+	if (!vma->vm_file)
+		return false;
+	if (!vma_is_dax(vma))
+		return false;
+	inode = file_inode(vma->vm_file);
+	if (inode->i_mode == S_IFCHR)
+		return false; /* device-dax */
+	return true;
+}
+
 static inline int iocb_flags(struct file *file)
 {
 	int res = 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b3b6a7e313e9..ea818ff739cd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1380,6 +1380,19 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages, int *locked);
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 		    struct page **pages, unsigned int gup_flags);
+#ifdef CONFIG_FS_DAX
+long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
+			    unsigned int gup_flags, struct page **pages,
+			    struct vm_area_struct **vmas);
+#else
+static inline long get_user_pages_longterm(unsigned long start,
+		unsigned long nr_pages, unsigned int gup_flags,
+		struct page **pages, struct vm_area_struct **vmas)
+{
+	return get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+}
+#endif /* CONFIG_FS_DAX */
+
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
 
-- 
cgit v1.2.3


From 40a899ed16486455f964e46d1af31fd4fded21c1 Mon Sep 17 00:00:00 2001
From: Zi Yan <zi.yan@cs.rutgers.edu>
Date: Wed, 29 Nov 2017 16:11:12 -0800
Subject: mm: migrate: fix an incorrect call of prep_transhuge_page()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In https://lkml.org/lkml/2017/11/20/411, Andrea reported that during
memory hotplug/hot remove prep_transhuge_page() is called incorrectly on
non-THP pages for migration, when THP is on but THP migration is not
enabled.  This leads to a bad state of target pages for migration.

By inspecting the code, if called on a non-THP, prep_transhuge_page()
will

 1) change the value of the mapping of (page + 2), since it is used for
    THP deferred list;

 2) change the lru value of (page + 1), since it is used for THP's dtor.

Both can lead to data corruption of these two pages.

Andrea said:
 "Pragmatically and from the point of view of the memory_hotplug subsys,
  the effect is a kernel crash when pages are being migrated during a
  memory hot remove offline and migration target pages are found in a
  bad state"

This patch fixes it by only calling prep_transhuge_page() when we are
certain that the target page is THP.

Link: http://lkml.kernel.org/r/20171121021855.50525-1-zi.yan@sent.com
Fixes: 8135d8926c08 ("mm: memory_hotplug: memory hotremove supports thp migration")
Signed-off-by: Zi Yan <zi.yan@cs.rutgers.edu>
Reported-by: Andrea Reale <ar@linux.vnet.ibm.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: <stable@vger.kernel.org>	[4.14]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 895ec0c4942e..a2246cf670ba 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -54,7 +54,7 @@ static inline struct page *new_page_nodemask(struct page *page,
 	new_page = __alloc_pages_nodemask(gfp_mask, order,
 				preferred_nid, nodemask);
 
-	if (new_page && PageTransHuge(page))
+	if (new_page && PageTransHuge(new_page))
 		prep_transhuge_page(new_page);
 
 	return new_page;
-- 
cgit v1.2.3


From 5d38f049cee1e1c4a7ac55aa79d37d01ddcc3860 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 29 Nov 2017 16:11:26 -0800
Subject: autofs: revert "autofs: fix AT_NO_AUTOMOUNT not being honored"

Commit 42f461482178 ("autofs: fix AT_NO_AUTOMOUNT not being honored")
allowed the fstatat(2) system call to properly honor the AT_NO_AUTOMOUNT
flag but introduced a semantic change.

In order to honor AT_NO_AUTOMOUNT a semantic change was made to the
negative dentry case for stat family system calls in follow_automount().

This changed the unconditional triggering of an automount in this case
to no longer be done and an error returned instead.

This has caused more problems than I expected so reverting the change is
needed.

In a discussion with Neil Brown it was concluded that the automount(8)
daemon can implement this change without kernel modifications.  So that
will be done instead and the autofs module documentation updated with a
description of the problem and what needs to be done by module users for
this specific case.

Link: http://lkml.kernel.org/r/151174730120.6162.3848002191530283984.stgit@pluto.themaw.net
Fixes: 42f4614821 ("autofs: fix AT_NO_AUTOMOUNT not being honored")
Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Neil Brown <neilb@suse.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Colin Walters <walters@redhat.com>
Cc: Ondrej Holy <oholy@redhat.com>
Cc: <stable@vger.kernel.org>	[4.11+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9dc498d16cc1..511fbaabf624 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3088,7 +3088,8 @@ static inline int vfs_lstat(const char __user *name, struct kstat *stat)
 static inline int vfs_fstatat(int dfd, const char __user *filename,
 			      struct kstat *stat, int flags)
 {
-	return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS);
+	return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT,
+			 stat, STATX_BASIC_STATS);
 }
 static inline int vfs_fstat(int fd, struct kstat *stat)
 {
-- 
cgit v1.2.3


From f8821f96ae97260d68228fe53f81848b2ede44d7 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 30 Nov 2017 14:33:56 +0100
Subject: skbuff: Grammar s/are can/can/, s/change/changes/

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bc486ef23f20..a38c80e9f91e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1406,8 +1406,7 @@ static inline struct sk_buff *skb_get(struct sk_buff *skb)
 }
 
 /*
- * If users == 1, we are the only owner and are can avoid redundant
- * atomic change.
+ * If users == 1, we are the only owner and can avoid redundant atomic changes.
  */
 
 /**
-- 
cgit v1.2.3


From 4db2b604c05afc3d2678fe01d3136c015df313ec Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 22 Nov 2017 11:47:28 +0100
Subject: move libgcc.h to include/linux

Introducing a new include/lib directory just for this file totally
messes up tab completion for include/linux, which is highly annoying.

Move it to include/linux where we have headers for all kinds of other
lib/ code as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 include/linux/libgcc.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 include/linux/libgcc.h

(limited to 'include/linux')

diff --git a/include/linux/libgcc.h b/include/linux/libgcc.h
new file mode 100644
index 000000000000..32e1e0f4b2d0
--- /dev/null
+++ b/include/linux/libgcc.h
@@ -0,0 +1,43 @@
+/*
+ * include/lib/libgcc.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.
+ */
+
+#ifndef __LIB_LIBGCC_H
+#define __LIB_LIBGCC_H
+
+#include <asm/byteorder.h>
+
+typedef int word_type __attribute__ ((mode (__word__)));
+
+#ifdef __BIG_ENDIAN
+struct DWstruct {
+	int high, low;
+};
+#elif defined(__LITTLE_ENDIAN)
+struct DWstruct {
+	int low, high;
+};
+#else
+#error I feel sick.
+#endif
+
+typedef union {
+	struct DWstruct s;
+	long long ll;
+} DWunion;
+
+#endif /* __ASM_LIBGCC_H */
-- 
cgit v1.2.3


From 6d745ee8b5e81f3a33791e3c854fbbfd6f3e585e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 6 Sep 2017 14:56:50 +0200
Subject: iio: stm32: fix adc/trigger link error

The ADC driver can trigger on either the timer or the lptim
trigger, but it only uses a Kconfig 'select' statement
to ensure that the first of the two is present. When the lptim
trigger is enabled as a loadable module, and the adc driver
is built-in, we now get a link error:

drivers/iio/adc/stm32-adc.o: In function `stm32_adc_get_trig_extsel':
stm32-adc.c:(.text+0x4e0): undefined reference to `is_stm32_lptim_trigger'

We could use a second 'select' statement and always have both
trigger drivers enabled when the adc driver is, but it seems that
the lptimer trigger was intentionally left optional, so it seems
better to keep it that way.

This adds a hack to use 'IS_REACHABLE()' rather than 'IS_ENABLED()',
which avoids the link error, but instead leads to the lptimer trigger
not being used in the broken configuration. I've added a runtime
warning for this case to help users figure out what they did wrong
if this should ever be done by accident.

Fixes: f0b638a7f6db ("iio: adc: stm32: add support for lptimer triggers")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/timer/stm32-lptim-trigger.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/timer/stm32-lptim-trigger.h b/include/linux/iio/timer/stm32-lptim-trigger.h
index 34d59bfdce2d..464458d20b16 100644
--- a/include/linux/iio/timer/stm32-lptim-trigger.h
+++ b/include/linux/iio/timer/stm32-lptim-trigger.h
@@ -16,11 +16,14 @@
 #define LPTIM2_OUT	"lptim2_out"
 #define LPTIM3_OUT	"lptim3_out"
 
-#if IS_ENABLED(CONFIG_IIO_STM32_LPTIMER_TRIGGER)
+#if IS_REACHABLE(CONFIG_IIO_STM32_LPTIMER_TRIGGER)
 bool is_stm32_lptim_trigger(struct iio_trigger *trig);
 #else
 static inline bool is_stm32_lptim_trigger(struct iio_trigger *trig)
 {
+#if IS_ENABLED(CONFIG_IIO_STM32_LPTIMER_TRIGGER)
+	pr_warn_once("stm32 lptim_trigger not linked in\n");
+#endif
 	return false;
 }
 #endif
-- 
cgit v1.2.3


From 4ce413d1840b25b101be3c0559161db8891f3360 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 1 Dec 2017 15:29:39 +0000
Subject: irqdesc: Use bool return type instead of int

The irq_balancing_disabled and irq_is_percpu{,_devid} functions are
clearly intended to return bool like the functions in
kernel/irq/settings.h, but actually return an int containing a masked
value of desc->status_use_accessors. This can lead to subtle breakage
if, for example, the return value is subsequently truncated when
assigned to a narrower type.

As Linus points out:

| In particular, what can (and _has_ happened) is that people end up
| using these functions that return true or false, and they assign the
| result to something like a bitfield (or a char) or whatever.
|
| And the code looks *obviously* correct, when you have things like
|
|      dev->percpu = irq_is_percpu_devid(dev->irq);
|
| and that "percpu" thing is just one status bit among many. It may even
| *work*, because maybe that "percpu" flag ends up not being all that
| important, or it just happens to never be set on the particular
| hardware that people end up testing.
|
| But while it looks obviously correct, and might even work, it's really
| fundamentally broken. Because that "true or false" function didn't
| actually return 0/1, it returned 0 or 0x20000.
|
| And 0x20000 may not fit in a bitmask or a "char" or whatever.

Fix the problem by consistently using bool as the return type for these
functions.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: marc.zyngier@arm.com
Link: https://lkml.kernel.org/r/1512142179-24616-1-git-send-email-will.deacon@arm.com
---
 include/linux/irqdesc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index dd418955962b..39fb3700f7a9 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -230,7 +230,7 @@ irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip,
 	data->chip = chip;
 }
 
-static inline int irq_balancing_disabled(unsigned int irq)
+static inline bool irq_balancing_disabled(unsigned int irq)
 {
 	struct irq_desc *desc;
 
@@ -238,7 +238,7 @@ static inline int irq_balancing_disabled(unsigned int irq)
 	return desc->status_use_accessors & IRQ_NO_BALANCING_MASK;
 }
 
-static inline int irq_is_percpu(unsigned int irq)
+static inline bool irq_is_percpu(unsigned int irq)
 {
 	struct irq_desc *desc;
 
@@ -246,7 +246,7 @@ static inline int irq_is_percpu(unsigned int irq)
 	return desc->status_use_accessors & IRQ_PER_CPU;
 }
 
-static inline int irq_is_percpu_devid(unsigned int irq)
+static inline bool irq_is_percpu_devid(unsigned int irq)
 {
 	struct irq_desc *desc;
 
-- 
cgit v1.2.3


From c895f6f703ad7dd2f99e751d9884b0aa5d0eea25 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Mon, 4 Dec 2017 10:56:44 +0100
Subject: bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type

Commit 0515e5999a466dfe ("bpf: introduce BPF_PROG_TYPE_PERF_EVENT
program type") introduced the bpf_perf_event_data structure which
exports the pt_regs structure.  This is OK for multiple architectures
but fail for s390 and arm64 which do not export pt_regs.  Programs
using them, for example, the bpf selftest fail to compile on these
architectures.

For s390, exporting the pt_regs is not an option because s390 wants
to allow changes to it.  For arm64, there is a user_pt_regs structure
that covers parts of the pt_regs structure for use by user space.

To solve the broken uapi for s390 and arm64, introduce an abstract
type for pt_regs and add an asm/bpf_perf_event.h file that concretes
the type.  An asm-generic header file covers the architectures that
export pt_regs today.

The arch-specific enablement for s390 and arm64 follows in separate
commits.

Reported-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Fixes: 0515e5999a466dfe ("bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type")
Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Reviewed-and-tested-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/perf_event.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2c9c87d8a0c1..7546822a1d74 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -15,6 +15,7 @@
 #define _LINUX_PERF_EVENT_H
 
 #include <uapi/linux/perf_event.h>
+#include <uapi/linux/bpf_perf_event.h>
 
 /*
  * Kernel-internal data types and definitions:
@@ -787,7 +788,7 @@ struct perf_output_handle {
 };
 
 struct bpf_perf_event_data_kern {
-	struct pt_regs *regs;
+	bpf_user_pt_regs_t *regs;
 	struct perf_sample_data *data;
 	struct perf_event *event;
 };
@@ -1177,6 +1178,9 @@ extern void perf_bp_event(struct perf_event *event, void *data);
 		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
 # define perf_instruction_pointer(regs)	instruction_pointer(regs)
 #endif
+#ifndef perf_arch_bpf_user_pt_regs
+# define perf_arch_bpf_user_pt_regs(regs) regs
+#endif
 
 static inline bool has_branch_stack(struct perf_event *event)
 {
-- 
cgit v1.2.3


From f775b13eedee2f7f3c6fdd4e90fb79090ce5d339 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Tue, 14 Nov 2017 16:54:23 -0500
Subject: x86,kvm: move qemu/guest FPU switching out to vcpu_run
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, every time a VCPU is scheduled out, the host kernel will
first save the guest FPU/xstate context, then load the qemu userspace
FPU context, only to then immediately save the qemu userspace FPU
context back to memory. When scheduling in a VCPU, the same extraneous
FPU loads and saves are done.

This could be avoided by moving from a model where the guest FPU is
loaded and stored with preemption disabled, to a model where the
qemu userspace FPU is swapped out for the guest FPU context for
the duration of the KVM_RUN ioctl.

This is done under the VCPU mutex, which is also taken when other
tasks inspect the VCPU FPU context, so the code should already be
safe for this change. That should come as no surprise, given that
s390 already has this optimization.

This can fix a bug where KVM calls get_user_pages while owning the
FPU, and the file system ends up requesting the FPU again:

    [258270.527947]  __warn+0xcb/0xf0
    [258270.527948]  warn_slowpath_null+0x1d/0x20
    [258270.527951]  kernel_fpu_disable+0x3f/0x50
    [258270.527953]  __kernel_fpu_begin+0x49/0x100
    [258270.527955]  kernel_fpu_begin+0xe/0x10
    [258270.527958]  crc32c_pcl_intel_update+0x84/0xb0
    [258270.527961]  crypto_shash_update+0x3f/0x110
    [258270.527968]  crc32c+0x63/0x8a [libcrc32c]
    [258270.527975]  dm_bm_checksum+0x1b/0x20 [dm_persistent_data]
    [258270.527978]  node_prepare_for_write+0x44/0x70 [dm_persistent_data]
    [258270.527985]  dm_block_manager_write_callback+0x41/0x50 [dm_persistent_data]
    [258270.527988]  submit_io+0x170/0x1b0 [dm_bufio]
    [258270.527992]  __write_dirty_buffer+0x89/0x90 [dm_bufio]
    [258270.527994]  __make_buffer_clean+0x4f/0x80 [dm_bufio]
    [258270.527996]  __try_evict_buffer+0x42/0x60 [dm_bufio]
    [258270.527998]  dm_bufio_shrink_scan+0xc0/0x130 [dm_bufio]
    [258270.528002]  shrink_slab.part.40+0x1f5/0x420
    [258270.528004]  shrink_node+0x22c/0x320
    [258270.528006]  do_try_to_free_pages+0xf5/0x330
    [258270.528008]  try_to_free_pages+0xe9/0x190
    [258270.528009]  __alloc_pages_slowpath+0x40f/0xba0
    [258270.528011]  __alloc_pages_nodemask+0x209/0x260
    [258270.528014]  alloc_pages_vma+0x1f1/0x250
    [258270.528017]  do_huge_pmd_anonymous_page+0x123/0x660
    [258270.528021]  handle_mm_fault+0xfd3/0x1330
    [258270.528025]  __get_user_pages+0x113/0x640
    [258270.528027]  get_user_pages+0x4f/0x60
    [258270.528063]  __gfn_to_pfn_memslot+0x120/0x3f0 [kvm]
    [258270.528108]  try_async_pf+0x66/0x230 [kvm]
    [258270.528135]  tdp_page_fault+0x130/0x280 [kvm]
    [258270.528149]  kvm_mmu_page_fault+0x60/0x120 [kvm]
    [258270.528158]  handle_ept_violation+0x91/0x170 [kvm_intel]
    [258270.528162]  vmx_handle_exit+0x1ca/0x1400 [kvm_intel]

No performance changes were detected in quick ping-pong tests on
my 4 socket system, which is expected since an FPU+xstate load is
on the order of 0.1us, while ping-ponging between CPUs is on the
order of 20us, and somewhat noisy.

Cc: stable@vger.kernel.org
Signed-off-by: Rik van Riel <riel@redhat.com>
Suggested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[Fixed a bug where reset_vcpu called put_fpu without preceding load_fpu,
 which happened inside from KVM_CREATE_VCPU ioctl. - Radim]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 893d6d606cd0..6bdd4b9f6611 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -232,7 +232,7 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	struct kvm_run *run;
 
-	int guest_fpu_loaded, guest_xcr0_loaded;
+	int guest_xcr0_loaded;
 	struct swait_queue_head wq;
 	struct pid __rcu *pid;
 	int sigset_active;
-- 
cgit v1.2.3


From d7efc6c11b277d9d80b99b1334a78bfe7d7edf10 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Dec 2017 12:45:56 -0800
Subject: net: remove hlist_nulls_add_tail_rcu()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Alexander Potapenko reported use of uninitialized memory [1]

This happens when inserting a request socket into TCP ehash,
in __sk_nulls_add_node_rcu(), since sk_reuseport is not initialized.

Bug was added by commit d894ba18d4e4 ("soreuseport: fix ordering for
mixed v4/v6 sockets")

Note that d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6
ordering fix") missed the opportunity to get rid of
hlist_nulls_add_tail_rcu() :

Both UDP sockets and TCP/DCCP listeners no longer use
__sk_nulls_add_node_rcu() for their hash insertion.

Since all other sockets have unique 4-tuple, the reuseport status
has no special meaning, so we can always use hlist_nulls_add_head_rcu()
for them and save few cycles/instructions.

[1]

==================================================================
BUG: KMSAN: use of uninitialized memory in inet_ehash_insert+0xd40/0x1050
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.13.0+ #3288
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:16
 dump_stack+0x185/0x1d0 lib/dump_stack.c:52
 kmsan_report+0x13f/0x1c0 mm/kmsan/kmsan.c:1016
 __msan_warning_32+0x69/0xb0 mm/kmsan/kmsan_instr.c:766
 __sk_nulls_add_node_rcu ./include/net/sock.h:684
 inet_ehash_insert+0xd40/0x1050 net/ipv4/inet_hashtables.c:413
 reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:754
 inet_csk_reqsk_queue_hash_add+0x1cc/0x300 net/ipv4/inet_connection_sock.c:765
 tcp_conn_request+0x31e7/0x36f0 net/ipv4/tcp_input.c:6414
 tcp_v4_conn_request+0x16d/0x220 net/ipv4/tcp_ipv4.c:1314
 tcp_rcv_state_process+0x42a/0x7210 net/ipv4/tcp_input.c:5917
 tcp_v4_do_rcv+0xa6a/0xcd0 net/ipv4/tcp_ipv4.c:1483
 tcp_v4_rcv+0x3de0/0x4ab0 net/ipv4/tcp_ipv4.c:1763
 ip_local_deliver_finish+0x6bb/0xcb0 net/ipv4/ip_input.c:216
 NF_HOOK ./include/linux/netfilter.h:248
 ip_local_deliver+0x3fa/0x480 net/ipv4/ip_input.c:257
 dst_input ./include/net/dst.h:477
 ip_rcv_finish+0x6fb/0x1540 net/ipv4/ip_input.c:397
 NF_HOOK ./include/linux/netfilter.h:248
 ip_rcv+0x10f6/0x15c0 net/ipv4/ip_input.c:488
 __netif_receive_skb_core+0x36f6/0x3f60 net/core/dev.c:4298
 __netif_receive_skb net/core/dev.c:4336
 netif_receive_skb_internal+0x63c/0x19c0 net/core/dev.c:4497
 napi_skb_finish net/core/dev.c:4858
 napi_gro_receive+0x629/0xa50 net/core/dev.c:4889
 e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4018
 e1000_clean_rx_irq+0x1492/0x1d30
drivers/net/ethernet/intel/e1000/e1000_main.c:4474
 e1000_clean+0x43aa/0x5970 drivers/net/ethernet/intel/e1000/e1000_main.c:3819
 napi_poll net/core/dev.c:5500
 net_rx_action+0x73c/0x1820 net/core/dev.c:5566
 __do_softirq+0x4b4/0x8dd kernel/softirq.c:284
 invoke_softirq kernel/softirq.c:364
 irq_exit+0x203/0x240 kernel/softirq.c:405
 exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:638
 do_IRQ+0x15e/0x1a0 arch/x86/kernel/irq.c:263
 common_interrupt+0x86/0x86

Fixes: d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets")
Fixes: d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Alexander Potapenko <glider@google.com>
Acked-by: Craig Gallek <kraig@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rculist_nulls.h | 38 --------------------------------------
 1 file changed, 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index a328e8181e49..e4b257ff881b 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -100,44 +100,6 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
 		first->pprev = &n->next;
 }
 
-/**
- * hlist_nulls_add_tail_rcu
- * @n: the element to add to the hash list.
- * @h: the list to add to.
- *
- * Description:
- * Adds the specified element to the end of the specified hlist_nulls,
- * while permitting racing traversals.  NOTE: tail insertion requires
- * list traversal.
- *
- * The caller must take whatever precautions are necessary
- * (such as holding appropriate locks) to avoid racing
- * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
- * or hlist_nulls_del_rcu(), running on this same list.
- * However, it is perfectly legal to run concurrently with
- * the _rcu list-traversal primitives, such as
- * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
- * problems on Alpha CPUs.  Regardless of the type of CPU, the
- * list-traversal primitive must be guarded by rcu_read_lock().
- */
-static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
-					struct hlist_nulls_head *h)
-{
-	struct hlist_nulls_node *i, *last = NULL;
-
-	for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i);
-	     i = hlist_nulls_next_rcu(i))
-		last = i;
-
-	if (last) {
-		n->next = last->next;
-		n->pprev = &last->next;
-		rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
-	} else {
-		hlist_nulls_add_head_rcu(n, h);
-	}
-}
-
 /**
  * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
  * @tpos:	the type * to use as a loop cursor.
-- 
cgit v1.2.3


From af97a77bc01ce49a466f9d4c0125479e2e2230b6 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 6 Dec 2017 09:50:08 +0000
Subject: efi: Move some sysfs files to be read-only by root

Thanks to the scripts/leaking_addresses.pl script, it was found that
some EFI values should not be readable by non-root users.

So make them root-only, and to do that, add a __ATTR_RO_MODE() macro to
make this easier, and use it in other places at the same time.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Tested-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Cc: stable <stable@vger.kernel.org>
Link: http://lkml.kernel.org/r/20171206095010.24170-2-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sysfs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index e32dfe098e82..40839c02d28c 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -117,6 +117,12 @@ struct attribute_group {
 	.show	= _name##_show,						\
 }
 
+#define __ATTR_RO_MODE(_name, _mode) {					\
+	.attr	= { .name = __stringify(_name),				\
+		    .mode = VERIFY_OCTAL_PERMISSIONS(_mode) },		\
+	.show	= _name##_show,						\
+}
+
 #define __ATTR_WO(_name) {						\
 	.attr	= { .name = __stringify(_name), .mode = S_IWUSR },	\
 	.store	= _name##_store,					\
-- 
cgit v1.2.3


From a4abd7a80addb4a9547f7dfc7812566b60ec505c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Wed, 6 Dec 2017 20:21:24 +0100
Subject: usbnet: fix alignment for frames with no ethernet header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The qmi_wwan minidriver support a 'raw-ip' mode where frames are
received without any ethernet header. This causes alignment issues
because the skbs allocated by usbnet are "IP aligned".

Fix by allowing minidrivers to disable the additional alignment
offset. This is implemented using a per-device flag, since the same
minidriver also supports 'ethernet' mode.

Fixes: 32f7adf633b9 ("net: qmi_wwan: support "raw IP" mode")
Reported-and-tested-by: Jay Foster <jay@systech.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/usbnet.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index a69877734c4e..e2ec3582e549 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -82,6 +82,7 @@ struct usbnet {
 #		define EVENT_RX_KILL	10
 #		define EVENT_LINK_CHANGE	11
 #		define EVENT_SET_RX_MODE	12
+#		define EVENT_NO_IP_ALIGN	13
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
-- 
cgit v1.2.3


From d4761754b4fb2ef8d9a1e9d121c4bec84e1fe292 Mon Sep 17 00:00:00 2001
From: Yousuk Seung <ysseung@google.com>
Date: Thu, 7 Dec 2017 13:41:34 -0800
Subject: tcp: invalidate rate samples during SACK reneging

Mark tcp_sock during a SACK reneging event and invalidate rate samples
while marked. Such rate samples may overestimate bw by including packets
that were SACKed before reneging.

< ack 6001 win 10000 sack 7001:38001
< ack 7001 win 0 sack 8001:38001 // Reneg detected
> seq 7001:8001 // RTO, SACK cleared.
< ack 38001 win 10000

In above example the rate sample taken after the last ack will count
7001-38001 as delivered while the actual delivery rate likely could
be much lower i.e. 7001-8001.

This patch adds a new field tcp_sock.sack_reneg and marks it when we
declare SACK reneging and entering TCP_CA_Loss, and unmarks it after
the last rate sample was taken before moving back to TCP_CA_Open. This
patch also invalidates rate samples taken while tcp_sock.is_sack_reneg
is set.

Fixes: b9f64820fb22 ("tcp: track data delivery rate for a TCP connection")
Signed-off-by: Yousuk Seung <ysseung@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index df5d97a85e1a..ca4a6361389b 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -224,7 +224,8 @@ struct tcp_sock {
 		rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */
 		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
-		unused:3;
+		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */
+		unused:2;
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
 		unused1	    : 1,
-- 
cgit v1.2.3


From f335195adf043168ee69d78ea72ac3e30f0c57ce Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 6 Dec 2017 11:27:57 +0100
Subject: kmemcheck: rip it out for real

Commit 4675ff05de2d ("kmemcheck: rip it out") has removed the code but
for some reason SPDX header stayed in place.  This looks like a rebase
mistake in the mmotm tree or the merge mistake.  Let's drop those
leftovers as well.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmemcheck.h | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 include/linux/kmemcheck.h

(limited to 'include/linux')

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/include/linux/kmemcheck.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-- 
cgit v1.2.3