From 4cef191d05871fbc8bb3b812880e1997e855a3b9 Mon Sep 17 00:00:00 2001
From: Jaedon Shin <jaedon.shin@gmail.com>
Date: Fri, 25 Mar 2016 12:46:54 +0900
Subject: net: phy: bcm7xxx: Add entries for Broadcom BCM7346 and BCM7362

Add PHY entries for the Broadcom BCM7346 and BCM7362 chips, these are
40nm generation Ethernet PHY.

Fixes: 815717d1473e ("net: phy: bcm7xxx: Remove wildcard entries")
Signed-off-by: Jaedon Shin <jaedon.shin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index f0ba9c2ec639..e3354b74286c 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -24,6 +24,8 @@
 #define PHY_ID_BCM7250			0xae025280
 #define PHY_ID_BCM7364			0xae025260
 #define PHY_ID_BCM7366			0x600d8490
+#define PHY_ID_BCM7346			0x600d8650
+#define PHY_ID_BCM7362			0x600d84b0
 #define PHY_ID_BCM7425			0x600d86b0
 #define PHY_ID_BCM7429			0x600d8730
 #define PHY_ID_BCM7435			0x600d8750
-- 
cgit v1.2.3


From 96c22a3293512ba684e73a981196430f524689da Mon Sep 17 00:00:00 2001
From: Octavian Purdila <octavian.purdila@intel.com>
Date: Wed, 23 Mar 2016 14:14:48 +0200
Subject: configfs: fix CONFIGFS_BIN_ATTR_[RW]O definitions

The type should be struct configfs_bin_attribute and not struct
configfs_attribute.

Signed-off-by: Octavian Purdila <octavian.purdila@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/configfs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 485fe5519448..d9d6a9d77489 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -188,7 +188,7 @@ static struct configfs_bin_attribute _pfx##attr_##_name = {	\
 }
 
 #define CONFIGFS_BIN_ATTR_RO(_pfx, _name, _priv, _maxsz)	\
-static struct configfs_attribute _pfx##attr_##_name = {		\
+static struct configfs_bin_attribute _pfx##attr_##_name = {	\
 	.cb_attr = {						\
 		.ca_name	= __stringify(_name),		\
 		.ca_mode	= S_IRUGO,			\
@@ -200,7 +200,7 @@ static struct configfs_attribute _pfx##attr_##_name = {		\
 }
 
 #define CONFIGFS_BIN_ATTR_WO(_pfx, _name, _priv, _maxsz)	\
-static struct configfs_attribute _pfx##attr_##_name = {		\
+static struct configfs_bin_attribute _pfx##attr_##_name = {	\
 	.cb_attr = {						\
 		.ca_name	= __stringify(_name),		\
 		.ca_mode	= S_IWUSR,			\
-- 
cgit v1.2.3


From d101a125954eae1d397adda94ca6319485a50493 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Sat, 26 Mar 2016 16:14:37 -0400
Subject: fs: add file_dentry()

This series fixes bugs in nfs and ext4 due to 4bacc9c9234c ("overlayfs:
Make f_path always point to the overlay and f_inode to the underlay").

Regular files opened on overlayfs will result in the file being opened on
the underlying filesystem, while f_path points to the overlayfs
mount/dentry.

This confuses filesystems which get the dentry from struct file and assume
it's theirs.

Add a new helper, file_dentry() [*], to get the filesystem's own dentry
from the file.  This checks file->f_path.dentry->d_flags against
DCACHE_OP_REAL, and returns file->f_path.dentry if DCACHE_OP_REAL is not
set (this is the common, non-overlayfs case).

In the uncommon case it will call into overlayfs's ->d_real() to get the
underlying dentry, matching file_inode(file).

The reason we need to check against the inode is that if the file is copied
up while being open, d_real() would return the upper dentry, while the open
file comes from the lower dentry.

[*] If possible, it's better simply to use file_inode() instead.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: <stable@vger.kernel.org> # v4.2
Cc: David Howells <dhowells@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Daniel Axtens <dja@axtens.net>
---
 include/linux/dcache.h | 10 ++++++++++
 include/linux/fs.h     | 10 ++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 7cb043d8f4e8..4bb4de8d95ea 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -161,6 +161,7 @@ struct dentry_operations {
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(struct dentry *, bool);
 	struct inode *(*d_select_inode)(struct dentry *, unsigned);
+	struct dentry *(*d_real)(struct dentry *, struct inode *);
 } ____cacheline_aligned;
 
 /*
@@ -229,6 +230,7 @@ struct dentry_operations {
 #define DCACHE_OP_SELECT_INODE		0x02000000 /* Unioned entry: dcache op selects inode */
 
 #define DCACHE_ENCRYPTED_WITH_KEY	0x04000000 /* dir is encrypted with a valid key */
+#define DCACHE_OP_REAL			0x08000000
 
 extern seqlock_t rename_lock;
 
@@ -555,4 +557,12 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
 	return upper;
 }
 
+static inline struct dentry *d_real(struct dentry *dentry)
+{
+	if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
+		return dentry->d_op->d_real(dentry, NULL);
+	else
+		return dentry;
+}
+
 #endif	/* __LINUX_DCACHE_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35d99266ca9a..b2ed2311b021 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1241,6 +1241,16 @@ static inline struct inode *file_inode(const struct file *f)
 	return f->f_inode;
 }
 
+static inline struct dentry *file_dentry(const struct file *file)
+{
+	struct dentry *dentry = file->f_path.dentry;
+
+	if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
+		return dentry->d_op->d_real(dentry, file_inode(file));
+	else
+		return dentry;
+}
+
 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
 {
 	return locks_lock_inode_wait(file_inode(filp), fl);
-- 
cgit v1.2.3


From 596cf3fe5854fe2b1703b0466ed6bf9cfb83c91e Mon Sep 17 00:00:00 2001
From: Vishwanath Pai <vpai@akamai.com>
Date: Wed, 16 Mar 2016 21:49:00 +0100
Subject: netfilter: ipset: fix race condition in ipset save, swap and delete

This fix adds a new reference counter (ref_netlink) for the struct ip_set.
The other reference counter (ref) can be swapped out by ip_set_swap and we
need a separate counter to keep track of references for netlink events
like dump. Using the same ref counter for dump causes a race condition
which can be demonstrated by the following script:

ipset create hash_ip1 hash:ip family inet hashsize 1024 maxelem 500000 \
counters
ipset create hash_ip2 hash:ip family inet hashsize 300000 maxelem 500000 \
counters
ipset create hash_ip3 hash:ip family inet hashsize 1024 maxelem 500000 \
counters

ipset save &

ipset swap hash_ip3 hash_ip2
ipset destroy hash_ip3 /* will crash the machine */

Swap will exchange the values of ref so destroy will see ref = 0 instead of
ref = 1. With this fix in place swap will not succeed because ipset save
still has ref_netlink on the set (ip_set_swap doesn't swap ref_netlink).

Both delete and swap will error out if ref_netlink != 0 on the set.

Note: The changes to *_head functions is because previously we would
increment ref whenever we called these functions, we don't do that
anymore.

Reviewed-by: Joshua Hunt <johunt@akamai.com>
Signed-off-by: Vishwanath Pai <vpai@akamai.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 0e1f433cc4b7..f48b8a664b0f 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -234,6 +234,10 @@ struct ip_set {
 	spinlock_t lock;
 	/* References to the set */
 	u32 ref;
+	/* References to the set for netlink events like dump,
+	 * ref can be swapped out by ip_set_swap
+	 */
+	u32 ref_netlink;
 	/* The core set type */
 	struct ip_set_type *type;
 	/* The type variant doing the real job */
-- 
cgit v1.2.3


From fc0c2028135c7f75fce36b90e44efb8003a9173b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Mar 2016 10:30:19 -0800
Subject: x86, pmem: use memcpy_mcsafe() for memcpy_from_pmem()

Update the definition of memcpy_from_pmem() to return 0 or a negative
error code.  Implement x86/arch_memcpy_from_pmem() with memcpy_mcsafe().

Cc: Borislav Petkov <bp@alien8.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/pmem.h | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 3ec5309e29f3..ac6d872ce067 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -42,6 +42,13 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
 	BUG();
 }
 
+static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
+		size_t n)
+{
+	BUG();
+	return -EFAULT;
+}
+
 static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
 		struct iov_iter *i)
 {
@@ -66,14 +73,17 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
 #endif
 
 /*
- * Architectures that define ARCH_HAS_PMEM_API must provide
- * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
- * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem()
- * and arch_has_wmb_pmem().
+ * memcpy_from_pmem - read from persistent memory with error handling
+ * @dst: destination buffer
+ * @src: source buffer
+ * @size: transfer length
+ *
+ * Returns 0 on success negative error code on failure.
  */
-static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
+static inline int memcpy_from_pmem(void *dst, void __pmem const *src,
+		size_t size)
 {
-	memcpy(dst, (void __force const *) src, size);
+	return arch_memcpy_from_pmem(dst, src, size);
 }
 
 static inline bool arch_has_pmem_api(void)
-- 
cgit v1.2.3


From 5acba71e18833b9d06686b3751598bfa263a3ac3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 24 Mar 2016 15:37:59 +0100
Subject: locking/atomic: Introduce atomic_fetch_or()

This is deemed to replace the type generic fetch_or() which brings a lot
of issues such as macro induced block variable aliasing and sloppy types.
Not to mention fetch_or() doesn't refer to any namespace, adding even
more confusion.

So lets provide an atomic_t version. Current and next users of fetch_or()
are thus encouraged to use atomic_t.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1458830281-4255-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/atomic.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index df4f369254c0..3d64c0852164 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -558,6 +558,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
+/**
+ * atomic_fetch_or - perform *p |= mask and return old value of *p
+ * @p: pointer to atomic_t
+ * @mask: mask to OR on the atomic_t
+ */
+#ifndef atomic_fetch_or
+static inline int atomic_fetch_or(atomic_t *p, int mask)
+{
+	int old, val = atomic_read(p);
+
+	for (;;) {
+		old = atomic_cmpxchg(p, val, val | mask);
+		if (old == val)
+			break;
+		val = old;
+	}
+
+	return old;
+}
+#endif
+
 /**
  * fetch_or - perform *ptr |= mask and return old value of *ptr
  * @ptr: pointer to value
-- 
cgit v1.2.3


From f009a7a767e792d5ab0b46c08d46236ea5271dd9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 24 Mar 2016 15:38:00 +0100
Subject: timers/nohz: Convert tick dependency mask to atomic_t

The tick dependency mask was intially unsigned long because this is the
type on which clear_bit() operates on and fetch_or() accepts it.

But now that we have atomic_fetch_or(), we can instead use
atomic_andnot() to clear the bit. This consolidates the type of our
tick dependency mask, reduce its size on structures and benefit from
possible architecture optimizations on atomic_t operations.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1458830281-4255-3-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 60bba7e032dc..52c4847b05e2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -720,7 +720,7 @@ struct signal_struct {
 	struct task_cputime cputime_expires;
 
 #ifdef CONFIG_NO_HZ_FULL
-	unsigned long tick_dep_mask;
+	atomic_t tick_dep_mask;
 #endif
 
 	struct list_head cpu_timers[3];
@@ -1549,7 +1549,7 @@ struct task_struct {
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-	unsigned long tick_dep_mask;
+	atomic_t tick_dep_mask;
 #endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	u64 start_time;		/* monotonic time in nsec */
-- 
cgit v1.2.3


From 5529578a27288d11d4d15635c258c6dde0f0fb10 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 24 Mar 2016 15:38:01 +0100
Subject: locking/atomic, sched: Unexport fetch_or()

This patch functionally reverts:

  5fd7a09cfb8c ("atomic: Export fetch_or()")

During the merge Linus observed that the generic version of fetch_or()
was messy:

  " This makes the ugly "fetch_or()" macro that the scheduler used
    internally a new generic helper, and does a bad job at it. "

  e23604edac2a Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Now that we have introduced atomic_fetch_or(), fetch_or() is only used
by the scheduler in order to deal with thread_info flags which type
can vary across architectures.

Lets confine fetch_or() back to the scheduler so that we encourage
future users to use the more robust and well typed atomic_t version
instead.

While at it, fetch_or() gets robustified, pasting improvements from a
previous patch by Ingo Molnar that avoids needless expression
re-evaluations in the loop.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1458830281-4255-4-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/atomic.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 3d64c0852164..506c3531832e 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -579,27 +579,6 @@ static inline int atomic_fetch_or(atomic_t *p, int mask)
 }
 #endif
 
-/**
- * fetch_or - perform *ptr |= mask and return old value of *ptr
- * @ptr: pointer to value
- * @mask: mask to OR on the value
- *
- * cmpxchg based fetch_or, macro so it works for different integer types
- */
-#ifndef fetch_or
-#define fetch_or(ptr, mask)						\
-({	typeof(*(ptr)) __old, __val = *(ptr);				\
-	for (;;) {							\
-		__old = cmpxchg((ptr), __val, __val | (mask));		\
-		if (__old == __val)					\
-			break;						\
-		__val = __old;						\
-	}								\
-	__old;								\
-})
-#endif
-
-
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
-- 
cgit v1.2.3


From 743bc4b069517d3bae69349a1a23511eaaae5ede Mon Sep 17 00:00:00 2001
From: John Youn <johnyoun@synopsys.com>
Date: Mon, 28 Mar 2016 16:12:21 -0700
Subject: usb: ch9: Fix SSP Device Cap wFunctionalitySupport type

The wFunctionalitySupport field should be __le16.

Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 include/uapi/linux/usb/ch9.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h
index 06d6c6228a7a..d5ce71607972 100644
--- a/include/uapi/linux/usb/ch9.h
+++ b/include/uapi/linux/usb/ch9.h
@@ -899,7 +899,7 @@ struct usb_ssp_cap_descriptor {
 	__le32 bmAttributes;
 #define USB_SSP_SUBLINK_SPEED_ATTRIBS	(0x1f << 0) /* sublink speed entries */
 #define USB_SSP_SUBLINK_SPEED_IDS	(0xf << 5)  /* speed ID entries */
-	__u16  wFunctionalitySupport;
+	__le16  wFunctionalitySupport;
 #define USB_SSP_MIN_SUBLINK_SPEED_ATTRIBUTE_ID	(0xf)
 #define USB_SSP_MIN_RX_LANE_COUNT		(0xf << 8)
 #define USB_SSP_MIN_TX_LANE_COUNT		(0xf << 12)
-- 
cgit v1.2.3


From 283d757378371e8044d873e661b1dccee46c5887 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Wed, 30 Mar 2016 00:14:57 +0200
Subject: uapi/linux/stddef.h: Provide __always_inline to userspace headers

Josh Boyer reported that my recent change to uapi/linux/swab.h broke the Qemu build:

  bc27fb68aaad ("include/uapi/linux/byteorder, swab: force inlining of some byteswap operations")

Unfortunately, UAPI headers don't include compiler.h so fixing it there is not enough,
add an __always_inline definition to uapi/linux/stddef.h instead.

Testcase: "make headers_install" and try to compile this:

	#include <linux/swab.h>
	void main() {}

Reported-by: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/1459289697-12875-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/stddef.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index aa9f10428743..621fa8ac4425 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -1 +1,5 @@
 #include <linux/compiler.h>
+
+#ifndef __always_inline
+#define __always_inline inline
+#endif
-- 
cgit v1.2.3


From c0e760c9c66ebd8a5a1ede81868677f4df993dfb Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Mar 2016 00:02:00 +0200
Subject: bpf: make padding in bpf_tunnel_key explicit

Make the 2 byte padding in struct bpf_tunnel_key between tunnel_ttl
and tunnel_label members explicit. No issue has been observed, and
gcc/llvm does padding for the old struct already, where tunnel_label
was not yet present, so the current code works, but since it's part
of uapi, make sure we don't introduce holes in structs.

Therefore, add tunnel_ext that we can use generically in future
(f.e. to flag OAM messages for backends, etc). Also add the offset
to the compat tests to be sure should some compilers not padd the
tail of the old version of bpf_tunnel_key.

Fixes: 4018ab1875e0 ("bpf: support flow label for bpf_skb_{set, get}_tunnel_key")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 924f537183fd..23917bb47bf3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -375,6 +375,7 @@ struct bpf_tunnel_key {
 	};
 	__u8 tunnel_tos;
 	__u8 tunnel_ttl;
+	__u16 tunnel_ext;
 	__u32 tunnel_label;
 };
 
-- 
cgit v1.2.3


From ce7043fd903eb9722a885b98b53fffe2cedfb047 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 29 Mar 2016 13:03:33 +0200
Subject: target: remove ->fabric_cleanup_nodeacl

Instead we can clean up the list of default ACLs in core code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_fabric.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index 685a51aa98cc..5a9dd1892b70 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -87,7 +87,6 @@ struct target_core_fabric_ops {
 				struct config_group *, const char *);
 	void (*fabric_drop_np)(struct se_tpg_np *);
 	int (*fabric_init_nodeacl)(struct se_node_acl *, const char *);
-	void (*fabric_cleanup_nodeacl)(struct se_node_acl *);
 
 	struct configfs_attribute **tfc_discovery_attrs;
 	struct configfs_attribute **tfc_wwn_attrs;
-- 
cgit v1.2.3


From 839559e10601ed4459c802cbfb69747bf1844078 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 29 Mar 2016 13:03:35 +0200
Subject: target: add a new add_wwn_groups fabrics method

We need to have the WWN fully initialized before addig default groups to it,
so add a new method to add these groups after the WWN has been initialized.
Also remove the default groups in the core while we're at it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_fabric.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index 5a9dd1892b70..8ff6d40a294f 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -76,6 +76,7 @@ struct target_core_fabric_ops {
 	struct se_wwn *(*fabric_make_wwn)(struct target_fabric_configfs *,
 				struct config_group *, const char *);
 	void (*fabric_drop_wwn)(struct se_wwn *);
+	void (*add_wwn_groups)(struct se_wwn *);
 	struct se_portal_group *(*fabric_make_tpg)(struct se_wwn *,
 				struct config_group *, const char *);
 	void (*fabric_drop_tpg)(struct se_portal_group *);
-- 
cgit v1.2.3


From 90195c3651800f9a7c14956f90c2b4eb0bc8f1fb Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Tue, 23 Feb 2016 10:22:50 +0100
Subject: gpu: ipu-cpmem: modify ipu_cpmem_set_yuv_planar_full for better
 control

Let ipu_cpmem_set_yuv_planar_full take a DRM_FORMAT instead of a
V4L2_PIXFMT and allow better control over U/V stride, U offset and
V offset settings in the CPMEM.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/video/imx-ipu-v3.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index eeba75395f7d..aa921052e7a6 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -194,8 +194,9 @@ int ipu_cpmem_set_format_rgb(struct ipuv3_channel *ch,
 int ipu_cpmem_set_format_passthrough(struct ipuv3_channel *ch, int width);
 void ipu_cpmem_set_yuv_interleaved(struct ipuv3_channel *ch, u32 pixel_format);
 void ipu_cpmem_set_yuv_planar_full(struct ipuv3_channel *ch,
-				   u32 pixel_format, int stride,
-				   int u_offset, int v_offset);
+				   unsigned int uv_stride,
+				   unsigned int u_offset,
+				   unsigned int v_offset);
 void ipu_cpmem_set_yuv_planar(struct ipuv3_channel *ch,
 			      u32 pixel_format, int stride, int height);
 int ipu_cpmem_set_fmt(struct ipuv3_channel *ch, u32 drm_fourcc);
-- 
cgit v1.2.3


From 2bbe32f7398c9b38916983b5823e11d6aaa10be2 Mon Sep 17 00:00:00 2001
From: Liu Ying <gnuiyl@gmail.com>
Date: Mon, 14 Mar 2016 16:10:09 +0800
Subject: gpu: ipu-v3: ipu-dmfc: Make function ipu_dmfc_init_channel() return
 void

Since the function ipu_dmfc_init_channel() always returns zero, we may
change the return type to void to simplify the code.

Signed-off-by: Liu Ying <gnuiyl@gmail.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/video/imx-ipu-v3.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index aa921052e7a6..8555d3728b52 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -237,7 +237,7 @@ void ipu_dmfc_disable_channel(struct dmfc_channel *dmfc);
 int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc,
 		unsigned long bandwidth_mbs, int burstsize);
 void ipu_dmfc_free_bandwidth(struct dmfc_channel *dmfc);
-int ipu_dmfc_init_channel(struct dmfc_channel *dmfc, int width);
+void ipu_dmfc_init_channel(struct dmfc_channel *dmfc, int width);
 struct dmfc_channel *ipu_dmfc_get(struct ipu_soc *ipu, int ipuv3_channel);
 void ipu_dmfc_put(struct dmfc_channel *dmfc);
 
-- 
cgit v1.2.3


From 27630c206bb00a252d21576d92f57bdcc3ab9455 Mon Sep 17 00:00:00 2001
From: Liu Ying <gnuiyl@gmail.com>
Date: Mon, 14 Mar 2016 16:10:10 +0800
Subject: gpu: ipu-v3: ipu-dmfc: Rename ipu_dmfc_init_channel to
 ipu_dmfc_config_wait4eot

The function name 'ipu_dmfc_config_wait4eot' matches the implementation of
the function better than 'ipu_dmfc_init_channel', since it only touches the
wait4eot bits.

Signed-off-by: Liu Ying <gnuiyl@gmail.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/video/imx-ipu-v3.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index 8555d3728b52..ad66589f2ae6 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -237,7 +237,7 @@ void ipu_dmfc_disable_channel(struct dmfc_channel *dmfc);
 int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc,
 		unsigned long bandwidth_mbs, int burstsize);
 void ipu_dmfc_free_bandwidth(struct dmfc_channel *dmfc);
-void ipu_dmfc_init_channel(struct dmfc_channel *dmfc, int width);
+void ipu_dmfc_config_wait4eot(struct dmfc_channel *dmfc, int width);
 struct dmfc_channel *ipu_dmfc_get(struct ipu_soc *ipu, int ipuv3_channel);
 void ipu_dmfc_put(struct dmfc_channel *dmfc);
 
-- 
cgit v1.2.3


From 5a5abb1fa3b05dd6aa821525832644c1e7d2905f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 31 Mar 2016 02:13:18 +0200
Subject: tun, bpf: fix suspicious RCU usage in tun_{attach, detach}_filter

Sasha Levin reported a suspicious rcu_dereference_protected() warning
found while fuzzing with trinity that is similar to this one:

  [   52.765684] net/core/filter.c:2262 suspicious rcu_dereference_protected() usage!
  [   52.765688] other info that might help us debug this:
  [   52.765695] rcu_scheduler_active = 1, debug_locks = 1
  [   52.765701] 1 lock held by a.out/1525:
  [   52.765704]  #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff816a64b7>] rtnl_lock+0x17/0x20
  [   52.765721] stack backtrace:
  [   52.765728] CPU: 1 PID: 1525 Comm: a.out Not tainted 4.5.0+ #264
  [...]
  [   52.765768] Call Trace:
  [   52.765775]  [<ffffffff813e488d>] dump_stack+0x85/0xc8
  [   52.765784]  [<ffffffff810f2fa5>] lockdep_rcu_suspicious+0xd5/0x110
  [   52.765792]  [<ffffffff816afdc2>] sk_detach_filter+0x82/0x90
  [   52.765801]  [<ffffffffa0883425>] tun_detach_filter+0x35/0x90 [tun]
  [   52.765810]  [<ffffffffa0884ed4>] __tun_chr_ioctl+0x354/0x1130 [tun]
  [   52.765818]  [<ffffffff8136fed0>] ? selinux_file_ioctl+0x130/0x210
  [   52.765827]  [<ffffffffa0885ce3>] tun_chr_ioctl+0x13/0x20 [tun]
  [   52.765834]  [<ffffffff81260ea6>] do_vfs_ioctl+0x96/0x690
  [   52.765843]  [<ffffffff81364af3>] ? security_file_ioctl+0x43/0x60
  [   52.765850]  [<ffffffff81261519>] SyS_ioctl+0x79/0x90
  [   52.765858]  [<ffffffff81003ba2>] do_syscall_64+0x62/0x140
  [   52.765866]  [<ffffffff817d563f>] entry_SYSCALL64_slow_path+0x25/0x25

Same can be triggered with PROVE_RCU (+ PROVE_RCU_REPEATEDLY) enabled
from tun_attach_filter() when user space calls ioctl(tun_fd, TUN{ATTACH,
DETACH}FILTER, ...) for adding/removing a BPF filter on tap devices.

Since the fix in f91ff5b9ff52 ("net: sk_{detach|attach}_filter() rcu
fixes") sk_attach_filter()/sk_detach_filter() now dereferences the
filter with rcu_dereference_protected(), checking whether socket lock
is held in control path.

Since its introduction in 994051625981 ("tun: socket filter support"),
tap filters are managed under RTNL lock from __tun_chr_ioctl(). Thus the
sock_owned_by_user(sk) doesn't apply in this specific case and therefore
triggers the false positive.

Extend the BPF API with __sk_attach_filter()/__sk_detach_filter() pair
that is used by tap filters and pass in lockdep_rtnl_is_held() for the
rcu_dereference_protected() checks instead.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 43aa1f8855c7..a51a5361695f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -465,10 +465,14 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
 void bpf_prog_destroy(struct bpf_prog *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
+int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
+		       bool locked);
 int sk_attach_bpf(u32 ufd, struct sock *sk);
 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
+int __sk_detach_filter(struct sock *sk, bool locked);
+
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
 
-- 
cgit v1.2.3


From d7e944c8ddc0983640a9a32868fb217485d12ca2 Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Fri, 1 Apr 2016 09:07:15 +0200
Subject: Revert "stmmac: Fix 'eth0: No PHY found' regression"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 88f8b1bb41c6208f81b6a480244533ded7b59493.
due to problems on GeekBox and Banana Pi M1 board when
connected to a real transceiver instead of a switch via
fixed-link.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Gabriel Fernandez <gabriel.fernandez@linaro.org>
Cc: Andreas Färber <afaerber@suse.de>
Cc: Frank Schäfer <fschaefer.oss@googlemail.com>
Cc: Dinh Nguyen <dinh.linux@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 4bcf5a61aada..6e53fa8942a4 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -114,7 +114,6 @@ struct plat_stmmacenet_data {
 	int interface;
 	struct stmmac_mdio_bus_data *mdio_bus_data;
 	struct device_node *phy_node;
-	struct device_node *mdio_node;
 	struct stmmac_dma_cfg *dma_cfg;
 	int clk_csr;
 	int has_gmac;
-- 
cgit v1.2.3


From a7657f128c279ae5796ab2ca7d04a7819f4259f0 Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Fri, 1 Apr 2016 09:07:16 +0200
Subject: stmmac: fix MDIO settings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Initially the phy_bus_name was added to manipulate the
driver name but it was recently just used to manage the
fixed-link and then to take some decision at run-time.
So the patch uses the is_pseudo_fixed_link and removes
the phy_bus_name variable not necessary anymore.

The driver can manage the mdio registration by using phy-handle,
dwmac-mdio and own parameter e.g. snps,phy-addr.
This patch takes care about all these possible configurations
and fixes the mdio registration in case of there is a real
transceiver or a switch (that needs to be managed by using
fixed-link).

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Reviewed-by: Andreas Färber <afaerber@suse.de>
Tested-by: Frank Schäfer <fschaefer.oss@googlemail.com>
Cc: Gabriel Fernandez <gabriel.fernandez@linaro.org>
Cc: Dinh Nguyen <dinh.linux@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Phil Reid <preid@electromag.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 6e53fa8942a4..e6bc30a42a74 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -108,12 +108,12 @@ struct stmmac_axi {
 };
 
 struct plat_stmmacenet_data {
-	char *phy_bus_name;
 	int bus_id;
 	int phy_addr;
 	int interface;
 	struct stmmac_mdio_bus_data *mdio_bus_data;
 	struct device_node *phy_node;
+	struct device_node *mdio_node;
 	struct stmmac_dma_cfg *dma_cfg;
 	int clk_csr;
 	int has_gmac;
-- 
cgit v1.2.3


From 969e8d7e47f93ef693028667480558de8f70523f Mon Sep 17 00:00:00 2001
From: Chen Gang <chengang@emindsoft.com.cn>
Date: Fri, 1 Apr 2016 14:31:17 -0700
Subject: include/linux/huge_mm.h: return NULL instead of false for
 pmd_trans_huge_lock()

The return value of pmd_trans_huge_lock() is a pointer, not a boolean
value, so use NULL instead of false as the return value.

Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 79b0ef6aaa14..7008623e24b1 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -127,7 +127,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 	if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
 		return __pmd_trans_huge_lock(pmd, vma);
 	else
-		return false;
+		return NULL;
 }
 static inline int hpage_nr_pages(struct page *page)
 {
-- 
cgit v1.2.3


From bbe3de2560280c0420d36a192e69f2772e893cf4 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 1 Apr 2016 14:31:32 -0700
Subject: mm/page_isolation: fix tracepoint to mirror check function behavior

Page isolation has not failed if the fin pfn extends beyond the end pfn
and test_pages_isolated checks this correctly.  Fix the tracepoint to
report the same result as the actual check function.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/page_isolation.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/page_isolation.h b/include/trace/events/page_isolation.h
index 6fb644029c80..8738a78e6bf4 100644
--- a/include/trace/events/page_isolation.h
+++ b/include/trace/events/page_isolation.h
@@ -29,7 +29,7 @@ TRACE_EVENT(test_pages_isolated,
 
 	TP_printk("start_pfn=0x%lx end_pfn=0x%lx fin_pfn=0x%lx ret=%s",
 		__entry->start_pfn, __entry->end_pfn, __entry->fin_pfn,
-		__entry->end_pfn == __entry->fin_pfn ? "success" : "fail")
+		__entry->end_pfn <= __entry->fin_pfn ? "success" : "fail")
 );
 
 #endif /* _TRACE_PAGE_ISOLATION_H */
-- 
cgit v1.2.3


From 0f5dcf8de974db5970d48d12a202997baa2846a1 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.de>
Date: Tue, 29 Mar 2016 17:19:55 -0700
Subject: btrfs: Add qgroup tracing

This patch adds tracepoints to the qgroup code on both the reporting side
(insert_dirty_extents) and the accounting side. Taken together it allows us
to see what qgroup operations have happened, and what their result was.

Signed-off-by: Mark Fasheh <mfasheh@suse.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 89 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 88 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index d866f21efbbf..467a4d205ff6 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -23,7 +23,7 @@ struct map_lookup;
 struct extent_buffer;
 struct btrfs_work;
 struct __btrfs_workqueue;
-struct btrfs_qgroup_operation;
+struct btrfs_qgroup_extent_record;
 
 #define show_ref_type(type)						\
 	__print_symbolic(type,						\
@@ -1231,6 +1231,93 @@ DEFINE_EVENT(btrfs__qgroup_delayed_ref, btrfs_qgroup_free_delayed_ref,
 
 	TP_ARGS(ref_root, reserved)
 );
+
+DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
+	TP_PROTO(struct btrfs_qgroup_extent_record *rec),
+
+	TP_ARGS(rec),
+
+	TP_STRUCT__entry(
+		__field(	u64,  bytenr		)
+		__field(	u64,  num_bytes		)
+	),
+
+	TP_fast_assign(
+		__entry->bytenr		= rec->bytenr,
+		__entry->num_bytes	= rec->num_bytes;
+	),
+
+	TP_printk("bytenr = %llu, num_bytes = %llu",
+		  (unsigned long long)__entry->bytenr,
+		  (unsigned long long)__entry->num_bytes)
+);
+
+DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_account_extents,
+
+	TP_PROTO(struct btrfs_qgroup_extent_record *rec),
+
+	TP_ARGS(rec)
+);
+
+DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_insert_dirty_extent,
+
+	TP_PROTO(struct btrfs_qgroup_extent_record *rec),
+
+	TP_ARGS(rec)
+);
+
+TRACE_EVENT(btrfs_qgroup_account_extent,
+
+	TP_PROTO(u64 bytenr, u64 num_bytes, u64 nr_old_roots, u64 nr_new_roots),
+
+	TP_ARGS(bytenr, num_bytes, nr_old_roots, nr_new_roots),
+
+	TP_STRUCT__entry(
+		__field(	u64,  bytenr			)
+		__field(	u64,  num_bytes			)
+		__field(	u64,  nr_old_roots		)
+		__field(	u64,  nr_new_roots		)
+	),
+
+	TP_fast_assign(
+		__entry->bytenr		= bytenr;
+		__entry->num_bytes	= num_bytes;
+		__entry->nr_old_roots	= nr_old_roots;
+		__entry->nr_new_roots	= nr_new_roots;
+	),
+
+	TP_printk("bytenr = %llu, num_bytes = %llu, nr_old_roots = %llu, "
+		  "nr_new_roots = %llu",
+		  __entry->bytenr,
+		  __entry->num_bytes,
+		  __entry->nr_old_roots,
+		  __entry->nr_new_roots)
+);
+
+TRACE_EVENT(qgroup_update_counters,
+
+	TP_PROTO(u64 qgid, u64 cur_old_count, u64 cur_new_count),
+
+	TP_ARGS(qgid, cur_old_count, cur_new_count),
+
+	TP_STRUCT__entry(
+		__field(	u64,  qgid			)
+		__field(	u64,  cur_old_count		)
+		__field(	u64,  cur_new_count		)
+	),
+
+	TP_fast_assign(
+		__entry->qgid		= qgid;
+		__entry->cur_old_count	= cur_old_count;
+		__entry->cur_new_count	= cur_new_count;
+	),
+
+	TP_printk("qgid = %llu, cur_old_count = %llu, cur_new_count = %llu",
+		  __entry->qgid,
+		  __entry->cur_old_count,
+		  __entry->cur_new_count)
+);
+
 #endif /* _TRACE_BTRFS_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 09cbfeaf1a5a67bfb3201e0c83c810cecb2efa5a Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 1 Apr 2016 15:29:47 +0300
Subject: mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros

PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.

This promise never materialized.  And unlikely will.

We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE.  And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.

Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.

Let's stop pretending that pages in page cache are special.  They are
not.

The changes are pretty straight-forward:

 - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;

 - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;

 - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};

 - page_cache_get() -> get_page();

 - page_cache_release() -> put_page();

This patch contains automated changes generated with coccinelle using
script below.  For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.

The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.

There are few places in the code where coccinelle didn't reach.  I'll
fix them manually in a separate patch.  Comments and documentation also
will be addressed with the separate patch.

virtual patch

@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E

@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E

@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT

@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE

@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK

@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)

@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)

@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bio.h          |  2 +-
 include/linux/blkdev.h       |  2 +-
 include/linux/buffer_head.h  |  4 ++--
 include/linux/ceph/libceph.h |  4 ++--
 include/linux/f2fs_fs.h      |  4 ++--
 include/linux/fs.h           |  4 ++--
 include/linux/nfs_page.h     |  2 +-
 include/linux/pagemap.h      | 14 +++++++-------
 include/linux/swap.h         |  2 +-
 9 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 88bc64f00bb5..6b7481f62218 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -41,7 +41,7 @@
 #endif
 
 #define BIO_MAX_PAGES		256
-#define BIO_MAX_SIZE		(BIO_MAX_PAGES << PAGE_CACHE_SHIFT)
+#define BIO_MAX_SIZE		(BIO_MAX_PAGES << PAGE_SHIFT)
 #define BIO_MAX_SECTORS		(BIO_MAX_SIZE >> 9)
 
 /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7e5d7e018bea..669e419d6234 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1372,7 +1372,7 @@ unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
 
 static inline void put_dev_sector(Sector p)
 {
-	page_cache_release(p.v);
+	put_page(p.v);
 }
 
 static inline bool __bvec_gap_to_prev(struct request_queue *q,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index c67f052cc5e5..d48daa3f6f20 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -43,7 +43,7 @@ enum bh_state_bits {
 			 */
 };
 
-#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
+#define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
 
 struct page;
 struct buffer_head;
@@ -263,7 +263,7 @@ void buffer_init(void);
 static inline void attach_page_buffers(struct page *page,
 		struct buffer_head *head)
 {
-	page_cache_get(page);
+	get_page(page);
 	SetPagePrivate(page);
 	set_page_private(page, (unsigned long)head);
 }
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index e7975e4681e1..db92a8d4926e 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -176,8 +176,8 @@ extern void ceph_put_snap_context(struct ceph_snap_context *sc);
  */
 static inline int calc_pages_for(u64 off, u64 len)
 {
-	return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
-		(off >> PAGE_CACHE_SHIFT);
+	return ((off+len+PAGE_SIZE-1) >> PAGE_SHIFT) -
+		(off >> PAGE_SHIFT);
 }
 
 extern struct kmem_cache *ceph_inode_cachep;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 9eb215a155e0..b90e9bdbd1dd 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -262,7 +262,7 @@ struct f2fs_node {
 /*
  * For NAT entries
  */
-#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry))
+#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
 
 struct f2fs_nat_entry {
 	__u8 version;		/* latest version of cached nat entry */
@@ -282,7 +282,7 @@ struct f2fs_nat_block {
  * Not allow to change this.
  */
 #define SIT_VBLOCK_MAP_SIZE 64
-#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry))
+#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry))
 
 /*
  * Note that f2fs_sit_entry->vblocks has the following bit-field information.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 14a97194b34b..304991a80e23 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -929,7 +929,7 @@ static inline struct file *get_file(struct file *f)
 /* Page cache limit. The filesystems should put that into their s_maxbytes 
    limits, otherwise bad things can happen in VM. */ 
 #if BITS_PER_LONG==32
-#define MAX_LFS_FILESIZE	(((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 
+#define MAX_LFS_FILESIZE	(((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1)
 #elif BITS_PER_LONG==64
 #define MAX_LFS_FILESIZE 	((loff_t)0x7fffffffffffffffLL)
 #endif
@@ -2067,7 +2067,7 @@ extern int generic_update_time(struct inode *, struct timespec *, int);
 /* /sys/fs */
 extern struct kobject *fs_kobj;
 
-#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
+#define MAX_RW_COUNT (INT_MAX & PAGE_MASK)
 
 #ifdef CONFIG_MANDATORY_FILE_LOCKING
 extern int locks_mandatory_locked(struct file *);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index f2f650f136ee..efada239205e 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -184,7 +184,7 @@ nfs_list_entry(struct list_head *head)
 static inline
 loff_t req_offset(struct nfs_page *req)
 {
-	return (((loff_t)req->wb_index) << PAGE_CACHE_SHIFT) + req->wb_offset;
+	return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset;
 }
 
 #endif /* _LINUX_NFS_PAGE_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1ebd65c91422..f396ccb900cc 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -390,13 +390,13 @@ static inline pgoff_t page_to_pgoff(struct page *page)
 		return page->index << compound_order(page);
 
 	if (likely(!PageTransTail(page)))
-		return page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+		return page->index;
 
 	/*
 	 *  We don't initialize ->index for tail pages: calculate based on
 	 *  head page
 	 */
-	pgoff = compound_head(page)->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	pgoff = compound_head(page)->index;
 	pgoff += page - compound_head(page);
 	return pgoff;
 }
@@ -406,12 +406,12 @@ static inline pgoff_t page_to_pgoff(struct page *page)
  */
 static inline loff_t page_offset(struct page *page)
 {
-	return ((loff_t)page->index) << PAGE_CACHE_SHIFT;
+	return ((loff_t)page->index) << PAGE_SHIFT;
 }
 
 static inline loff_t page_file_offset(struct page *page)
 {
-	return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT;
+	return ((loff_t)page_file_index(page)) << PAGE_SHIFT;
 }
 
 extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
@@ -425,7 +425,7 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 		return linear_hugepage_index(vma, address);
 	pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
 	pgoff += vma->vm_pgoff;
-	return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	return pgoff;
 }
 
 extern void __lock_page(struct page *page);
@@ -671,8 +671,8 @@ static inline int add_to_page_cache(struct page *page,
 
 static inline unsigned long dir_pages(struct inode *inode)
 {
-	return (unsigned long)(inode->i_size + PAGE_CACHE_SIZE - 1) >>
-			       PAGE_CACHE_SHIFT;
+	return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
+			       PAGE_SHIFT;
 }
 
 #endif /* _LINUX_PAGEMAP_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d18b65c53dbb..3d980ea1c946 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -435,7 +435,7 @@ struct backing_dev_info;
 /* only sparc can not include linux/pagemap.h in this file
  * so leave page_cache_release and release_pages undeclared... */
 #define free_page_and_swap_cache(page) \
-	page_cache_release(page)
+	put_page(page)
 #define free_pages_and_swap_cache(pages, nr) \
 	release_pages((pages), (nr), false);
 
-- 
cgit v1.2.3


From ea1754a084760e68886f5b725c8eaada9cc57155 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 1 Apr 2016 15:29:48 +0300
Subject: mm, fs: remove remaining PAGE_CACHE_* and page_cache_{get,release}
 usage

Mostly direct substitution with occasional adjustment or removing
outdated comments.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev-defs.h | 2 +-
 include/linux/mm.h               | 2 +-
 include/linux/mm_types.h         | 2 +-
 include/linux/nfs_page.h         | 4 ++--
 include/linux/nilfs2_fs.h        | 4 ++--
 include/linux/pagemap.h          | 3 +--
 include/linux/sunrpc/svc.h       | 2 +-
 include/linux/swap.h             | 2 +-
 8 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 1b4d69f68c33..3f103076d0bf 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -135,7 +135,7 @@ struct bdi_writeback {
 
 struct backing_dev_info {
 	struct list_head bdi_list;
-	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
+	unsigned long ra_pages;	/* max readahead in PAGE_SIZE units */
 	unsigned int capabilities; /* Device capabilities */
 	congested_fn *congested_fn; /* Function pointer if device is md/dm */
 	void *congested_data;	/* Pointer to aux data for congested func */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ed6407d1b7b5..ffcff53e3b2b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -623,7 +623,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
  *
  * A page may belong to an inode's memory mapping. In this case, page->mapping
  * is the pointer to the inode, and page->index is the file offset of the page,
- * in units of PAGE_CACHE_SIZE.
+ * in units of PAGE_SIZE.
  *
  * If pagecache pages are not associated with an inode, they are said to be
  * anonymous pages. These may become associated with the swapcache, and in that
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 944b2b37313b..c2d75b4fa86c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -341,7 +341,7 @@ struct vm_area_struct {
 
 	/* Information about our backing store: */
 	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
-					   units, *not* PAGE_CACHE_SIZE */
+					   units */
 	struct file * vm_file;		/* File we map to (can be NULL). */
 	void * vm_private_data;		/* was vm_pte (shared mem) */
 
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index efada239205e..957049f72290 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -41,8 +41,8 @@ struct nfs_page {
 	struct page		*wb_page;	/* page to read in/write out */
 	struct nfs_open_context	*wb_context;	/* File state context info */
 	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
-	pgoff_t			wb_index;	/* Offset >> PAGE_CACHE_SHIFT */
-	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
+	pgoff_t			wb_index;	/* Offset >> PAGE_SHIFT */
+	unsigned int		wb_offset,	/* Offset & ~PAGE_MASK */
 				wb_pgbase,	/* Start of page data */
 				wb_bytes;	/* Length of request */
 	struct kref		wb_kref;	/* reference count */
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 9abb763e4b86..e9fcf90b270d 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -331,7 +331,7 @@ static inline unsigned nilfs_rec_len_from_disk(__le16 dlen)
 {
 	unsigned len = le16_to_cpu(dlen);
 
-#if !defined(__KERNEL__) || (PAGE_CACHE_SIZE >= 65536)
+#if !defined(__KERNEL__) || (PAGE_SIZE >= 65536)
 	if (len == NILFS_MAX_REC_LEN)
 		return 1 << 16;
 #endif
@@ -340,7 +340,7 @@ static inline unsigned nilfs_rec_len_from_disk(__le16 dlen)
 
 static inline __le16 nilfs_rec_len_to_disk(unsigned len)
 {
-#if !defined(__KERNEL__) || (PAGE_CACHE_SIZE >= 65536)
+#if !defined(__KERNEL__) || (PAGE_SIZE >= 65536)
 	if (len == (1 << 16))
 		return cpu_to_le16(NILFS_MAX_REC_LEN);
 	else if (len > (1 << 16))
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index f396ccb900cc..b3fc0370c14f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -535,8 +535,7 @@ extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter);
 /*
  * Fault a userspace page into pagetables.  Return non-zero on a fault.
  *
- * This assumes that two userspace pages are always sufficient.  That's
- * not true if PAGE_CACHE_SIZE > PAGE_SIZE.
+ * This assumes that two userspace pages are always sufficient.
  */
 static inline int fault_in_pages_writeable(char __user *uaddr, int size)
 {
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index cc0fc712bb82..7ca44fb5b675 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -129,7 +129,7 @@ static inline void svc_get(struct svc_serv *serv)
  *
  * These happen to all be powers of 2, which is not strictly
  * necessary but helps enforce the real limitation, which is
- * that they should be multiples of PAGE_CACHE_SIZE.
+ * that they should be multiples of PAGE_SIZE.
  *
  * For UDP transports, a block plus NFS,RPC, and UDP headers
  * has to fit into the IP datagram limit of 64K.  The largest
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3d980ea1c946..2b83359c19ca 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -433,7 +433,7 @@ struct backing_dev_info;
 #define si_swapinfo(val) \
 	do { (val)->freeswap = (val)->totalswap = 0; } while (0)
 /* only sparc can not include linux/pagemap.h in this file
- * so leave page_cache_release and release_pages undeclared... */
+ * so leave put_page and release_pages undeclared... */
 #define free_page_and_swap_cache(page) \
 	put_page(page)
 #define free_pages_and_swap_cache(pages, nr) \
-- 
cgit v1.2.3


From 1fa64f198b9f8d6ec0f7aec7c18dc94684391140 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 1 Apr 2016 15:29:49 +0300
Subject: mm: drop PAGE_CACHE_* and page_cache_{get,release} definition

All users gone.  We can remove these macros.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b3fc0370c14f..7e1ab155c67c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -86,21 +86,6 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 				(__force unsigned long)mask;
 }
 
-/*
- * The page cache can be done in larger chunks than
- * one page, because it allows for more efficient
- * throughput (it can then be mapped into user
- * space in smaller chunks for same flexibility).
- *
- * Or rather, it _will_ be done in larger chunks.
- */
-#define PAGE_CACHE_SHIFT	PAGE_SHIFT
-#define PAGE_CACHE_SIZE		PAGE_SIZE
-#define PAGE_CACHE_MASK		PAGE_MASK
-#define PAGE_CACHE_ALIGN(addr)	(((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK)
-
-#define page_cache_get(page)		get_page(page)
-#define page_cache_release(page)	put_page(page)
 void release_pages(struct page **pages, int nr, bool cold);
 
 /*
-- 
cgit v1.2.3


From 749b48faaf64d1081d7216068ff3da92c230bad0 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 1 Apr 2016 10:12:49 -0400
Subject: drm/ttm: use phys_addr_t for ttm_bus_placement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes ttm on platforms like PPC460 where the CPU
is in 32-bit mode, but the physical addresses are
>32 bits.

Extracted from a patch by Hans Verkuil.

Tested-by: Julian Margetson <runaway@candw.ms>
Acked-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
Cc: Julian Margetson <runaway@candw.ms>
Cc: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/drm/ttm/ttm_bo_api.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index afae2316bd43..055a08ddac02 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -92,7 +92,7 @@ struct ttm_placement {
  */
 struct ttm_bus_placement {
 	void		*addr;
-	unsigned long	base;
+	phys_addr_t	base;
 	unsigned long	size;
 	unsigned long	offset;
 	bool		is_iomem;
-- 
cgit v1.2.3


From 84ea3a18c06c7ae94b76f763c462bf1e0ce4dc79 Mon Sep 17 00:00:00 2001
From: Luis de Bethencourt <luisbg@osg.samsung.com>
Date: Fri, 18 Mar 2016 16:09:29 +0000
Subject: mac80211: add doc for RX_FLAG_DUP_VALIDATED flag

Add documentation for the flag for duplication check.

Fixes the following warning when running make htmldocs:
warning: Enum value 'RX_FLAG_DUP_VALIDATED' not described in enum 'mac80211_rx_flags'

Signed-off-by: Luis de Bethencourt <luisbg@osg.samsung.com>
[fix description]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0c09da34b67a..e385eb3076a1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1001,6 +1001,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  *	flag indicates that the PN was verified for replay protection.
  *	Note that this flag is also currently only supported when a frame
  *	is also decrypted (ie. @RX_FLAG_DECRYPTED must be set)
+ * @RX_FLAG_DUP_VALIDATED: The driver should set this flag if it did
+ *	de-duplication by itself.
  * @RX_FLAG_FAILED_FCS_CRC: Set this flag if the FCS check failed on
  *	the frame.
  * @RX_FLAG_FAILED_PLCP_CRC: Set this flag if the PCLP check failed on
-- 
cgit v1.2.3


From 5ddfe0858ea7848c5d4efe3f4319e7543522e0ee Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 1 Apr 2016 08:57:36 +0200
Subject: scsi: Do not attach VPD to devices that don't support it

The patch "scsi: rescan VPD attributes" introduced a regression in which
devices that don't support VPD were being scanned for VPD attributes
anyway.  This could cause issues for some devices and should be avoided
so the check for scsi_level has been moved out of scsi_add_lun and into
scsi_attach_vpd so that all callers will not scan VPD for devices that
don't support it.

[mkp: Merge fix]

Fixes: 09e2b0b14690 ("scsi: rescan VPD attributes")
Cc: <stable@vger.kernel.org> #v4.5+
Suggested-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/scsi_device.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index c067019ed12a..74d79bde7075 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -516,6 +516,31 @@ static inline int scsi_device_tpgs(struct scsi_device *sdev)
 	return sdev->inquiry ? (sdev->inquiry[5] >> 4) & 0x3 : 0;
 }
 
+/**
+ * scsi_device_supports_vpd - test if a device supports VPD pages
+ * @sdev: the &struct scsi_device to test
+ *
+ * If the 'try_vpd_pages' flag is set it takes precedence.
+ * Otherwise we will assume VPD pages are supported if the
+ * SCSI level is at least SPC-3 and 'skip_vpd_pages' is not set.
+ */
+static inline int scsi_device_supports_vpd(struct scsi_device *sdev)
+{
+	/* Attempt VPD inquiry if the device blacklist explicitly calls
+	 * for it.
+	 */
+	if (sdev->try_vpd_pages)
+		return 1;
+	/*
+	 * Although VPD inquiries can go to SCSI-2 type devices,
+	 * some USB ones crash on receiving them, and the pages
+	 * we currently ask for are for SPC-3 and beyond
+	 */
+	if (sdev->scsi_level > SCSI_SPC_2 && !sdev->skip_vpd_pages)
+		return 1;
+	return 0;
+}
+
 #define MODULE_ALIAS_SCSI_DEVICE(type) \
 	MODULE_ALIAS("scsi:t-" __stringify(type) "*")
 #define SCSI_DEVICE_MODALIAS_FMT "scsi:t-0x%02x"
-- 
cgit v1.2.3


From b70bb984489363aadd5ccc94d919629d9e264d36 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 15 Mar 2016 22:37:17 +0100
Subject: iommu: provide of_xlate pointer unconditionally

iommu drivers that support the standard DT bindings use a of_xlate
callback pointer, but that is only part of struct iommu_ops when
CONFIG_OF_IOMMU is enabled, leading to build errors in randconfig
builds when that is not provided:

drivers/iommu/mtk_iommu.c:497:2: error: unknown field 'of_xlate' specified in initializer
  .of_xlate = mtk_iommu_of_xlate,
  ^
drivers/iommu/mtk_iommu.c:497:14: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types]
  .of_xlate = mtk_iommu_of_xlate,
              ^~~~~~~~~~~~~~~~~~
drivers/iommu/mtk_iommu.c:497:14: note: (near initialization for 'mtk_iommu_ops.domain_get_attr')

We can work around it by adding more #ifdefs in each driver, but
it seems nicer to just allow setting the pointer even if it is
unused. This makes the driver code look nicer, and it gives better
compile-time coverage when test building on other architectures.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 0df4fabe208d ("iommu/mediatek: Add mt8173 IOMMU driver")
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a5c539fa5d2b..ef7a6ecd8584 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -195,9 +195,7 @@ struct iommu_ops {
 	/* Get the number of windows per domain */
 	u32 (*domain_get_windows)(struct iommu_domain *domain);
 
-#ifdef CONFIG_OF_IOMMU
 	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
-#endif
 
 	unsigned long pgsize_bitmap;
 	void *priv;
-- 
cgit v1.2.3


From 95272c29378ee7dc15f43fa2758cb28a5913a06d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 31 Mar 2016 09:38:51 +0200
Subject: compiler-gcc: disable -ftracer for __noclone functions

-ftracer can duplicate asm blocks causing compilation to fail in
noclone functions.  For example, KVM declares a global variable
in an asm like

    asm("2: ... \n
         .pushsection data \n
         .global vmx_return \n
         vmx_return: .long 2b");

and -ftracer causes a double declaration.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: stable@vger.kernel.org
Cc: kvm@vger.kernel.org
Reported-by: Linda Walsh <lkml@tlinx.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/compiler-gcc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 22ab246feed3..eeae401a2412 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -199,7 +199,7 @@
 #define unreachable() __builtin_unreachable()
 
 /* Mark a function definition as prohibited from being cloned. */
-#define __noclone	__attribute__((__noclone__))
+#define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
 
 #endif /* GCC_VERSION >= 40500 */
 
-- 
cgit v1.2.3


From eb8e97715f29a1240cdf67b0df725be27433259f Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Fri, 1 Apr 2016 14:30:32 -0300
Subject: sctp: use list_* in sctp_list_dequeue

Use list_* helpers in sctp_list_dequeue, more readable.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 65521cfdcade..03fb33efcae2 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -386,11 +386,9 @@ static inline struct list_head *sctp_list_dequeue(struct list_head *list)
 {
 	struct list_head *result = NULL;
 
-	if (list->next != list) {
+	if (!list_empty(list)) {
 		result = list->next;
-		list->next = result->next;
-		list->next->prev = list;
-		INIT_LIST_HEAD(result);
+		list_del_init(result);
 	}
 	return result;
 }
-- 
cgit v1.2.3


From 60901df3aed230d4565dca003f11b6a95fbf30d9 Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Thu, 17 Mar 2016 16:51:59 +0000
Subject: xen: Fix page <-> pfn conversion on 32 bit systems

Commit 1084b1988d22dc165c9dbbc2b0e057f9248ac4db (xen: Add Xen specific
page definition) caused a regression in 4.4.

The xen functions to convert between pages and pfns fail due to an
overflow on systems where a physical address may not fit in an
unsigned long (e.g. x86 32 bit PAE systems). Rework the conversion to
avoid overflow. This should also result in simpler object code.

This bug manifested itself as disk corruption with Linux 4.4 when
using blkfront in a Xen HVM x86 32 bit guest with more than 4 GiB of
memory.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Cc: <stable@vger.kernel.org> # 4.4+
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 include/xen/page.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/xen/page.h b/include/xen/page.h
index 96294ac93755..9dc46cb8a0fd 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -15,9 +15,9 @@
  */
 
 #define xen_pfn_to_page(xen_pfn)	\
-	((pfn_to_page(((unsigned long)(xen_pfn) << XEN_PAGE_SHIFT) >> PAGE_SHIFT)))
+	(pfn_to_page((unsigned long)(xen_pfn) >> (PAGE_SHIFT - XEN_PAGE_SHIFT)))
 #define page_to_xen_pfn(page)		\
-	(((page_to_pfn(page)) << PAGE_SHIFT) >> XEN_PAGE_SHIFT)
+	((page_to_pfn(page)) << (PAGE_SHIFT - XEN_PAGE_SHIFT))
 
 #define XEN_PFN_PER_PAGE	(PAGE_SIZE / XEN_PAGE_SIZE)
 
-- 
cgit v1.2.3


From 18fcf49f87f4b280d3cd695fc77766004b223af5 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 4 Apr 2016 10:32:48 -0700
Subject: net_sched: fix a memory leak in tc action

Fixes: ddf97ccdd7cb ("net_sched: add network namespace support for tc actions")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Tested-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 2a19fe111c78..03e322b30218 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -135,6 +135,7 @@ void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
 static inline void tc_action_net_exit(struct tc_action_net *tn)
 {
 	tcf_hashinfo_destroy(tn->ops, tn->hinfo);
+	kfree(tn->hinfo);
 }
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
-- 
cgit v1.2.3


From c12d2da56d0e07d230968ee2305aaa86b93a6832 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 4 Apr 2016 10:24:58 +0200
Subject: mm/gup: Remove the macro overload API migration helpers from the
 get_user*() APIs

The pkeys changes brought about a truly hideous set of macros in:

  cde70140fed8 ("mm/gup: Overload get_user_pages() functions")

... which macros are (ab-)using the fact that __VA_ARGS__ can be used
to shift parameter positions in macro arguments without breaking the
build and so can be used to call separate C functions depending on
the number of arguments of the macro.

This allowed easy migration of these 3 GUP APIs, as both these variants
worked at the C level:

  old:
	ret = get_user_pages(current, current->mm, address, 1, 1, 0, &page, NULL);

  new:
	ret = get_user_pages(address, 1, 1, 0, &page, NULL);

... while we also generated a (functionally harmless but noticeable) build
time warning if the old API was used. As there are over 300 uses of these
APIs, this trick eased the migration of the API and avoided excessive
migration pain in linux-next.

Now, with its work done, get rid of all of that complication and ugliness:

    3 files changed, 16 insertions(+), 140 deletions(-)

... where the linecount of the migration hack was further inflated by the
fact that there are NOMMU variants of these GUP APIs as well.

Much of the conversion was done in linux-next over the past couple of months,
and Linus recently removed all remaining old API uses from the upstream tree
in the following upstrea commit:

  cb107161df3c ("Convert straggling drivers to new six-argument get_user_pages()")

There was one more old-API usage in mm/gup.c, in the CONFIG_HAVE_GENERIC_RCU_GUP
code path that ARM, ARM64 and PowerPC uses.

After this commit any old API usage will break the build.

[ Also fixed a PowerPC/HAVE_GENERIC_RCU_GUP warning reported by Stephen Rothwell. ]

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm.h | 64 +++---------------------------------------------------
 1 file changed, 3 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ed6407d1b7b5..d6508a025a31 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1250,78 +1250,20 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
 			    unsigned long start, unsigned long nr_pages,
 			    int write, int force, struct page **pages,
 			    struct vm_area_struct **vmas);
-long get_user_pages6(unsigned long start, unsigned long nr_pages,
+long get_user_pages(unsigned long start, unsigned long nr_pages,
 			    int write, int force, struct page **pages,
 			    struct vm_area_struct **vmas);
-long get_user_pages_locked6(unsigned long start, unsigned long nr_pages,
+long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
 		    int write, int force, struct page **pages, int *locked);
 long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
 			       unsigned long start, unsigned long nr_pages,
 			       int write, int force, struct page **pages,
 			       unsigned int gup_flags);
-long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages,
+long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 		    int write, int force, struct page **pages);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
 
-/* suppress warnings from use in EXPORT_SYMBOL() */
-#ifndef __DISABLE_GUP_DEPRECATED
-#define __gup_deprecated __deprecated
-#else
-#define __gup_deprecated
-#endif
-/*
- * These macros provide backward-compatibility with the old
- * get_user_pages() variants which took tsk/mm.  These
- * functions/macros provide both compile-time __deprecated so we
- * can catch old-style use and not break the build.  The actual
- * functions also have WARN_ON()s to let us know at runtime if
- * the get_user_pages() should have been the "remote" variant.
- *
- * These are hideous, but temporary.
- *
- * If you run into one of these __deprecated warnings, look
- * at how you are calling get_user_pages().  If you are calling
- * it with current/current->mm as the first two arguments,
- * simply remove those arguments.  The behavior will be the same
- * as it is now.  If you are calling it on another task, use
- * get_user_pages_remote() instead.
- *
- * Any questions?  Ask Dave Hansen <dave@sr71.net>
- */
-long
-__gup_deprecated
-get_user_pages8(struct task_struct *tsk, struct mm_struct *mm,
-		unsigned long start, unsigned long nr_pages,
-		int write, int force, struct page **pages,
-		struct vm_area_struct **vmas);
-#define GUP_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages, ...)	\
-	get_user_pages
-#define get_user_pages(...) GUP_MACRO(__VA_ARGS__,	\
-		get_user_pages8, x,			\
-		get_user_pages6, x, x, x, x, x)(__VA_ARGS__)
-
-__gup_deprecated
-long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm,
-		unsigned long start, unsigned long nr_pages,
-		int write, int force, struct page **pages,
-		int *locked);
-#define GUPL_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages_locked, ...)	\
-	get_user_pages_locked
-#define get_user_pages_locked(...) GUPL_MACRO(__VA_ARGS__,	\
-		get_user_pages_locked8,	x,			\
-		get_user_pages_locked6, x, x, x, x)(__VA_ARGS__)
-
-__gup_deprecated
-long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm,
-		unsigned long start, unsigned long nr_pages,
-		int write, int force, struct page **pages);
-#define GUPU_MACRO(_1, _2, _3, _4, _5, _6, _7, get_user_pages_unlocked, ...)	\
-	get_user_pages_unlocked
-#define get_user_pages_unlocked(...) GUPU_MACRO(__VA_ARGS__,	\
-		get_user_pages_unlocked7, x,			\
-		get_user_pages_unlocked5, x, x, x, x)(__VA_ARGS__)
-
 /* Container for pinned pfns / pages */
 struct frame_vector {
 	unsigned int nr_allocated;	/* Number of frames we have space for */
-- 
cgit v1.2.3


From c00bbcf8628969e103d4a7b351a53746f1025576 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 29 Mar 2016 16:43:45 +0100
Subject: virtio: add VIRTIO_CONFIG_S_NEEDS_RESET device status bit

The VIRTIO 1.0 specification added the DEVICE_NEEDS_RESET device status
bit in "VIRTIO-98: Add DEVICE_NEEDS_RESET".  This patch defines the
device status bit in the uapi header file so that both the kernel and
userspace applications can use it.

The bit is currently unused by the virtio guest drivers and vhost.
According to the spec "a good implementation will try to recover by
issuing a reset".  This is not attempted here because it requires
auditing the virtio drivers to ensure there are no resource leaks or
crashes if the device needs to be reset mid-operation.

See "2.1 Device Status Field" in the VIRTIO 1.0 specification for
details.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_config.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index c18264df9504..4cb65bbfa654 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -40,6 +40,8 @@
 #define VIRTIO_CONFIG_S_DRIVER_OK	4
 /* Driver has finished configuring features */
 #define VIRTIO_CONFIG_S_FEATURES_OK	8
+/* Device entered invalid state, driver must reset it */
+#define VIRTIO_CONFIG_S_NEEDS_RESET	0x40
 /* We've given up on this device. */
 #define VIRTIO_CONFIG_S_FAILED		0x80
 
-- 
cgit v1.2.3


From a0ca153f98db8cf25298565a09e11fe9d82846ad Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Tue, 5 Apr 2016 09:13:39 -0700
Subject: GRE: Disable segmentation offloads w/ CSUM and we are encapsulated
 via FOU

This patch fixes an issue I found in which we were dropping frames if we
had enabled checksums on GRE headers that were encapsulated by either FOU
or GUE.  Without this patch I was barely able to get 1 Gb/s of throughput.
With this patch applied I am now at least getting around 6 Gb/s.

The issue is due to the fact that with FOU or GUE applied we do not provide
a transport offset pointing to the GRE header, nor do we offload it in
software as the GRE header is completely skipped by GSO and treated like a
VXLAN or GENEVE type header.  As such we need to prevent the stack from
generating it and also prevent GRE from generating it via any interface we
create.

Fixes: c3483384ee511 ("gro: Allow tunnel stacking in the case of FOU/GUE")
Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cb0d5d09c2e4..8395308a2445 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2120,7 +2120,10 @@ struct napi_gro_cb {
 	/* Used in foo-over-udp, set in udp[46]_gro_receive */
 	u8	is_ipv6:1;
 
-	/* 7 bit hole */
+	/* Used in GRE, set in fou/gue_gro_receive */
+	u8	is_fou:1;
+
+	/* 6 bit hole */
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
-- 
cgit v1.2.3


From ba6f5e33bdbb9ed2014b778fbbaecf20060ca989 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Wed, 6 Apr 2016 15:15:19 -0300
Subject: sctp: avoid refreshing heartbeat timer too often

Currently on high rate SCTP streams the heartbeat timer refresh can
consume quite a lot of resources as timer updates are costly and it
contains a random factor, which a) is also costly and b) invalidates
mod_timer() optimization for not editing a timer to the same value.
It may even cause the timer to be slightly advanced, for no good reason.

As suggested by David Laight this patch now removes this timer update
from hot path by leaving the timer on and re-evaluating upon its
expiration if the heartbeat is still needed or not, similarly to what is
done for TCP. If it's not needed anymore the timer is re-scheduled to
the new timeout, considering the time already elapsed.

For this, we now record the last tx timestamp per transport, updated in
the same spots as hb timer was restarted on tx. Also split up
sctp_transport_reset_timers into sctp_transport_reset_t3_rtx and
sctp_transport_reset_hb_timer, so we can re-arm T3 without re-arming the
heartbeat one.

On loopback with MTU of 65535 and data chunks with 1636, so that we
have a considerable amount of chunks without stressing system calls,
netperf -t SCTP_STREAM -l 30, perf looked like this before:

Samples: 103K of event 'cpu-clock', Event count (approx.): 25833000000
  Overhead  Command  Shared Object      Symbol
+    6,15%  netperf  [kernel.vmlinux]   [k] copy_user_enhanced_fast_string
-    5,43%  netperf  [kernel.vmlinux]   [k] _raw_write_unlock_irqrestore
   - _raw_write_unlock_irqrestore
      - 96,54% _raw_spin_unlock_irqrestore
         - 36,14% mod_timer
            + 97,24% sctp_transport_reset_timers
            + 2,76% sctp_do_sm
         + 33,65% __wake_up_sync_key
         + 28,77% sctp_ulpq_tail_event
         + 1,40% del_timer
      - 1,84% mod_timer
         + 99,03% sctp_transport_reset_timers
         + 0,97% sctp_do_sm
      + 1,50% sctp_ulpq_tail_event

And after this patch, now with netperf -l 60:

Samples: 230K of event 'cpu-clock', Event count (approx.): 57707250000
  Overhead  Command  Shared Object      Symbol
+    5,65%  netperf  [kernel.vmlinux]   [k] memcpy_erms
+    5,59%  netperf  [kernel.vmlinux]   [k] copy_user_enhanced_fast_string
-    5,05%  netperf  [kernel.vmlinux]   [k] _raw_spin_unlock_irqrestore
   - _raw_spin_unlock_irqrestore
      + 49,89% __wake_up_sync_key
      + 45,68% sctp_ulpq_tail_event
      - 2,85% mod_timer
         + 76,51% sctp_transport_reset_t3_rtx
         + 23,49% sctp_do_sm
      + 1,55% del_timer
+    2,50%  netperf  [sctp]             [k] sctp_datamsg_from_user
+    2,26%  netperf  [sctp]             [k] sctp_sendmsg

Throughput-wise, from 6800mbps without the patch to 7050mbps with it,
~3.7%.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 6df1ce7a411c..5a404c354f4c 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -847,6 +847,11 @@ struct sctp_transport {
 	 */
 	ktime_t last_time_heard;
 
+	/* When was the last time that we sent a chunk using this
+	 * transport? We use this to check for idle transports
+	 */
+	unsigned long last_time_sent;
+
 	/* Last time(in jiffies) when cwnd is reduced due to the congestion
 	 * indication based on ECNE chunk.
 	 */
@@ -952,7 +957,8 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
 			  struct sctp_sock *);
 void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk);
 void sctp_transport_free(struct sctp_transport *);
-void sctp_transport_reset_timers(struct sctp_transport *);
+void sctp_transport_reset_t3_rtx(struct sctp_transport *);
+void sctp_transport_reset_hb_timer(struct sctp_transport *);
 int sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
-- 
cgit v1.2.3


From 9ab179d83b4e31ea277a123492e419067c2f129a Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 7 Apr 2016 11:10:06 -0700
Subject: net: vrf: Fix dst reference counting

Vivek reported a kernel exception deleting a VRF with an active
connection through it. The root cause is that the socket has a cached
reference to a dst that is destroyed. Converting the dst_destroy to
dst_release and letting proper reference counting kick in does not
work as the dst has a reference to the device which needs to be released
as well.

I talked to Hannes about this at netdev and he pointed out the ipv4 and
ipv6 dst handling has dst_ifdown for just this scenario. Rather than
continuing with the reinvented dst wheel in VRF just remove it and
leverage the ipv4 and ipv6 versions.

Fixes: 193125dbd8eb2 ("net: Introduce VRF device driver")
Fixes: 35402e3136634 ("net: Add IPv6 support to VRF device")

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 3 +++
 include/net/route.h     | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 295d291269e2..54c779416eec 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -101,6 +101,9 @@ void fib6_force_start_gc(struct net *net);
 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 				    const struct in6_addr *addr, bool anycast);
 
+struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
+			       int flags);
+
 /*
  *	support functions for ND
  *
diff --git a/include/net/route.h b/include/net/route.h
index 9b0a523bb428..6de665bf1750 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -209,6 +209,9 @@ unsigned int inet_addr_type_dev_table(struct net *net,
 void ip_rt_multicast_event(struct in_device *);
 int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg);
 void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
+struct rtable *rt_dst_alloc(struct net_device *dev,
+			     unsigned int flags, u16 type,
+			     bool nopolicy, bool noxfrm, bool will_cache);
 
 struct in_ifaddr;
 void fib_add_ifaddr(struct in_ifaddr *);
-- 
cgit v1.2.3


From 1ecf689013b598918cd9a261a078e64571b60f90 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Mon, 11 Apr 2016 13:39:08 -0700
Subject: devlink: add missing install of header

The new devlink.h in uapi was not being installed by
make headers_install

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index b71fd0b5cbad..813ffb2e22c9 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -96,6 +96,7 @@ header-y += cyclades.h
 header-y += cycx_cfm.h
 header-y += dcbnl.h
 header-y += dccp.h
+header-y += devlink.h
 header-y += dlmconstants.h
 header-y += dlm_device.h
 header-y += dlm.h
-- 
cgit v1.2.3


From b32e4482aadfd1322357f46d4ed8a990603664d9 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 11 Apr 2016 15:51:57 -0700
Subject: fscrypto: don't let data integrity writebacks fail with ENOMEM

This patch fixes the issue introduced by the ext4 crypto fix in a same manner.
For F2FS, however, we flush the pending IOs and wait for a while to acquire free
memory.

Fixes: c9af28fdd4492 ("ext4 crypto: don't let data integrity writebacks fail with ENOMEM")
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/fscrypto.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index cd91f75de49b..6027f6bbb061 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -263,9 +263,9 @@ static inline void fscrypt_set_d_op(struct dentry *dentry)
 extern struct kmem_cache *fscrypt_info_cachep;
 int fscrypt_initialize(void);
 
-extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *);
+extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *, gfp_t);
 extern void fscrypt_release_ctx(struct fscrypt_ctx *);
-extern struct page *fscrypt_encrypt_page(struct inode *, struct page *);
+extern struct page *fscrypt_encrypt_page(struct inode *, struct page *, gfp_t);
 extern int fscrypt_decrypt_page(struct page *);
 extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
 extern void fscrypt_pullback_bio_page(struct page **, bool);
@@ -299,7 +299,8 @@ extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *,
 #endif
 
 /* crypto.c */
-static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i)
+static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i,
+							gfp_t f)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
@@ -310,7 +311,7 @@ static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c)
 }
 
 static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i,
-						struct page *p)
+						struct page *p, gfp_t f)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
-- 
cgit v1.2.3


From 1363074667a6b7d0507527742ccd7bbed5e3ceaa Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 12 Apr 2016 12:27:09 +0200
Subject: USB: uas: Add a new NO_REPORT_LUNS quirk

Add a new NO_REPORT_LUNS quirk and set it for Seagate drives with
an usb-id of: 0bc2:331a, as these will fail to respond to a
REPORT_LUNS command.

Cc: stable@vger.kernel.org
Reported-and-tested-by: David Webb <djw@noc.ac.uk>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb_usual.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index 7f5f78bd15ad..245f57dbbb61 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -79,6 +79,8 @@
 		/* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */	\
 	US_FLAG(MAX_SECTORS_240,	0x08000000)		\
 		/* Sets max_sectors to 240 */			\
+	US_FLAG(NO_REPORT_LUNS,	0x10000000)			\
+		/* Cannot handle REPORT_LUNS */			\
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.3


From e7e0c3e26587749b62d17b9dd0532874186c77f7 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Sun, 3 Apr 2016 16:15:00 -0300
Subject: [media] videobuf2-core: Check user space planes array in dqbuf

The number of planes in videobuf2 is specific to a buffer. In order to
verify that the planes array provided by the user is long enough, a new
vb2_buf_op is required.

Call __verify_planes_array() when the dequeued buffer is known. Return an
error to the caller if there was one, otherwise remove the buffer from the
done list.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Cc: stable@vger.kernel.org # for v4.4 and later
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/media/videobuf2-core.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 8a0f55b6c2ba..5342ff4d748f 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -375,6 +375,9 @@ struct vb2_ops {
 /**
  * struct vb2_ops - driver-specific callbacks
  *
+ * @verify_planes_array: Verify that a given user space structure contains
+ *			enough planes for the buffer. This is called
+ *			for each dequeued buffer.
  * @fill_user_buffer:	given a vb2_buffer fill in the userspace structure.
  *			For V4L2 this is a struct v4l2_buffer.
  * @fill_vb2_buffer:	given a userspace structure, fill in the vb2_buffer.
@@ -384,6 +387,7 @@ struct vb2_ops {
  *			the vb2_buffer struct.
  */
 struct vb2_buf_ops {
+	int (*verify_planes_array)(struct vb2_buffer *vb, const void *pb);
 	void (*fill_user_buffer)(struct vb2_buffer *vb, void *pb);
 	int (*fill_vb2_buffer)(struct vb2_buffer *vb, const void *pb,
 				struct vb2_plane *planes);
-- 
cgit v1.2.3


From cba2e47abcbd80e3f46f460899290402f98090ec Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Tue, 12 Apr 2016 18:10:52 -0600
Subject: pmem: fix BUG() error in pmem.h:48 on X86_32

After 'commit fc0c2028135c ("x86, pmem: use memcpy_mcsafe()
for memcpy_from_pmem()")', probing a PMEM device hits the BUG()
error below on X86_32 kernel.

 kernel BUG at include/linux/pmem.h:48!

memcpy_from_pmem() calls arch_memcpy_from_pmem(), which is
unimplemented since CONFIG_ARCH_HAS_PMEM_API is undefined on
X86_32.

Fix the BUG() error by adding default_memcpy_from_pmem().

Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 include/linux/pmem.h | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index ac6d872ce067..57d146fe44dd 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -72,6 +72,18 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
 }
 #endif
 
+static inline bool arch_has_pmem_api(void)
+{
+	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
+}
+
+static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
+		size_t size)
+{
+	memcpy(dst, (void __force *) src, size);
+	return 0;
+}
+
 /*
  * memcpy_from_pmem - read from persistent memory with error handling
  * @dst: destination buffer
@@ -83,12 +95,10 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
 static inline int memcpy_from_pmem(void *dst, void __pmem const *src,
 		size_t size)
 {
-	return arch_memcpy_from_pmem(dst, src, size);
-}
-
-static inline bool arch_has_pmem_api(void)
-{
-	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
+	if (arch_has_pmem_api())
+		return arch_memcpy_from_pmem(dst, src, size);
+	else
+		return default_memcpy_from_pmem(dst, src, size);
 }
 
 /**
-- 
cgit v1.2.3


From 34dbbcdbf63360661ff7bda6c5f52f99ac515f92 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 14 Apr 2016 11:22:00 -0700
Subject: Make file credentials available to the seqfile interfaces

A lot of seqfile users seem to be using things like %pK that uses the
credentials of the current process, but that is actually completely
wrong for filesystem interfaces.

The unix semantics for permission checking files is to check permissions
at _open_ time, not at read or write time, and that is not just a small
detail: passing off stdin/stdout/stderr to a suid application and making
the actual IO happen in privileged context is a classic exploit
technique.

So if we want to be able to look at permissions at read time, we need to
use the file open credentials, not the current ones.  Normal file
accesses can just use "f_cred" (or any of the helper functions that do
that, like file_ns_capable()), but the seqfile interfaces do not have
any such options.

It turns out that seq_file _does_ save away the user_ns information of
the file, though.  Since user_ns is just part of the full credential
information, replace that special case with saving off the cred pointer
instead, and suddenly seq_file has all the permission information it
needs.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/seq_file.h | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index dde00defbaa5..f3d45dd42695 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -7,13 +7,10 @@
 #include <linux/mutex.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
+#include <linux/fs.h>
+#include <linux/cred.h>
 
 struct seq_operations;
-struct file;
-struct path;
-struct inode;
-struct dentry;
-struct user_namespace;
 
 struct seq_file {
 	char *buf;
@@ -27,9 +24,7 @@ struct seq_file {
 	struct mutex lock;
 	const struct seq_operations *op;
 	int poll_event;
-#ifdef CONFIG_USER_NS
-	struct user_namespace *user_ns;
-#endif
+	const struct file *file;
 	void *private;
 };
 
@@ -147,7 +142,7 @@ int seq_release_private(struct inode *, struct file *);
 static inline struct user_namespace *seq_user_ns(struct seq_file *seq)
 {
 #ifdef CONFIG_USER_NS
-	return seq->user_ns;
+	return seq->file->f_cred->user_ns;
 #else
 	extern struct user_namespace init_user_ns;
 	return &init_user_ns;
-- 
cgit v1.2.3


From 33c162a980fe03498fcecb917f618ad7e7c55e61 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 11 Apr 2016 15:29:36 -0700
Subject: ipv6: datagram: Update dst cache of a connected datagram sk during
 pmtu update

There is a case in connected UDP socket such that
getsockopt(IPV6_MTU) will return a stale MTU value. The reproducible
sequence could be the following:
1. Create a connected UDP socket
2. Send some datagrams out
3. Receive a ICMPV6_PKT_TOOBIG
4. No new outgoing datagrams to trigger the sk_dst_check()
   logic to update the sk->sk_dst_cache.
5. getsockopt(IPV6_MTU) returns the mtu from the invalid
   sk->sk_dst_cache instead of the newly created RTF_CACHE clone.

This patch updates the sk->sk_dst_cache for a connected datagram sk
during pmtu-update code path.

Note that the sk->sk_v6_daddr is used to do the route lookup
instead of skb->data (i.e. iph).  It is because a UDP socket can become
connected after sending out some datagrams in un-connected state.  or
It can be connected multiple times to different destinations.  Hence,
iph may not be related to where sk is currently connected to.

It is done under '!sock_owned_by_user(sk)' condition because
the user may make another ip6_datagram_connect()  (i.e changing
the sk->sk_v6_daddr) while dst lookup is happening in the pmtu-update
code path.

For the sock_owned_by_user(sk) == true case, the next patch will
introduce a release_cb() which will update the sk->sk_dst_cache.

Test:

Server (Connected UDP Socket):
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Route Details:
[root@arch-fb-vm1 ~]# ip -6 r show | egrep '2fac'
2fac::/64 dev eth0  proto kernel  metric 256  pref medium
2fac:face::/64 via 2fac::face dev eth0  metric 1024  pref medium

A simple python code to create a connected UDP socket:

import socket
import errno

HOST = '2fac::1'
PORT = 8080

s = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
s.bind((HOST, PORT))
s.connect(('2fac:face::face', 53))
print("connected")
while True:
    try:
	data = s.recv(1024)
    except socket.error as se:
	if se.errno == errno.EMSGSIZE:
		pmtu = s.getsockopt(41, 24)
		print("PMTU:%d" % pmtu)
		break
s.close()

Python program output after getting a ICMPV6_PKT_TOOBIG:
[root@arch-fb-vm1 ~]# python2 ~/devshare/kernel/tasks/fib6/udp-connect-53-8080.py
connected
PMTU:1300

Cache routes after recieving TOOBIG:
[root@arch-fb-vm1 ~]# ip -6 r show table cache
2fac:face::face via 2fac::face dev eth0  metric 0
    cache  expires 463sec mtu 1300 pref medium

Client (Send the ICMPV6_PKT_TOOBIG):
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
scapy is used to generate the TOOBIG message.  Here is the scapy script I have
used:

>>> p=Ether(src='da:75:4d:36:ac:32', dst='52:54:00:12:34:66', type=0x86dd)/IPv6(src='2fac::face', dst='2fac::1')/ICMPv6PacketTooBig(mtu=1300)/IPv6(src='2fac::
1',dst='2fac:face::face', nh='UDP')/UDP(sport=8080,dport=53)
>>> sendp(p, iface='qemubr0')

Fixes: 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu exception")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Reported-by: Wei Wang <weiwan@google.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Wei Wang <weiwan@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d0aeb97aec5d..fd02e90b1289 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -959,6 +959,7 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len);
 int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr,
 				 int addr_len);
+int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr);
 
 int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
 		    int *addr_len);
-- 
cgit v1.2.3


From e646b657f6983017783914a951039e323120dc55 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 11 Apr 2016 15:29:37 -0700
Subject: ipv6: udp: Do a route lookup and update during release_cb

This patch adds a release_cb for UDPv6.  It does a route lookup
and updates sk->sk_dst_cache if it is needed.  It picks up the
left-over job from ip6_sk_update_pmtu() if the sk was owned
by user during the pmtu update.

It takes a rcu_read_lock to protect the __sk_dst_get() operations
because another thread may do ip6_dst_store() without taking the
sk lock (e.g. sendmsg).

Fixes: 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu exception")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Reported-by: Wei Wang <weiwan@google.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Wei Wang <weiwan@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index fd02e90b1289..1be050ada8c5 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -960,6 +960,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len);
 int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr,
 				 int addr_len);
 int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr);
+void ip6_datagram_release_cb(struct sock *sk);
 
 int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
 		    int *addr_len);
-- 
cgit v1.2.3


From d894ba18d4e449b3a7f6eb491f16c9e02933736e Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Tue, 12 Apr 2016 13:11:25 -0400
Subject: soreuseport: fix ordering for mixed v4/v6 sockets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the SO_REUSEPORT socket option, it is possible to create sockets
in the AF_INET and AF_INET6 domains which are bound to the same IPv4 address.
This is only possible with SO_REUSEPORT and when not using IPV6_V6ONLY on
the AF_INET6 sockets.

Prior to the commits referenced below, an incoming IPv4 packet would
always be routed to a socket of type AF_INET when this mixed-mode was used.
After those changes, the same packet would be routed to the most recently
bound socket (if this happened to be an AF_INET6 socket, it would
have an IPv4 mapped IPv6 address).

The change in behavior occurred because the recent SO_REUSEPORT optimizations
short-circuit the socket scoring logic as soon as they find a match.  They
did not take into account the scoring logic that favors AF_INET sockets
over AF_INET6 sockets in the event of a tie.

To fix this problem, this patch changes the insertion order of AF_INET
and AF_INET6 addresses in the TCP and UDP socket lists when the sockets
have SO_REUSEPORT set.  AF_INET sockets will be inserted at the head of the
list and AF_INET6 sockets with SO_REUSEPORT set will always be inserted at
the tail of the list.  This will force AF_INET sockets to always be
considered first.

Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
Fixes: 125e80b88687 ("soreuseport: fast reuseport TCP socket selection")

Reported-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: Craig Gallek <kraig@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rculist_nulls.h | 39 +++++++++++++++++++++++++++++++++++++++
 include/net/sock.h            |  6 +++++-
 2 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 1c33dd7da4a7..4ae95f7e8597 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -98,6 +98,45 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
 	if (!is_a_nulls(first))
 		first->pprev = &n->next;
 }
+
+/**
+ * hlist_nulls_add_tail_rcu
+ * @n: the element to add to the hash list.
+ * @h: the list to add to.
+ *
+ * Description:
+ * Adds the specified element to the end of the specified hlist_nulls,
+ * while permitting racing traversals.  NOTE: tail insertion requires
+ * list traversal.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
+ * or hlist_nulls_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs.  Regardless of the type of CPU, the
+ * list-traversal primitive must be guarded by rcu_read_lock().
+ */
+static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
+					struct hlist_nulls_head *h)
+{
+	struct hlist_nulls_node *i, *last = NULL;
+
+	for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i);
+	     i = hlist_nulls_next_rcu(i))
+		last = i;
+
+	if (last) {
+		n->next = last->next;
+		n->pprev = &last->next;
+		rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
+	} else {
+		hlist_nulls_add_head_rcu(n, h);
+	}
+}
+
 /**
  * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
  * @tpos:	the type * to use as a loop cursor.
diff --git a/include/net/sock.h b/include/net/sock.h
index 255d3e03727b..121ffc115c4f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -630,7 +630,11 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
 
 static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
 {
-	hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+	if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+	    sk->sk_family == AF_INET6)
+		hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
+	else
+		hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
 }
 
 static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
-- 
cgit v1.2.3


From cb92148b58a49455f3a7204eba3aee09a8b7683c Mon Sep 17 00:00:00 2001
From: Hariprasad Shenai <hariprasad@chelsio.com>
Date: Fri, 15 Apr 2016 13:00:11 -0500
Subject: PCI: Add pci_set_vpd_size() to set VPD size

After 104daa71b396 ("PCI: Determine actual VPD size on first access"), the
PCI core computes the valid VPD size by parsing the VPD starting at offset
0x0.  We don't attempt to read past that valid size because that causes
some devices to crash.

However, some devices do have data past that valid size.  For example,
Chelsio adapters contain two VPD structures, and the driver needs both of
them.

Add pci_set_vpd_size().  If a driver knows it is safe to read past the end
of the VPD data structure at offset 0, it can use pci_set_vpd_size() to
allow access to as much data as it needs.

[bhelgaas: changelog, split patches, rename to pci_set_vpd_size() and
return int (not ssize_t)]
Fixes: 104daa71b396 ("PCI: Determine actual VPD size on first access")
Tested-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 004b8133417d..932ec74909c6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1111,6 +1111,7 @@ void pci_unlock_rescan_remove(void);
 /* Vital product data routines */
 ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
 ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
+int pci_set_vpd_size(struct pci_dev *dev, size_t len);
 
 /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
 resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx);
-- 
cgit v1.2.3


From 67245ff332064c01b760afa7a384ccda024bfd24 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 16 Apr 2016 15:16:07 -0700
Subject: devpts: clean up interface to pty drivers

This gets rid of the horrible notion of having that

    struct inode *ptmx_inode

be the linchpin of the interface between the pty code and devpts.

By de-emphasizing the ptmx inode, a lot of things actually get cleaner,
and we will have a much saner way forward.  In particular, this will
allow us to associate with any particular devpts instance at open-time,
and not be artificially tied to one particular ptmx inode.

The patch itself is actually fairly straightforward, and apart from some
locking and return path cleanups it's pretty mechanical:

 - the interfaces that devpts exposes all take "struct pts_fs_info *"
   instead of "struct inode *ptmx_inode" now.

   NOTE! The "struct pts_fs_info" thing is a completely opaque structure
   as far as the pty driver is concerned: it's still declared entirely
   internally to devpts. So the pty code can't actually access it in any
   way, just pass it as a "cookie" to the devpts code.

 - the "look up the pts fs info" is now a single clear operation, that
   also does the reference count increment on the pts superblock.

   So "devpts_add/del_ref()" is gone, and replaced by a "lookup and get
   ref" operation (devpts_get_ref(inode)), along with a "put ref" op
   (devpts_put_ref()).

 - the pty master "tty->driver_data" field now contains the pts_fs_info,
   not the ptmx inode.

 - because we don't care about the ptmx inode any more as some kind of
   base index, the ref counting can now drop the inode games - it just
   gets the ref on the superblock.

 - the pts_fs_info now has a back-pointer to the super_block. That's so
   that we can easily look up the information we actually need. Although
   quite often, the pts fs info was actually all we wanted, and not having
   to look it up based on some magical inode makes things more
   straightforward.

In particular, now that "devpts_get_ref(inode)" operation should really
be the *only* place we need to look up what devpts instance we're
associated with, and we do it exactly once, at ptmx_open() time.

The other side of this is that one ptmx node could now be associated
with multiple different devpts instances - you could have a single
/dev/ptmx node, and then have multiple mount namespaces with their own
instances of devpts mounted on /dev/pts/.  And that's all perfectly sane
in a model where we just look up the pts instance at open time.

This will eventually allow us to get rid of our odd single-vs-multiple
pts instance model, but this patch in itself changes no semantics, only
an internal binding model.

Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Peter Anvin <hpa@zytor.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: Serge Hallyn <serge.hallyn@ubuntu.com>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Cc: Jann Horn <jann@thejh.net>
Cc: Greg KH <greg@kroah.com>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Florian Weimer <fw@deneb.enyo.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/devpts_fs.h | 34 ++++++++++------------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
index e0ee0b3000b2..358a4db72a27 100644
--- a/include/linux/devpts_fs.h
+++ b/include/linux/devpts_fs.h
@@ -15,38 +15,24 @@
 
 #include <linux/errno.h>
 
+struct pts_fs_info;
+
 #ifdef CONFIG_UNIX98_PTYS
 
-int devpts_new_index(struct inode *ptmx_inode);
-void devpts_kill_index(struct inode *ptmx_inode, int idx);
-void devpts_add_ref(struct inode *ptmx_inode);
-void devpts_del_ref(struct inode *ptmx_inode);
+/* Look up a pts fs info and get a ref to it */
+struct pts_fs_info *devpts_get_ref(struct inode *, struct file *);
+void devpts_put_ref(struct pts_fs_info *);
+
+int devpts_new_index(struct pts_fs_info *);
+void devpts_kill_index(struct pts_fs_info *, int);
+
 /* mknod in devpts */
-struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
-		void *priv);
+struct inode *devpts_pty_new(struct pts_fs_info *, dev_t, int, void *);
 /* get private structure */
 void *devpts_get_priv(struct inode *pts_inode);
 /* unlink */
 void devpts_pty_kill(struct inode *inode);
 
-#else
-
-/* Dummy stubs in the no-pty case */
-static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; }
-static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { }
-static inline void devpts_add_ref(struct inode *ptmx_inode) { }
-static inline void devpts_del_ref(struct inode *ptmx_inode) { }
-static inline struct inode *devpts_pty_new(struct inode *ptmx_inode,
-		dev_t device, int index, void *priv)
-{
-	return ERR_PTR(-EINVAL);
-}
-static inline void *devpts_get_priv(struct inode *pts_inode)
-{
-	return NULL;
-}
-static inline void devpts_pty_kill(struct inode *inode) { }
-
 #endif
 
 
-- 
cgit v1.2.3


From 2309236c13feae619572324efd3e910e66ef6bd0 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Mon, 18 Apr 2016 14:37:10 +0300
Subject: cls_cgroup: get sk_classid only from full sockets

skb->sk could point to timewait or request socket which has no sk_classid.
Detected as "BUG: KASAN: slab-out-of-bounds in cls_cgroup_classify".

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/cls_cgroup.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index c0a92e2c286d..74c9693d4941 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -17,6 +17,7 @@
 #include <linux/hardirq.h>
 #include <linux/rcupdate.h>
 #include <net/sock.h>
+#include <net/inet_sock.h>
 
 #ifdef CONFIG_CGROUP_NET_CLASSID
 struct cgroup_cls_state {
@@ -63,11 +64,13 @@ static inline u32 task_get_classid(const struct sk_buff *skb)
 	 * softirqs always disables bh.
 	 */
 	if (in_serving_softirq()) {
+		struct sock *sk = skb_to_full_sk(skb);
+
 		/* If there is an sock_cgroup_classid we'll use that. */
-		if (!skb->sk)
+		if (!sk || !sk_fullsock(sk))
 			return 0;
 
-		classid = sock_cgroup_classid(&skb->sk->sk_cgrp_data);
+		classid = sock_cgroup_classid(&sk->sk_cgrp_data);
 	}
 
 	return classid;
-- 
cgit v1.2.3


From 1d0fd42fa31d18ba0a3e0dd008c9e93e1cebe451 Mon Sep 17 00:00:00 2001
From: Wei Ni <wni@nvidia.com>
Date: Thu, 3 Mar 2016 17:33:46 +0800
Subject: thermal: consistently use int for trip temp

The commit 17e8351a7739 consistently use int for temperature,
however it missed a few in trip temperature and thermal_core.

In current codes, the trip->temperature used "unsigned long"
and zone->temperature used"int", if the temperature is negative
value, it will get wrong result when compare temperature with
trip temperature.

This patch can fix it.

Signed-off-by: Wei Ni <wni@nvidia.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/thermal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index a55d0523f75d..1b8a5a7876ce 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -352,8 +352,8 @@ struct thermal_zone_of_device_ops {
 
 struct thermal_trip {
 	struct device_node *np;
-	unsigned long int temperature;
-	unsigned long int hysteresis;
+	int temperature;
+	int hysteresis;
 	enum thermal_trip_type type;
 };
 
-- 
cgit v1.2.3


From fba7cd681b6155e2d93e7862fcd6f970336b83c3 Mon Sep 17 00:00:00 2001
From: Romain Perier <romain.perier@free-electrons.com>
Date: Thu, 14 Apr 2016 15:36:03 +0200
Subject: asm-generic/futex: Re-enable preemption in
 futex_atomic_cmpxchg_inatomic()

The recent decoupling of pagefault disable and preempt disable added an
explicit preempt_disable/enable() pair to the futex_atomic_cmpxchg_inatomic()
implementation in asm-generic/futex.h. But it forgot to add preempt_enable()
calls to the error handling code pathes, which results in a preemption count
imbalance.

This is observable on boot when the test for atomic_cmpxchg() is calling
futex_atomic_cmpxchg_inatomic() on a NULL pointer.

Add the missing preempt_enable() calls to the error handling code pathes.

[ tglx: Massaged changelog ]

Fixes: d9b9ff8c1889 ("sched/preempt, futex: Disable preemption in UP futex_atomic_cmpxchg_inatomic() explicitly")
Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
Cc: linux-arch@vger.kernel.org
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1460640963-690-1-git-send-email-romain.perier@free-electrons.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-generic/futex.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index e56272c919b5..bf2d34c9d804 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -108,11 +108,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	u32 val;
 
 	preempt_disable();
-	if (unlikely(get_user(val, uaddr) != 0))
+	if (unlikely(get_user(val, uaddr) != 0)) {
+		preempt_enable();
 		return -EFAULT;
+	}
 
-	if (val == oldval && unlikely(put_user(newval, uaddr) != 0))
+	if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) {
+		preempt_enable();
 		return -EFAULT;
+	}
 
 	*uval = val;
 	preempt_enable();
-- 
cgit v1.2.3


From 3194ed497939c6448005542e3ca4fa2386968fa0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Apr 2016 17:49:11 +0200
Subject: ALSA: hda - Fix possible race on regmap bypass flip

HD-audio driver uses regmap cache bypass feature for reading a raw
value without the cache.  But this is racy since both the cached and
the uncached reads may occur concurrently.  The former is done via the
normal control API access while the latter comes from the proc file
read.

Even though the regmap itself has the protection against the
concurrent accesses, the flag set/reset is done without the
protection, so it may lead to inconsistent state of bypass flag that
doesn't match with the current read and occasionally result in a
kernel WARNING like:
  WARNING: CPU: 3 PID: 2731 at drivers/base/regmap/regcache.c:499 regcache_cache_only+0x78/0x93

One way to work around such a problem is to wrap with a mutex.  But in
this case, the solution is simpler: for the uncached read, we just
skip the regmap and directly calls its accessor.  The verb execution
there is protected by itself, so basically it's safe to call
individually.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=116171
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hda_regmap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/hda_regmap.h b/include/sound/hda_regmap.h
index 2767c55a641e..ca64f0f50b45 100644
--- a/include/sound/hda_regmap.h
+++ b/include/sound/hda_regmap.h
@@ -17,6 +17,8 @@ int snd_hdac_regmap_add_vendor_verb(struct hdac_device *codec,
 				    unsigned int verb);
 int snd_hdac_regmap_read_raw(struct hdac_device *codec, unsigned int reg,
 			     unsigned int *val);
+int snd_hdac_regmap_read_raw_uncached(struct hdac_device *codec,
+				      unsigned int reg, unsigned int *val);
 int snd_hdac_regmap_write_raw(struct hdac_device *codec, unsigned int reg,
 			      unsigned int val);
 int snd_hdac_regmap_update_raw(struct hdac_device *codec, unsigned int reg,
-- 
cgit v1.2.3


From cfea5a688eb37bcd1081255df9f9f777f4e61999 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 19 Apr 2016 22:39:29 -0700
Subject: tcp: Merge tx_flags and tskey in tcp_shifted_skb

After receiving sacks, tcp_shifted_skb() will collapse
skbs if possible.  tx_flags and tskey also have to be
merged.

This patch reuses the tcp_skb_collapse_tstamp() to handle
them.

BPF Output Before:
~~~~~
<no-output-due-to-missing-tstamp-event>

BPF Output After:
~~~~~
<...>-2024  [007] d.s.    88.644374: : ee_data:14599

Packetdrill Script:
~~~~~
+0 `sysctl -q -w net.ipv4.tcp_min_tso_segs=10`
+0 `sysctl -q -w net.ipv4.tcp_no_metrics_save=1`
+0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0

0.100 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7>
0.100 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 7>
0.200 < . 1:1(0) ack 1 win 257
0.200 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0

0.200 write(4, ..., 1460) = 1460
+0 setsockopt(4, SOL_SOCKET, 37, [2688], 4) = 0
0.200 write(4, ..., 13140) = 13140

0.200 > P. 1:1461(1460) ack 1
0.200 > . 1461:8761(7300) ack 1
0.200 > P. 8761:14601(5840) ack 1

0.300 < . 1:1(0) ack 1 win 257 <sack 1461:14601,nop,nop>
0.300 > P. 1:1461(1460) ack 1
0.400 < . 1:1(0) ack 14601 win 257

0.400 close(4) = 0
0.400 > F. 14601:14601(0) ack 1
0.500 < F. 1:1(0) ack 14602 win 257
0.500 > . 14602:14602(0) ack 2

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Tested-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b91370f61be6..6db10228113f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -552,6 +552,8 @@ void tcp_send_ack(struct sock *sk);
 void tcp_send_delayed_ack(struct sock *sk);
 void tcp_send_loss_probe(struct sock *sk);
 bool tcp_schedule_loss_probe(struct sock *sk);
+void tcp_skb_collapse_tstamp(struct sk_buff *skb,
+			     const struct sk_buff *next_skb);
 
 /* tcp_input.c */
 void tcp_resume_early_retransmit(struct sock *sk);
-- 
cgit v1.2.3


From 4bfd2e6e53435a214888fd35e230157a38ffc6a0 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@mellanox.com>
Date: Wed, 20 Apr 2016 16:01:16 +0300
Subject: net/mlx4_core: Avoid repeated calls to pci enable/disable

Maintain the PCI status and provide wrappers for enabling and disabling
the PCI device.  Performing the actions more than once without doing
its opposite results in warning logs.

This occurred when EEH hotplugged the device causing a warning for
disabling an already disabled device.

Fixes: 2ba5fbd62b25 ('net/mlx4_core: Handle AER flow properly')
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 8541a913f6a3..d1f904c8b2cb 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -828,6 +828,11 @@ struct mlx4_vf_dev {
 	u8			n_ports;
 };
 
+enum mlx4_pci_status {
+	MLX4_PCI_STATUS_DISABLED,
+	MLX4_PCI_STATUS_ENABLED,
+};
+
 struct mlx4_dev_persistent {
 	struct pci_dev	       *pdev;
 	struct mlx4_dev	       *dev;
@@ -841,6 +846,8 @@ struct mlx4_dev_persistent {
 	u8		state;
 	struct mutex	interface_state_mutex; /* protect SW state */
 	u8	interface_state;
+	struct mutex		pci_status_mutex; /* sync pci state */
+	enum mlx4_pci_status	pci_status;
 };
 
 struct mlx4_dev {
-- 
cgit v1.2.3


From 221004c66a58949a0f25c937a6789c0839feb530 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Tue, 19 Apr 2016 19:19:11 +0800
Subject: drm: Loongson-3 doesn't fully support wc memory

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: stable@vger.kernel.org
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_cache.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index 461a0558bca4..cebecff536a3 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -39,6 +39,8 @@ static inline bool drm_arch_can_wc_memory(void)
 {
 #if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE)
 	return false;
+#elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3)
+	return false;
 #else
 	return true;
 #endif
-- 
cgit v1.2.3


From 75dd602a5198a6e5f75534db52b6e6fbaabb33d1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 30 Mar 2016 11:36:59 +0200
Subject: lockdep: Fix lock_chain::base size

lock_chain::base is used to store an index into the chain_hlocks[]
array, however that array contains more elements than can be indexed
using the u16.

Change the lock_chain structure to use a bitfield to encode the data
it needs and add BUILD_BUG_ON() assertions to check the fields are
wide enough.

Also, for DEBUG_LOCKDEP, assert that we don't run out of elements of
that array; as that would wreck the collision detectoring.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alfredo Alvarez Fernandez <alfredoalvarezfernandez@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20160330093659.GS3408@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index d026b190c530..d10ef06971b5 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -196,9 +196,11 @@ struct lock_list {
  * We record lock dependency chains, so that we can cache them:
  */
 struct lock_chain {
-	u8				irq_context;
-	u8				depth;
-	u16				base;
+	/* see BUILD_BUG_ON()s in lookup_chain_cache() */
+	unsigned int			irq_context :  2,
+					depth       :  6,
+					base	    : 24;
+	/* 4 byte hole */
 	struct hlist_node		entry;
 	u64				chain_key;
 };
-- 
cgit v1.2.3


From 987aedb5d6f6e10c5203c6d0aab9a60ec22c7e93 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Mon, 11 Apr 2016 10:17:46 +0100
Subject: generic syscalls: wire up preadv2 and pwritev2 syscalls

These new syscalls are implemented as generic code, so enable them for
architectures like arm64 which use the generic syscall table.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/asm-generic/unistd.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 2622b33fb2ec..6e0f5f01734c 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -717,9 +717,13 @@ __SYSCALL(__NR_membarrier, sys_membarrier)
 __SYSCALL(__NR_mlock2, sys_mlock2)
 #define __NR_copy_file_range 285
 __SYSCALL(__NR_copy_file_range, sys_copy_file_range)
+#define __NR_preadv2 286
+__SYSCALL(__NR_preadv2, sys_preadv2)
+#define __NR_pwritev2 287
+__SYSCALL(__NR_pwritev2, sys_pwritev2)
 
 #undef __NR_syscalls
-#define __NR_syscalls 286
+#define __NR_syscalls 288
 
 /*
  * All syscalls below here should go away really,
-- 
cgit v1.2.3


From 7ceb2afbd6aee4643056b47156baad6841db8e78 Mon Sep 17 00:00:00 2001
From: Elad Raz <eladr@mellanox.com>
Date: Thu, 21 Apr 2016 12:52:43 +0200
Subject: switchdev: Adding complete operation to deferred switchdev ops

When using switchdev deferred operation (SWITCHDEV_F_DEFER), the operation
is executed in different context and the application doesn't have any way
to get the operation real status.

Adding a completion callback fixes that. This patch adds fields to
switchdev_attr and switchdev_obj "complete_priv" field which is used by
the "complete" callback.

Application can set a complete function which will be called once the
operation executed.

Signed-off-by: Elad Raz <eladr@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d451122e8404..51d77b2ce2b2 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -54,6 +54,8 @@ struct switchdev_attr {
 	struct net_device *orig_dev;
 	enum switchdev_attr_id id;
 	u32 flags;
+	void *complete_priv;
+	void (*complete)(struct net_device *dev, int err, void *priv);
 	union {
 		struct netdev_phys_item_id ppid;	/* PORT_PARENT_ID */
 		u8 stp_state;				/* PORT_STP_STATE */
@@ -75,6 +77,8 @@ struct switchdev_obj {
 	struct net_device *orig_dev;
 	enum switchdev_obj_id id;
 	u32 flags;
+	void *complete_priv;
+	void (*complete)(struct net_device *dev, int err, void *priv);
 };
 
 /* SWITCHDEV_OBJ_ID_PORT_VLAN */
-- 
cgit v1.2.3


From 748164802c1bd2c52937d20782b07d8c68dd9a4f Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 22 Apr 2016 11:28:08 +0200
Subject: macsec: add missing macsec prefix in uapi

I accidentally forgot some MACSEC_ prefixes in if_macsec.h.

Fixes: dece8d2b78d1 ("uapi: add MACsec bits")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_macsec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h
index 26b0d1e3e3e7..4c58d9917aa4 100644
--- a/include/uapi/linux/if_macsec.h
+++ b/include/uapi/linux/if_macsec.h
@@ -19,8 +19,8 @@
 
 #define MACSEC_MAX_KEY_LEN 128
 
-#define DEFAULT_CIPHER_ID   0x0080020001000001ULL
-#define DEFAULT_CIPHER_ALT  0x0080C20001000001ULL
+#define MACSEC_DEFAULT_CIPHER_ID   0x0080020001000001ULL
+#define MACSEC_DEFAULT_CIPHER_ALT  0x0080C20001000001ULL
 
 #define MACSEC_MIN_ICV_LEN 8
 #define MACSEC_MAX_ICV_LEN 32
-- 
cgit v1.2.3


From 046339eaab26804f52f6604877f5674f70815b26 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Fri, 22 Apr 2016 00:33:03 +0300
Subject: net/mlx5e: Device's mtu field is u16 and not int

For set/query MTU port firmware commands the MTU field
is 16 bits, here I changed all the "int mtu" parameters
of the functions wrapping those firmware commands to be u16.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/port.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index a1d145abd4eb..b30250ab7604 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -54,9 +54,9 @@ int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
 int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
 				 enum mlx5_port_status *status);
 
-int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port);
-void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port);
-void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
+int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port);
+void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port);
+void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu,
 			      u8 port);
 
 int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
-- 
cgit v1.2.3


From cd255efff9baadd654d6160e52d17ae7c568c9d3 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Fri, 22 Apr 2016 00:33:05 +0300
Subject: net/mlx5e: Use vport MTU rather than physical port MTU

Set and report vport MTU rather than physical MTU,
Driver will set both vport and physical port mtu and will
rely on the query of vport mtu.

SRIOV VFs have to report their MTU to their vport manager (PF),
and this will allow them to work with any MTU they need
without failing the request.

Also for some cases where the PF is not a port owner, PF can
work with MTU less than the physical port mtu if set physical
port mtu didn't take effect.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/vport.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index bd93e6323603..301da4a5e6bf 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -45,6 +45,8 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 *addr);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
 				      u16 vport, u8 *addr);
+int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu);
+int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu);
 int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
 					   u64 *system_image_guid);
 int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid);
-- 
cgit v1.2.3


From 5fc7197d3a256d9c5de3134870304b24892a4908 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Fri, 22 Apr 2016 00:33:07 +0300
Subject: net/mlx5: Add pci shutdown callback

This patch introduces kexec support for mlx5.
When switching kernels, kexec() calls shutdown, which unloads
the driver and cleans its resources.

In addition, remove unregister netdev from shutdown flow. This will
allow a clean shutdown, even if some netdev clients did not release their
reference from this netdev. Releasing The HW resources only is enough as
the kernel is shutting down

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Haggai Abramovsky <hagaya@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index dcd5ac8d3b14..369c837d40f5 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -519,8 +519,9 @@ enum mlx5_device_state {
 };
 
 enum mlx5_interface_state {
-	MLX5_INTERFACE_STATE_DOWN,
-	MLX5_INTERFACE_STATE_UP,
+	MLX5_INTERFACE_STATE_DOWN = BIT(0),
+	MLX5_INTERFACE_STATE_UP = BIT(1),
+	MLX5_INTERFACE_STATE_SHUTDOWN = BIT(2),
 };
 
 enum mlx5_pci_status {
@@ -544,7 +545,7 @@ struct mlx5_core_dev {
 	enum mlx5_device_state	state;
 	/* sync interface state */
 	struct mutex		intf_state_mutex;
-	enum mlx5_interface_state interface_state;
+	unsigned long		intf_state;
 	void			(*event) (struct mlx5_core_dev *dev,
 					  enum mlx5_dev_event event,
 					  unsigned long param);
-- 
cgit v1.2.3


From 3020ca711871fdaf0c15c8bab677a6bc302e28fe Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 22 Apr 2016 04:00:50 -0300
Subject: [media] v4l2-dv-timings.h: fix polarity for 4k formats

The VSync polarity was negative instead of positive for the 4k CEA formats.
I probably copy-and-pasted these from the DMT 4k format, which does have a
negative VSync polarity.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Reported-by: Martin Bugge <marbugge@cisco.com>
Cc: <stable@vger.kernel.org>      # for v4.1 and up
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/v4l2-dv-timings.h | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h
index c039f1d68a09..086168e18ca8 100644
--- a/include/uapi/linux/v4l2-dv-timings.h
+++ b/include/uapi/linux/v4l2-dv-timings.h
@@ -183,7 +183,8 @@
 
 #define V4L2_DV_BT_CEA_3840X2160P24 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 1276, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -191,14 +192,16 @@
 
 #define V4L2_DV_BT_CEA_3840X2160P25 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_3840X2160P30 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -206,14 +209,16 @@
 
 #define V4L2_DV_BT_CEA_3840X2160P50 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		594000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_3840X2160P60 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		594000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -221,7 +226,8 @@
 
 #define V4L2_DV_BT_CEA_4096X2160P24 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 1020, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -229,14 +235,16 @@
 
 #define V4L2_DV_BT_CEA_4096X2160P25 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_4096X2160P30 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -244,14 +252,16 @@
 
 #define V4L2_DV_BT_CEA_4096X2160P50 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		594000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_4096X2160P60 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		594000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
-- 
cgit v1.2.3


From b93876845c5e30a92964eeb088d9d2e024118022 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ricardo.ribalda@gmail.com>
Date: Mon, 25 Apr 2016 06:04:45 -0300
Subject: [media] media: vb2: Fix regression on poll() for RW mode

When using a device is read/write mode, vb2 does not handle properly the
first select/poll operation.

The reason for this, is that when this code has been refactored, some of
the operations have changed their order, and now fileio emulator is not
started.

The reintroduced check to the core is enabled by a quirk flag, that
avoids this check by other subsystems like DVB.

Fixes: 49d8ab9feaf2 ("media] media: videobuf2: Separate vb2_poll()")

Reported-by: Dimitrios Katsaros <patcherwork@gmail.com>
Cc: Junghak Sung <jh1009.sung@samsung.com>
Cc: <stable@vger.kernel.org>      # for v4.5 and up
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/media/videobuf2-core.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 5342ff4d748f..88e3ab496e8f 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -404,6 +404,9 @@ struct vb2_buf_ops {
  * @fileio_read_once:		report EOF after reading the first buffer
  * @fileio_write_immediately:	queue buffer after each write() call
  * @allow_zero_bytesused:	allow bytesused == 0 to be passed to the driver
+ * @quirk_poll_must_check_waiting_for_buffers: Return POLLERR at poll when QBUF
+ *              has not been called. This is a vb1 idiom that has been adopted
+ *              also by vb2.
  * @lock:	pointer to a mutex that protects the vb2_queue struct. The
  *		driver can set this to a mutex to let the v4l2 core serialize
  *		the queuing ioctls. If the driver wants to handle locking
@@ -467,6 +470,7 @@ struct vb2_queue {
 	unsigned			fileio_read_once:1;
 	unsigned			fileio_write_immediately:1;
 	unsigned			allow_zero_bytesused:1;
+	unsigned		   quirk_poll_must_check_waiting_for_buffers:1;
 
 	struct mutex			*lock;
 	void				*owner;
-- 
cgit v1.2.3


From 6c1ea260f89709e0021d2c59f8fd2a104b5b1123 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 11 Apr 2016 19:34:49 +0200
Subject: libceph: make authorizer destruction independent of ceph_auth_client

Starting the kernel client with cephx disabled and then enabling cephx
and restarting userspace daemons can result in a crash:

    [262671.478162] BUG: unable to handle kernel paging request at ffffebe000000000
    [262671.531460] IP: [<ffffffff811cd04a>] kfree+0x5a/0x130
    [262671.584334] PGD 0
    [262671.635847] Oops: 0000 [#1] SMP
    [262672.055841] CPU: 22 PID: 2961272 Comm: kworker/22:2 Not tainted 4.2.0-34-generic #39~14.04.1-Ubuntu
    [262672.162338] Hardware name: Dell Inc. PowerEdge R720/068CDY, BIOS 2.4.3 07/09/2014
    [262672.268937] Workqueue: ceph-msgr con_work [libceph]
    [262672.322290] task: ffff88081c2d0dc0 ti: ffff880149ae8000 task.ti: ffff880149ae8000
    [262672.428330] RIP: 0010:[<ffffffff811cd04a>]  [<ffffffff811cd04a>] kfree+0x5a/0x130
    [262672.535880] RSP: 0018:ffff880149aeba58  EFLAGS: 00010286
    [262672.589486] RAX: 000001e000000000 RBX: 0000000000000012 RCX: ffff8807e7461018
    [262672.695980] RDX: 000077ff80000000 RSI: ffff88081af2be04 RDI: 0000000000000012
    [262672.803668] RBP: ffff880149aeba78 R08: 0000000000000000 R09: 0000000000000000
    [262672.912299] R10: ffffebe000000000 R11: ffff880819a60e78 R12: ffff8800aec8df40
    [262673.021769] R13: ffffffffc035f70f R14: ffff8807e5b138e0 R15: ffff880da9785840
    [262673.131722] FS:  0000000000000000(0000) GS:ffff88081fac0000(0000) knlGS:0000000000000000
    [262673.245377] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    [262673.303281] CR2: ffffebe000000000 CR3: 0000000001c0d000 CR4: 00000000001406e0
    [262673.417556] Stack:
    [262673.472943]  ffff880149aeba88 ffff88081af2be04 ffff8800aec8df40 ffff88081af2be04
    [262673.583767]  ffff880149aeba98 ffffffffc035f70f ffff880149aebac8 ffff8800aec8df00
    [262673.694546]  ffff880149aebac8 ffffffffc035c89e ffff8807e5b138e0 ffff8805b047f800
    [262673.805230] Call Trace:
    [262673.859116]  [<ffffffffc035f70f>] ceph_x_destroy_authorizer+0x1f/0x50 [libceph]
    [262673.968705]  [<ffffffffc035c89e>] ceph_auth_destroy_authorizer+0x3e/0x60 [libceph]
    [262674.078852]  [<ffffffffc0352805>] put_osd+0x45/0x80 [libceph]
    [262674.134249]  [<ffffffffc035290e>] remove_osd+0xae/0x140 [libceph]
    [262674.189124]  [<ffffffffc0352aa3>] __reset_osd+0x103/0x150 [libceph]
    [262674.243749]  [<ffffffffc0354703>] kick_requests+0x223/0x460 [libceph]
    [262674.297485]  [<ffffffffc03559e2>] ceph_osdc_handle_map+0x282/0x5e0 [libceph]
    [262674.350813]  [<ffffffffc035022e>] dispatch+0x4e/0x720 [libceph]
    [262674.403312]  [<ffffffffc034bd91>] try_read+0x3d1/0x1090 [libceph]
    [262674.454712]  [<ffffffff810ab7c2>] ? dequeue_entity+0x152/0x690
    [262674.505096]  [<ffffffffc034cb1b>] con_work+0xcb/0x1300 [libceph]
    [262674.555104]  [<ffffffff8108fb3e>] process_one_work+0x14e/0x3d0
    [262674.604072]  [<ffffffff810901ea>] worker_thread+0x11a/0x470
    [262674.652187]  [<ffffffff810900d0>] ? rescuer_thread+0x310/0x310
    [262674.699022]  [<ffffffff810957a2>] kthread+0xd2/0xf0
    [262674.744494]  [<ffffffff810956d0>] ? kthread_create_on_node+0x1c0/0x1c0
    [262674.789543]  [<ffffffff817bd81f>] ret_from_fork+0x3f/0x70
    [262674.834094]  [<ffffffff810956d0>] ? kthread_create_on_node+0x1c0/0x1c0

What happens is the following:

    (1) new MON session is established
    (2) old "none" ac is destroyed
    (3) new "cephx" ac is constructed
    ...
    (4) old OSD session (w/ "none" authorizer) is put
          ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer)

osd->o_auth.authorizer in the "none" case is just a bare pointer into
ac, which contains a single static copy for all services.  By the time
we get to (4), "none" ac, freed in (2), is long gone.  On top of that,
a new vtable installed in (3) points us at ceph_x_destroy_authorizer(),
so we end up trying to destroy a "none" authorizer with a "cephx"
destructor operating on invalid memory!

To fix this, decouple authorizer destruction from ac and do away with
a single static "none" authorizer by making a copy for each OSD or MDS
session.  Authorizers themselves are independent of ac and so there is
no reason for destroy_authorizer() to be an ac op.  Make it an op on
the authorizer itself by turning ceph_authorizer into a real struct.

Fixes: http://tracker.ceph.com/issues/15447

Reported-by: Alan Zhang <alan.zhang@linux.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/auth.h       | 10 +++++-----
 include/linux/ceph/osd_client.h |  1 -
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index 260d78b587c4..1563265d2097 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -12,9 +12,12 @@
  */
 
 struct ceph_auth_client;
-struct ceph_authorizer;
 struct ceph_msg;
 
+struct ceph_authorizer {
+	void (*destroy)(struct ceph_authorizer *);
+};
+
 struct ceph_auth_handshake {
 	struct ceph_authorizer *authorizer;
 	void *authorizer_buf;
@@ -62,8 +65,6 @@ struct ceph_auth_client_ops {
 				 struct ceph_auth_handshake *auth);
 	int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
 				       struct ceph_authorizer *a, size_t len);
-	void (*destroy_authorizer)(struct ceph_auth_client *ac,
-				   struct ceph_authorizer *a);
 	void (*invalidate_authorizer)(struct ceph_auth_client *ac,
 				      int peer_type);
 
@@ -112,8 +113,7 @@ extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
 extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
 				       int peer_type,
 				       struct ceph_auth_handshake *auth);
-extern void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
-					 struct ceph_authorizer *a);
+void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
 extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
 				       int peer_type,
 				       struct ceph_auth_handshake *a);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 4343df806710..cbf460927c42 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -16,7 +16,6 @@ struct ceph_msg;
 struct ceph_snap_context;
 struct ceph_osd_request;
 struct ceph_osd_client;
-struct ceph_authorizer;
 
 /*
  * completion callback for async writepages
-- 
cgit v1.2.3


From 841645b5f2dfceac69b78fcd0c9050868d41ea61 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 25 Apr 2016 15:33:55 -0400
Subject: ipv6: Revert optional address flusing on ifdown.

This reverts the following three commits:

70af921db6f8835f4b11c65731116560adb00c14
799977d9aafbf0ca0b9c39b04cbfb16db71302c9
f1705ec197e705b79ea40fe7a2cc5acfa1d3bfac

The feature was ill conceived, has terrible semantics, and has added
nothing but regressions to the already fragile ipv6 stack.

Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h      | 1 -
 include/uapi/linux/ipv6.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 7edc14fb66b6..4b2267e1b7c3 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -62,7 +62,6 @@ struct ipv6_devconf {
 		struct in6_addr secret;
 	} stable_secret;
 	__s32		use_oif_addrs_only;
-	__s32		keep_addr_on_down;
 	void		*sysctl;
 };
 
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 395876060f50..ec117b65d5a5 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -176,7 +176,6 @@ enum {
 	DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
 	DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
 	DEVCONF_DROP_UNSOLICITED_NA,
-	DEVCONF_KEEP_ADDR_ON_DOWN,
 	DEVCONF_MAX
 };
 
-- 
cgit v1.2.3


From 5cf1cacb49aee39c3e02ae87068fc3c6430659b0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 21 Apr 2016 19:06:48 -0400
Subject: cgroup, cpuset: replace cpuset_post_attach_flush() with
 cgroup_subsys->post_attach callback

Since e93ad19d0564 ("cpuset: make mm migration asynchronous"), cpuset
kicks off asynchronous NUMA node migration if necessary during task
migration and flushes it from cpuset_post_attach_flush() which is
called at the end of __cgroup_procs_write().  This is to avoid
performing migration with cgroup_threadgroup_rwsem write-locked which
can lead to deadlock through dependency on kworker creation.

memcg has a similar issue with charge moving, so let's convert it to
an official callback rather than the current one-off cpuset specific
function.  This patch adds cgroup_subsys->post_attach callback and
makes cpuset register cpuset_post_attach_flush() as its ->post_attach.

The conversion is mostly one-to-one except that the new callback is
called under cgroup_mutex.  This is to guarantee that no other
migration operations are started before ->post_attach callbacks are
finished.  cgroup_mutex is one of the outermost mutex in the system
and has never been and shouldn't be a problem.  We can add specialized
synchronization around __cgroup_procs_write() but I don't think
there's any noticeable benefit.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: <stable@vger.kernel.org> # 4.4+ prerequisite for the next patch
---
 include/linux/cgroup-defs.h | 1 +
 include/linux/cpuset.h      | 6 ------
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 3e39ae5bc799..5b17de62c962 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -444,6 +444,7 @@ struct cgroup_subsys {
 	int (*can_attach)(struct cgroup_taskset *tset);
 	void (*cancel_attach)(struct cgroup_taskset *tset);
 	void (*attach)(struct cgroup_taskset *tset);
+	void (*post_attach)(void);
 	int (*can_fork)(struct task_struct *task);
 	void (*cancel_fork)(struct task_struct *task);
 	void (*fork)(struct task_struct *task);
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index fea160ee5803..85a868ccb493 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask)
 	task_unlock(current);
 }
 
-extern void cpuset_post_attach_flush(void);
-
 #else /* !CONFIG_CPUSETS */
 
 static inline bool cpusets_enabled(void) { return false; }
@@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
 	return false;
 }
 
-static inline void cpuset_post_attach_flush(void)
-{
-}
-
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
-- 
cgit v1.2.3


From bb03ed216370cb021f377f923471e56d1de3ff5d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Apr 2016 16:39:17 +0200
Subject: ALSA: hda - Update BCLK also at hotplug for i915 HSW/BDW

The recent bug report suggests that BCLK setup for i915 HSW/BDW needs
to be updated at each HDMI hotplug, not only at initialization and
resume.  That is, we need to update HSW_EM4 and HSW_EM5 registers at
ELD notification, too.  Otherwise the HDMI audio may be out of sync
and played in a wrong pitch.

However, the HDA codec driver has no access to the controller
registers, and currently the code managing these registers is in
hda_intel.c, i.e. local to the controller driver.  For allowing the
explicit BCLK update from the codec driver, as in this patch, the
former haswell_set_bclk() in hda_intel.c is moved to hdac_i915.c and
exposed as snd_hdac_i915_set_bclk().  This is called from both the HDA
controller driver and intel_pin_eld_notify() in HDMI codec driver.

Along with this change, snd_hdac_get_display_clk() gets dropped as
it's no longer used.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91410
Cc: <stable@vger.kernel.org> # v4.5+
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hda_i915.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/hda_i915.h b/include/sound/hda_i915.h
index fa341fcb5829..f5842bcd9c94 100644
--- a/include/sound/hda_i915.h
+++ b/include/sound/hda_i915.h
@@ -9,7 +9,7 @@
 #ifdef CONFIG_SND_HDA_I915
 int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable);
 int snd_hdac_display_power(struct hdac_bus *bus, bool enable);
-int snd_hdac_get_display_clk(struct hdac_bus *bus);
+void snd_hdac_i915_set_bclk(struct hdac_bus *bus);
 int snd_hdac_sync_audio_rate(struct hdac_bus *bus, hda_nid_t nid, int rate);
 int snd_hdac_acomp_get_eld(struct hdac_bus *bus, hda_nid_t nid,
 			   bool *audio_enabled, char *buffer, int max_bytes);
@@ -25,9 +25,8 @@ static inline int snd_hdac_display_power(struct hdac_bus *bus, bool enable)
 {
 	return 0;
 }
-static inline int snd_hdac_get_display_clk(struct hdac_bus *bus)
+static inline void snd_hdac_i915_set_bclk(struct hdac_bus *bus)
 {
-	return 0;
 }
 static inline int snd_hdac_sync_audio_rate(struct hdac_bus *bus, hda_nid_t nid,
 					   int rate)
-- 
cgit v1.2.3


From 6a923934c33c750a595868af6bef5f1a1fa90054 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 26 Apr 2016 11:47:41 -0400
Subject: Revert "ipv6: Revert optional address flusing on ifdown."

This reverts commit 841645b5f2dfceac69b78fcd0c9050868d41ea61.

Ok, this puts the feature back.  I've decided to apply David A.'s
bug fix and run with that rather than make everyone wait another
whole release for this feature.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h      | 1 +
 include/uapi/linux/ipv6.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 4b2267e1b7c3..7edc14fb66b6 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -62,6 +62,7 @@ struct ipv6_devconf {
 		struct in6_addr secret;
 	} stable_secret;
 	__s32		use_oif_addrs_only;
+	__s32		keep_addr_on_down;
 	void		*sysctl;
 };
 
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index ec117b65d5a5..395876060f50 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -176,6 +176,7 @@ enum {
 	DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
 	DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
 	DEVCONF_DROP_UNSOLICITED_NA,
+	DEVCONF_KEEP_ADDR_ON_DOWN,
 	DEVCONF_MAX
 };
 
-- 
cgit v1.2.3


From 8ead9dd54716d1e05e129959f702fcc1786f82b4 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 25 Apr 2016 20:04:08 -0700
Subject: devpts: more pty driver interface cleanups

This is more prep-work for the upcoming pty changes.  Still just code
cleanup with no actual semantic changes.

This removes a bunch pointless complexity by just having the slave pty
side remember the dentry associated with the devpts slave rather than
the inode.  That allows us to remove all the "look up the dentry" code
for when we want to remove it again.

Together with moving the tty pointer from "inode->i_private" to
"dentry->d_fsdata" and getting rid of pointless inode locking, this
removes about 30 lines of code.  Not only is the end result smaller,
it's simpler and easier to understand.

The old code, for example, depended on the d_find_alias() to not just
find the dentry, but also to check that it is still hashed, which in
turn validated the tty pointer in the inode.

That is a _very_ roundabout way to say "invalidate the cached tty
pointer when the dentry is removed".

The new code just does

	dentry->d_fsdata = NULL;

in devpts_pty_kill() instead, invalidating the tty pointer rather more
directly and obviously.  Don't do something complex and subtle when the
obvious straightforward approach will do.

The rest of the patch (ie apart from code deletion and the above tty
pointer clearing) is just switching the calling convention to pass the
dentry or file pointer around instead of the inode.

Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Peter Anvin <hpa@zytor.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: Serge Hallyn <serge.hallyn@ubuntu.com>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Cc: Jann Horn <jann@thejh.net>
Cc: Greg KH <greg@kroah.com>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Florian Weimer <fw@deneb.enyo.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/devpts_fs.h  | 6 +++---
 include/linux/tty_driver.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
index 358a4db72a27..5871f292b596 100644
--- a/include/linux/devpts_fs.h
+++ b/include/linux/devpts_fs.h
@@ -27,11 +27,11 @@ int devpts_new_index(struct pts_fs_info *);
 void devpts_kill_index(struct pts_fs_info *, int);
 
 /* mknod in devpts */
-struct inode *devpts_pty_new(struct pts_fs_info *, dev_t, int, void *);
+struct dentry *devpts_pty_new(struct pts_fs_info *, int, void *);
 /* get private structure */
-void *devpts_get_priv(struct inode *pts_inode);
+void *devpts_get_priv(struct dentry *);
 /* unlink */
-void devpts_pty_kill(struct inode *inode);
+void devpts_pty_kill(struct dentry *);
 
 #endif
 
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 161052477f77..b742b5e47cc2 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -7,7 +7,7 @@
  * defined; unless noted otherwise, they are optional, and can be
  * filled in with a null pointer.
  *
- * struct tty_struct * (*lookup)(struct tty_driver *self, int idx)
+ * struct tty_struct * (*lookup)(struct tty_driver *self, struct file *, int idx)
  *
  *	Return the tty device corresponding to idx, NULL if there is not
  *	one currently in use and an ERR_PTR value on error. Called under
@@ -250,7 +250,7 @@ struct serial_icounter_struct;
 
 struct tty_operations {
 	struct tty_struct * (*lookup)(struct tty_driver *driver,
-			struct inode *inode, int idx);
+			struct file *filp, int idx);
 	int  (*install)(struct tty_driver *driver, struct tty_struct *tty);
 	void (*remove)(struct tty_driver *driver, struct tty_struct *tty);
 	int  (*open)(struct tty_struct * tty, struct file * filp);
-- 
cgit v1.2.3


From 0224a4a30b57385a60065aa598181868881d8fc6 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Apr 2016 14:04:20 +0300
Subject: device property: Avoid potential dereferences of invalid pointers

Since fwnode may hold ERR_PTR(-ENODEV) or it may be NULL,
the fwnode type checks is_of_node(), is_acpi_node() and is
is_pset_node() need to consider it. Using IS_ERR_OR_NULL()
to check it.

Fixes: 0d67e0fa1664 (device property: fix for a case of use-after-free)
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
[ rjw: Subject & changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_bus.h | 4 ++--
 include/linux/of.h      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 14362a84c78e..3a932501d690 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -394,13 +394,13 @@ struct acpi_data_node {
 
 static inline bool is_acpi_node(struct fwnode_handle *fwnode)
 {
-	return fwnode && (fwnode->type == FWNODE_ACPI
+	return !IS_ERR_OR_NULL(fwnode) && (fwnode->type == FWNODE_ACPI
 		|| fwnode->type == FWNODE_ACPI_DATA);
 }
 
 static inline bool is_acpi_device_node(struct fwnode_handle *fwnode)
 {
-	return fwnode && fwnode->type == FWNODE_ACPI;
+	return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_ACPI;
 }
 
 static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwnode)
diff --git a/include/linux/of.h b/include/linux/of.h
index 7fcb681baadf..31758036787c 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -133,7 +133,7 @@ void of_core_init(void);
 
 static inline bool is_of_node(struct fwnode_handle *fwnode)
 {
-	return fwnode && fwnode->type == FWNODE_OF;
+	return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_OF;
 }
 
 static inline struct device_node *to_of_node(struct fwnode_handle *fwnode)
-- 
cgit v1.2.3


From 986ef95ecdd3eb6fa29433e68faa94c7624083be Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 31 Mar 2016 19:03:25 +0300
Subject: IB/mlx5: Expose correct max_sge_rd limit

mlx5 devices (Connect-IB, ConnectX-4, ConnectX-4-LX) has a limitation
where rdma read work queue entries cannot exceed 512 bytes.
A rdma_read wqe needs to fit in 512 bytes:
- wqe control segment (16 bytes)
- rdma segment (16 bytes)
- scatter elements (16 bytes each)

So max_sge_rd should be: (512 - 16 - 16) / 16 = 30.

Cc: linux-stable@vger.kernel.org
Reported-by: Christoph Hellwig <hch@lst.de>
Tested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagig@grimberg.me>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/mlx5/device.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 8156e3c9239c..b3575f392492 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -392,6 +392,17 @@ enum {
 	MLX5_CAP_OFF_CMDIF_CSUM		= 46,
 };
 
+enum {
+	/*
+	 * Max wqe size for rdma read is 512 bytes, so this
+	 * limits our max_sge_rd as the wqe needs to fit:
+	 * - ctrl segment (16 bytes)
+	 * - rdma segment (16 bytes)
+	 * - scatter elements (16 bytes each)
+	 */
+	MLX5_MAX_SGE_RD	= (512 - 16 - 16) / 16
+};
+
 struct mlx5_inbox_hdr {
 	__be16		opcode;
 	u8		rsvd[4];
-- 
cgit v1.2.3


From e6bd18f57aad1a2d1ef40e646d03ed0f2515c9e3 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Sun, 10 Apr 2016 19:13:13 -0600
Subject: IB/security: Restrict use of the write() interface

The drivers/infiniband stack uses write() as a replacement for
bi-directional ioctl().  This is not safe. There are ways to
trigger write calls that result in the return structure that
is normally written to user space being shunted off to user
specified kernel memory instead.

For the immediate repair, detect and deny suspicious accesses to
the write API.

For long term, update the user space libraries and the kernel API
to something that doesn't present the same security vulnerabilities
(likely a structured ioctl() interface).

The impacted uAPI interfaces are generally only available if
hardware from drivers/infiniband is installed in the system.

Reported-by: Jann Horn <jann@thejh.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
[ Expanded check to all known write() entry points ]
Cc: stable@vger.kernel.org
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/rdma/ib.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index cf8f9e700e48..a6b93706b0fc 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -34,6 +34,7 @@
 #define _RDMA_IB_H
 
 #include <linux/types.h>
+#include <linux/sched.h>
 
 struct ib_addr {
 	union {
@@ -86,4 +87,19 @@ struct sockaddr_ib {
 	__u64			sib_scope_id;
 };
 
+/*
+ * The IB interfaces that use write() as bi-directional ioctl() are
+ * fundamentally unsafe, since there are lots of ways to trigger "write()"
+ * calls from various contexts with elevated privileges. That includes the
+ * traditional suid executable error message writes, but also various kernel
+ * interfaces that can write to file descriptors.
+ *
+ * This function provides protection for the legacy API by restricting the
+ * calling context.
+ */
+static inline bool ib_safe_file_access(struct file *filp)
+{
+	return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS);
+}
+
 #endif /* _RDMA_IB_H */
-- 
cgit v1.2.3


From 7b7483409f09c15f30ac43242ead1ab3061e1f59 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Mon, 25 Apr 2016 15:13:17 -0300
Subject: net: fix net_gso_ok for new GSO types.

Fix casting in net_gso_ok. Otherwise the shift on
gso_type << NETIF_F_GSO_SHIFT may hit the 32th bit and make it look like
a INT_MIN, which is then promoted from signed to uint64 which is
0xffffffff80000000, resulting in wrong behavior when it is and'ed with
the feature itself, as in:

This test app:
#include <stdio.h>
#include <stdint.h>

int main(int argc, char **argv)
{
	uint64_t feature1;
	uint64_t feature2;
	int gso_type = 1 << 15;

	feature1 = gso_type << 16;
	feature2 = (uint64_t)gso_type << 16;
	printf("%lx %lx\n", feature1, feature2);

	return 0;
}

Gives:
ffffffff80000000 80000000

So that this:
   return (features & feature) == feature;
Actually works on more bits than expected and invalid ones.

Fix is to promote it earlier.

Issue noted while rebasing SCTP GSO patch but posting separetely as
someone else may experience this meanwhile.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8395308a2445..b3c46b019ac1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4004,7 +4004,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb);
 
 static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 {
-	netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT;
+	netdev_features_t feature = (netdev_features_t)gso_type << NETIF_F_GSO_SHIFT;
 
 	/* check flags correspondence */
 	BUILD_BUG_ON(SKB_GSO_TCPV4   != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
-- 
cgit v1.2.3


From 92117d8443bc5afacc8d5ba82e541946310f106e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 27 Apr 2016 18:56:20 -0700
Subject: bpf: fix refcnt overflow

On a system with >32Gbyte of phyiscal memory and infinite RLIMIT_MEMLOCK,
the malicious application may overflow 32-bit bpf program refcnt.
It's also possible to overflow map refcnt on 1Tb system.
Impose 32k hard limit which means that the same bpf program or
map cannot be shared by more than 32k processes.

Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs")
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 21ee41b92e8a..f1d5c5acc8dd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -171,12 +171,13 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
+struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
 void bpf_prog_put_rcu(struct bpf_prog *prog);
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
-void bpf_map_inc(struct bpf_map *map, bool uref);
+struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 int bpf_map_precharge_memlock(u32 pages);
-- 
cgit v1.2.3


From 66ee95d16a7f1b7b4f1dd74a2d81c6e19dc29a14 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Thu, 28 Apr 2016 16:18:24 -0700
Subject: mm: exclude HugeTLB pages from THP page_mapped() logic

HugeTLB pages cannot be split, so we use the compound_mapcount to track
rmaps.

Currently page_mapped() will check the compound_mapcount, but will also
go through the constituent pages of a THP compound page and query the
individual _mapcount's too.

Unfortunately, page_mapped() does not distinguish between HugeTLB and
THP compound pages and assumes that a compound page always needs to have
HPAGE_PMD_NR pages querying.

For most cases when dealing with HugeTLB this is just inefficient, but
for scenarios where the HugeTLB page size is less than the pmd block
size (e.g.  when using contiguous bit on ARM) this can lead to crashes.

This patch adjusts the page_mapped function such that we skip the
unnecessary THP reference checks for HugeTLB pages.

Fixes: e1534ae95004 ("mm: differentiate page_mapped() from page_mapcount() for compound pages")
Signed-off-by: Steve Capper <steve.capper@arm.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a55e5be0894f..79b6c18d0a38 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1031,6 +1031,8 @@ static inline bool page_mapped(struct page *page)
 	page = compound_head(page);
 	if (atomic_read(compound_mapcount_ptr(page)) >= 0)
 		return true;
+	if (PageHuge(page))
+		return false;
 	for (i = 0; i < hpage_nr_pages(page); i++) {
 		if (atomic_read(&page[i]._mapcount) >= 0)
 			return true;
-- 
cgit v1.2.3


From aa88b68c3b1dce8bc3fd54c8a7372a777ff265cd Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 28 Apr 2016 16:18:27 -0700
Subject: thp: keep huge zero page pinned until tlb flush

Andrea has found[1] a race condition on MMU-gather based TLB flush vs
split_huge_page() or shrinker which frees huge zero under us (patch 1/2
and 2/2 respectively).

With new THP refcounting, we don't need patch 1/2: mmu_gather keeps the
page pinned until flush is complete and the pin prevents the page from
being split under us.

We still need patch 2/2.  This is simplified version of Andrea's patch.
We don't need fancy encoding.

[1] http://lkml.kernel.org/r/1447938052-22165-1-git-send-email-aarcange@redhat.com

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7008623e24b1..d7b9e5346fba 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -152,6 +152,7 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 }
 
 struct page *get_huge_zero_page(void);
+void put_huge_zero_page(void);
 
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
@@ -208,6 +209,10 @@ static inline bool is_huge_zero_page(struct page *page)
 	return false;
 }
 
+static inline void put_huge_zero_page(void)
+{
+	BUILD_BUG();
+}
 
 static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
 		unsigned long addr, pmd_t *pmd, int flags)
-- 
cgit v1.2.3


From 28093f9f34cedeaea0f481c58446d9dac6dd620f Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Thu, 28 Apr 2016 16:18:35 -0700
Subject: numa: fix /proc/<pid>/numa_maps for THP

In gather_pte_stats() a THP pmd is cast into a pte, which is wrong
because the layouts may differ depending on the architecture.  On s390
this will lead to inaccurate numa_maps accounting in /proc because of
misguided pte_present() and pte_dirty() checks on the fake pte.

On other architectures pte_present() and pte_dirty() may work by chance,
but there may be an issue with direct-access (dax) mappings w/o
underlying struct pages when HAVE_PTE_SPECIAL is set and THP is
available.  In vm_normal_page() the fake pte will be checked with
pte_special() and because there is no "special" bit in a pmd, this will
always return false and the VM_PFNMAP | VM_MIXEDMAP checking will be
skipped.  On dax mappings w/o struct pages, an invalid struct page
pointer would then be returned that can crash the kernel.

This patch fixes the numa_maps THP handling by introducing new "_pmd"
variants of the can_gather_numa_stats() and vm_normal_page() functions.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org>	[4.3+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 79b6c18d0a38..864d7221de84 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1140,6 +1140,8 @@ struct zap_details {
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 		pte_t pte);
+struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+				pmd_t pmd);
 
 int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size);
-- 
cgit v1.2.3


From 2c94b53738549d81dc7464a32117d1f5112c64d3 Mon Sep 17 00:00:00 2001
From: Tim Bingham <tbingham@akamai.com>
Date: Fri, 29 Apr 2016 13:30:23 -0400
Subject: net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case

Prior to commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op
when !DEBUG") the implementation of net_dbg_ratelimited() was buggy
for both the DEBUG and CONFIG_DYNAMIC_DEBUG cases.

The bug was that net_ratelimit() was being called and, despite
returning true, nothing was being printed to the console. This
resulted in messages like the following -

"net_ratelimit: %d callbacks suppressed"

with no other output nearby.

After commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when
!DEBUG") the bug is fixed for the DEBUG case. However, there's no
output at all for CONFIG_DYNAMIC_DEBUG case.

This patch restores debug output (if enabled) for the
CONFIG_DYNAMIC_DEBUG case.

Add a definition of net_dbg_ratelimited() for the CONFIG_DYNAMIC_DEBUG
case. The implementation takes care to check that dynamic debugging is
enabled before calling net_ratelimit().

Fixes: d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG")
Signed-off-by: Tim Bingham <tbingham@akamai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index 49175e4ced11..f840d77c6c31 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -246,7 +246,15 @@ do {								\
 	net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__)
 #define net_info_ratelimited(fmt, ...)				\
 	net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__)
-#if defined(DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG)
+#define net_dbg_ratelimited(fmt, ...)					\
+do {									\
+	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
+	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
+	    net_ratelimit())						\
+		__dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__);	\
+} while (0)
+#elif defined(DEBUG)
 #define net_dbg_ratelimited(fmt, ...)				\
 	net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__)
 #else
-- 
cgit v1.2.3


From 689de1d6ca95b3b5bd8ee446863bf81a4883ea25 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 2 May 2016 12:46:42 -0700
Subject: Minimal fix-up of bad hashing behavior of hash_64()

This is a fairly minimal fixup to the horribly bad behavior of hash_64()
with certain input patterns.

In particular, because the multiplicative value used for the 64-bit hash
was intentionally bit-sparse (so that the multiply could be done with
shifts and adds on architectures without hardware multipliers), some
bits did not get spread out very much.  In particular, certain fairly
common bit ranges in the input (roughly bits 12-20: commonly with the
most information in them when you hash things like byte offsets in files
or memory that have block factors that mean that the low bits are often
zero) would not necessarily show up much in the result.

There's a bigger patch-series brewing to fix up things more completely,
but this is the fairly minimal fix for the 64-bit hashing problem.  It
simply picks a much better constant multiplier, spreading the bits out a
lot better.

NOTE! For 32-bit architectures, the bad old hash_64() remains the same
for now, since 64-bit multiplies are expensive.  The bigger hashing
cleanup will replace the 32-bit case with something better.

The new constants were picked by George Spelvin who wrote that bigger
cleanup series.  I just picked out the constants and part of the comment
from that series.

Cc: stable@vger.kernel.org
Cc: George Spelvin <linux@horizon.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hash.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hash.h b/include/linux/hash.h
index 1afde47e1528..79c52fa81cac 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -32,12 +32,28 @@
 #error Wordsize not 32 or 64
 #endif
 
+/*
+ * The above primes are actively bad for hashing, since they are
+ * too sparse. The 32-bit one is mostly ok, the 64-bit one causes
+ * real problems. Besides, the "prime" part is pointless for the
+ * multiplicative hash.
+ *
+ * Although a random odd number will do, it turns out that the golden
+ * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice
+ * properties.
+ *
+ * These are the negative, (1 - phi) = (phi^2) = (3 - sqrt(5))/2.
+ * (See Knuth vol 3, section 6.4, exercise 9.)
+ */
+#define GOLDEN_RATIO_32 0x61C88647
+#define GOLDEN_RATIO_64 0x61C8864680B583EBull
+
 static __always_inline u64 hash_64(u64 val, unsigned int bits)
 {
 	u64 hash = val;
 
-#if defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64
-	hash = hash * GOLDEN_RATIO_PRIME_64;
+#if BITS_PER_LONG == 64
+	hash = hash * GOLDEN_RATIO_64;
 #else
 	/*  Sigh, gcc can't optimise this alone like it does for 32 bits. */
 	u64 n = hash;
-- 
cgit v1.2.3


From af67eb9e7e1ab37880459f83153d34b3c42b0075 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Mon, 2 May 2016 09:25:16 -0700
Subject: vxlan: Add checksum check to the features check function

We need to perform an additional check on the inner headers to determine if
we can offload the checksum for them.  Previously this check didn't occur
so we would generate an invalid frame in the case of an IPv6 header
encapsulated inside of an IPv4 tunnel.  To fix this I added a secondary
check to vxlan_features_check so that we can verify that we can offload the
inner checksum.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h | 5 +++++
 include/net/vxlan.h      | 4 +++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index d5569734f672..548fd535fd02 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -28,6 +28,11 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
 	return (struct ethhdr *)skb_mac_header(skb);
 }
 
+static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb)
+{
+	return (struct ethhdr *)skb_inner_mac_header(skb);
+}
+
 int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
 
 extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 73ed2e951c02..35437c779da8 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -252,7 +252,9 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
 	    (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
 	     skb->inner_protocol != htons(ETH_P_TEB) ||
 	     (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
-	      sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
+	      sizeof(struct udphdr) + sizeof(struct vxlanhdr)) ||
+	     (skb->ip_summed != CHECKSUM_NONE &&
+	      !can_checksum_protocol(features, inner_eth_hdr(skb)->h_proto))))
 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 
 	return features;
-- 
cgit v1.2.3


From 1f93e9f2318b598e6775a1fc9701604993c512b1 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@caviumnetworks.com>
Date: Mon, 2 May 2016 19:12:47 +0300
Subject: asm-generic: use compat version for preadv2 and pwritev2

Compat architectures that does not use generic unistd (mips, s390),
declare compat version in their syscall tables for preadv2 and
pwritev2. Generic unistd syscall table should do it as well.

[arnd: this initially slipped through the review and an
 incorrect patch got merged. arch/tile/ is the only architecture
 that could be affected for their 32-bit compat mode, every
 other architecture we support today is fine.]

Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/asm-generic/unistd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 6e0f5f01734c..c51afb71bfab 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -718,9 +718,9 @@ __SYSCALL(__NR_mlock2, sys_mlock2)
 #define __NR_copy_file_range 285
 __SYSCALL(__NR_copy_file_range, sys_copy_file_range)
 #define __NR_preadv2 286
-__SYSCALL(__NR_preadv2, sys_preadv2)
+__SC_COMP(__NR_preadv2, sys_preadv2, compat_sys_preadv2)
 #define __NR_pwritev2 287
-__SYSCALL(__NR_pwritev2, sys_pwritev2)
+__SC_COMP(__NR_pwritev2, sys_pwritev2, compat_sys_pwritev2)
 
 #undef __NR_syscalls
 #define __NR_syscalls 288
-- 
cgit v1.2.3


From 4550c4e157ca3da929593bb6c64080a59141af35 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 5 May 2016 16:22:03 -0700
Subject: mm: memcontrol: let v2 cgroups follow changes in system swappiness

Cgroup2 currently doesn't have a per-cgroup swappiness setting.  We
might want to add one later - that's a different discussion - but until
we do, the cgroups should always follow the system setting.  Otherwise
it will be unchangeably set to whatever the ancestor inherited from the
system setting at the time of cgroup creation.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: <stable@vger.kernel.org>	[4.5]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2b83359c19ca..0a4cd4703f40 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -533,6 +533,10 @@ static inline swp_entry_t get_swap_page(void)
 #ifdef CONFIG_MEMCG
 static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
 {
+	/* Cgroup2 doesn't have per-cgroup swappiness */
+	if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
+		return vm_swappiness;
+
 	/* root ? */
 	if (mem_cgroup_disabled() || !memcg->css.parent)
 		return vm_swappiness;
-- 
cgit v1.2.3


From 4e1016dac1ccce6d8a960775526cdc3a5baa690b Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Thu, 5 May 2016 16:22:06 -0700
Subject: rapidio/mport_cdev: fix uapi type definitions

Fix problems in uapi definitions reported by Gabriel Laskar: (see
https://lkml.org/lkml/2016/4/5/205 for details)

 - move public header file rio_mport_cdev.h to include/uapi/linux directory
 - change types in data structures passed as IOCTL parameters
 - improve parameter checking in some IOCTL service routines

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Reported-by: Gabriel Laskar <gabriel@lse.epita.fr>
Tested-by: Barry Wood <barry.wood@idt.com>
Cc: Gabriel Laskar <gabriel@lse.epita.fr>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Cc: Barry Wood <barry.wood@idt.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rio_mport_cdev.h      | 271 -----------------------------------
 include/uapi/linux/rio_mport_cdev.h | 277 ++++++++++++++++++++++++++++++++++++
 2 files changed, 277 insertions(+), 271 deletions(-)
 delete mode 100644 include/linux/rio_mport_cdev.h
 create mode 100644 include/uapi/linux/rio_mport_cdev.h

(limited to 'include')

diff --git a/include/linux/rio_mport_cdev.h b/include/linux/rio_mport_cdev.h
deleted file mode 100644
index b65d19df76d2..000000000000
--- a/include/linux/rio_mport_cdev.h
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright (c) 2015-2016, Integrated Device Technology Inc.
- * Copyright (c) 2015, Prodrive Technologies
- * Copyright (c) 2015, Texas Instruments Incorporated
- * Copyright (c) 2015, RapidIO Trade Association
- * All rights reserved.
- *
- * This software is available to you under a choice of one of two licenses.
- * You may choose to be licensed under the terms of the GNU General Public
- * License(GPL) Version 2, or the BSD-3 Clause license below:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _RIO_MPORT_CDEV_H_
-#define _RIO_MPORT_CDEV_H_
-
-#ifndef __user
-#define __user
-#endif
-
-struct rio_mport_maint_io {
-	uint32_t rioid;		/* destID of remote device */
-	uint32_t hopcount;	/* hopcount to remote device */
-	uint32_t offset;	/* offset in register space */
-	size_t length;		/* length in bytes */
-	void __user *buffer;	/* data buffer */
-};
-
-/*
- * Definitions for RapidIO data transfers:
- * - memory mapped (MAPPED)
- * - packet generation from memory (TRANSFER)
- */
-#define RIO_TRANSFER_MODE_MAPPED	(1 << 0)
-#define RIO_TRANSFER_MODE_TRANSFER	(1 << 1)
-#define RIO_CAP_DBL_SEND		(1 << 2)
-#define RIO_CAP_DBL_RECV		(1 << 3)
-#define RIO_CAP_PW_SEND			(1 << 4)
-#define RIO_CAP_PW_RECV			(1 << 5)
-#define RIO_CAP_MAP_OUTB		(1 << 6)
-#define RIO_CAP_MAP_INB			(1 << 7)
-
-struct rio_mport_properties {
-	uint16_t hdid;
-	uint8_t id;			/* Physical port ID */
-	uint8_t  index;
-	uint32_t flags;
-	uint32_t sys_size;		/* Default addressing size */
-	uint8_t  port_ok;
-	uint8_t  link_speed;
-	uint8_t  link_width;
-	uint32_t dma_max_sge;
-	uint32_t dma_max_size;
-	uint32_t dma_align;
-	uint32_t transfer_mode;		/* Default transfer mode */
-	uint32_t cap_sys_size;		/* Capable system sizes */
-	uint32_t cap_addr_size;		/* Capable addressing sizes */
-	uint32_t cap_transfer_mode;	/* Capable transfer modes */
-	uint32_t cap_mport;		/* Mport capabilities */
-};
-
-/*
- * Definitions for RapidIO events;
- * - incoming port-writes
- * - incoming doorbells
- */
-#define RIO_DOORBELL	(1 << 0)
-#define RIO_PORTWRITE	(1 << 1)
-
-struct rio_doorbell {
-	uint32_t rioid;
-	uint16_t payload;
-};
-
-struct rio_doorbell_filter {
-	uint32_t rioid;			/* 0xffffffff to match all ids */
-	uint16_t low;
-	uint16_t high;
-};
-
-
-struct rio_portwrite {
-	uint32_t payload[16];
-};
-
-struct rio_pw_filter {
-	uint32_t mask;
-	uint32_t low;
-	uint32_t high;
-};
-
-/* RapidIO base address for inbound requests set to value defined below
- * indicates that no specific RIO-to-local address translation is requested
- * and driver should use direct (one-to-one) address mapping.
-*/
-#define RIO_MAP_ANY_ADDR	(uint64_t)(~((uint64_t) 0))
-
-struct rio_mmap {
-	uint32_t rioid;
-	uint64_t rio_addr;
-	uint64_t length;
-	uint64_t handle;
-	void *address;
-};
-
-struct rio_dma_mem {
-	uint64_t length;		/* length of DMA memory */
-	uint64_t dma_handle;		/* handle associated with this memory */
-	void *buffer;			/* pointer to this memory */
-};
-
-
-struct rio_event {
-	unsigned int header;	/* event type RIO_DOORBELL or RIO_PORTWRITE */
-	union {
-		struct rio_doorbell doorbell;	/* header for RIO_DOORBELL */
-		struct rio_portwrite portwrite; /* header for RIO_PORTWRITE */
-	} u;
-};
-
-enum rio_transfer_sync {
-	RIO_TRANSFER_SYNC,	/* synchronous transfer */
-	RIO_TRANSFER_ASYNC,	/* asynchronous transfer */
-	RIO_TRANSFER_FAF,	/* fire-and-forget transfer */
-};
-
-enum rio_transfer_dir {
-	RIO_TRANSFER_DIR_READ,	/* Read operation */
-	RIO_TRANSFER_DIR_WRITE,	/* Write operation */
-};
-
-/*
- * RapidIO data exchange transactions are lists of individual transfers. Each
- * transfer exchanges data between two RapidIO devices by remote direct memory
- * access and has its own completion code.
- *
- * The RapidIO specification defines four types of data exchange requests:
- * NREAD, NWRITE, SWRITE and NWRITE_R. The RapidIO DMA channel interface allows
- * to specify the required type of write operation or combination of them when
- * only the last data packet requires response.
- *
- * NREAD:    read up to 256 bytes from remote device memory into local memory
- * NWRITE:   write up to 256 bytes from local memory to remote device memory
- *           without confirmation
- * SWRITE:   as NWRITE, but all addresses and payloads must be 64-bit aligned
- * NWRITE_R: as NWRITE, but expect acknowledgment from remote device.
- *
- * The default exchange is chosen from NREAD and any of the WRITE modes as the
- * driver sees fit. For write requests the user can explicitly choose between
- * any of the write modes for each transaction.
- */
-enum rio_exchange {
-	RIO_EXCHANGE_DEFAULT,	/* Default method */
-	RIO_EXCHANGE_NWRITE,	/* All packets using NWRITE */
-	RIO_EXCHANGE_SWRITE,	/* All packets using SWRITE */
-	RIO_EXCHANGE_NWRITE_R,	/* Last packet NWRITE_R, others NWRITE */
-	RIO_EXCHANGE_SWRITE_R,	/* Last packet NWRITE_R, others SWRITE */
-	RIO_EXCHANGE_NWRITE_R_ALL, /* All packets using NWRITE_R */
-};
-
-struct rio_transfer_io {
-	uint32_t rioid;			/* Target destID */
-	uint64_t rio_addr;		/* Address in target's RIO mem space */
-	enum rio_exchange method;	/* Data exchange method */
-	void __user *loc_addr;
-	uint64_t handle;
-	uint64_t offset;		/* Offset in buffer */
-	uint64_t length;		/* Length in bytes */
-	uint32_t completion_code;	/* Completion code for this transfer */
-};
-
-struct rio_transaction {
-	uint32_t transfer_mode;		/* Data transfer mode */
-	enum rio_transfer_sync sync;	/* Synchronization method */
-	enum rio_transfer_dir dir;	/* Transfer direction */
-	size_t count;			/* Number of transfers */
-	struct rio_transfer_io __user *block;	/* Array of <count> transfers */
-};
-
-struct rio_async_tx_wait {
-	uint32_t token;		/* DMA transaction ID token */
-	uint32_t timeout;	/* Wait timeout in msec, if 0 use default TO */
-};
-
-#define RIO_MAX_DEVNAME_SZ	20
-
-struct rio_rdev_info {
-	uint32_t destid;
-	uint8_t hopcount;
-	uint32_t comptag;
-	char name[RIO_MAX_DEVNAME_SZ + 1];
-};
-
-/* Driver IOCTL codes */
-#define RIO_MPORT_DRV_MAGIC           'm'
-
-#define RIO_MPORT_MAINT_HDID_SET	\
-	_IOW(RIO_MPORT_DRV_MAGIC, 1, uint16_t)
-#define RIO_MPORT_MAINT_COMPTAG_SET	\
-	_IOW(RIO_MPORT_DRV_MAGIC, 2, uint32_t)
-#define RIO_MPORT_MAINT_PORT_IDX_GET	\
-	_IOR(RIO_MPORT_DRV_MAGIC, 3, uint32_t)
-#define RIO_MPORT_GET_PROPERTIES \
-	_IOR(RIO_MPORT_DRV_MAGIC, 4, struct rio_mport_properties)
-#define RIO_MPORT_MAINT_READ_LOCAL \
-	_IOR(RIO_MPORT_DRV_MAGIC, 5, struct rio_mport_maint_io)
-#define RIO_MPORT_MAINT_WRITE_LOCAL \
-	_IOW(RIO_MPORT_DRV_MAGIC, 6, struct rio_mport_maint_io)
-#define RIO_MPORT_MAINT_READ_REMOTE \
-	_IOR(RIO_MPORT_DRV_MAGIC, 7, struct rio_mport_maint_io)
-#define RIO_MPORT_MAINT_WRITE_REMOTE \
-	_IOW(RIO_MPORT_DRV_MAGIC, 8, struct rio_mport_maint_io)
-#define RIO_ENABLE_DOORBELL_RANGE	\
-	_IOW(RIO_MPORT_DRV_MAGIC, 9, struct rio_doorbell_filter)
-#define RIO_DISABLE_DOORBELL_RANGE	\
-	_IOW(RIO_MPORT_DRV_MAGIC, 10, struct rio_doorbell_filter)
-#define RIO_ENABLE_PORTWRITE_RANGE	\
-	_IOW(RIO_MPORT_DRV_MAGIC, 11, struct rio_pw_filter)
-#define RIO_DISABLE_PORTWRITE_RANGE	\
-	_IOW(RIO_MPORT_DRV_MAGIC, 12, struct rio_pw_filter)
-#define RIO_SET_EVENT_MASK		\
-	_IOW(RIO_MPORT_DRV_MAGIC, 13, unsigned int)
-#define RIO_GET_EVENT_MASK		\
-	_IOR(RIO_MPORT_DRV_MAGIC, 14, unsigned int)
-#define RIO_MAP_OUTBOUND \
-	_IOWR(RIO_MPORT_DRV_MAGIC, 15, struct rio_mmap)
-#define RIO_UNMAP_OUTBOUND \
-	_IOW(RIO_MPORT_DRV_MAGIC, 16, struct rio_mmap)
-#define RIO_MAP_INBOUND \
-	_IOWR(RIO_MPORT_DRV_MAGIC, 17, struct rio_mmap)
-#define RIO_UNMAP_INBOUND \
-	_IOW(RIO_MPORT_DRV_MAGIC, 18, uint64_t)
-#define RIO_ALLOC_DMA \
-	_IOWR(RIO_MPORT_DRV_MAGIC, 19, struct rio_dma_mem)
-#define RIO_FREE_DMA \
-	_IOW(RIO_MPORT_DRV_MAGIC, 20, uint64_t)
-#define RIO_TRANSFER \
-	_IOWR(RIO_MPORT_DRV_MAGIC, 21, struct rio_transaction)
-#define RIO_WAIT_FOR_ASYNC \
-	_IOW(RIO_MPORT_DRV_MAGIC, 22, struct rio_async_tx_wait)
-#define RIO_DEV_ADD \
-	_IOW(RIO_MPORT_DRV_MAGIC, 23, struct rio_rdev_info)
-#define RIO_DEV_DEL \
-	_IOW(RIO_MPORT_DRV_MAGIC, 24, struct rio_rdev_info)
-
-#endif /* _RIO_MPORT_CDEV_H_ */
diff --git a/include/uapi/linux/rio_mport_cdev.h b/include/uapi/linux/rio_mport_cdev.h
new file mode 100644
index 000000000000..5796bf1d06ad
--- /dev/null
+++ b/include/uapi/linux/rio_mport_cdev.h
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2015-2016, Integrated Device Technology Inc.
+ * Copyright (c) 2015, Prodrive Technologies
+ * Copyright (c) 2015, Texas Instruments Incorporated
+ * Copyright (c) 2015, RapidIO Trade Association
+ * All rights reserved.
+ *
+ * This software is available to you under a choice of one of two licenses.
+ * You may choose to be licensed under the terms of the GNU General Public
+ * License(GPL) Version 2, or the BSD-3 Clause license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RIO_MPORT_CDEV_H_
+#define _RIO_MPORT_CDEV_H_
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+struct rio_mport_maint_io {
+	__u16 rioid;		/* destID of remote device */
+	__u8  hopcount;		/* hopcount to remote device */
+	__u8  pad0[5];
+	__u32 offset;		/* offset in register space */
+	__u32 length;		/* length in bytes */
+	__u64 buffer;		/* pointer to data buffer */
+};
+
+/*
+ * Definitions for RapidIO data transfers:
+ * - memory mapped (MAPPED)
+ * - packet generation from memory (TRANSFER)
+ */
+#define RIO_TRANSFER_MODE_MAPPED	(1 << 0)
+#define RIO_TRANSFER_MODE_TRANSFER	(1 << 1)
+#define RIO_CAP_DBL_SEND		(1 << 2)
+#define RIO_CAP_DBL_RECV		(1 << 3)
+#define RIO_CAP_PW_SEND			(1 << 4)
+#define RIO_CAP_PW_RECV			(1 << 5)
+#define RIO_CAP_MAP_OUTB		(1 << 6)
+#define RIO_CAP_MAP_INB			(1 << 7)
+
+struct rio_mport_properties {
+	__u16 hdid;
+	__u8  id;			/* Physical port ID */
+	__u8  index;
+	__u32 flags;
+	__u32 sys_size;		/* Default addressing size */
+	__u8  port_ok;
+	__u8  link_speed;
+	__u8  link_width;
+	__u8  pad0;
+	__u32 dma_max_sge;
+	__u32 dma_max_size;
+	__u32 dma_align;
+	__u32 transfer_mode;		/* Default transfer mode */
+	__u32 cap_sys_size;		/* Capable system sizes */
+	__u32 cap_addr_size;		/* Capable addressing sizes */
+	__u32 cap_transfer_mode;	/* Capable transfer modes */
+	__u32 cap_mport;		/* Mport capabilities */
+};
+
+/*
+ * Definitions for RapidIO events;
+ * - incoming port-writes
+ * - incoming doorbells
+ */
+#define RIO_DOORBELL	(1 << 0)
+#define RIO_PORTWRITE	(1 << 1)
+
+struct rio_doorbell {
+	__u16 rioid;
+	__u16 payload;
+};
+
+struct rio_doorbell_filter {
+	__u16 rioid;	/* Use RIO_INVALID_DESTID to match all ids */
+	__u16 low;
+	__u16 high;
+	__u16 pad0;
+};
+
+
+struct rio_portwrite {
+	__u32 payload[16];
+};
+
+struct rio_pw_filter {
+	__u32 mask;
+	__u32 low;
+	__u32 high;
+	__u32 pad0;
+};
+
+/* RapidIO base address for inbound requests set to value defined below
+ * indicates that no specific RIO-to-local address translation is requested
+ * and driver should use direct (one-to-one) address mapping.
+*/
+#define RIO_MAP_ANY_ADDR	(__u64)(~((__u64) 0))
+
+struct rio_mmap {
+	__u16 rioid;
+	__u16 pad0[3];
+	__u64 rio_addr;
+	__u64 length;
+	__u64 handle;
+	__u64 address;
+};
+
+struct rio_dma_mem {
+	__u64 length;		/* length of DMA memory */
+	__u64 dma_handle;	/* handle associated with this memory */
+	__u64 address;
+};
+
+struct rio_event {
+	__u32 header;	/* event type RIO_DOORBELL or RIO_PORTWRITE */
+	union {
+		struct rio_doorbell doorbell;	/* header for RIO_DOORBELL */
+		struct rio_portwrite portwrite; /* header for RIO_PORTWRITE */
+	} u;
+	__u32 pad0;
+};
+
+enum rio_transfer_sync {
+	RIO_TRANSFER_SYNC,	/* synchronous transfer */
+	RIO_TRANSFER_ASYNC,	/* asynchronous transfer */
+	RIO_TRANSFER_FAF,	/* fire-and-forget transfer */
+};
+
+enum rio_transfer_dir {
+	RIO_TRANSFER_DIR_READ,	/* Read operation */
+	RIO_TRANSFER_DIR_WRITE,	/* Write operation */
+};
+
+/*
+ * RapidIO data exchange transactions are lists of individual transfers. Each
+ * transfer exchanges data between two RapidIO devices by remote direct memory
+ * access and has its own completion code.
+ *
+ * The RapidIO specification defines four types of data exchange requests:
+ * NREAD, NWRITE, SWRITE and NWRITE_R. The RapidIO DMA channel interface allows
+ * to specify the required type of write operation or combination of them when
+ * only the last data packet requires response.
+ *
+ * NREAD:    read up to 256 bytes from remote device memory into local memory
+ * NWRITE:   write up to 256 bytes from local memory to remote device memory
+ *           without confirmation
+ * SWRITE:   as NWRITE, but all addresses and payloads must be 64-bit aligned
+ * NWRITE_R: as NWRITE, but expect acknowledgment from remote device.
+ *
+ * The default exchange is chosen from NREAD and any of the WRITE modes as the
+ * driver sees fit. For write requests the user can explicitly choose between
+ * any of the write modes for each transaction.
+ */
+enum rio_exchange {
+	RIO_EXCHANGE_DEFAULT,	/* Default method */
+	RIO_EXCHANGE_NWRITE,	/* All packets using NWRITE */
+	RIO_EXCHANGE_SWRITE,	/* All packets using SWRITE */
+	RIO_EXCHANGE_NWRITE_R,	/* Last packet NWRITE_R, others NWRITE */
+	RIO_EXCHANGE_SWRITE_R,	/* Last packet NWRITE_R, others SWRITE */
+	RIO_EXCHANGE_NWRITE_R_ALL, /* All packets using NWRITE_R */
+};
+
+struct rio_transfer_io {
+	__u64 rio_addr;	/* Address in target's RIO mem space */
+	__u64 loc_addr;
+	__u64 handle;
+	__u64 offset;	/* Offset in buffer */
+	__u64 length;	/* Length in bytes */
+	__u16 rioid;	/* Target destID */
+	__u16 method;	/* Data exchange method, one of rio_exchange enum */
+	__u32 completion_code;	/* Completion code for this transfer */
+};
+
+struct rio_transaction {
+	__u64 block;	/* Pointer to array of <count> transfers */
+	__u32 count;	/* Number of transfers */
+	__u32 transfer_mode;	/* Data transfer mode */
+	__u16 sync;	/* Synch method, one of rio_transfer_sync enum */
+	__u16 dir;	/* Transfer direction, one of rio_transfer_dir enum */
+	__u32 pad0;
+};
+
+struct rio_async_tx_wait {
+	__u32 token;	/* DMA transaction ID token */
+	__u32 timeout;	/* Wait timeout in msec, if 0 use default TO */
+};
+
+#define RIO_MAX_DEVNAME_SZ	20
+
+struct rio_rdev_info {
+	__u16 destid;
+	__u8 hopcount;
+	__u8 pad0;
+	__u32 comptag;
+	char name[RIO_MAX_DEVNAME_SZ + 1];
+};
+
+/* Driver IOCTL codes */
+#define RIO_MPORT_DRV_MAGIC           'm'
+
+#define RIO_MPORT_MAINT_HDID_SET	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 1, __u16)
+#define RIO_MPORT_MAINT_COMPTAG_SET	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 2, __u32)
+#define RIO_MPORT_MAINT_PORT_IDX_GET	\
+	_IOR(RIO_MPORT_DRV_MAGIC, 3, __u32)
+#define RIO_MPORT_GET_PROPERTIES \
+	_IOR(RIO_MPORT_DRV_MAGIC, 4, struct rio_mport_properties)
+#define RIO_MPORT_MAINT_READ_LOCAL \
+	_IOR(RIO_MPORT_DRV_MAGIC, 5, struct rio_mport_maint_io)
+#define RIO_MPORT_MAINT_WRITE_LOCAL \
+	_IOW(RIO_MPORT_DRV_MAGIC, 6, struct rio_mport_maint_io)
+#define RIO_MPORT_MAINT_READ_REMOTE \
+	_IOR(RIO_MPORT_DRV_MAGIC, 7, struct rio_mport_maint_io)
+#define RIO_MPORT_MAINT_WRITE_REMOTE \
+	_IOW(RIO_MPORT_DRV_MAGIC, 8, struct rio_mport_maint_io)
+#define RIO_ENABLE_DOORBELL_RANGE	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 9, struct rio_doorbell_filter)
+#define RIO_DISABLE_DOORBELL_RANGE	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 10, struct rio_doorbell_filter)
+#define RIO_ENABLE_PORTWRITE_RANGE	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 11, struct rio_pw_filter)
+#define RIO_DISABLE_PORTWRITE_RANGE	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 12, struct rio_pw_filter)
+#define RIO_SET_EVENT_MASK		\
+	_IOW(RIO_MPORT_DRV_MAGIC, 13, __u32)
+#define RIO_GET_EVENT_MASK		\
+	_IOR(RIO_MPORT_DRV_MAGIC, 14, __u32)
+#define RIO_MAP_OUTBOUND \
+	_IOWR(RIO_MPORT_DRV_MAGIC, 15, struct rio_mmap)
+#define RIO_UNMAP_OUTBOUND \
+	_IOW(RIO_MPORT_DRV_MAGIC, 16, struct rio_mmap)
+#define RIO_MAP_INBOUND \
+	_IOWR(RIO_MPORT_DRV_MAGIC, 17, struct rio_mmap)
+#define RIO_UNMAP_INBOUND \
+	_IOW(RIO_MPORT_DRV_MAGIC, 18, __u64)
+#define RIO_ALLOC_DMA \
+	_IOWR(RIO_MPORT_DRV_MAGIC, 19, struct rio_dma_mem)
+#define RIO_FREE_DMA \
+	_IOW(RIO_MPORT_DRV_MAGIC, 20, __u64)
+#define RIO_TRANSFER \
+	_IOWR(RIO_MPORT_DRV_MAGIC, 21, struct rio_transaction)
+#define RIO_WAIT_FOR_ASYNC \
+	_IOW(RIO_MPORT_DRV_MAGIC, 22, struct rio_async_tx_wait)
+#define RIO_DEV_ADD \
+	_IOW(RIO_MPORT_DRV_MAGIC, 23, struct rio_rdev_info)
+#define RIO_DEV_DEL \
+	_IOW(RIO_MPORT_DRV_MAGIC, 24, struct rio_rdev_info)
+
+#endif /* _RIO_MPORT_CDEV_H_ */
-- 
cgit v1.2.3


From 127393fbe597dd85863a9bdccaa11007e7d4948f Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 5 May 2016 16:22:20 -0700
Subject: mm: thp: kvm: fix memory corruption in KVM with THP enabled

After the THP refcounting change, obtaining a compound pages from
get_user_pages() no longer allows us to assume the entire compound page
is immediately mappable from a secondary MMU.

A secondary MMU doesn't want to call get_user_pages() more than once for
each compound page, in order to know if it can map the whole compound
page.  So a secondary MMU needs to know from a single get_user_pages()
invocation when it can map immediately the entire compound page to avoid
a flood of unnecessary secondary MMU faults and spurious
atomic_inc()/atomic_dec() (pages don't have to be pinned by MMU notifier
users).

Ideally instead of the page->_mapcount < 1 check, get_user_pages()
should return the granularity of the "page" mapping in the "mm" passed
to get_user_pages().  However it's non trivial change to pass the "pmd"
status belonging to the "mm" walked by get_user_pages up the stack (up
to the caller of get_user_pages).  So the fix just checks if there is
not a single pte mapping on the page returned by get_user_pages, and in
turn if the caller can assume that the whole compound page is mapped in
the current "mm" (in a pmd_trans_huge()).  In such case the entire
compound page is safe to map into the secondary MMU without additional
get_user_pages() calls on the surrounding tail/head pages.  In addition
of being faster, not having to run other get_user_pages() calls also
reduces the memory footprint of the secondary MMU fault in case the pmd
split happened as result of memory pressure.

Without this fix after a MADV_DONTNEED (like invoked by QEMU during
postcopy live migration or balloning) or after generic swapping (with a
failure in split_huge_page() that would only result in pmd splitting and
not a physical page split), KVM would map the whole compound page into
the shadow pagetables, despite regular faults or userfaults (like
UFFDIO_COPY) may map regular pages into the primary MMU as result of the
pte faults, leading to the guest mode and userland mode going out of
sync and not working on the same memory at all times.

Any other secondary MMU notifier manager (KVM is just one of the many
MMU notifier users) will need the same information if it doesn't want to
run a flood of get_user_pages_fast and it can support multiple
granularity in the secondary MMU mappings, so I think it is justified to
be exposed not just to KVM.

The other option would be to move transparent_hugepage_adjust to
mm/huge_memory.c but that currently has all kind of KVM data structures
in it, so it's definitely not a cut-and-paste work, so I couldn't do a
fix as cleaner as this one for 4.6.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: "Li, Liang Z" <liang.z.li@intel.com>
Cc: Amit Shah <amit.shah@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f4ed4f1b0c77..6b052aa7b5b7 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -516,6 +516,27 @@ static inline int PageTransCompound(struct page *page)
 	return PageCompound(page);
 }
 
+/*
+ * PageTransCompoundMap is the same as PageTransCompound, but it also
+ * guarantees the primary MMU has the entire compound page mapped
+ * through pmd_trans_huge, which in turn guarantees the secondary MMUs
+ * can also map the entire compound page. This allows the secondary
+ * MMUs to call get_user_pages() only once for each compound page and
+ * to immediately map the entire compound page with a single secondary
+ * MMU fault. If there will be a pmd split later, the secondary MMUs
+ * will get an update through the MMU notifier invalidation through
+ * split_huge_pmd().
+ *
+ * Unlike PageTransCompound, this is safe to be called only while
+ * split_huge_pmd() cannot run from under us, like if protected by the
+ * MMU notifier, otherwise it may result in page->_mapcount < 0 false
+ * positives.
+ */
+static inline int PageTransCompoundMap(struct page *page)
+{
+	return PageTransCompound(page) && atomic_read(&page->_mapcount) < 0;
+}
+
 /*
  * PageTransTail returns true for both transparent huge pages
  * and hugetlbfs pages, so it should only be called when it's known
@@ -559,6 +580,7 @@ static inline int TestClearPageDoubleMap(struct page *page)
 #else
 TESTPAGEFLAG_FALSE(TransHuge)
 TESTPAGEFLAG_FALSE(TransCompound)
+TESTPAGEFLAG_FALSE(TransCompoundMap)
 TESTPAGEFLAG_FALSE(TransTail)
 TESTPAGEFLAG_FALSE(DoubleMap)
 	TESTSETFLAG_FALSE(DoubleMap)
-- 
cgit v1.2.3


From 7322dd755e7dd34bc5359aa27abeed1687e0f628 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 5 May 2016 16:22:39 -0700
Subject: byteswap: try to avoid __builtin_constant_p gcc bug

This is another attempt to avoid a regression in wwn_to_u64() after that
started using get_unaligned_be64(), which in turn ran into a bug on
gcc-4.9 through 6.1.

The regression got introduced due to the combination of two separate
workarounds (commits e3bde9568d99: "include/linux/unaligned: force
inlining of byteswap operations" and ef3fb2422ffe: "scsi: fc: use
get/put_unaligned64 for wwn access") that each try to sidestep distinct
problems with gcc behavior (code growth and increased stack usage).

Unfortunately after both have been applied, a more serious gcc bug has
been uncovered, leading to incorrect object code that discards part of a
function and causes undefined behavior.

As part of this problem is how __builtin_constant_p gets evaluated on an
argument passed by reference into an inline function, this avoids the
use of __builtin_constant_p() for all architectures that set
CONFIG_ARCH_USE_BUILTIN_BSWAP.  Most architectures do not set
ARCH_SUPPORTS_OPTIMIZED_INLINING, which means they probably do not
suffer from the problem in the qla2xxx driver, but they might still run
into it elsewhere.

Both of the original workarounds were only merged in the 4.6 kernel, and
the bug that is fixed by this patch should only appear if both are
there, so we probably don't need to backport the fix.  On the other
hand, it works by simplifying the code path and should not have any
negative effects.

[arnd@arndb.de: fix older gcc warnings]
  (http://lkml.kernel.org/r/12243652.bxSxEgjgfk@wuerfel)
Link: https://lkml.org/lkml/headers/2016/4/12/1103
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70232
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
Fixes: e3bde9568d99 ("include/linux/unaligned: force inlining of byteswap operations")
Fixes: ef3fb2422ffe ("scsi: fc: use get/put_unaligned64 for wwn access")
Link: http://lkml.kernel.org/r/1780465.XdtPJpi8Tt@wuerfel
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Tested-by: Josh Poimboeuf <jpoimboe@redhat.com> # on gcc-5.3
Tested-by: Quinn Tran <quinn.tran@qlogic.com>
Cc: Martin Jambor <mjambor@suse.cz>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Himanshu Madhani <himanshu.madhani@qlogic.com>
Cc: Jan Hubicka <hubicka@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/swab.h | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h
index 3f10e5317b46..8f3a8f606fd9 100644
--- a/include/uapi/linux/swab.h
+++ b/include/uapi/linux/swab.h
@@ -45,9 +45,7 @@
 
 static inline __attribute_const__ __u16 __fswab16(__u16 val)
 {
-#ifdef __HAVE_BUILTIN_BSWAP16__
-	return __builtin_bswap16(val);
-#elif defined (__arch_swab16)
+#if defined (__arch_swab16)
 	return __arch_swab16(val);
 #else
 	return ___constant_swab16(val);
@@ -56,9 +54,7 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val)
 
 static inline __attribute_const__ __u32 __fswab32(__u32 val)
 {
-#ifdef __HAVE_BUILTIN_BSWAP32__
-	return __builtin_bswap32(val);
-#elif defined(__arch_swab32)
+#if defined(__arch_swab32)
 	return __arch_swab32(val);
 #else
 	return ___constant_swab32(val);
@@ -67,9 +63,7 @@ static inline __attribute_const__ __u32 __fswab32(__u32 val)
 
 static inline __attribute_const__ __u64 __fswab64(__u64 val)
 {
-#ifdef __HAVE_BUILTIN_BSWAP64__
-	return __builtin_bswap64(val);
-#elif defined (__arch_swab64)
+#if defined (__arch_swab64)
 	return __arch_swab64(val);
 #elif defined(__SWAB_64_THRU_32__)
 	__u32 h = val >> 32;
@@ -102,28 +96,40 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val)
  * __swab16 - return a byteswapped 16-bit value
  * @x: value to byteswap
  */
+#ifdef __HAVE_BUILTIN_BSWAP16__
+#define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
+#else
 #define __swab16(x)				\
 	(__builtin_constant_p((__u16)(x)) ?	\
 	___constant_swab16(x) :			\
 	__fswab16(x))
+#endif
 
 /**
  * __swab32 - return a byteswapped 32-bit value
  * @x: value to byteswap
  */
+#ifdef __HAVE_BUILTIN_BSWAP32__
+#define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
+#else
 #define __swab32(x)				\
 	(__builtin_constant_p((__u32)(x)) ?	\
 	___constant_swab32(x) :			\
 	__fswab32(x))
+#endif
 
 /**
  * __swab64 - return a byteswapped 64-bit value
  * @x: value to byteswap
  */
+#ifdef __HAVE_BUILTIN_BSWAP64__
+#define __swab64(x) (__u64)__builtin_bswap64((__u64)(x))
+#else
 #define __swab64(x)				\
 	(__builtin_constant_p((__u64)(x)) ?	\
 	___constant_swab64(x) :			\
 	__fswab64(x))
+#endif
 
 /**
  * __swahw32 - return a word-swapped 32-bit value
-- 
cgit v1.2.3


From 54d5ca871e72f2bb172ec9323497f01cd5091ec7 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 11 May 2016 01:16:37 +0200
Subject: vfs: add vfs_select_inode() helper

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Cc: <stable@vger.kernel.org> # v4.2+
---
 include/linux/dcache.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 4bb4de8d95ea..7e9422cb5989 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -565,4 +565,16 @@ static inline struct dentry *d_real(struct dentry *dentry)
 		return dentry;
 }
 
+static inline struct inode *vfs_select_inode(struct dentry *dentry,
+					     unsigned open_flags)
+{
+	struct inode *inode = d_inode(dentry);
+
+	if (inode && unlikely(dentry->d_flags & DCACHE_OP_SELECT_INODE))
+		inode = dentry->d_op->d_select_inode(dentry, open_flags);
+
+	return inode;
+}
+
+
 #endif	/* __LINUX_DCACHE_H */
-- 
cgit v1.2.3


From 3c9fe8cdff1b889a059a30d22f130372f2b3885f Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 11 May 2016 01:16:37 +0200
Subject: vfs: add lookup_hash() helper

Overlayfs needs lookup without inode_permission() and already has the name
hash (in form of dentry->d_name on overlayfs dentry).  It also doesn't
support filesystems with d_op->d_hash() so basically it only needs
the actual hashed lookup from lookup_one_len_unlocked()

So add a new helper that does unlocked lookup of a hashed name.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/namei.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 77d01700daf7..ec5ec2818a28 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -79,6 +79,8 @@ extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int);
 
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
+struct qstr;
+extern struct dentry *lookup_hash(const struct qstr *, struct dentry *);
 
 extern int follow_down_one(struct path *);
 extern int follow_down(struct path *);
-- 
cgit v1.2.3