From 439cd39ea136d2c026805264d58a91f36b6b64ca Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 14 Jul 2018 21:59:43 +0200
Subject: netfilter: ipset: list:set: Decrease refcount synchronously on
 deletion and replace

Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash
when flush/dump set in parallel") postponed decreasing set
reference counters to the RCU callback.

An 'ipset del' command can terminate before the RCU grace period
is elapsed, and if sets are listed before then, the reference
counter shown in userspace will be wrong:

 # ipset create h hash:ip; ipset create l list:set; ipset add l
 # ipset del l h; ipset list h
 Name: h
 Type: hash:ip
 Revision: 4
 Header: family inet hashsize 1024 maxelem 65536
 Size in memory: 88
 References: 1
 Number of entries: 0
 Members:
 # sleep 1; ipset list h
 Name: h
 Type: hash:ip
 Revision: 4
 Header: family inet hashsize 1024 maxelem 65536
 Size in memory: 88
 References: 0
 Number of entries: 0
 Members:

Fix this by making the reference count update synchronous again.

As a result, when sets are listed, ip_set_name_byindex() might
now fetch a set whose reference count is already zero. Instead
of relying on the reference count to protect against concurrent
set renaming, grab ip_set_ref_lock as reader and copy the name,
while holding the same lock in ip_set_rename() as writer
instead.

Reported-by: Li Shuang <shuali@redhat.com>
Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 34fc80f3eb90..1d100efe74ec 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -314,7 +314,7 @@ enum {
 extern ip_set_id_t ip_set_get_byname(struct net *net,
 				     const char *name, struct ip_set **set);
 extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
-extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
+extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
 extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
 extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
 
-- 
cgit v1.2.3


From 6d212db11947ae5464e4717536ed9faf61c01e86 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 15 Oct 2018 10:30:23 +0200
Subject: mm: add mm_pxd_folded checks to pgtable_bytes accounting functions

The common mm code calls mm_dec_nr_pmds() and mm_dec_nr_puds()
in free_pgtables() if the address range spans a full pud or pmd.
If mm_dec_nr_puds/mm_dec_nr_pmds are non-empty due to configuration
settings they blindly subtract the size of the pmd or pud table from
pgtable_bytes even if the pud or pmd page table layer is folded.

Add explicit mm_[pmd|pud]_folded checks to the four pgtable_bytes
accounting functions mm_inc_nr_puds, mm_inc_nr_pmds, mm_dec_nr_puds
and mm_dec_nr_pmds. As the check for folded page tables can be
overwritten by the architecture, this allows to keep a correct
pgtable_bytes value for platforms that use a dynamic number of
page table levels.

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/mm.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index daa2b8f1e9a8..a3701e91bb57 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1742,11 +1742,15 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
 
 static inline void mm_inc_nr_puds(struct mm_struct *mm)
 {
+	if (mm_pud_folded(mm))
+		return;
 	atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_puds(struct mm_struct *mm)
 {
+	if (mm_pud_folded(mm))
+		return;
 	atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
 }
 #endif
@@ -1766,11 +1770,15 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
 
 static inline void mm_inc_nr_pmds(struct mm_struct *mm)
 {
+	if (mm_pmd_folded(mm))
+		return;
 	atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 {
+	if (mm_pmd_folded(mm))
+		return;
 	atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
 }
 #endif
-- 
cgit v1.2.3


From 17b8b74c0f8dbf9b9e3301f9ca5b65dd1c079951 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 19 Oct 2018 19:35:19 +0200
Subject: netfilter: ipset: Correct rcu_dereference() call in
 ip_set_put_comment()

The function is called when rcu_read_lock() is held and not
when rcu_read_lock_bh() is held.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set_comment.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
index 8e2bab1e8e90..70877f8de7e9 100644
--- a/include/linux/netfilter/ipset/ip_set_comment.h
+++ b/include/linux/netfilter/ipset/ip_set_comment.h
@@ -43,11 +43,11 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
 	rcu_assign_pointer(comment->c, c);
 }
 
-/* Used only when dumping a set, protected by rcu_read_lock_bh() */
+/* Used only when dumping a set, protected by rcu_read_lock() */
 static inline int
 ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
 {
-	struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
+	struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
 
 	if (!c)
 		return 0;
-- 
cgit v1.2.3


From 3e59020abf0f88182730527ee5b862e786eb485a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Oct 2018 08:39:12 -0700
Subject: net: bql: add __netdev_tx_sent_queue()

When qdisc_run() tries to use BQL budget to bulk-dequeue a batch
of packets, GSO can later transform this list in another list
of skbs, and each skb is sent to device ndo_start_xmit(),
one at a time, with skb->xmit_more being set to one but
for last skb.

Problem is that very often, BQL limit is hit in the middle of
the packet train, forcing dev_hard_start_xmit() to stop the
bulk send and requeue the end of the list.

BQL role is to avoid head of line blocking, making sure
a qdisc can deliver high priority packets before low priority ones.

But there is no way requeued packets can be bypassed by fresh
packets in the qdisc.

Aborting the bulk send increases TX softirqs, and hot cache
lines (after skb_segment()) are wasted.

Note that for TSO packets, we never split a packet in the middle
because of BQL limit being hit.

Drivers should be able to update BQL counters without
flipping/caring about BQL status, if the current skb
has xmit_more set.

Upper layers are ultimately responsible to stop sending another
packet train when BQL limit is hit.

Code template in a driver might look like the following :

	send_doorbell = __netdev_tx_sent_queue(tx_queue, nr_bytes, skb->xmit_more);

Note that __netdev_tx_sent_queue() use is not mandatory,
since following patch will change dev_hard_start_xmit()
to not care about BQL status.

But it is highly recommended so that xmit_more full benefits
can be reached (less doorbells sent, and less atomic operations as well)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dc1d9ed33b31..857f8abf7b91 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3190,6 +3190,26 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 #endif
 }
 
+/* Variant of netdev_tx_sent_queue() for drivers that are aware
+ * that they should not test BQL status themselves.
+ * We do want to change __QUEUE_STATE_STACK_XOFF only for the last
+ * skb of a batch.
+ * Returns true if the doorbell must be used to kick the NIC.
+ */
+static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue,
+					  unsigned int bytes,
+					  bool xmit_more)
+{
+	if (xmit_more) {
+#ifdef CONFIG_BQL
+		dql_queued(&dev_queue->dql, bytes);
+#endif
+		return netif_tx_queue_stopped(dev_queue);
+	}
+	netdev_tx_sent_queue(dev_queue, bytes);
+	return true;
+}
+
 /**
  * 	netdev_sent_queue - report the number of bytes queued to hardware
  * 	@dev: network device
-- 
cgit v1.2.3


From 35b69a420bfb56b7b74cb635ea903db05e357bec Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Sun, 4 Nov 2018 03:48:54 +0000
Subject: clockevents/drivers/i8253: Add support for PIT shutdown quirk

Add support for platforms where pit_shutdown() doesn't work because of a
quirk in the PIT emulation. On these platforms setting the counter register
to zero causes the PIT to start running again, negating the shutdown.

Provide a global variable that controls whether the counter register is
zero'ed, which platform specific code can override.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "gregkh@linuxfoundation.org" <gregkh@linuxfoundation.org>
Cc: "devel@linuxdriverproject.org" <devel@linuxdriverproject.org>
Cc: "daniel.lezcano@linaro.org" <daniel.lezcano@linaro.org>
Cc: "virtualization@lists.linux-foundation.org" <virtualization@lists.linux-foundation.org>
Cc: "jgross@suse.com" <jgross@suse.com>
Cc: "akataria@vmware.com" <akataria@vmware.com>
Cc: "olaf@aepfle.de" <olaf@aepfle.de>
Cc: "apw@canonical.com" <apw@canonical.com>
Cc: vkuznets <vkuznets@redhat.com>
Cc: "jasowang@redhat.com" <jasowang@redhat.com>
Cc: "marcelo.cerri@canonical.com" <marcelo.cerri@canonical.com>
Cc: KY Srinivasan <kys@microsoft.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/1541303219-11142-2-git-send-email-mikelley@microsoft.com
---
 include/linux/i8253.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i8253.h b/include/linux/i8253.h
index e6bb36a97519..8336b2f6f834 100644
--- a/include/linux/i8253.h
+++ b/include/linux/i8253.h
@@ -21,6 +21,7 @@
 #define PIT_LATCH	((PIT_TICK_RATE + HZ/2) / HZ)
 
 extern raw_spinlock_t i8253_lock;
+extern bool i8253_clear_counter_on_shutdown;
 extern struct clock_event_device i8253_clockevent;
 extern void clockevent_i8253_init(bool oneshot);
 
-- 
cgit v1.2.3


From d098093ba06eb032057d1aca1c2e45889e099d00 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Sun, 28 Oct 2018 12:29:55 +0100
Subject: mtd: nand: Fix nanddev_neraseblocks()

nanddev_neraseblocks() currently returns the number pages per LUN
instead of the total number of eraseblocks.

Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND devices")
Cc: <stable@vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/nand.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index abe975c87b90..78b86dea2f29 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -324,9 +324,8 @@ static inline unsigned int nanddev_ntargets(const struct nand_device *nand)
  */
 static inline unsigned int nanddev_neraseblocks(const struct nand_device *nand)
 {
-	return (u64)nand->memorg.luns_per_target *
-	       nand->memorg.eraseblocks_per_lun *
-	       nand->memorg.pages_per_eraseblock;
+	return nand->memorg.ntargets * nand->memorg.luns_per_target *
+	       nand->memorg.eraseblocks_per_lun;
 }
 
 /**
-- 
cgit v1.2.3


From 163c8d54a997153ee1a1e07fcac087492ad85b37 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 5 Nov 2018 07:36:28 +0100
Subject: compiler: remove __no_sanitize_address_or_inline again

The __no_sanitize_address_or_inline and __no_kasan_or_inline defines
are almost identical. The only difference is that __no_kasan_or_inline
does not have the 'notrace' attribute.

To be able to replace __no_sanitize_address_or_inline with the older
definition, add 'notrace' to __no_kasan_or_inline and change to two
users of __no_sanitize_address_or_inline in the s390 code.

The 'notrace' option is necessary for e.g. the __load_psw_mask function
in arch/s390/include/asm/processor.h. Without the option it is possible
to trace __load_psw_mask which leads to kernel stack overflow.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Pointed-out-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 12 ------------
 include/linux/compiler.h     |  2 +-
 2 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index c0f5db3a9621..2010493e1040 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -143,18 +143,6 @@
 #define KASAN_ABI_VERSION 3
 #endif
 
-/*
- * Because __no_sanitize_address conflicts with inlining:
- *   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
- * we do one or the other. 
- */
-#ifdef CONFIG_KASAN
-#define __no_sanitize_address_or_inline					\
-	__no_sanitize_address __maybe_unused notrace
-#else
-#define __no_sanitize_address_or_inline inline
-#endif
-
 #if GCC_VERSION >= 50100
 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 18c80cfa4fc4..06396c1cf127 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -189,7 +189,7 @@ void __read_once_size(const volatile void *p, void *res, int size)
  * 	https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
  * '__maybe_unused' allows us to avoid defined-but-not-used warnings.
  */
-# define __no_kasan_or_inline __no_sanitize_address __maybe_unused
+# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused
 #else
 # define __no_kasan_or_inline __always_inline
 #endif
-- 
cgit v1.2.3


From 4c0608f4a0e76dfb82d3accd20081f4bf47ed143 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 30 Oct 2018 09:45:55 -0400
Subject: XArray: Regularise xa_reserve

The xa_reserve() function was a little unusual in that it attempted to
be callable for all kinds of locking scenarios.  Make it look like the
other APIs with __xa_reserve, xa_reserve_bh and xa_reserve_irq variants.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 80 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index d9514928ddac..c2cb0426c60c 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -291,7 +291,6 @@ void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *xa_cmpxchg(struct xarray *, unsigned long index,
 			void *old, void *entry, gfp_t);
-int xa_reserve(struct xarray *, unsigned long index, gfp_t);
 void *xa_store_range(struct xarray *, unsigned long first, unsigned long last,
 			void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -455,6 +454,7 @@ void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old,
 		void *entry, gfp_t);
 int __xa_alloc(struct xarray *, u32 *id, u32 max, void *entry, gfp_t);
+int __xa_reserve(struct xarray *, unsigned long index, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
@@ -621,6 +621,84 @@ static inline int xa_alloc_irq(struct xarray *xa, u32 *id, u32 max, void *entry,
 	return err;
 }
 
+/**
+ * xa_reserve() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * Ensures there is somewhere to store an entry at @index in the array.
+ * If there is already something stored at @index, this function does
+ * nothing.  If there was nothing there, the entry is marked as reserved.
+ * Loading from a reserved entry returns a %NULL pointer.
+ *
+ * If you do not use the entry that you have reserved, call xa_release()
+ * or xa_erase() to free any unnecessary memory.
+ *
+ * Context: Any context.  Takes and releases the xa_lock.
+ * May sleep if the @gfp flags permit.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+static inline
+int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	int ret;
+
+	xa_lock(xa);
+	ret = __xa_reserve(xa, index, gfp);
+	xa_unlock(xa);
+
+	return ret;
+}
+
+/**
+ * xa_reserve_bh() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * A softirq-disabling version of xa_reserve().
+ *
+ * Context: Any context.  Takes and releases the xa_lock while
+ * disabling softirqs.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+static inline
+int xa_reserve_bh(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	int ret;
+
+	xa_lock_bh(xa);
+	ret = __xa_reserve(xa, index, gfp);
+	xa_unlock_bh(xa);
+
+	return ret;
+}
+
+/**
+ * xa_reserve_irq() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * An interrupt-disabling version of xa_reserve().
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+static inline
+int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	int ret;
+
+	xa_lock_irq(xa);
+	ret = __xa_reserve(xa, index, gfp);
+	xa_unlock_irq(xa);
+
+	return ret;
+}
+
 /* Everything below here is the Advanced API.  Proceed with caution. */
 
 /*
-- 
cgit v1.2.3


From c5beb07e7a06b24f4f27304f6282b5dbd929543b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 31 Oct 2018 14:39:28 -0400
Subject: XArray: Unify xa_cmpxchg and __xa_cmpxchg

xa_cmpxchg() was one of the largest functions in the xarray
implementation.  By turning it into a wrapper and having the callers
take the lock (like several other functions), we save 160 bytes on a
tinyconfig build and reduce the duplication in xarray.c.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 113 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 69 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index c2cb0426c60c..8e59d4fbd55e 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -289,8 +289,6 @@ struct xarray {
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
-void *xa_cmpxchg(struct xarray *, unsigned long index,
-			void *old, void *entry, gfp_t);
 void *xa_store_range(struct xarray *, unsigned long first, unsigned long last,
 			void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -359,48 +357,6 @@ static inline void *xa_erase(struct xarray *xa, unsigned long index)
 	return xa_store(xa, index, NULL, 0);
 }
 
-/**
- * xa_insert() - Store this entry in the XArray unless another entry is
- *			already present.
- * @xa: XArray.
- * @index: Index into array.
- * @entry: New entry.
- * @gfp: Memory allocation flags.
- *
- * If you would rather see the existing entry in the array, use xa_cmpxchg().
- * This function is for users who don't care what the entry is, only that
- * one is present.
- *
- * Context: Process context.  Takes and releases the xa_lock.
- *	    May sleep if the @gfp flags permit.
- * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
- * -ENOMEM if memory could not be allocated.
- */
-static inline int xa_insert(struct xarray *xa, unsigned long index,
-		void *entry, gfp_t gfp)
-{
-	void *curr = xa_cmpxchg(xa, index, NULL, entry, gfp);
-	if (!curr)
-		return 0;
-	if (xa_is_err(curr))
-		return xa_err(curr);
-	return -EEXIST;
-}
-
-/**
- * xa_release() - Release a reserved entry.
- * @xa: XArray.
- * @index: Index of entry.
- *
- * After calling xa_reserve(), you can call this function to release the
- * reservation.  If the entry at @index has been stored to, this function
- * will do nothing.
- */
-static inline void xa_release(struct xarray *xa, unsigned long index)
-{
-	xa_cmpxchg(xa, index, NULL, NULL, 0);
-}
-
 /**
  * xa_for_each() - Iterate over a portion of an XArray.
  * @xa: XArray.
@@ -534,6 +490,61 @@ static inline void *xa_erase_irq(struct xarray *xa, unsigned long index)
 	return entry;
 }
 
+/**
+ * xa_cmpxchg() - Conditionally replace an entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New value to place in array.
+ * @gfp: Memory allocation flags.
+ *
+ * If the entry at @index is the same as @old, replace it with @entry.
+ * If the return value is equal to @old, then the exchange was successful.
+ *
+ * Context: Any context.  Takes and releases the xa_lock.  May sleep
+ * if the @gfp flags permit.
+ * Return: The old value at this index or xa_err() if an error happened.
+ */
+static inline void *xa_cmpxchg(struct xarray *xa, unsigned long index,
+			void *old, void *entry, gfp_t gfp)
+{
+	void *curr;
+
+	xa_lock(xa);
+	curr = __xa_cmpxchg(xa, index, old, entry, gfp);
+	xa_unlock(xa);
+
+	return curr;
+}
+
+/**
+ * xa_insert() - Store this entry in the XArray unless another entry is
+ *			already present.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * If you would rather see the existing entry in the array, use xa_cmpxchg().
+ * This function is for users who don't care what the entry is, only that
+ * one is present.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.
+ *	    May sleep if the @gfp flags permit.
+ * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * -ENOMEM if memory could not be allocated.
+ */
+static inline int xa_insert(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr = xa_cmpxchg(xa, index, NULL, entry, gfp);
+	if (!curr)
+		return 0;
+	if (xa_is_err(curr))
+		return xa_err(curr);
+	return -EEXIST;
+}
+
 /**
  * xa_alloc() - Find somewhere to store this entry in the XArray.
  * @xa: XArray.
@@ -699,6 +710,20 @@ int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp)
 	return ret;
 }
 
+/**
+ * xa_release() - Release a reserved entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * After calling xa_reserve(), you can call this function to release the
+ * reservation.  If the entry at @index has been stored to, this function
+ * will do nothing.
+ */
+static inline void xa_release(struct xarray *xa, unsigned long index)
+{
+	xa_cmpxchg(xa, index, NULL, NULL, 0);
+}
+
 /* Everything below here is the Advanced API.  Proceed with caution. */
 
 /*
-- 
cgit v1.2.3


From 9c16bb88905456a9b1299338041f05fa7699971b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 5 Nov 2018 15:48:49 -0500
Subject: XArray: Turn xa_erase into an exported function

Make xa_erase() take the spinlock and then call __xa_erase(), but make
it out of line since it's such a common function.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 8e59d4fbd55e..4c839c17a99b 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -289,6 +289,7 @@ struct xarray {
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
+void *xa_erase(struct xarray *, unsigned long index);
 void *xa_store_range(struct xarray *, unsigned long first, unsigned long last,
 			void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -340,23 +341,6 @@ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark)
 	return xa->xa_flags & XA_FLAGS_MARK(mark);
 }
 
-/**
- * xa_erase() - Erase this entry from the XArray.
- * @xa: XArray.
- * @index: Index of entry.
- *
- * This function is the equivalent of calling xa_store() with %NULL as
- * the third argument.  The XArray does not need to allocate memory, so
- * the user does not need to provide GFP flags.
- *
- * Context: Process context.  Takes and releases the xa_lock.
- * Return: The entry which used to be at this index.
- */
-static inline void *xa_erase(struct xarray *xa, unsigned long index)
-{
-	return xa_store(xa, index, NULL, 0);
-}
-
 /**
  * xa_for_each() - Iterate over a portion of an XArray.
  * @xa: XArray.
-- 
cgit v1.2.3


From 84e5acb76dacb8ebd648a86a53907ce0dd616534 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 26 Oct 2018 14:41:29 -0400
Subject: XArray: Add xa_store_bh() and xa_store_irq()

These convenience wrappers disable interrupts while taking the spinlock.
A number of drivers would otherwise have to open-code these functions.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 4c839c17a99b..52d9732e4ec4 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -426,6 +426,58 @@ static inline int __xa_insert(struct xarray *xa, unsigned long index,
 	return -EEXIST;
 }
 
+/**
+ * xa_store_bh() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * This function is like calling xa_store() except it disables softirqs
+ * while holding the array lock.
+ *
+ * Context: Any context.  Takes and releases the xa_lock while
+ * disabling softirqs.
+ * Return: The entry which used to be at this index.
+ */
+static inline void *xa_store_bh(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr;
+
+	xa_lock_bh(xa);
+	curr = __xa_store(xa, index, entry, gfp);
+	xa_unlock_bh(xa);
+
+	return curr;
+}
+
+/**
+ * xa_store_irq() - Erase this entry from the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * This function is like calling xa_store() except it disables interrupts
+ * while holding the array lock.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.
+ * Return: The entry which used to be at this index.
+ */
+static inline void *xa_store_irq(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr;
+
+	xa_lock_irq(xa);
+	curr = __xa_store(xa, index, entry, gfp);
+	xa_unlock_irq(xa);
+
+	return curr;
+}
+
 /**
  * xa_erase_bh() - Erase this entry from the XArray.
  * @xa: XArray.
-- 
cgit v1.2.3


From 804dfaf01bcc9daa4298c608ba9018abf616ec48 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 5 Nov 2018 16:37:15 -0500
Subject: XArray: Fix Documentation

Minor fixes.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 52d9732e4ec4..564892e19f8c 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -487,7 +487,7 @@ static inline void *xa_store_irq(struct xarray *xa, unsigned long index,
  * the third argument.  The XArray does not need to allocate memory, so
  * the user does not need to provide GFP flags.
  *
- * Context: Process context.  Takes and releases the xa_lock while
+ * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.
  * Return: The entry which used to be at this index.
  */
@@ -622,7 +622,7 @@ static inline int xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry,
  * Updates the @id pointer with the index, then stores the entry at that
  * index.  A concurrent lookup will not see an uninitialised @id.
  *
- * Context: Process context.  Takes and releases the xa_lock while
+ * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.  May sleep if the @gfp flags permit.
  * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
  * there is no more space in the XArray.
-- 
cgit v1.2.3


From aa9b760cec2385ad408bb2e346c7f6dc1be69a79 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 4 Nov 2018 11:32:47 +0100
Subject: HID: fix up .raw_event() documentation

The documentation for the .raw_event() callback says that if the
driver return 1, there will be no further processing of the event,
but this is not true, the actual code in hid-core.c looks like this:

  if (hdrv && hdrv->raw_event && hid_match_report(hid, report)) {
           ret = hdrv->raw_event(hid, report, data, size);
           if (ret < 0)
                   goto unlock;
   }

   ret = hid_report_raw_event(hid, type, data, size, interrupt);

The only return value that has any effect on the processing is
a negative error.

Correct this as it seems to confuse people: I found bogus code in
the Razer out-of-tree driver attempting to return 1 here.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 2827b87590d8..387c70df6f29 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -722,8 +722,8 @@ struct hid_usage_id {
  * input will not be passed to raw_event unless hid_device_io_start is
  * called.
  *
- * raw_event and event should return 0 on no action performed, 1 when no
- * further processing should be done and negative on error
+ * raw_event and event should return negative on error, any other value will
+ * pass the event on to .event() typically return 0 for success.
  *
  * input_mapping shall return a negative value to completely ignore this usage
  * (e.g. doubled or invalid usage), zero to continue with parsing of this
-- 
cgit v1.2.3


From 8bd66d147c88bd441178c7b4c774ae5a185f19b8 Mon Sep 17 00:00:00 2001
From: "ndesaulniers@google.com" <ndesaulniers@google.com>
Date: Wed, 31 Oct 2018 12:39:01 -0700
Subject: include/linux/compiler*.h: define asm_volatile_goto

asm_volatile_goto should also be defined for other compilers that support
asm goto.

Fixes commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h
mutually exclusive").

Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_types.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 3439d7d0249a..4a3f9c09c92d 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -130,6 +130,10 @@ struct ftrace_likely_data {
 # define randomized_struct_fields_end
 #endif
 
+#ifndef asm_volatile_goto
+#define asm_volatile_goto(x...) asm goto(x)
+#endif
+
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 
-- 
cgit v1.2.3


From 98ee3fc7ef8395f8b7a379e6608aee91efc66d48 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Tue, 6 Nov 2018 17:25:37 +0100
Subject: mtd: nand: Fix nanddev_pos_next_page() kernel-doc header

Function name is wrong in the kernel-doc header.

Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND devices")
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/nand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 78b86dea2f29..7f53ece2c039 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -568,7 +568,7 @@ static inline void nanddev_pos_next_eraseblock(struct nand_device *nand,
 }
 
 /**
- * nanddev_pos_next_eraseblock() - Move a position to the next page
+ * nanddev_pos_next_page() - Move a position to the next page
  * @nand: NAND device
  * @pos: the position to update
  *
-- 
cgit v1.2.3


From 81bd415c91eb966118d773dddf254aebf3022411 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 6 Jun 2018 21:42:32 +0200
Subject: watchdog/core: Add missing prototypes for weak functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The split out of the hard lockup detector exposed two new weak functions,
but no prototypes for them, which triggers the build warning:

  kernel/watchdog.c:109:12: warning: no previous prototype for ‘watchdog_nmi_enable’ [-Wmissing-prototypes]
  kernel/watchdog.c:115:13: warning: no previous prototype for ‘watchdog_nmi_disable’ [-Wmissing-prototypes]

Add the prototypes.

Fixes: 73ce0511c436 ("kernel/watchdog.c: move hardlockup detector to separate file")
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Babu Moger <babu.moger@oracle.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180606194232.17653-1-malat@debian.org
---
 include/linux/nmi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 08f9247e9827..9003e29cde46 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -119,6 +119,8 @@ static inline int hardlockup_detector_perf_init(void) { return 0; }
 void watchdog_nmi_stop(void);
 void watchdog_nmi_start(void);
 int watchdog_nmi_probe(void);
+int watchdog_nmi_enable(unsigned int cpu);
+void watchdog_nmi_disable(unsigned int cpu);
 
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
-- 
cgit v1.2.3


From 99b77fef3c6c69bb7664f1ac97a69d5b17968dae Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Wed, 31 Oct 2018 12:20:28 +0200
Subject: net/mlx5: Fix XRC SRQ umem valid bits

Adapt XRC SRQ to the latest HW specification with fixed definition
around umem valid bits. The previous definition relied on a bit which
was taken for other purposes in legacy FW.

Fixes: bd37197554eb ("net/mlx5: Update mlx5_ifc with DEVX UID bits")
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dbff9ff28f2c..34e17e6f8942 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2473,14 +2473,15 @@ struct mlx5_ifc_xrc_srqc_bits {
 
 	u8         wq_signature[0x1];
 	u8         cont_srq[0x1];
-	u8         dbr_umem_valid[0x1];
+	u8         reserved_at_22[0x1];
 	u8         rlky[0x1];
 	u8         basic_cyclic_rcv_wqe[0x1];
 	u8         log_rq_stride[0x3];
 	u8         xrcd[0x18];
 
 	u8         page_offset[0x6];
-	u8         reserved_at_46[0x2];
+	u8         reserved_at_46[0x1];
+	u8         dbr_umem_valid[0x1];
 	u8         cqn[0x18];
 
 	u8         reserved_at_60[0x20];
@@ -6689,9 +6690,12 @@ struct mlx5_ifc_create_xrc_srq_in_bits {
 
 	struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
 
-	u8         reserved_at_280[0x40];
+	u8         reserved_at_280[0x60];
+
 	u8         xrc_srq_umem_valid[0x1];
-	u8         reserved_at_2c1[0x5bf];
+	u8         reserved_at_2e1[0x1f];
+
+	u8         reserved_at_300[0x580];
 
 	u8         pas[0][0x40];
 };
-- 
cgit v1.2.3


From 781f0766cc41a9dd2e5d118ef4b1d5d89430257b Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 19 Oct 2018 16:14:50 +0800
Subject: USB: Wait for extra delay time after USB_PORT_FEAT_RESET for quirky
 hub

Devices connected under Terminus Technology Inc. Hub (1a40:0101) may
fail to work after the system resumes from suspend:
[  206.063325] usb 3-2.4: reset full-speed USB device number 4 using xhci_hcd
[  206.143691] usb 3-2.4: device descriptor read/64, error -32
[  206.351671] usb 3-2.4: device descriptor read/64, error -32

Info for this hub:
T:  Bus=03 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#=  2 Spd=480 MxCh= 4
D:  Ver= 2.00 Cls=09(hub  ) Sub=00 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=1a40 ProdID=0101 Rev=01.11
S:  Product=USB 2.0 Hub
C:  #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=100mA
I:  If#= 0 Alt= 0 #EPs= 1 Cls=09(hub  ) Sub=00 Prot=00 Driver=hub

Some expirements indicate that the USB devices connected to the hub are
innocent, it's the hub itself is to blame. The hub needs extra delay
time after it resets its port.

Hence wait for extra delay, if the device is connected to this quirky
hub.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Cc: stable <stable@vger.kernel.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/quirks.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index b7a99ce56bc9..a1be64c9940f 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -66,4 +66,7 @@
 /* Device needs a pause after every control message. */
 #define USB_QUIRK_DELAY_CTRL_MSG		BIT(13)
 
+/* Hub needs extra delay after resetting its port. */
+#define USB_QUIRK_HUB_SLOW_RESET		BIT(14)
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 24efee412c75843755da0ddd7bbc2db2ce9129f5 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Tue, 6 Nov 2018 22:52:11 +0100
Subject: Compiler Attributes: improve explanation of header

Explain better what "optional" attributes are, and avoid calling
them so to avoid confusion. Simply retain "Optional" as a word
to look for in the comments.

Moreover, add a couple sentences to explain a bit more the intention
and the documentation links.

Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_attributes.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 6b28c1b7310c..f8c400ba1929 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -4,22 +4,26 @@
 
 /*
  * The attributes in this file are unconditionally defined and they directly
- * map to compiler attribute(s) -- except those that are optional.
+ * map to compiler attribute(s), unless one of the compilers does not support
+ * the attribute. In that case, __has_attribute is used to check for support
+ * and the reason is stated in its comment ("Optional: ...").
  *
  * Any other "attributes" (i.e. those that depend on a configuration option,
  * on a compiler, on an architecture, on plugins, on other attributes...)
  * should be defined elsewhere (e.g. compiler_types.h or compiler-*.h).
+ * The intention is to keep this file as simple as possible, as well as
+ * compiler- and version-agnostic (e.g. avoiding GCC_VERSION checks).
  *
  * This file is meant to be sorted (by actual attribute name,
  * not by #define identifier). Use the __attribute__((__name__)) syntax
  * (i.e. with underscores) to avoid future collisions with other macros.
- * If an attribute is optional, state the reason in the comment.
+ * Provide links to the documentation of each supported compiler, if it exists.
  */
 
 /*
- * To check for optional attributes, we use __has_attribute, which is supported
- * on gcc >= 5, clang >= 2.9 and icc >= 17. In the meantime, to support
- * 4.6 <= gcc < 5, we implement __has_attribute by hand.
+ * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17.
+ * In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute
+ * by hand.
  *
  * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__
  * depending on the compiler used to build it; however, these attributes have
-- 
cgit v1.2.3


From 23c625ce3065e40c933a4239efb9b11f1194a343 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 8 Nov 2018 14:55:21 +0100
Subject: libceph: assume argonaut on the server side

No one is running pre-argonaut.  In addition one of the argonaut
features (NOSRCADDR) has been required since day one (and a half,
2.6.34 vs 2.6.35) of the kernel client.

Allow for the possibility of reusing these feature bits later.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/ceph_features.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 6b92b3395fa9..65a38c4a02a1 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -213,12 +213,6 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING |	\
 	 CEPH_FEATURE_CEPHX_V2)
 
-#define CEPH_FEATURES_REQUIRED_DEFAULT   \
-	(CEPH_FEATURE_NOSRCADDR |	 \
-	 CEPH_FEATURE_SUBSCRIBE2 |	 \
-	 CEPH_FEATURE_RECONNECT_SEQ |	 \
-	 CEPH_FEATURE_PGID64 |		 \
-	 CEPH_FEATURE_PGPOOL3 |		 \
-	 CEPH_FEATURE_OSDENC)
+#define CEPH_FEATURES_REQUIRED_DEFAULT	0
 
 #endif
-- 
cgit v1.2.3


From a4310fa2f24687888ce80fdb0e88583561a23700 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 31 Oct 2018 10:37:46 +0100
Subject: can: dev: can_get_echo_skb(): factor out non sending code to
 __can_get_echo_skb()

This patch factors out all non sending parts of can_get_echo_skb() into
a seperate function __can_get_echo_skb(), so that it can be re-used in
an upcoming patch.

Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index a83e1f632eb7..f01623aef2f7 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -169,6 +169,7 @@ void can_change_state(struct net_device *dev, struct can_frame *cf,
 
 void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 		      unsigned int idx);
+struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr);
 unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx);
 void can_free_echo_skb(struct net_device *dev, unsigned int idx);
 
-- 
cgit v1.2.3


From 55059f2b7f868cd43b3ad30e28e18347e1b46ace Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 18 Sep 2018 11:40:38 +0200
Subject: can: rx-offload: introduce can_rx_offload_get_echo_skb() and
 can_rx_offload_queue_sorted() functions

Current CAN framework can't guarantee proper/chronological order
of RX and TX-ECHO messages. To make this possible, drivers should use
this functions instead of can_get_echo_skb().

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/rx-offload.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
index cb31683bbe15..01a7c9e5d8d8 100644
--- a/include/linux/can/rx-offload.h
+++ b/include/linux/can/rx-offload.h
@@ -41,6 +41,10 @@ int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload *
 int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight);
 int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg);
 int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload);
+int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
+				struct sk_buff *skb, u32 timestamp);
+unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
+					 unsigned int idx, u32 timestamp);
 int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb);
 void can_rx_offload_reset(struct can_rx_offload *offload);
 void can_rx_offload_del(struct can_rx_offload *offload);
-- 
cgit v1.2.3


From 4530ec36bb1e0d24f41c33229694adacda3d5d89 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 18 Sep 2018 11:40:40 +0200
Subject: can: rx-offload: rename can_rx_offload_irq_queue_err_skb() to
 can_rx_offload_queue_tail()

This function has nothing todo with error.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/rx-offload.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
index 01a7c9e5d8d8..8268811a697e 100644
--- a/include/linux/can/rx-offload.h
+++ b/include/linux/can/rx-offload.h
@@ -45,7 +45,8 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
 				struct sk_buff *skb, u32 timestamp);
 unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
 					 unsigned int idx, u32 timestamp);
-int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb);
+int can_rx_offload_queue_tail(struct can_rx_offload *offload,
+			      struct sk_buff *skb);
 void can_rx_offload_reset(struct can_rx_offload *offload);
 void can_rx_offload_del(struct can_rx_offload *offload);
 void can_rx_offload_enable(struct can_rx_offload *offload);
-- 
cgit v1.2.3


From d19f9130b814d33c03118493c17454f7d90075d1 Mon Sep 17 00:00:00 2001
From: Elvira Khabirova <lineprinter@altlinux.org>
Date: Sat, 10 Nov 2018 04:22:09 +0100
Subject: x86/ptrace: Fix documentation for tracehook_report_syscall_entry()

tracehook_report_syscall_entry() is called not only
if %TIF_SYSCALL_TRACE is set, but also if %TIF_SYSCALL_EMU is set,
as appears from x86's entry code.

Signed-off-by: Elvira Khabirova <lineprinter@altlinux.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: ldv@altlinux.org
Cc: oleg@redhat.com
Cc: rostedt@goodmis.org
Link: http://lkml.kernel.org/r/20181110042209.26333972@akathisia
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tracehook.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 40b0b4c1bf7b..df20f8bdbfa3 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -83,8 +83,8 @@ static inline int ptrace_report_syscall(struct pt_regs *regs)
  * tracehook_report_syscall_entry - task is about to attempt a system call
  * @regs:		user register state of current task
  *
- * This will be called if %TIF_SYSCALL_TRACE has been set, when the
- * current task has just entered the kernel for a system call.
+ * This will be called if %TIF_SYSCALL_TRACE or %TIF_SYSCALL_EMU have been set,
+ * when the current task has just entered the kernel for a system call.
  * Full user register state is available here.  Changing the values
  * in @regs can affect the system call number and arguments to be tried.
  * It is safe to block here, preventing the system call from beginning.
-- 
cgit v1.2.3


From eff896288872d687d9662000ec9ae11b6d61766f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 14 Nov 2018 09:55:43 -0800
Subject: efi/arm: Defer persistent reservations until after paging_init()

The new memory EFI reservation feature we introduced to allow memory
reservations to persist across kexec may trigger an unbounded number
of calls to memblock_reserve(). The memblock subsystem can deal with
this fine, but not before memblock resizing is enabled, which we can
only do after paging_init(), when the memory we reallocate the array
into is actually mapped.

So break out the memreserve table processing into a separate routine
and call it after paging_init() on arm64. On ARM, because of limited
reviewing bandwidth of the maintainer, we cannot currently fix this,
so instead, disable the EFI persistent memreserve entirely on ARM so
we can fix it later.

Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20181114175544.12860-5-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 845174e113ce..100ce4a4aff6 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1167,6 +1167,8 @@ static inline bool efi_enabled(int feature)
 extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused);
 
 extern bool efi_is_table_address(unsigned long phys_addr);
+
+extern int efi_apply_persistent_mem_reservations(void);
 #else
 static inline bool efi_enabled(int feature)
 {
@@ -1185,6 +1187,11 @@ static inline bool efi_is_table_address(unsigned long phys_addr)
 {
 	return false;
 }
+
+static inline int efi_apply_persistent_mem_reservations(void)
+{
+	return 0;
+}
 #endif
 
 extern int efi_status_to_err(efi_status_t status);
-- 
cgit v1.2.3


From 0145b50566e7de5637e80ecba96c7f0e6fff1aad Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 31 Oct 2018 15:20:05 +0100
Subject: iio/hid-sensors: Fix IIO_CHAN_INFO_RAW returning wrong values for
 signed numbers

Before this commit sensor_hub_input_attr_get_raw_value() failed to take
the signedness of 16 and 8 bit values into account, returning e.g.
65436 instead of -100 for the z-axis reading of an accelerometer.

This commit adds a new is_signed parameter to the function and makes all
callers pass the appropriate value for this.

While at it, this commit also fixes up some neighboring lines where
statements were needlessly split over 2 lines to improve readability.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/hid-sensor-hub.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index 331dc377c275..dc12f5c4b076 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -177,6 +177,7 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev,
 * @attr_usage_id:	Attribute usage id as per spec
 * @report_id:	Report id to look for
 * @flag:      Synchronous or asynchronous read
+* @is_signed:   If true then fields < 32 bits will be sign-extended
 *
 * Issues a synchronous or asynchronous read request for an input attribute.
 * Returns data upto 32 bits.
@@ -190,7 +191,8 @@ enum sensor_hub_read_flags {
 int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
  					u32 usage_id,
  					u32 attr_usage_id, u32 report_id,
- 					enum sensor_hub_read_flags flag
+					enum sensor_hub_read_flags flag,
+					bool is_signed
 );
 
 /**
-- 
cgit v1.2.3


From b34087157dd76e8d96e5e52808134a791ac61e57 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 21 Nov 2018 16:00:50 +0000
Subject: dma-direct: Make DIRECT_MAPPING_ERROR viable for SWIOTLB

With the overflow buffer removed, we no longer have a unique address
which is guaranteed not to be a valid DMA target to use as an error
token. The DIRECT_MAPPING_ERROR value of 0 tries to at least represent
an unlikely DMA target, but unfortunately there are already SWIOTLB
users with DMA-able memory at physical address 0 which now gets falsely
treated as a mapping failure and leads to all manner of misbehaviour.

The best we can do to mitigate that is flip DIRECT_MAPPING_ERROR to the
other commonly-used error value of all-bits-set, since the last single
byte of memory is by far the least-likely-valid DMA target.

Fixes: dff8d6c1ed58 ("swiotlb: remove the overflow buffer")
Reported-by: John Stultz <john.stultz@linaro.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-direct.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index bd73e7a91410..9e66bfe369aa 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -5,7 +5,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/mem_encrypt.h>
 
-#define DIRECT_MAPPING_ERROR		0
+#define DIRECT_MAPPING_ERROR		(~(dma_addr_t)0)
 
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include <asm/dma-direct.h>
-- 
cgit v1.2.3


From 86de5921a3d5dd246df661e09bdd0a6131b39ae3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 20 Nov 2018 05:53:59 -0800
Subject: tcp: defer SACK compression after DupThresh

Jean-Louis reported a TCP regression and bisected to recent SACK
compression.

After a loss episode (receiver not able to keep up and dropping
packets because its backlog is full), linux TCP stack is sending
a single SACK (DUPACK).

Sender waits a full RTO timer before recovering losses.

While RFC 6675 says in section 5, "Algorithm Details",

   (2) If DupAcks < DupThresh but IsLost (HighACK + 1) returns true --
       indicating at least three segments have arrived above the current
       cumulative acknowledgment point, which is taken to indicate loss
       -- go to step (4).
...
   (4) Invoke fast retransmit and enter loss recovery as follows:

there are old TCP stacks not implementing this strategy, and
still counting the dupacks before starting fast retransmit.

While these stacks probably perform poorly when receivers implement
LRO/GRO, we should be a little more gentle to them.

This patch makes sure we do not enable SACK compression unless
3 dupacks have been sent since last rcv_nxt update.

Ideally we should even rearm the timer to send one or two
more DUPACK if no more packets are coming, but that will
be work aiming for linux-4.21.

Many thanks to Jean-Louis for bisecting the issue, providing
packet captures and testing this patch.

Fixes: 5d9f4262b7ea ("tcp: add SACK compression")
Reported-by: Jean-Louis Dupond <jean-louis@dupond.be>
Tested-by: Jean-Louis Dupond <jean-louis@dupond.be>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8ed77bb4ed86..a9b0280687d5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -196,6 +196,7 @@ struct tcp_sock {
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
 	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
 	u32	last_oow_ack_time;  /* timestamp of last out-of-window ACK */
+	u32	compressed_ack_rcv_nxt;
 
 	u32	tsoffset;	/* timestamp offset */
 
-- 
cgit v1.2.3


From f1539a0c2545d7bd82e451bd1464f2a820f55de4 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Wed, 21 Nov 2018 16:27:11 +0100
Subject: Revert "HID: input: Create a utility class for counting scroll
 events"

This reverts commit 1ff2e1a44e02d4bdbb9be67c7d9acc240a67141f.

It turns out the current API is not that compatible with
some Microsoft mice, so better start again from scratch.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Harry Cutts <hcutts@chromium.org>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 387c70df6f29..a355d61940f2 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1139,34 +1139,6 @@ static inline u32 hid_report_len(struct hid_report *report)
 int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
 		int interrupt);
 
-
-/**
- * struct hid_scroll_counter - Utility class for processing high-resolution
- *                             scroll events.
- * @dev: the input device for which events should be reported.
- * @microns_per_hi_res_unit: the amount moved by the user's finger for each
- *                           high-resolution unit reported by the mouse, in
- *                           microns.
- * @resolution_multiplier: the wheel's resolution in high-resolution mode as a
- *                         multiple of its lower resolution. For example, if
- *                         moving the wheel by one "notch" would result in a
- *                         value of 1 in low-resolution mode but 8 in
- *                         high-resolution, the multiplier is 8.
- * @remainder: counts the number of high-resolution units moved since the last
- *             low-resolution event (REL_WHEEL or REL_HWHEEL) was sent. Should
- *             only be used by class methods.
- */
-struct hid_scroll_counter {
-	struct input_dev *dev;
-	int microns_per_hi_res_unit;
-	int resolution_multiplier;
-
-	int remainder;
-};
-
-void hid_scroll_counter_handle_scroll(struct hid_scroll_counter *counter,
-				      int hi_res_value);
-
 /* HID quirks API */
 unsigned long hid_lookup_quirk(const struct hid_device *hdev);
 int hid_quirks_init(char **quirks_param, __u16 bus, int count);
-- 
cgit v1.2.3


From 0211dda68a4f6531923a2f72d8e8959207f59fba Mon Sep 17 00:00:00 2001
From: Tal Gilboa <talgi@mellanox.com>
Date: Wed, 21 Nov 2018 16:28:23 +0200
Subject: net/dim: Update DIM start sample after each DIM iteration

On every iteration of net_dim, the algorithm may choose to
check for the system state by comparing current data sample
with previous data sample. After each of these comparison,
regardless of the action taken, the sample used as baseline
is needed to be updated.

This patch fixes a bug that causes DIM to take wrong decisions,
due to never updating the baseline sample for comparison between
iterations. This way, DIM always compares current sample with
zeros.

Although this is a functional fix, it also improves and stabilizes
performance as the algorithm works properly now.

Performance:
Tested single UDP TX stream with pktgen:
samples/pktgen/pktgen_sample03_burst_single_flow.sh -i p4p2 -d 1.1.1.1
-m 24:8a:07:88:26:8b -f 3 -b 128

ConnectX-5 100GbE packet rate improved from 15-19Mpps to 19-20Mpps.
Also, toggling between profiles is less frequent with the fix.

Fixes: 8115b750dbcb ("net/dim: use struct net_dim_sample as arg to net_dim")
Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net_dim.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
index c79e859408e6..fd458389f7d1 100644
--- a/include/linux/net_dim.h
+++ b/include/linux/net_dim.h
@@ -406,6 +406,8 @@ static inline void net_dim(struct net_dim *dim,
 		}
 		/* fall through */
 	case NET_DIM_START_MEASURE:
+		net_dim_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr,
+			       &dim->start_sample);
 		dim->state = NET_DIM_MEASURE_IN_PROGRESS;
 		break;
 	case NET_DIM_APPLY_NEW_PROFILE:
-- 
cgit v1.2.3


From 5cd8d46ea1562be80063f53c7c6a5f40224de623 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 20 Nov 2018 13:00:18 -0500
Subject: packet: copy user buffers before orphan or clone

tpacket_snd sends packets with user pages linked into skb frags. It
notifies that pages can be reused when the skb is released by setting
skb->destructor to tpacket_destruct_skb.

This can cause data corruption if the skb is orphaned (e.g., on
transmit through veth) or cloned (e.g., on mirror to another psock).

Create a kernel-private copy of data in these cases, same as tun/tap
zerocopy transmission. Reuse that infrastructure: mark the skb as
SKBTX_ZEROCOPY_FRAG, which will trigger copy in skb_orphan_frags(_rx).

Unlike other zerocopy packets, do not set shinfo destructor_arg to
struct ubuf_info. tpacket_destruct_skb already uses that ptr to notify
when the original skb is released and a timestamp is recorded. Do not
change this timestamp behavior. The ubuf_info->callback is not needed
anyway, as no zerocopy notification is expected.

Mark destructor_arg as not-a-uarg by setting the lower bit to 1. The
resulting value is not a valid ubuf_info pointer, nor a valid
tpacket_snd frame address. Add skb_zcopy_.._nouarg helpers for this.

The fix relies on features introduced in commit 52267790ef52 ("sock:
add MSG_ZEROCOPY"), so can be backported as is only to 4.14.

Tested with from `./in_netns.sh ./txring_overwrite` from
http://github.com/wdebruij/kerneltools/tests

Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap")
Reported-by: Anand H. Krishnan <anandhkrishnan@gmail.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0ba687454267..0d1b2c3f127b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1326,6 +1326,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
 	}
 }
 
+static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val)
+{
+	skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL);
+	skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
+}
+
+static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb)
+{
+	return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL;
+}
+
+static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb)
+{
+	return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL);
+}
+
 /* Release a reference on a zerocopy structure */
 static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
 {
@@ -1335,7 +1351,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
 		if (uarg->callback == sock_zerocopy_callback) {
 			uarg->zerocopy = uarg->zerocopy && zerocopy;
 			sock_zerocopy_put(uarg);
-		} else {
+		} else if (!skb_zcopy_is_nouarg(skb)) {
 			uarg->callback(uarg, zerocopy);
 		}
 
-- 
cgit v1.2.3


From 89259088c1b7fecb43e8e245dc931909132a4e03 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 17 Nov 2018 11:32:29 +0100
Subject: netfilter: nfnetlink_cttimeout: fetch timeouts for udplite and gre,
 too

syzbot was able to trigger the WARN in cttimeout_default_get() by
passing UDPLITE as l4protocol.  Alias UDPLITE to UDP, both use
same timeout values.

Furthermore, also fetch GRE timeouts.  GRE is a bit more complicated,
as it still can be a module and its netns_proto_gre struct layout isn't
visible outside of the gre module. Can't move timeouts around, it
appears conntrack sysctl unregister assumes net_generic() returns
nf_proto_net, so we get crash. Expose layout of netns_proto_gre instead.

A followup nf-next patch could make gre tracker be built-in as well
if needed, its not that large.

Last, make the WARN() mention the missing protocol value in case
anything else is missing.

Reported-by: syzbot+2fae8fa157dd92618cae@syzkaller.appspotmail.com
Fixes: 8866df9264a3 ("netfilter: nfnetlink_cttimeout: pass default timeout policy to obj_to_nlattr")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_proto_gre.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index b8d95564bd53..14edb795ab43 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -21,6 +21,19 @@ struct nf_ct_gre_keymap {
 	struct nf_conntrack_tuple tuple;
 };
 
+enum grep_conntrack {
+	GRE_CT_UNREPLIED,
+	GRE_CT_REPLIED,
+	GRE_CT_MAX
+};
+
+struct netns_proto_gre {
+	struct nf_proto_net	nf;
+	rwlock_t		keymap_lock;
+	struct list_head	keymap_list;
+	unsigned int		gre_timeouts[GRE_CT_MAX];
+};
+
 /* add new tuple->key_reply pair to keymap */
 int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 			 struct nf_conntrack_tuple *t);
-- 
cgit v1.2.3


From 786a9ab1330169f2602238822b4df5d5c4c98f6c Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:17 +0100
Subject: gpio: davinci: restore a way to manually specify the GPIO base

Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and
automatic base selection") broke the network support in legacy boot
mode for da850-evm since we can no longer request the MDIO clock GPIO.

Other boards may be broken too, which I haven't tested.

The problem is in the fact that most board files still use the legacy
GPIO API where lines are requested by numbers rather than descriptors.

While this should be fixed eventually, in order to unbreak the board
for now - provide a way to manually specify the GPIO base in platform
data.

Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 include/linux/platform_data/gpio-davinci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index f92a47e18034..a93841bfb9f7 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -17,6 +17,8 @@
 #define __DAVINCI_GPIO_PLATFORM_H
 
 struct davinci_gpio_platform_data {
+	bool	no_auto_base;
+	u32	base;
 	u32	ngpio;
 	u32	gpio_unbanked;
 };
-- 
cgit v1.2.3


From 8114865ff82e200b383e46821c25cb0625b842b5 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:10:15 -0500
Subject: function_graph: Create function_graph_enter() to consolidate
 architecture code

Currently all the architectures do basically the same thing in preparing the
function graph tracer on entry to a function. This code can be pulled into a
generic location and then this will allow the function graph tracer to be
fixed, as well as extended.

Create a new function graph helper function_graph_enter() that will call the
hook function (ftrace_graph_entry) and the shadow stack operation
(ftrace_push_return_trace), and remove the need of the architecture code to
manage the shadow stack.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a397907e8d72..5717e8f81c59 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -779,6 +779,9 @@ extern void return_to_handler(void);
 extern int
 ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 			 unsigned long frame_pointer, unsigned long *retp);
+extern int
+function_graph_enter(unsigned long ret, unsigned long func,
+		     unsigned long frame_pointer, unsigned long *retp);
 
 unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
 				    unsigned long ret, unsigned long *retp);
-- 
cgit v1.2.3


From e2c95a61656d29ceaac97b6a975c8a1f26e26f15 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 26 Nov 2018 14:05:38 +0100
Subject: bpf, ppc64: generalize fetching subprog into bpf_jit_get_func_addr

Make fetching of the BPF call address from ppc64 JIT generic. ppc64
was using a slightly different variant rather than through the insns'
imm field encoding as the target address would not fit into that space.
Therefore, the target subprog number was encoded into the insns' offset
and fetched through fp->aux->func[off]->bpf_func instead. Given there
are other JITs with this issue and the mechanism of fetching the address
is JIT-generic, move it into the core as a helper instead. On the JIT
side, we get information on whether the retrieved address is a fixed
one, that is, not changing through JIT passes, or a dynamic one. For
the former, JITs can optimize their imm emission because this doesn't
change jump offsets throughout JIT process.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Sandipan Das <sandipan@linux.ibm.com>
Tested-by: Sandipan Das <sandipan@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index de629b706d1d..448dcc448f1f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -866,6 +866,10 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr);
 
 void bpf_jit_free(struct bpf_prog *fp);
 
+int bpf_jit_get_func_addr(const struct bpf_prog *prog,
+			  const struct bpf_insn *insn, bool extra_pass,
+			  u64 *func_addr, bool *func_addr_fixed);
+
 struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp);
 void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
 
-- 
cgit v1.2.3


From d125f3f866df88da5a85df00291f88f0baa89f7c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 19 Nov 2018 07:40:39 -0500
Subject: function_graph: Make ftrace_push_return_trace() static

As all architectures now call function_graph_enter() to do the entry work,
no architecture should ever call ftrace_push_return_trace(). Make it static.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 5717e8f81c59..dd16e8218db3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -776,9 +776,6 @@ struct ftrace_ret_stack {
  */
 extern void return_to_handler(void);
 
-extern int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
-			 unsigned long frame_pointer, unsigned long *retp);
 extern int
 function_graph_enter(unsigned long ret, unsigned long func,
 		     unsigned long frame_pointer, unsigned long *retp);
-- 
cgit v1.2.3


From 39eb456dacb543de90d3bc6a8e0ac5cf51ac475e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 19 Nov 2018 08:07:12 -0500
Subject: function_graph: Use new curr_ret_depth to manage depth instead of
 curr_ret_stack

Currently, the depth of the ret_stack is determined by curr_ret_stack index.
The issue is that there's a race between setting of the curr_ret_stack and
calling of the callback attached to the return of the function.

Commit 03274a3ffb44 ("tracing/fgraph: Adjust fgraph depth before calling
trace return callback") moved the calling of the callback to after the
setting of the curr_ret_stack, even stating that it was safe to do so, when
in fact, it was the reason there was a barrier() there (yes, I should have
commented that barrier()).

Not only does the curr_ret_stack keep track of the current call graph depth,
it also keeps the ret_stack content from being overwritten by new data.

The function profiler, uses the "subtime" variable of ret_stack structure
and by moving the curr_ret_stack, it allows for interrupts to use the same
structure it was using, corrupting the data, and breaking the profiler.

To fix this, there needs to be two variables to handle the call stack depth
and the pointer to where the ret_stack is being used, as they need to change
at two different locations.

Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a51c13c2b1a0..d6183a55e8eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1116,6 +1116,7 @@ struct task_struct {
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	/* Index of current stored address in ret_stack: */
 	int				curr_ret_stack;
+	int				curr_ret_depth;
 
 	/* Stack of return addresses for return function tracing: */
 	struct ftrace_ret_stack		*ret_stack;
-- 
cgit v1.2.3


From 321a874a7ef85655e93b3206d0f36b4a6097f948 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:38 +0100
Subject: sched/smt: Expose sched_smt_present static key

Make the scheduler's 'sched_smt_present' static key globaly available, so
it can be used in the x86 speculation control code.

Provide a query function and a stub for the CONFIG_SMP=n case.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.430168326@linutronix.de
---
 include/linux/sched/smt.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 include/linux/sched/smt.h

(limited to 'include/linux')

diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
new file mode 100644
index 000000000000..c9e0be514110
--- /dev/null
+++ b/include/linux/sched/smt.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_SMT_H
+#define _LINUX_SCHED_SMT_H
+
+#include <linux/static_key.h>
+
+#ifdef CONFIG_SCHED_SMT
+extern struct static_key_false sched_smt_present;
+
+static __always_inline bool sched_smt_active(void)
+{
+	return static_branch_likely(&sched_smt_present);
+}
+#else
+static inline bool sched_smt_active(void) { return false; }
+#endif
+
+#endif
-- 
cgit v1.2.3


From a74cfffb03b73d41e08f84c2e5c87dec0ce3db9f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:39 +0100
Subject: x86/speculation: Rework SMT state change

arch_smt_update() is only called when the sysfs SMT control knob is
changed. This means that when SMT is enabled in the sysfs control knob the
system is considered to have SMT active even if all siblings are offline.

To allow finegrained control of the speculation mitigations, the actual SMT
state is more interesting than the fact that siblings could be enabled.

Rework the code, so arch_smt_update() is invoked from each individual CPU
hotplug function, and simplify the update function while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.521974984@linutronix.de
---
 include/linux/sched/smt.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
index c9e0be514110..59d3736c454c 100644
--- a/include/linux/sched/smt.h
+++ b/include/linux/sched/smt.h
@@ -15,4 +15,6 @@ static __always_inline bool sched_smt_active(void)
 static inline bool sched_smt_active(void) { return false; }
 #endif
 
+void arch_smt_update(void);
+
 #endif
-- 
cgit v1.2.3


From 46f7ecb1e7359f183f5bbd1e08b90e10e52164f9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:50 +0100
Subject: ptrace: Remove unused ptrace_may_access_sched() and MODE_IBRS

The IBPB control code in x86 removed the usage. Remove the functionality
which was introduced for this.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.559149393@linutronix.de
---
 include/linux/ptrace.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 6c2ffed907f5..de20ede2c5c8 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -64,15 +64,12 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_NOAUDIT	0x04
 #define PTRACE_MODE_FSCREDS	0x08
 #define PTRACE_MODE_REALCREDS	0x10
-#define PTRACE_MODE_SCHED	0x20
-#define PTRACE_MODE_IBPB	0x40
 
 /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
 #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
 #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS)
 #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS)
 #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS)
-#define PTRACE_MODE_SPEC_IBPB (PTRACE_MODE_ATTACH_REALCREDS | PTRACE_MODE_IBPB)
 
 /**
  * ptrace_may_access - check whether the caller is permitted to access
@@ -90,20 +87,6 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
  */
 extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);
 
-/**
- * ptrace_may_access - check whether the caller is permitted to access
- * a target task.
- * @task: target task
- * @mode: selects type of access and caller credentials
- *
- * Returns true on success, false on denial.
- *
- * Similar to ptrace_may_access(). Only to be called from context switch
- * code. Does not call into audit and the regular LSM hooks due to locking
- * constraints.
- */
-extern bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode);
-
 static inline int ptrace_reparented(struct task_struct *child)
 {
 	return !same_thread_group(child->real_parent, child->parent);
-- 
cgit v1.2.3


From 9137bb27e60e554dab694eafa4cca241fa3a694f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:53 +0100
Subject: x86/speculation: Add prctl() control for indirect branch speculation

Add the PR_SPEC_INDIRECT_BRANCH option for the PR_GET_SPECULATION_CTRL and
PR_SET_SPECULATION_CTRL prctls to allow fine grained per task control of
indirect branch speculation via STIBP and IBPB.

Invocations:
 Check indirect branch speculation status with
 - prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);

 Enable indirect branch speculation with
 - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);

 Disable indirect branch speculation with
 - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);

 Force disable indirect branch speculation with
 - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);

See Documentation/userspace-api/spec_ctrl.rst.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.866780996@linutronix.de
---
 include/linux/sched.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a51c13c2b1a0..d607db5fcc6a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1453,6 +1453,8 @@ static inline bool is_percpu_thread(void)
 #define PFA_SPREAD_SLAB			2	/* Spread some slab caches over cpuset */
 #define PFA_SPEC_SSB_DISABLE		3	/* Speculative Store Bypass disabled */
 #define PFA_SPEC_SSB_FORCE_DISABLE	4	/* Speculative Store Bypass force disabled*/
+#define PFA_SPEC_IB_DISABLE		5	/* Indirect branch speculation restricted */
+#define PFA_SPEC_IB_FORCE_DISABLE	6	/* Indirect branch speculation permanently restricted */
 
 #define TASK_PFA_TEST(name, func)					\
 	static inline bool task_##func(struct task_struct *p)		\
@@ -1484,6 +1486,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
 TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
 TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
 
+TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
+TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
+TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
+
+TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
+TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
+
 static inline void
 current_restore_flags(unsigned long orig_flags, unsigned long flags)
 {
-- 
cgit v1.2.3


From 3f2b7b9035107d6096ea438ea3d97dcf0481b6d2 Mon Sep 17 00:00:00 2001
From: "kiran.modukuri" <kiran.modukuri@gmail.com>
Date: Mon, 26 Nov 2018 15:41:48 +0000
Subject: fscache: Fix race in fscache_op_complete() due to split atomic_sub &
 read

The code in fscache_retrieval_complete is using atomic_sub followed by an
atomic_read:

        atomic_sub(n_pages, &op->n_pages);
        if (atomic_read(&op->n_pages) <= 0)
                fscache_op_complete(&op->op, true);

This causes two threads doing a decrement of n_pages to race with each
other seeing the op->refcount 0 at same time - and they end up calling
fscache_op_complete() in both the threads leading to an assertion failure.

Fix this by using atomic_sub_return_relaxed() instead of two calls.  Note
that I'm using 'relaxed' rather than, say, 'release' as there aren't
multiple variables that appear to need ordering across the release.

The oops looks something like:

FS-Cache: Assertion failed
FS-Cache: 0 > 0 is false
...
kernel BUG at /usr/src/linux-4.4.0/fs/fscache/operation.c:449!
...
Workqueue: fscache_operation fscache_op_work_func [fscache]
...
RIP: 0010:[<ffffffffc037eacd>] fscache_op_complete+0x10d/0x180 [fscache]
...
Call Trace:
 [<ffffffffc1464cf9>] cachefiles_read_copier+0x3a9/0x410 [cachefiles]
 [<ffffffffc037e272>] fscache_op_work_func+0x22/0x50 [fscache]
 [<ffffffff81096da0>] process_one_work+0x150/0x3f0
 [<ffffffff8109751a>] worker_thread+0x11a/0x470
 [<ffffffff81808e59>] ? __schedule+0x359/0x980
 [<ffffffff81097400>] ? rescuer_thread+0x310/0x310
 [<ffffffff8109cdd6>] kthread+0xd6/0xf0
 [<ffffffff8109cd00>] ? kthread_park+0x60/0x60
 [<ffffffff8180d0cf>] ret_from_fork+0x3f/0x70
 [<ffffffff8109cd00>] ? kthread_park+0x60/0x60

This seen this in 4.4.x kernels and the same bug affects fscache in latest
upstreams kernels.

Fixes: 1bb4b7f98f36 ("FS-Cache: The retrieval remaining-pages counter needs to be atomic_t")
Signed-off-by: Kiran Kumar Modukuri <kiran.modukuri@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/fscache-cache.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 34cf0fdd7dc7..610815e3f1aa 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -196,8 +196,7 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op)
 static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
 					      int n_pages)
 {
-	atomic_sub(n_pages, &op->n_pages);
-	if (atomic_read(&op->n_pages) <= 0)
+	if (atomic_sub_return_relaxed(n_pages, &op->n_pages) <= 0)
 		fscache_op_complete(&op->op, false);
 }
 
-- 
cgit v1.2.3


From 89d328f637b9904b6d4c9af73c8a608b8dd4d6f8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 1 Nov 2018 16:17:22 -0700
Subject: pstore/ram: Correctly calculate usable PRZ bytes

The actual number of bytes stored in a PRZ is smaller than the
bytes requested by platform data, since there is a header on each
PRZ. Additionally, if ECC is enabled, there are trailing bytes used
as well. Normally this mismatch doesn't matter since PRZs are circular
buffers and the leading "overflow" bytes are just thrown away. However, in
the case of a compressed record, this rather badly corrupts the results.

This corruption was visible with "ramoops.mem_size=204800 ramoops.ecc=1".
Any stored crashes would not be uncompressable (producing a pstorefs
"dmesg-*.enc.z" file), and triggering errors at boot:

  [    2.790759] pstore: crypto_comp_decompress failed, ret = -22!

Backporting this depends on commit 70ad35db3321 ("pstore: Convert console
write to use ->write_buf")

Reported-by: Joel Fernandes <joel@joelfernandes.org>
Fixes: b0aad7a99c1d ("pstore: Add compression support to pstore")
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 include/linux/pstore.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index a15bc4d48752..30fcec375a3a 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -90,7 +90,10 @@ struct pstore_record {
  *
  * @buf_lock:	spinlock to serialize access to @buf
  * @buf:	preallocated crash dump buffer
- * @bufsize:	size of @buf available for crash dump writes
+ * @bufsize:	size of @buf available for crash dump bytes (must match
+ *		smallest number of bytes available for writing to a
+ *		backend entry, since compressed bytes don't take kindly
+ *		to being truncated)
  *
  * @read_mutex:	serializes @open, @read, @close, and @erase callbacks
  * @flags:	bitfield of frontends the backend can accept writes for
-- 
cgit v1.2.3


From 0c7a52e4d4b5c4d35b31f3c3ad32af814f1bf491 Mon Sep 17 00:00:00 2001
From: Zenghui Yu <yuzenghui@huawei.com>
Date: Wed, 28 Nov 2018 03:35:23 +0000
Subject: tracepoint: Use __idx instead of idx in DO_TRACE macro to make it
 unique

After enabling KVM event tracing, almost all of trace_kvm_exit()'s
printk shows

	"kvm_exit: IRQ: ..."

even if the actual exception_type is NOT IRQ.  More specifically,
trace_kvm_exit() is defined in virt/kvm/arm/trace.h by TRACE_EVENT.

This slight problem may have existed after commit e6753f23d961
("tracepoint: Make rcuidle tracepoint callers use SRCU"). There are
two variables in trace_kvm_exit() and __DO_TRACE() which have the
same name, *idx*. Thus the actual value of *idx* will be overwritten
when tracing. Fix it by adding a simple prefix.

Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Wang Haibin <wanghaibin.wang@huawei.com>
Cc: linux-trace-devel@vger.kernel.org
Cc: stable@vger.kernel.org
Fixes: e6753f23d961 ("tracepoint: Make rcuidle tracepoint callers use SRCU")
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 538ba1a58f5b..e9de8ad0bad7 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -166,7 +166,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		struct tracepoint_func *it_func_ptr;			\
 		void *it_func;						\
 		void *__data;						\
-		int __maybe_unused idx = 0;				\
+		int __maybe_unused __idx = 0;				\
 									\
 		if (!(cond))						\
 			return;						\
@@ -182,7 +182,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		 * doesn't work from the idle path.			\
 		 */							\
 		if (rcuidle) {						\
-			idx = srcu_read_lock_notrace(&tracepoint_srcu);	\
+			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
 			rcu_irq_enter_irqson();				\
 		}							\
 									\
@@ -198,7 +198,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 									\
 		if (rcuidle) {						\
 			rcu_irq_exit_irqson();				\
-			srcu_read_unlock_notrace(&tracepoint_srcu, idx);\
+			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
 		}							\
 									\
 		preempt_enable_notrace();				\
-- 
cgit v1.2.3


From e0c274472d5d27f277af722e017525e0b33784cd Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 30 Nov 2018 14:09:58 -0800
Subject: psi: make disabling/enabling easier for vendor kernels

Mel Gorman reports a hackbench regression with psi that would prohibit
shipping the suse kernel with it default-enabled, but he'd still like
users to be able to opt in at little to no cost to others.

With the current combination of CONFIG_PSI and the psi_disabled bool set
from the commandline, this is a challenge.  Do the following things to
make it easier:

1. Add a config option CONFIG_PSI_DEFAULT_DISABLED that allows distros
   to enable CONFIG_PSI in their kernel but leave the feature disabled
   unless a user requests it at boot-time.

   To avoid double negatives, rename psi_disabled= to psi=.

2. Make psi_disabled a static branch to eliminate any branch costs
   when the feature is disabled.

In terms of numbers before and after this patch, Mel says:

: The following is a comparision using CONFIG_PSI=n as a baseline against
: your patch and a vanilla kernel
:
:                          4.20.0-rc4             4.20.0-rc4             4.20.0-rc4
:                 kconfigdisable-v1r1                vanilla        psidisable-v1r1
: Amean     1       1.3100 (   0.00%)      1.3923 (  -6.28%)      1.3427 (  -2.49%)
: Amean     3       3.8860 (   0.00%)      4.1230 *  -6.10%*      3.8860 (  -0.00%)
: Amean     5       6.8847 (   0.00%)      8.0390 * -16.77%*      6.7727 (   1.63%)
: Amean     7       9.9310 (   0.00%)     10.8367 *  -9.12%*      9.9910 (  -0.60%)
: Amean     12     16.6577 (   0.00%)     18.2363 *  -9.48%*     17.1083 (  -2.71%)
: Amean     18     26.5133 (   0.00%)     27.8833 *  -5.17%*     25.7663 (   2.82%)
: Amean     24     34.3003 (   0.00%)     34.6830 (  -1.12%)     32.0450 (   6.58%)
: Amean     30     40.0063 (   0.00%)     40.5800 (  -1.43%)     41.5087 (  -3.76%)
: Amean     32     40.1407 (   0.00%)     41.2273 (  -2.71%)     39.9417 (   0.50%)
:
: It's showing that the vanilla kernel takes a hit (as the bisection
: indicated it would) and that disabling PSI by default is reasonably
: close in terms of performance for this particular workload on this
: particular machine so;

Link: http://lkml.kernel.org/r/20181127165329.GA29728@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Tested-by: Mel Gorman <mgorman@techsingularity.net>
Reported-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/psi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/psi.h b/include/linux/psi.h
index 8e0725aac0aa..7006008d5b72 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_PSI_H
 #define _LINUX_PSI_H
 
+#include <linux/jump_label.h>
 #include <linux/psi_types.h>
 #include <linux/sched.h>
 
@@ -9,7 +10,7 @@ struct css_set;
 
 #ifdef CONFIG_PSI
 
-extern bool psi_disabled;
+extern struct static_key_false psi_disabled;
 
 void psi_init(void);
 
-- 
cgit v1.2.3