From 2e210bbb7429cdcf1a1a3ad00c1bf98bd9bf2452 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 3 Apr 2018 10:52:20 -0700
Subject: HID: input: fix battery level reporting on BT mice

The commit 581c4484769e ("HID: input: map digitizer battery usage")
assumed that devices having input (qas opposed to feature) report for
battery strength would report the data on their own, without the need to
be polled by the kernel; unfortunately it is not so. Many wireless mice
do not send unsolicited reports with battery strength data and have to
be polled explicitly. As a complication, stylus devices on digitizers
are not normally connected to the base and thus can not be polled - the
base can only determine battery strength in the stylus when it is in
proximity.

To solve this issue, we add a special flag that tells the kernel
to avoid polling the device (and expect unsolicited reports) and set it
when report field with physical usage of digitizer stylus (HID_DG_STYLUS).
Unless this flag is set, and we have not seen the unsolicited reports,
the kernel will attempt to poll the device when userspace attempts to
read "capacity" and "state" attributes of power_supply object
corresponding to the devices battery.

Fixes: 581c4484769e ("HID: input: map digitizer battery usage")
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=198095
Cc: stable@vger.kernel.org
Reported-and-tested-by: Martin van Es <martin@mrvanes.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 8da3e1f48195..26240a22978a 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -516,6 +516,12 @@ enum hid_type {
 	HID_TYPE_USBNONE
 };
 
+enum hid_battery_status {
+	HID_BATTERY_UNKNOWN = 0,
+	HID_BATTERY_QUERIED,		/* Kernel explicitly queried battery strength */
+	HID_BATTERY_REPORTED,		/* Device sent unsolicited battery strength report */
+};
+
 struct hid_driver;
 struct hid_ll_driver;
 
@@ -558,7 +564,8 @@ struct hid_device {							/* device report descriptor */
 	__s32 battery_max;
 	__s32 battery_report_type;
 	__s32 battery_report_id;
-	bool battery_reported;
+	enum hid_battery_status battery_status;
+	bool battery_avoid_query;
 #endif
 
 	unsigned int status;						/* see STAT flags above */
-- 
cgit v1.2.3


From 3a38bb98d9abdc3856f26b5ed4332803065cd7cf Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 10 Apr 2018 09:37:32 -0700
Subject: bpf/tracing: fix a deadlock in perf_event_detach_bpf_prog

syzbot reported a possible deadlock in perf_event_detach_bpf_prog.
The error details:
  ======================================================
  WARNING: possible circular locking dependency detected
  4.16.0-rc7+ #3 Not tainted
  ------------------------------------------------------
  syz-executor7/24531 is trying to acquire lock:
   (bpf_event_mutex){+.+.}, at: [<000000008a849b07>] perf_event_detach_bpf_prog+0x92/0x3d0 kernel/trace/bpf_trace.c:854

  but task is already holding lock:
   (&mm->mmap_sem){++++}, at: [<0000000038768f87>] vm_mmap_pgoff+0x198/0x280 mm/util.c:353

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:

  -> #1 (&mm->mmap_sem){++++}:
       __might_fault+0x13a/0x1d0 mm/memory.c:4571
       _copy_to_user+0x2c/0xc0 lib/usercopy.c:25
       copy_to_user include/linux/uaccess.h:155 [inline]
       bpf_prog_array_copy_info+0xf2/0x1c0 kernel/bpf/core.c:1694
       perf_event_query_prog_array+0x1c7/0x2c0 kernel/trace/bpf_trace.c:891
       _perf_ioctl kernel/events/core.c:4750 [inline]
       perf_ioctl+0x3e1/0x1480 kernel/events/core.c:4770
       vfs_ioctl fs/ioctl.c:46 [inline]
       do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686
       SYSC_ioctl fs/ioctl.c:701 [inline]
       SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692
       do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
       entry_SYSCALL_64_after_hwframe+0x42/0xb7

  -> #0 (bpf_event_mutex){+.+.}:
       lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3920
       __mutex_lock_common kernel/locking/mutex.c:756 [inline]
       __mutex_lock+0x16f/0x1a80 kernel/locking/mutex.c:893
       mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
       perf_event_detach_bpf_prog+0x92/0x3d0 kernel/trace/bpf_trace.c:854
       perf_event_free_bpf_prog kernel/events/core.c:8147 [inline]
       _free_event+0xbdb/0x10f0 kernel/events/core.c:4116
       put_event+0x24/0x30 kernel/events/core.c:4204
       perf_mmap_close+0x60d/0x1010 kernel/events/core.c:5172
       remove_vma+0xb4/0x1b0 mm/mmap.c:172
       remove_vma_list mm/mmap.c:2490 [inline]
       do_munmap+0x82a/0xdf0 mm/mmap.c:2731
       mmap_region+0x59e/0x15a0 mm/mmap.c:1646
       do_mmap+0x6c0/0xe00 mm/mmap.c:1483
       do_mmap_pgoff include/linux/mm.h:2223 [inline]
       vm_mmap_pgoff+0x1de/0x280 mm/util.c:355
       SYSC_mmap_pgoff mm/mmap.c:1533 [inline]
       SyS_mmap_pgoff+0x462/0x5f0 mm/mmap.c:1491
       SYSC_mmap arch/x86/kernel/sys_x86_64.c:100 [inline]
       SyS_mmap+0x16/0x20 arch/x86/kernel/sys_x86_64.c:91
       do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
       entry_SYSCALL_64_after_hwframe+0x42/0xb7

  other info that might help us debug this:

   Possible unsafe locking scenario:

         CPU0                    CPU1
         ----                    ----
    lock(&mm->mmap_sem);
                                 lock(bpf_event_mutex);
                                 lock(&mm->mmap_sem);
    lock(bpf_event_mutex);

   *** DEADLOCK ***
  ======================================================

The bug is introduced by Commit f371b304f12e ("bpf/tracing: allow
user space to query prog array on the same tp") where copy_to_user,
which requires mm->mmap_sem, is called inside bpf_event_mutex lock.
At the same time, during perf_event file descriptor close,
mm->mmap_sem is held first and then subsequent
perf_event_detach_bpf_prog needs bpf_event_mutex lock.
Such a senario caused a deadlock.

As suggested by Daniel, moving copy_to_user out of the
bpf_event_mutex lock should fix the problem.

Fixes: f371b304f12e ("bpf/tracing: allow user space to query prog array on the same tp")
Reported-by: syzbot+dc5ca0e4c9bfafaf2bae@syzkaller.appspotmail.com
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 95a7abd0ee92..486e65e3db26 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -339,8 +339,8 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
 void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
 				struct bpf_prog *old_prog);
 int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
-			     __u32 __user *prog_ids, u32 request_cnt,
-			     __u32 __user *prog_cnt);
+			     u32 *prog_ids, u32 request_cnt,
+			     u32 *prog_cnt);
 int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 			struct bpf_prog *exclude_prog,
 			struct bpf_prog *include_prog,
-- 
cgit v1.2.3


From 1c2734b31d72316e3faaad88c0c9c46fa92a4b20 Mon Sep 17 00:00:00 2001
From: Raghuram Chary J <raghuramchary.jallipalli@microchip.com>
Date: Wed, 11 Apr 2018 20:36:36 +0530
Subject: lan78xx: PHY DSP registers initialization to address EEE link drop
 issues with long cables

The patch is to configure DSP registers of PHY device
to handle Gbe-EEE failures with >40m cable length.

Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Raghuram Chary J <raghuramchary.jallipalli@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/microchipphy.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h
index eb492d47f717..8f9c90379732 100644
--- a/include/linux/microchipphy.h
+++ b/include/linux/microchipphy.h
@@ -70,4 +70,12 @@
 #define	LAN88XX_MMD3_CHIP_ID			(32877)
 #define	LAN88XX_MMD3_CHIP_REV			(32878)
 
+/* DSP registers */
+#define PHY_ARDENNES_MMD_DEV_3_PHY_CFG		(0x806A)
+#define PHY_ARDENNES_MMD_DEV_3_PHY_CFG_ZD_DLY_EN_	(0x2000)
+#define LAN88XX_EXT_PAGE_ACCESS_TR		(0x52B5)
+#define LAN88XX_EXT_PAGE_TR_CR			16
+#define LAN88XX_EXT_PAGE_TR_LOW_DATA		17
+#define LAN88XX_EXT_PAGE_TR_HIGH_DATA		18
+
 #endif /* _MICROCHIPPHY_H */
-- 
cgit v1.2.3


From 8e984f8667ff4225092af734eef28a3d7bae8626 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 5 Apr 2018 16:18:04 +0300
Subject: fsnotify: fix typo in a comment about mark->g_list

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fsnotify_backend.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 9f1edb92c97e..e0c95c9f1e29 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -248,7 +248,7 @@ struct fsnotify_mark {
 	/* Group this mark is for. Set on mark creation, stable until last ref
 	 * is dropped */
 	struct fsnotify_group *group;
-	/* List of marks by group->i_fsnotify_marks. Also reused for queueing
+	/* List of marks by group->marks_list. Also reused for queueing
 	 * mark into destroy_list when it's waiting for the end of SRCU period
 	 * before it can be freed. [group->mark_mutex] */
 	struct list_head g_list;
-- 
cgit v1.2.3


From 8e04944f0ea8b838399049bdcda920ab36ae3b04 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Wed, 4 Apr 2018 19:53:07 +0900
Subject: mm,vmscan: Allow preallocating memory for register_shrinker().

syzbot is catching so many bugs triggered by commit 9ee332d99e4d5a97
("sget(): handle failures of register_shrinker()"). That commit expected
that calling kill_sb() from deactivate_locked_super() without successful
fill_super() is safe, but the reality was different; some callers assign
attributes which are needed for kill_sb() after sget() succeeds.

For example, [1] is a report where sb->s_mode (which seems to be either
FMODE_READ | FMODE_EXCL | FMODE_WRITE or FMODE_READ | FMODE_EXCL) is not
assigned unless sget() succeeds. But it does not worth complicate sget()
so that register_shrinker() failure path can safely call
kill_block_super() via kill_sb(). Making alloc_super() fail if memory
allocation for register_shrinker() failed is much simpler. Let's avoid
calling deactivate_locked_super() from sget_userns() by preallocating
memory for the shrinker and making register_shrinker() in sget_userns()
never fail.

[1] https://syzkaller.appspot.com/bug?id=588996a25a2587be2e3a54e8646728fb9cae44e7

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+5a170e19c963a2e0df79@syzkaller.appspotmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/shrinker.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 388ff2936a87..6794490f25b2 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -75,6 +75,9 @@ struct shrinker {
 #define SHRINKER_NUMA_AWARE	(1 << 0)
 #define SHRINKER_MEMCG_AWARE	(1 << 1)
 
-extern int register_shrinker(struct shrinker *);
-extern void unregister_shrinker(struct shrinker *);
+extern int prealloc_shrinker(struct shrinker *shrinker);
+extern void register_shrinker_prepared(struct shrinker *shrinker);
+extern int register_shrinker(struct shrinker *shrinker);
+extern void unregister_shrinker(struct shrinker *shrinker);
+extern void free_prealloced_shrinker(struct shrinker *shrinker);
 #endif
-- 
cgit v1.2.3


From 4ad69b80e886a845f56ce0a3d10211208693d92b Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Mon, 9 Apr 2018 15:59:20 +0200
Subject: clk: honor CLK_MUX_ROUND_CLOSEST in generic clk mux

CLK_MUX_ROUND_CLOSEST is part of the clk_mux documentation but clk_mux
directly calls __clk_mux_determine_rate(), which overrides the flag.
As result, if clk_mux is instantiated with CLK_MUX_ROUND_CLOSEST, the
flag will be ignored and the clock rounded down.

To solve this, this patch expose clk_mux_determine_rate_flags() in the
clk-provider API and uses it in the determine_rate() callback of clk_mux.

Fixes: 15a02c1f6dd7 ("clk: Add __clk_mux_determine_rate_closest")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk-provider.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 210a890008f9..1d25e149c1c5 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -765,6 +765,9 @@ int __clk_mux_determine_rate(struct clk_hw *hw,
 int __clk_determine_rate(struct clk_hw *core, struct clk_rate_request *req);
 int __clk_mux_determine_rate_closest(struct clk_hw *hw,
 				     struct clk_rate_request *req);
+int clk_mux_determine_rate_flags(struct clk_hw *hw,
+				 struct clk_rate_request *req,
+				 unsigned long flags);
 void clk_hw_reparent(struct clk_hw *hw, struct clk_hw *new_parent);
 void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate,
 			   unsigned long max_rate);
-- 
cgit v1.2.3


From 5968a70d7af5f2abbd9d9f9c8e86da51f0a6b16d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 16 Apr 2018 12:32:55 -0700
Subject: textsearch: fix kernel-doc warnings and add kernel-api section

Make lib/textsearch.c usable as kernel-doc.
Add textsearch() function family to kernel-api documentation.
Fix kernel-doc warnings in <linux/textsearch.h>:
  ../include/linux/textsearch.h:65: warning: Incorrect use of kernel-doc format:
	* get_next_block - fetch next block of data
  ../include/linux/textsearch.h:82: warning: Incorrect use of kernel-doc format:
	* finish - finalize/clean a series of get_next_block() calls

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/textsearch.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h
index 0494db3fd9e8..13770cfe33ad 100644
--- a/include/linux/textsearch.h
+++ b/include/linux/textsearch.h
@@ -62,7 +62,7 @@ struct ts_config
 	int 			flags;
 
 	/**
-	 * get_next_block - fetch next block of data
+	 * @get_next_block: fetch next block of data
 	 * @consumed: number of bytes consumed by the caller
 	 * @dst: destination buffer
 	 * @conf: search configuration
@@ -79,7 +79,7 @@ struct ts_config
 						  struct ts_state *state);
 
 	/**
-	 * finish - finalize/clean a series of get_next_block() calls
+	 * @finish: finalize/clean a series of get_next_block() calls
 	 * @conf: search configuration
 	 * @state: search state
 	 *
-- 
cgit v1.2.3


From e91c2518a5d22a07642f35d85f39001ad379dae4 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Mon, 16 Apr 2018 13:36:46 +0200
Subject: livepatch: Initialize shadow variables safely by a custom callback

The existing API allows to pass a sample data to initialize the shadow
data. It works well when the data are position independent. But it fails
miserably when we need to set a pointer to the shadow structure itself.

Unfortunately, we might need to initialize the pointer surprisingly
often because of struct list_head. It is even worse because the list
might be hidden in other common structures, for example, struct mutex,
struct wait_queue_head.

For example, this was needed to fix races in ALSA sequencer. It required
to add mutex into struct snd_seq_client. See commit b3defb791b26ea06
("ALSA: seq: Make ioctls race-free") and commit d15d662e89fc667b9
("ALSA: seq: Fix racy pool initializations")

This patch makes the API more safe. A custom constructor function and data
are passed to klp_shadow_*alloc() functions instead of the sample data.

Note that ctor_data are no longer a template for shadow->data. It might
point to any data that might be necessary when the constructor is called.

Also note that the constructor is called under klp_shadow_lock. It is
an internal spin_lock that synchronizes alloc() vs. get() operations,
see klp_shadow_get_or_alloc(). On one hand, this adds a risk of ABBA
deadlocks. On the other hand, it allows to do some operations safely.
For example, we could add the new structure into an existing list.
This must be done only once when the structure is allocated.

Reported-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/livepatch.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 4754f01c1abb..7e084321b146 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -186,11 +186,17 @@ static inline bool klp_have_reliable_stack(void)
 	       IS_ENABLED(CONFIG_HAVE_RELIABLE_STACKTRACE);
 }
 
+typedef int (*klp_shadow_ctor_t)(void *obj,
+				 void *shadow_data,
+				 void *ctor_data);
+
 void *klp_shadow_get(void *obj, unsigned long id);
-void *klp_shadow_alloc(void *obj, unsigned long id, void *data,
-		       size_t size, gfp_t gfp_flags);
-void *klp_shadow_get_or_alloc(void *obj, unsigned long id, void *data,
-			      size_t size, gfp_t gfp_flags);
+void *klp_shadow_alloc(void *obj, unsigned long id,
+		       size_t size, gfp_t gfp_flags,
+		       klp_shadow_ctor_t ctor, void *ctor_data);
+void *klp_shadow_get_or_alloc(void *obj, unsigned long id,
+			      size_t size, gfp_t gfp_flags,
+			      klp_shadow_ctor_t ctor, void *ctor_data);
 void klp_shadow_free(void *obj, unsigned long id);
 void klp_shadow_free_all(unsigned long id);
 
-- 
cgit v1.2.3


From 3b2c77d000fe9f7d02e9e726e00dccf9f92b256f Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Mon, 16 Apr 2018 13:36:47 +0200
Subject: livepatch: Allow to call a custom callback when freeing shadow
 variables

We might need to do some actions before the shadow variable is freed.
For example, we might need to remove it from a list or free some data
that it points to.

This is already possible now. The user can get the shadow variable
by klp_shadow_get(), do the necessary actions, and then call
klp_shadow_free().

This patch allows to do it a more elegant way. The user could implement
the needed actions in a callback that is passed to klp_shadow_free()
as a parameter. The callback usually does reverse operations to
the constructor callback that can be called by klp_shadow_*alloc().

It is especially useful for klp_shadow_free_all(). There we need to do
these extra actions for each found shadow variable with the given ID.

Note that the memory used by the shadow variable itself is still released
later by rcu callback. It is needed to protect internal structures that
keep all shadow variables. But the destructor is called immediately.
The shadow variable must not be access anyway after klp_shadow_free()
is called. The user is responsible to protect this any suitable way.

Be aware that the destructor is called under klp_shadow_lock. It is
the same as for the contructor in klp_shadow_alloc().

Signed-off-by: Petr Mladek <pmladek@suse.com>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/livepatch.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 7e084321b146..aec44b1d9582 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -189,6 +189,7 @@ static inline bool klp_have_reliable_stack(void)
 typedef int (*klp_shadow_ctor_t)(void *obj,
 				 void *shadow_data,
 				 void *ctor_data);
+typedef void (*klp_shadow_dtor_t)(void *obj, void *shadow_data);
 
 void *klp_shadow_get(void *obj, unsigned long id);
 void *klp_shadow_alloc(void *obj, unsigned long id,
@@ -197,8 +198,8 @@ void *klp_shadow_alloc(void *obj, unsigned long id,
 void *klp_shadow_get_or_alloc(void *obj, unsigned long id,
 			      size_t size, gfp_t gfp_flags,
 			      klp_shadow_ctor_t ctor, void *ctor_data);
-void klp_shadow_free(void *obj, unsigned long id);
-void klp_shadow_free_all(unsigned long id);
+void klp_shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor);
+void klp_shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor);
 
 #else /* !CONFIG_LIVEPATCH */
 
-- 
cgit v1.2.3


From f0ae6a0321222864ed8675a924cc8ee2cb042c31 Mon Sep 17 00:00:00 2001
From: "Liu, Changcheng" <changcheng.liu@intel.com>
Date: Thu, 12 Apr 2018 15:57:01 +0800
Subject: timers: Remove stale struct tvec_base forward declaration

struct tvec_base is a leftover of the original timer wheel implementation
and not longer used. Remove the forward declaration.

Signed-off-by: Liu Changcheng <changcheng.liu@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Link: https://lkml.kernel.org/r/20180412075701.GA38952@sofia
---
 include/linux/timer.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 2448f9cc48a3..7b066fd38248 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -8,8 +8,6 @@
 #include <linux/debugobjects.h>
 #include <linux/stringify.h>
 
-struct tvec_base;
-
 struct timer_list {
 	/*
 	 * All fields that change during normal runtime grouped to the
-- 
cgit v1.2.3


From e142aa09ed88be98395dde7acb96fb2263566b68 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Fri, 13 Apr 2018 13:27:58 +0800
Subject: timekeeping: Remove __current_kernel_time()

The __current_kernel_time() function based on 'struct timespec' is no
longer recommended for new code, and the only user of this function has
been replaced by commit 6909e29fdefb ("kdb: use __ktime_get_real_seconds
instead of __current_kernel_time").

Remove the obsolete interface.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: sboyd@kernel.org
Cc: broonie@kernel.org
Cc: john.stultz@linaro.org
Link: https://lkml.kernel.org/r/1a9dbea7ee2cda7efe9ed330874075cf17fdbff6.1523596316.git.baolin.wang@linaro.org
---
 include/linux/timekeeping32.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h
index af4114d5dc17..3616b4becb59 100644
--- a/include/linux/timekeeping32.h
+++ b/include/linux/timekeeping32.h
@@ -9,9 +9,6 @@
 extern void do_gettimeofday(struct timeval *tv);
 unsigned long get_seconds(void);
 
-/* does not take xtime_lock */
-struct timespec __current_kernel_time(void);
-
 static inline struct timespec current_kernel_time(void)
 {
 	struct timespec64 now = current_kernel_time64();
-- 
cgit v1.2.3


From 7ce2367254e84753bceb07327aaf5c953cfce117 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Tue, 17 Apr 2018 18:46:14 +0900
Subject: vlan: Fix reading memory beyond skb->tail in skb_vlan_tagged_multi

Syzkaller spotted an old bug which leads to reading skb beyond tail by 4
bytes on vlan tagged packets.
This is caused because skb_vlan_tagged_multi() did not check
skb_headlen.

BUG: KMSAN: uninit-value in eth_type_vlan include/linux/if_vlan.h:283 [inline]
BUG: KMSAN: uninit-value in skb_vlan_tagged_multi include/linux/if_vlan.h:656 [inline]
BUG: KMSAN: uninit-value in vlan_features_check include/linux/if_vlan.h:672 [inline]
BUG: KMSAN: uninit-value in dflt_features_check net/core/dev.c:2949 [inline]
BUG: KMSAN: uninit-value in netif_skb_features+0xd1b/0xdc0 net/core/dev.c:3009
CPU: 1 PID: 3582 Comm: syzkaller435149 Not tainted 4.16.0+ #82
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:17 [inline]
  dump_stack+0x185/0x1d0 lib/dump_stack.c:53
  kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067
  __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676
  eth_type_vlan include/linux/if_vlan.h:283 [inline]
  skb_vlan_tagged_multi include/linux/if_vlan.h:656 [inline]
  vlan_features_check include/linux/if_vlan.h:672 [inline]
  dflt_features_check net/core/dev.c:2949 [inline]
  netif_skb_features+0xd1b/0xdc0 net/core/dev.c:3009
  validate_xmit_skb+0x89/0x1320 net/core/dev.c:3084
  __dev_queue_xmit+0x1cb2/0x2b60 net/core/dev.c:3549
  dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590
  packet_snd net/packet/af_packet.c:2944 [inline]
  packet_sendmsg+0x7c57/0x8a10 net/packet/af_packet.c:2969
  sock_sendmsg_nosec net/socket.c:630 [inline]
  sock_sendmsg net/socket.c:640 [inline]
  sock_write_iter+0x3b9/0x470 net/socket.c:909
  do_iter_readv_writev+0x7bb/0x970 include/linux/fs.h:1776
  do_iter_write+0x30d/0xd40 fs/read_write.c:932
  vfs_writev fs/read_write.c:977 [inline]
  do_writev+0x3c9/0x830 fs/read_write.c:1012
  SYSC_writev+0x9b/0xb0 fs/read_write.c:1085
  SyS_writev+0x56/0x80 fs/read_write.c:1082
  do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
RIP: 0033:0x43ffa9
RSP: 002b:00007fff2cff3948 EFLAGS: 00000217 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043ffa9
RDX: 0000000000000001 RSI: 0000000020000080 RDI: 0000000000000003
RBP: 00000000006cb018 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000217 R12: 00000000004018d0
R13: 0000000000401960 R14: 0000000000000000 R15: 0000000000000000

Uninit was created at:
  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline]
  kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188
  kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314
  kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321
  slab_post_alloc_hook mm/slab.h:445 [inline]
  slab_alloc_node mm/slub.c:2737 [inline]
  __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369
  __kmalloc_reserve net/core/skbuff.c:138 [inline]
  __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206
  alloc_skb include/linux/skbuff.h:984 [inline]
  alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234
  sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085
  packet_alloc_skb net/packet/af_packet.c:2803 [inline]
  packet_snd net/packet/af_packet.c:2894 [inline]
  packet_sendmsg+0x6444/0x8a10 net/packet/af_packet.c:2969
  sock_sendmsg_nosec net/socket.c:630 [inline]
  sock_sendmsg net/socket.c:640 [inline]
  sock_write_iter+0x3b9/0x470 net/socket.c:909
  do_iter_readv_writev+0x7bb/0x970 include/linux/fs.h:1776
  do_iter_write+0x30d/0xd40 fs/read_write.c:932
  vfs_writev fs/read_write.c:977 [inline]
  do_writev+0x3c9/0x830 fs/read_write.c:1012
  SYSC_writev+0x9b/0xb0 fs/read_write.c:1085
  SyS_writev+0x56/0x80 fs/read_write.c:1082
  do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x3d/0xa2

Fixes: 58e998c6d239 ("offloading: Force software GSO for multiple vlan tags.")
Reported-and-tested-by: syzbot+0bbe42c764feafa82c5a@syzkaller.appspotmail.com
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index d11f41d5269f..78a5a90b4267 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -663,7 +663,7 @@ static inline bool skb_vlan_tagged(const struct sk_buff *skb)
  * Returns true if the skb is tagged with multiple vlan headers, regardless
  * of whether it is hardware accelerated or not.
  */
-static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
+static inline bool skb_vlan_tagged_multi(struct sk_buff *skb)
 {
 	__be16 protocol = skb->protocol;
 
@@ -673,6 +673,9 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
 		if (likely(!eth_type_vlan(protocol)))
 			return false;
 
+		if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+			return false;
+
 		veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
 	}
@@ -690,7 +693,7 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
  *
  * Returns features without unsafe ones if the skb has multiple tags.
  */
-static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
+static inline netdev_features_t vlan_features_check(struct sk_buff *skb,
 						    netdev_features_t features)
 {
 	if (skb_vlan_tagged_multi(skb)) {
-- 
cgit v1.2.3


From 0ce9144471de9ee09306ca0127e7cd27521ccc3f Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 18 Apr 2018 14:08:27 +1000
Subject: block: add blk_queue_fua() helper function

So we can check FUA support status from the iomap direct IO code.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9af3e0f430bc..c362aadfe036 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -737,6 +737,7 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
 #define blk_queue_quiesced(q)	test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
 #define blk_queue_preempt_only(q)				\
 	test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags)
+#define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
 
 extern int blk_set_preempt_only(struct request_queue *q);
 extern void blk_clear_preempt_only(struct request_queue *q);
-- 
cgit v1.2.3


From 5692fceebeb7f11c07e2a990f7711a01ae437de2 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Wed, 11 Apr 2018 16:15:43 -0500
Subject: ARM: OMAP2+: Fix build when using split object directories

The sleep33xx and sleep43xx files should not depend on a header file
generated in drivers/memory. Remove this dependency and instead allow
both drivers/memory and arch/arm/mach-omap2 to generate all macros
needed in headers local to their own paths.

This fixes an issue where the build fail will when using O= to set a
split object directory and arch/arm/mach-omap2 is built before
drivers/memory with the following error:

.../drivers/memory/emif-asm-offsets.c:1:0: fatal error: can't open
drivers/memory/emif-asm-offsets.s for writing: No such file or directory
compilation terminated.

Fixes: 41d9d44d7258 ("ARM: OMAP2+: pm33xx-core: Add platform code needed for PM")
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/ti-emif-sram.h | 75 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ti-emif-sram.h b/include/linux/ti-emif-sram.h
index 45bc6b376492..53604b087f2c 100644
--- a/include/linux/ti-emif-sram.h
+++ b/include/linux/ti-emif-sram.h
@@ -60,6 +60,81 @@ struct ti_emif_pm_functions {
 	u32 abort_sr;
 } __packed __aligned(8);
 
+static inline void ti_emif_asm_offsets(void)
+{
+	DEFINE(EMIF_SDCFG_VAL_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_sdcfg_val));
+	DEFINE(EMIF_TIMING1_VAL_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_timing1_val));
+	DEFINE(EMIF_TIMING2_VAL_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_timing2_val));
+	DEFINE(EMIF_TIMING3_VAL_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_timing3_val));
+	DEFINE(EMIF_REF_CTRL_VAL_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_ref_ctrl_val));
+	DEFINE(EMIF_ZQCFG_VAL_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_zqcfg_val));
+	DEFINE(EMIF_PMCR_VAL_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_pmcr_val));
+	DEFINE(EMIF_PMCR_SHDW_VAL_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_pmcr_shdw_val));
+	DEFINE(EMIF_RD_WR_LEVEL_RAMP_CTRL_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_rd_wr_level_ramp_ctrl));
+	DEFINE(EMIF_RD_WR_EXEC_THRESH_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_rd_wr_exec_thresh));
+	DEFINE(EMIF_COS_CONFIG_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_cos_config));
+	DEFINE(EMIF_PRIORITY_TO_COS_MAPPING_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_priority_to_cos_mapping));
+	DEFINE(EMIF_CONNECT_ID_SERV_1_MAP_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_connect_id_serv_1_map));
+	DEFINE(EMIF_CONNECT_ID_SERV_2_MAP_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_connect_id_serv_2_map));
+	DEFINE(EMIF_OCP_CONFIG_VAL_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_ocp_config_val));
+	DEFINE(EMIF_LPDDR2_NVM_TIM_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_lpddr2_nvm_tim));
+	DEFINE(EMIF_LPDDR2_NVM_TIM_SHDW_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_lpddr2_nvm_tim_shdw));
+	DEFINE(EMIF_DLL_CALIB_CTRL_VAL_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_dll_calib_ctrl_val));
+	DEFINE(EMIF_DLL_CALIB_CTRL_VAL_SHDW_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_dll_calib_ctrl_val_shdw));
+	DEFINE(EMIF_DDR_PHY_CTLR_1_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_ddr_phy_ctlr_1));
+	DEFINE(EMIF_EXT_PHY_CTRL_VALS_OFFSET,
+	       offsetof(struct emif_regs_amx3, emif_ext_phy_ctrl_vals));
+	DEFINE(EMIF_REGS_AMX3_SIZE, sizeof(struct emif_regs_amx3));
+
+	BLANK();
+
+	DEFINE(EMIF_PM_BASE_ADDR_VIRT_OFFSET,
+	       offsetof(struct ti_emif_pm_data, ti_emif_base_addr_virt));
+	DEFINE(EMIF_PM_BASE_ADDR_PHYS_OFFSET,
+	       offsetof(struct ti_emif_pm_data, ti_emif_base_addr_phys));
+	DEFINE(EMIF_PM_CONFIG_OFFSET,
+	       offsetof(struct ti_emif_pm_data, ti_emif_sram_config));
+	DEFINE(EMIF_PM_REGS_VIRT_OFFSET,
+	       offsetof(struct ti_emif_pm_data, regs_virt));
+	DEFINE(EMIF_PM_REGS_PHYS_OFFSET,
+	       offsetof(struct ti_emif_pm_data, regs_phys));
+	DEFINE(EMIF_PM_DATA_SIZE, sizeof(struct ti_emif_pm_data));
+
+	BLANK();
+
+	DEFINE(EMIF_PM_SAVE_CONTEXT_OFFSET,
+	       offsetof(struct ti_emif_pm_functions, save_context));
+	DEFINE(EMIF_PM_RESTORE_CONTEXT_OFFSET,
+	       offsetof(struct ti_emif_pm_functions, restore_context));
+	DEFINE(EMIF_PM_ENTER_SR_OFFSET,
+	       offsetof(struct ti_emif_pm_functions, enter_sr));
+	DEFINE(EMIF_PM_EXIT_SR_OFFSET,
+	       offsetof(struct ti_emif_pm_functions, exit_sr));
+	DEFINE(EMIF_PM_ABORT_SR_OFFSET,
+	       offsetof(struct ti_emif_pm_functions, abort_sr));
+	DEFINE(EMIF_PM_FUNCTIONS_SIZE, sizeof(struct ti_emif_pm_functions));
+}
+
 struct gen_pool;
 
 int ti_emif_copy_pm_function_table(struct gen_pool *sram_pool, void *dst);
-- 
cgit v1.2.3


From ccce20fc7968d546fb1e8e147bf5cdc8afc4278a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Mon, 16 Apr 2018 18:04:41 -0700
Subject: scsi: sd_zbc: Avoid that resetting a zone fails sporadically

Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() is
called from sd_probe_async() and since sd_revalidate_disk() calls
sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called
concurrently with blkdev_report_zones() and/or blkdev_reset_zones().  That can
cause these functions to fail with -EIO because sd_zbc_read_zones() e.g. sets
q->nr_zones to zero before restoring it to the actual value, even if no drive
characteristics have changed.  Avoid that this can happen by making the
following changes:

- Protect the code that updates zone information with blk_queue_enter()
  and blk_queue_exit().
- Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that
  these functions do not modify struct scsi_disk before all zone
  information has been obtained.

Note: since commit 055f6e18e08f ("block: Make q_usage_counter also track
legacy requests"; kernel v4.15) the request queue freezing mechanism also
affects legacy request queues.

Fixes: 89d947561077 ("sd: Implement support for ZBC devices")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: stable@vger.kernel.org # v4.16
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/blkdev.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9af3e0f430bc..21e21f273a21 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -605,6 +605,11 @@ struct request_queue {
 	 * initialized by the low level device driver (e.g. scsi/sd.c).
 	 * Stacking drivers (device mappers) may or may not initialize
 	 * these fields.
+	 *
+	 * Reads of this information must be protected with blk_queue_enter() /
+	 * blk_queue_exit(). Modifying this information is only allowed while
+	 * no requests are being processed. See also blk_mq_freeze_queue() and
+	 * blk_mq_unfreeze_queue().
 	 */
 	unsigned int		nr_zones;
 	unsigned long		*seq_zones_bitmap;
-- 
cgit v1.2.3


From ed4564babeeee4fb19fe4ec0beabe29754e380f9 Mon Sep 17 00:00:00 2001
From: Arend van Spriel <aspriel@gmail.com>
Date: Sun, 8 Apr 2018 23:57:07 +0200
Subject: drivers: change struct device_driver::coredump() return type to void

Upon submitting a patch for mwifiex [1] it was discussed whether this
callback function could fail. To keep things simple there is no need
for the error code so the driver can do the task synchronous or not
without worries. Currently the device driver core already ignores the
return value so changing it to void.

[1] https://patchwork.kernel.org/patch/10231933/

Signed-off-by: Arend van Spriel <aspriel@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 0059b99e1f25..477956990f5e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -256,7 +256,9 @@ enum probe_type {
  *		automatically.
  * @pm:		Power management operations of the device which matched
  *		this driver.
- * @coredump:	Called through sysfs to initiate a device coredump.
+ * @coredump:	Called when sysfs entry is written to. The device driver
+ *		is expected to call the dev_coredump API resulting in a
+ *		uevent.
  * @p:		Driver core's private data, no one other than the driver
  *		core can touch this.
  *
@@ -288,7 +290,7 @@ struct device_driver {
 	const struct attribute_group **groups;
 
 	const struct dev_pm_ops *pm;
-	int (*coredump) (struct device *dev);
+	void (*coredump) (struct device *dev);
 
 	struct driver_private *p;
 };
-- 
cgit v1.2.3


From 8a9fd8323087e794f1d3cd4850b393ced048bc73 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Wed, 18 Apr 2018 16:05:18 -0600
Subject: coresight: Move to SPDX identifier

Move CoreSight headers to the SPDX identifier.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1524089118-27595-1-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/coresight-pmu.h | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
index edfeaba95429..a1a959ba24ff 100644
--- a/include/linux/coresight-pmu.h
+++ b/include/linux/coresight-pmu.h
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _LINUX_CORESIGHT_PMU_H
-- 
cgit v1.2.3


From d90a10e2444ba5a351fa695917258ff4c5709fa5 Mon Sep 17 00:00:00 2001
From: Robert Kolchmeyer <rkolchmeyer@google.com>
Date: Thu, 19 Apr 2018 10:44:33 -0700
Subject: fsnotify: Fix fsnotify_mark_connector race

fsnotify() acquires a reference to a fsnotify_mark_connector through
the SRCU-protected pointer to_tell->i_fsnotify_marks. However, it
appears that no precautions are taken in fsnotify_put_mark() to
ensure that fsnotify() drops its reference to this
fsnotify_mark_connector before assigning a value to its 'destroy_next'
field. This can result in fsnotify_put_mark() assigning a value
to a connector's 'destroy_next' field right before fsnotify() tries to
traverse the linked list referenced by the connector's 'list' field.
Since these two fields are members of the same union, this behavior
results in a kernel panic.

This issue is resolved by moving the connector's 'destroy_next' field
into the object pointer union. This should work since the object pointer
access is protected by both a spinlock and the value of the 'flags'
field, and the 'flags' field is cleared while holding the spinlock in
fsnotify_put_mark() before 'destroy_next' is updated. It shouldn't be
possible for another thread to accidentally read from the object pointer
after the 'destroy_next' field is updated.

The offending behavior here is extremely unlikely; since
fsnotify_put_mark() removes references to a connector (specifically,
it ensures that the connector is unreachable from the inode it was
formerly attached to) before updating its 'destroy_next' field, a
sizeable chunk of code in fsnotify_put_mark() has to execute in the
short window between when fsnotify() acquires the connector reference
and saves the value of its 'list' field. On the HEAD kernel, I've only
been able to reproduce this by inserting a udelay(1) in fsnotify().
However, I've been able to reproduce this issue without inserting a
udelay(1) anywhere on older unmodified release kernels, so I believe
it's worth fixing at HEAD.

References: https://bugzilla.kernel.org/show_bug.cgi?id=199437
Fixes: 08991e83b7286635167bab40927665a90fb00d81
CC: stable@vger.kernel.org
Signed-off-by: Robert Kolchmeyer <rkolchmeyer@google.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fsnotify_backend.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 9f1edb92c97e..a3d13d874fd1 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -217,12 +217,10 @@ struct fsnotify_mark_connector {
 	union {	/* Object pointer [lock] */
 		struct inode *inode;
 		struct vfsmount *mnt;
-	};
-	union {
-		struct hlist_head list;
 		/* Used listing heads to free after srcu period expires */
 		struct fsnotify_mark_connector *destroy_next;
 	};
+	struct hlist_head list;
 };
 
 /*
-- 
cgit v1.2.3


From e01e80634ecdde1dd113ac43b3adad21b47f3957 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 20 Apr 2018 14:55:31 -0700
Subject: fork: unconditionally clear stack on fork

One of the classes of kernel stack content leaks[1] is exposing the
contents of prior heap or stack contents when a new process stack is
allocated.  Normally, those stacks are not zeroed, and the old contents
remain in place.  In the face of stack content exposure flaws, those
contents can leak to userspace.

Fixing this will make the kernel no longer vulnerable to these flaws, as
the stack will be wiped each time a stack is assigned to a new process.
There's not a meaningful change in runtime performance; it almost looks
like it provides a benefit.

Performing back-to-back kernel builds before:
	Run times: 157.86 157.09 158.90 160.94 160.80
	Mean: 159.12
	Std Dev: 1.54

and after:
	Run times: 159.31 157.34 156.71 158.15 160.81
	Mean: 158.46
	Std Dev: 1.46

Instead of making this a build or runtime config, Andy Lutomirski
recommended this just be enabled by default.

[1] A noisy search for many kinds of stack content leaks can be seen here:
https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=linux+kernel+stack+leak

I did some more with perf and cycle counts on running 100,000 execs of
/bin/true.

before:
Cycles: 218858861551 218853036130 214727610969 227656844122 224980542841
Mean:  221015379122.60
Std Dev: 4662486552.47

after:
Cycles: 213868945060 213119275204 211820169456 224426673259 225489986348
Mean:  217745009865.40
Std Dev: 5935559279.99

It continues to look like it's faster, though the deviation is rather
wide, but I'm not sure what I could do that would be less noisy.  I'm
open to ideas!

Link: http://lkml.kernel.org/r/20180221021659.GA37073@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/thread_info.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 34f053a150a9..cf2862bd134a 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -43,11 +43,7 @@ enum {
 #define THREAD_ALIGN	THREAD_SIZE
 #endif
 
-#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK)
-# define THREADINFO_GFP		(GFP_KERNEL_ACCOUNT | __GFP_ZERO)
-#else
-# define THREADINFO_GFP		(GFP_KERNEL_ACCOUNT)
-#endif
+#define THREADINFO_GFP		(GFP_KERNEL_ACCOUNT | __GFP_ZERO)
 
 /*
  * flag set/clear/test wrappers
-- 
cgit v1.2.3


From 2e898e4c0a3897ccd434adac5abb8330194f527b Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Fri, 20 Apr 2018 14:55:42 -0700
Subject: writeback: safer lock nesting

lock_page_memcg()/unlock_page_memcg() use spin_lock_irqsave/restore() if
the page's memcg is undergoing move accounting, which occurs when a
process leaves its memcg for a new one that has
memory.move_charge_at_immigrate set.

unlocked_inode_to_wb_begin,end() use spin_lock_irq/spin_unlock_irq() if
the given inode is switching writeback domains.  Switches occur when
enough writes are issued from a new domain.

This existing pattern is thus suspicious:
    lock_page_memcg(page);
    unlocked_inode_to_wb_begin(inode, &locked);
    ...
    unlocked_inode_to_wb_end(inode, locked);
    unlock_page_memcg(page);

If both inode switch and process memcg migration are both in-flight then
unlocked_inode_to_wb_end() will unconditionally enable interrupts while
still holding the lock_page_memcg() irq spinlock.  This suggests the
possibility of deadlock if an interrupt occurs before unlock_page_memcg().

    truncate
    __cancel_dirty_page
    lock_page_memcg
    unlocked_inode_to_wb_begin
    unlocked_inode_to_wb_end
    <interrupts mistakenly enabled>
                                    <interrupt>
                                    end_page_writeback
                                    test_clear_page_writeback
                                    lock_page_memcg
                                    <deadlock>
    unlock_page_memcg

Due to configuration limitations this deadlock is not currently possible
because we don't mix cgroup writeback (a cgroupv2 feature) and
memory.move_charge_at_immigrate (a cgroupv1 feature).

If the kernel is hacked to always claim inode switching and memcg
moving_account, then this script triggers lockup in less than a minute:

  cd /mnt/cgroup/memory
  mkdir a b
  echo 1 > a/memory.move_charge_at_immigrate
  echo 1 > b/memory.move_charge_at_immigrate
  (
    echo $BASHPID > a/cgroup.procs
    while true; do
      dd if=/dev/zero of=/mnt/big bs=1M count=256
    done
  ) &
  while true; do
    sync
  done &
  sleep 1h &
  SLEEP=$!
  while true; do
    echo $SLEEP > a/cgroup.procs
    echo $SLEEP > b/cgroup.procs
  done

The deadlock does not seem possible, so it's debatable if there's any
reason to modify the kernel.  I suggest we should to prevent future
surprises.  And Wang Long said "this deadlock occurs three times in our
environment", so there's more reason to apply this, even to stable.
Stable 4.4 has minor conflicts applying this patch.  For a clean 4.4 patch
see "[PATCH for-4.4] writeback: safer lock nesting"
https://lkml.org/lkml/2018/4/11/146

Wang Long said "this deadlock occurs three times in our environment"

[gthelen@google.com: v4]
  Link: http://lkml.kernel.org/r/20180411084653.254724-1-gthelen@google.com
[akpm@linux-foundation.org: comment tweaks, struct initialization simplification]
Change-Id: Ibb773e8045852978f6207074491d262f1b3fb613
Link: http://lkml.kernel.org/r/20180410005908.167976-1-gthelen@google.com
Fixes: 682aa8e1a6a1 ("writeback: implement unlocked_inode_to_wb transaction and use it for stat updates")
Signed-off-by: Greg Thelen <gthelen@google.com>
Reported-by: Wang Long <wanglong19@meituan.com>
Acked-by: Wang Long <wanglong19@meituan.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: <stable@vger.kernel.org>	[v4.2+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev-defs.h |  5 +++++
 include/linux/backing-dev.h      | 30 ++++++++++++++++--------------
 2 files changed, 21 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index bfe86b54f6c1..0bd432a4d7bd 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -223,6 +223,11 @@ static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync)
 	set_wb_congested(bdi->wb.congested, sync);
 }
 
+struct wb_lock_cookie {
+	bool locked;
+	unsigned long flags;
+};
+
 #ifdef CONFIG_CGROUP_WRITEBACK
 
 /**
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f6be4b0b6c18..72ca0f3d39f3 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -347,7 +347,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
 /**
  * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
  * @inode: target inode
- * @lockedp: temp bool output param, to be passed to the end function
+ * @cookie: output param, to be passed to the end function
  *
  * The caller wants to access the wb associated with @inode but isn't
  * holding inode->i_lock, the i_pages lock or wb->list_lock.  This
@@ -355,12 +355,12 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
  * association doesn't change until the transaction is finished with
  * unlocked_inode_to_wb_end().
  *
- * The caller must call unlocked_inode_to_wb_end() with *@lockdep
- * afterwards and can't sleep during transaction.  IRQ may or may not be
- * disabled on return.
+ * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and
+ * can't sleep during the transaction.  IRQs may or may not be disabled on
+ * return.
  */
 static inline struct bdi_writeback *
-unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
+unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
 {
 	rcu_read_lock();
 
@@ -368,10 +368,10 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 	 * Paired with store_release in inode_switch_wb_work_fn() and
 	 * ensures that we see the new wb if we see cleared I_WB_SWITCH.
 	 */
-	*lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
+	cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 
-	if (unlikely(*lockedp))
-		xa_lock_irq(&inode->i_mapping->i_pages);
+	if (unlikely(cookie->locked))
+		xa_lock_irqsave(&inode->i_mapping->i_pages, cookie->flags);
 
 	/*
 	 * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages
@@ -383,12 +383,13 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 /**
  * unlocked_inode_to_wb_end - end inode wb access transaction
  * @inode: target inode
- * @locked: *@lockedp from unlocked_inode_to_wb_begin()
+ * @cookie: @cookie from unlocked_inode_to_wb_begin()
  */
-static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
+static inline void unlocked_inode_to_wb_end(struct inode *inode,
+					    struct wb_lock_cookie *cookie)
 {
-	if (unlikely(locked))
-		xa_unlock_irq(&inode->i_mapping->i_pages);
+	if (unlikely(cookie->locked))
+		xa_unlock_irqrestore(&inode->i_mapping->i_pages, cookie->flags);
 
 	rcu_read_unlock();
 }
@@ -435,12 +436,13 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 }
 
 static inline struct bdi_writeback *
-unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
+unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
 {
 	return inode_to_wb(inode);
 }
 
-static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
+static inline void unlocked_inode_to_wb_end(struct inode *inode,
+					    struct wb_lock_cookie *cookie)
 {
 }
 
-- 
cgit v1.2.3


From 12c8f25a016dff69ee284aa3338bebfd2cfcba33 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Fri, 20 Apr 2018 14:55:52 -0700
Subject: kasan: add no_sanitize attribute for clang builds

KASAN uses the __no_sanitize_address macro to disable instrumentation of
particular functions.  Right now it's defined only for GCC build, which
causes false positives when clang is used.

This patch adds a definition for clang.

Note, that clang's revision 329612 or higher is required.

[andreyknvl@google.com: remove redundant #ifdef CONFIG_KASAN check]
  Link: http://lkml.kernel.org/r/c79aa31a2a2790f6131ed607c58b0dd45dd62a6c.1523967959.git.andreyknvl@google.com
Link: http://lkml.kernel.org/r/4ad725cc903f8534f8c8a60f0daade5e3d674f8d.1523554166.git.andreyknvl@google.com
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Paul Lawrence <paullawrence@google.com>
Cc: Sandipan Das <sandipan@linux.vnet.ibm.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-clang.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index ceb96ecab96e..7d98e263e048 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -25,6 +25,9 @@
 #define __SANITIZE_ADDRESS__
 #endif
 
+#undef __no_sanitize_address
+#define __no_sanitize_address __attribute__((no_sanitize("address")))
+
 /* Clang doesn't have a way to turn it off per-function, yet. */
 #ifdef __noretpoline
 #undef __noretpoline
-- 
cgit v1.2.3


From dd709e72cb934eefd44de8d9969097173fbf45dc Mon Sep 17 00:00:00 2001
From: Daniel Kurtz <djkurtz@chromium.org>
Date: Fri, 6 Apr 2018 17:21:53 -0600
Subject: earlycon: Use a pointer table to fix __earlycon_table stride

Commit 99492c39f39f ("earlycon: Fix __earlycon_table stride") tried to fix
__earlycon_table stride by forcing the earlycon_id struct alignment to 32
and asking the linker to 32-byte align the __earlycon_table symbol.  This
fix was based on commit 07fca0e57fca92 ("tracing: Properly align linker
defined symbols") which tried a similar fix for the tracing subsystem.

However, this fix doesn't quite work because there is no guarantee that
gcc will place structures packed into an array format.  In fact, gcc 4.9
chooses to 64-byte align these structs by inserting additional padding
between the entries because it has no clue that they are supposed to be in
an array.  If we are unlucky, the linker will assign symbol
"__earlycon_table" to a 32-byte aligned address which does not correspond
to the 64-byte aligned contents of section "__earlycon_table".

To address this same problem, the fix to the tracing system was
subsequently re-implemented using a more robust table of pointers approach
by commits:
 3d56e331b653 ("tracing: Replace syscall_meta_data struct array with pointer array")
 654986462939 ("tracepoints: Fix section alignment using pointer array")
 e4a9ea5ee7c8 ("tracing: Replace trace_event struct array with pointer array")

Let's use this same "array of pointers to structs" approach for
EARLYCON_TABLE.

Fixes: 99492c39f39f ("earlycon: Fix __earlycon_table stride")
Signed-off-by: Daniel Kurtz <djkurtz@chromium.org>
Suggested-by: Aaron Durbin <adurbin@chromium.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Tested-by: Guenter Roeck <groeck@chromium.org>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 1d356105f25a..b4c9fda9d833 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -351,10 +351,10 @@ struct earlycon_id {
 	char	name[16];
 	char	compatible[128];
 	int	(*setup)(struct earlycon_device *, const char *options);
-} __aligned(32);
+};
 
-extern const struct earlycon_id __earlycon_table[];
-extern const struct earlycon_id __earlycon_table_end[];
+extern const struct earlycon_id *__earlycon_table[];
+extern const struct earlycon_id *__earlycon_table_end[];
 
 #if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE)
 #define EARLYCON_USED_OR_UNUSED	__used
@@ -362,12 +362,19 @@ extern const struct earlycon_id __earlycon_table_end[];
 #define EARLYCON_USED_OR_UNUSED	__maybe_unused
 #endif
 
-#define OF_EARLYCON_DECLARE(_name, compat, fn)				\
-	static const struct earlycon_id __UNIQUE_ID(__earlycon_##_name)	\
-	     EARLYCON_USED_OR_UNUSED __section(__earlycon_table)	\
+#define _OF_EARLYCON_DECLARE(_name, compat, fn, unique_id)		\
+	static const struct earlycon_id unique_id			\
+	     EARLYCON_USED_OR_UNUSED __initconst			\
 		= { .name = __stringify(_name),				\
 		    .compatible = compat,				\
-		    .setup = fn  }
+		    .setup = fn  };					\
+	static const struct earlycon_id EARLYCON_USED_OR_UNUSED		\
+		__section(__earlycon_table)				\
+		* const __PASTE(__p, unique_id) = &unique_id
+
+#define OF_EARLYCON_DECLARE(_name, compat, fn)				\
+	_OF_EARLYCON_DECLARE(_name, compat, fn,				\
+			     __UNIQUE_ID(__earlycon_##_name))
 
 #define EARLYCON_DECLARE(_name, fn)	OF_EARLYCON_DECLARE(_name, "", fn)
 
-- 
cgit v1.2.3


From 903f9db10f18f735e62ba447147b6c434b6af003 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Thu, 5 Apr 2018 19:40:16 +0900
Subject: tty: Don't call panic() at tty_ldisc_init()

syzbot is reporting kernel panic [1] triggered by memory allocation failure
at tty_ldisc_get() from tty_ldisc_init(). But since both tty_ldisc_get()
and caller of tty_ldisc_init() can cleanly handle errors, tty_ldisc_init()
does not need to call panic() when tty_ldisc_get() failed.

[1] https://syzkaller.appspot.com/bug?id=883431818e036ae6a9981156a64b821110f39187

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 47f8af22f216..1dd587ba6d88 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -701,7 +701,7 @@ extern int tty_unregister_ldisc(int disc);
 extern int tty_set_ldisc(struct tty_struct *tty, int disc);
 extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty);
 extern void tty_ldisc_release(struct tty_struct *tty);
-extern void tty_ldisc_init(struct tty_struct *tty);
+extern int __must_check tty_ldisc_init(struct tty_struct *tty);
 extern void tty_ldisc_deinit(struct tty_struct *tty);
 extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p,
 				 char *f, int count);
-- 
cgit v1.2.3


From 02cfde67df1f440c7c3c7038cc97992afb81804f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 18 Apr 2018 15:24:47 +0200
Subject: virt: vbox: Move declarations of vboxguest private functions to
 private header

Move the declarations of functions from vboxguest_utils.c which are only
meant for vboxguest internal use from include/linux/vbox_utils.h to
drivers/virt/vboxguest/vboxguest_core.h.

Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/vbox_utils.h | 23 -----------------------
 1 file changed, 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vbox_utils.h b/include/linux/vbox_utils.h
index c71def6b310f..a240ed2a0372 100644
--- a/include/linux/vbox_utils.h
+++ b/include/linux/vbox_utils.h
@@ -24,24 +24,6 @@ __printf(1, 2) void vbg_debug(const char *fmt, ...);
 #define vbg_debug pr_debug
 #endif
 
-/**
- * Allocate memory for generic request and initialize the request header.
- *
- * Return: the allocated memory
- * @len:		Size of memory block required for the request.
- * @req_type:		The generic request type.
- */
-void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type);
-
-/**
- * Perform a generic request.
- *
- * Return: VBox status code
- * @gdev:		The Guest extension device.
- * @req:		Pointer to the request structure.
- */
-int vbg_req_perform(struct vbg_dev *gdev, void *req);
-
 int vbg_hgcm_connect(struct vbg_dev *gdev,
 		     struct vmmdev_hgcm_service_location *loc,
 		     u32 *client_id, int *vbox_status);
@@ -52,11 +34,6 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function,
 		  u32 timeout_ms, struct vmmdev_hgcm_function_parameter *parms,
 		  u32 parm_count, int *vbox_status);
 
-int vbg_hgcm_call32(
-	struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms,
-	struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count,
-	int *vbox_status);
-
 /**
  * Convert a VirtualBox status code to a standard Linux kernel return value.
  * Return: 0 or negative errno value.
-- 
cgit v1.2.3


From 6899b32b5b2dee358936b82b8363b716607a138f Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Mon, 23 Apr 2018 18:09:21 +0100
Subject: bpf: disable and restore preemption in __BPF_PROG_RUN_ARRAY

Running bpf programs requires disabled preemption,
however at least some* of the BPF_PROG_RUN_ARRAY users
do not follow this rule.

To fix this bug, and also to make it not happen in the future,
let's add explicit preemption disabling/re-enabling
to the __BPF_PROG_RUN_ARRAY code.

* for example:
 [   17.624472] RIP: 0010:__cgroup_bpf_run_filter_sk+0x1c4/0x1d0
 ...
 [   17.640890]  inet6_create+0x3eb/0x520
 [   17.641405]  __sock_create+0x242/0x340
 [   17.641939]  __sys_socket+0x57/0xe0
 [   17.642370]  ? trace_hardirqs_off_thunk+0x1a/0x1c
 [   17.642944]  SyS_socket+0xa/0x10
 [   17.643357]  do_syscall_64+0x79/0x220
 [   17.643879]  entry_SYSCALL_64_after_hwframe+0x42/0xb7

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 486e65e3db26..dc586cc64bc2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -351,6 +351,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 		struct bpf_prog **_prog, *__prog;	\
 		struct bpf_prog_array *_array;		\
 		u32 _ret = 1;				\
+		preempt_disable();			\
 		rcu_read_lock();			\
 		_array = rcu_dereference(array);	\
 		if (unlikely(check_non_null && !_array))\
@@ -362,6 +363,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 		}					\
 _out:							\
 		rcu_read_unlock();			\
+		preempt_enable_no_resched();		\
 		_ret;					\
 	 })
 
-- 
cgit v1.2.3


From ba6b8de423f8d0dee48d6030288ed81c03ddf9f0 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 23 Apr 2018 15:39:23 -0700
Subject: bpf: sockmap, map_release does not hold refcnt for pinned maps

Relying on map_release hook to decrement the reference counts when a
map is removed only works if the map is not being pinned. In the
pinned case the ref is decremented immediately and the BPF programs
released. After this BPF programs may not be in-use which is not
what the user would expect.

This patch moves the release logic into bpf_map_put_uref() and brings
sockmap in-line with how a similar case is handled in prog array maps.

Fixes: 3d9e952697de ("bpf: sockmap, fix leaking maps with attached but not detached progs")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index dc586cc64bc2..469b20e1dd7e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -31,6 +31,7 @@ struct bpf_map_ops {
 	void (*map_release)(struct bpf_map *map, struct file *map_file);
 	void (*map_free)(struct bpf_map *map);
 	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+	void (*map_release_uref)(struct bpf_map *map);
 
 	/* funcs callable from userspace and from eBPF programs */
 	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
@@ -436,7 +437,6 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
 				 void *key, void *value, u64 map_flags);
 int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
-void bpf_fd_array_map_clear(struct bpf_map *map);
 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
 				void *key, void *value, u64 map_flags);
 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
-- 
cgit v1.2.3


From 6510bbc88e3258631831ade49033537081950605 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Date: Thu, 1 Mar 2018 14:39:39 +0100
Subject: mtd: cfi: cmdset_0001: Do not allow read/write to suspend erase
 block.

Currently it is possible to read and/or write to suspend EB's.
Writing /dev/mtdX or /dev/mtdblockX from several processes may
break the flash state machine.

Signed-off-by: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Cc: <stable@vger.kernel.org>
Reviewed-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 include/linux/mtd/flashchip.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index b63fa457febd..3529683f691e 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -85,6 +85,7 @@ struct flchip {
 	unsigned int write_suspended:1;
 	unsigned int erase_suspended:1;
 	unsigned long in_progress_block_addr;
+	unsigned long in_progress_block_mask;
 
 	struct mutex mutex;
 	wait_queue_head_t wq; /* Wait on here when we're waiting for the chip
-- 
cgit v1.2.3


From d805c5209350ae725e3a1ee0204ba27d9e75ce3e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 23 Apr 2018 15:51:38 -0700
Subject: net: ethtool: Add missing kernel doc for FEC parameters

While adding support for ethtool::get_fecparam and set_fecparam, kernel
doc for these functions was missed, add those.

Fixes: 1a5f3da20bd9 ("net: ethtool: add support for forward error correction modes")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index ebe41811ed34..b32cd2062f18 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -310,6 +310,8 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
  *	fields should be ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS
  *	instead of the latter), any change to them will be overwritten
  *	by kernel. Returns a negative error code or zero.
+ * @get_fecparam: Get the network device Forward Error Correction parameters.
+ * @set_fecparam: Set the network device Forward Error Correction parameters.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
-- 
cgit v1.2.3


From fe644072dfee069d97a66ea9a80f4bc461499e6a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 20 Apr 2018 10:29:51 +0200
Subject: block: mq: Add some minor doc for core structs

As it came up in discussion on the mailing list that the semantic
meaning of 'blk_mq_ctx' and 'blk_mq_hw_ctx' isn't completely
obvious to everyone, let's add some minimal kerneldoc for a
starter.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e3986f4b3461..ebc34a5686dc 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -9,6 +9,9 @@
 struct blk_mq_tags;
 struct blk_flush_queue;
 
+/**
+ * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device
+ */
 struct blk_mq_hw_ctx {
 	struct {
 		spinlock_t		lock;
-- 
cgit v1.2.3


From 24a7e4d20783c0514850f24a5c41ede46ab058f0 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 20 Apr 2018 20:22:40 +0300
Subject: virtio: add ability to iterate over vqs

For cleanup it's helpful to be able to simply scan all vqs and discard
all data. Add an iterator to do that.

Cc: stable@vger.kernel.org
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 988c7355bc22..fa1b5da2804e 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -157,6 +157,9 @@ int virtio_device_freeze(struct virtio_device *dev);
 int virtio_device_restore(struct virtio_device *dev);
 #endif
 
+#define virtio_device_for_each_vq(vdev, vq) \
+	list_for_each_entry(vq, &vdev->vqs, list)
+
 /**
  * virtio_driver - operations for a virtio I/O driver
  * @driver: underlying device driver (populate name and owner).
-- 
cgit v1.2.3


From fcd58037f28bf70eb17157a51fbf94d466634a7d Mon Sep 17 00:00:00 2001
From: Arnaud Pouliquen <arnaud.pouliquen@st.com>
Date: Tue, 10 Apr 2018 10:57:25 +0200
Subject: remoteproc: fix crashed parameter logic on stop call

Fix rproc_add_subdev parameter name and inverse the crashed logic.

Fixes: 880f5b388252 ("remoteproc: Pass type of shutdown to subdev remove")
Reviewed-by: Alex Elder <elder@linaro.org>
Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/remoteproc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index d09a9c7af109..dfdaede9139e 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -569,7 +569,7 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
 void rproc_add_subdev(struct rproc *rproc,
 		      struct rproc_subdev *subdev,
 		      int (*probe)(struct rproc_subdev *subdev),
-		      void (*remove)(struct rproc_subdev *subdev, bool graceful));
+		      void (*remove)(struct rproc_subdev *subdev, bool crashed));
 
 void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
 
-- 
cgit v1.2.3


From a3ed0e4393d6885b4af7ce84b437dc696490a530 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 25 Apr 2018 15:33:38 +0200
Subject: Revert: Unify CLOCK_MONOTONIC and CLOCK_BOOTTIME

Revert commits

92af4dcb4e1c ("tracing: Unify the "boot" and "mono" tracing clocks")
127bfa5f4342 ("hrtimer: Unify MONOTONIC and BOOTTIME clock behavior")
7250a4047aa6 ("posix-timers: Unify MONOTONIC and BOOTTIME clock behavior")
d6c7270e913d ("timekeeping: Remove boot time specific code")
f2d6fdbfd238 ("Input: Evdev - unify MONOTONIC and BOOTTIME clock behavior")
d6ed449afdb3 ("timekeeping: Make the MONOTONIC clock behave like the BOOTTIME clock")
72199320d49d ("timekeeping: Add the new CLOCK_MONOTONIC_ACTIVE clock")

As stated in the pull request for the unification of CLOCK_MONOTONIC and
CLOCK_BOOTTIME, it was clear that we might have to revert the change.

As reported by several folks systemd and other applications rely on the
documented behaviour of CLOCK_MONOTONIC on Linux and break with the above
changes. After resume daemons time out and other timeout related issues are
observed. Rafael compiled this list:

* systemd kills daemons on resume, after >WatchdogSec seconds
  of suspending (Genki Sky).  [Verified that that's because systemd uses
  CLOCK_MONOTONIC and expects it to not include the suspend time.]

* systemd-journald misbehaves after resume:
  systemd-journald[7266]: File /var/log/journal/016627c3c4784cd4812d4b7e96a34226/system.journal
corrupted or uncleanly shut down, renaming and replacing.
  (Mike Galbraith).

* NetworkManager reports "networking disabled" and networking is broken
  after resume 50% of the time (Pavel).  [May be because of systemd.]

* MATE desktop dims the display and starts the screensaver right after
  system resume (Pavel).

* Full system hang during resume (me).  [May be due to systemd or NM or both.]

That happens on debian and open suse systems.

It's sad, that these problems were neither catched in -next nor by those
folks who expressed interest in this change.

Reported-by: Rafael J. Wysocki <rjw@rjwysocki.net>
Reported-by: Genki Sky <sky@genki.is>,
Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kevin Easton <kevin@guarana.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Salyzyn <salyzyn@android.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/hrtimer.h             |  2 ++
 include/linux/timekeeper_internal.h |  2 --
 include/linux/timekeeping.h         | 37 +++++++++++++++++++++++++------------
 3 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index a2656c3ebe81..3892e9c8b2de 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -161,9 +161,11 @@ struct hrtimer_clock_base {
 enum  hrtimer_base_type {
 	HRTIMER_BASE_MONOTONIC,
 	HRTIMER_BASE_REALTIME,
+	HRTIMER_BASE_BOOTTIME,
 	HRTIMER_BASE_TAI,
 	HRTIMER_BASE_MONOTONIC_SOFT,
 	HRTIMER_BASE_REALTIME_SOFT,
+	HRTIMER_BASE_BOOTTIME_SOFT,
 	HRTIMER_BASE_TAI_SOFT,
 	HRTIMER_MAX_CLOCK_BASES,
 };
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 4b3dca173e89..7acb953298a7 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -52,7 +52,6 @@ struct tk_read_base {
  * @offs_real:		Offset clock monotonic -> clock realtime
  * @offs_boot:		Offset clock monotonic -> clock boottime
  * @offs_tai:		Offset clock monotonic -> clock tai
- * @time_suspended:	Accumulated suspend time
  * @tai_offset:		The current UTC to TAI offset in seconds
  * @clock_was_set_seq:	The sequence number of clock was set events
  * @cs_was_changed_seq:	The sequence number of clocksource change events
@@ -95,7 +94,6 @@ struct timekeeper {
 	ktime_t			offs_real;
 	ktime_t			offs_boot;
 	ktime_t			offs_tai;
-	ktime_t			time_suspended;
 	s32			tai_offset;
 	unsigned int		clock_was_set_seq;
 	u8			cs_was_changed_seq;
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 9737fbec7019..588a0e4b1ab9 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -33,25 +33,20 @@ extern void ktime_get_ts64(struct timespec64 *ts);
 extern time64_t ktime_get_seconds(void);
 extern time64_t __ktime_get_real_seconds(void);
 extern time64_t ktime_get_real_seconds(void);
-extern void ktime_get_active_ts64(struct timespec64 *ts);
 
 extern int __getnstimeofday64(struct timespec64 *tv);
 extern void getnstimeofday64(struct timespec64 *tv);
 extern void getboottime64(struct timespec64 *ts);
 
-#define ktime_get_real_ts64(ts)		getnstimeofday64(ts)
-
-/* Clock BOOTTIME compatibility wrappers */
-static inline void get_monotonic_boottime64(struct timespec64 *ts)
-{
-	ktime_get_ts64(ts);
-}
+#define ktime_get_real_ts64(ts)	getnstimeofday64(ts)
 
 /*
  * ktime_t based interfaces
  */
+
 enum tk_offsets {
 	TK_OFFS_REAL,
+	TK_OFFS_BOOT,
 	TK_OFFS_TAI,
 	TK_OFFS_MAX,
 };
@@ -62,10 +57,6 @@ extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
 extern ktime_t ktime_get_raw(void);
 extern u32 ktime_get_resolution_ns(void);
 
-/* Clock BOOTTIME compatibility wrappers */
-static inline ktime_t ktime_get_boottime(void) { return ktime_get(); }
-static inline u64 ktime_get_boot_ns(void) { return ktime_get(); }
-
 /**
  * ktime_get_real - get the real (wall-) time in ktime_t format
  */
@@ -74,6 +65,17 @@ static inline ktime_t ktime_get_real(void)
 	return ktime_get_with_offset(TK_OFFS_REAL);
 }
 
+/**
+ * ktime_get_boottime - Returns monotonic time since boot in ktime_t format
+ *
+ * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the
+ * time spent in suspend.
+ */
+static inline ktime_t ktime_get_boottime(void)
+{
+	return ktime_get_with_offset(TK_OFFS_BOOT);
+}
+
 /**
  * ktime_get_clocktai - Returns the TAI time of day in ktime_t format
  */
@@ -100,6 +102,11 @@ static inline u64 ktime_get_real_ns(void)
 	return ktime_to_ns(ktime_get_real());
 }
 
+static inline u64 ktime_get_boot_ns(void)
+{
+	return ktime_to_ns(ktime_get_boottime());
+}
+
 static inline u64 ktime_get_tai_ns(void)
 {
 	return ktime_to_ns(ktime_get_clocktai());
@@ -112,11 +119,17 @@ static inline u64 ktime_get_raw_ns(void)
 
 extern u64 ktime_get_mono_fast_ns(void);
 extern u64 ktime_get_raw_fast_ns(void);
+extern u64 ktime_get_boot_fast_ns(void);
 extern u64 ktime_get_real_fast_ns(void);
 
 /*
  * timespec64 interfaces utilizing the ktime based ones
  */
+static inline void get_monotonic_boottime64(struct timespec64 *ts)
+{
+	*ts = ktime_to_timespec64(ktime_get_boottime());
+}
+
 static inline void timekeeping_clocktai64(struct timespec64 *ts)
 {
 	*ts = ktime_to_timespec64(ktime_get_clocktai());
-- 
cgit v1.2.3


From bf0ddaba65ddbb2715af97041da8e7a45b2d8628 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Thu, 26 Apr 2018 00:21:59 -0700
Subject: blk-mq: fix sysfs inflight counter

When the blk-mq inflight implementation was added, /proc/diskstats was
converted to use it, but /sys/block/$dev/inflight was not. Fix it by
adding another helper to count in-flight requests by data direction.

Fixes: f299b7c7a9de ("blk-mq: provide internal in-flight variant")
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/genhd.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c826b0b5232a..6cb8a5789668 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -368,7 +368,9 @@ static inline void free_part_stats(struct hd_struct *part)
 	part_stat_add(cpu, gendiskp, field, -subnd)
 
 void part_in_flight(struct request_queue *q, struct hd_struct *part,
-			unsigned int inflight[2]);
+		    unsigned int inflight[2]);
+void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+		       unsigned int inflight[2]);
 void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
 			int rw);
 void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
-- 
cgit v1.2.3


From 6082d9c9c94a408d7409b5f2e4e42ac9e8b16d0d Mon Sep 17 00:00:00 2001
From: Israel Rukshin <israelr@mellanox.com>
Date: Thu, 12 Apr 2018 09:49:11 +0000
Subject: net/mlx5: Fix mlx5_get_vector_affinity function

Adding the vector offset when calling to mlx5_vector2eqn() is wrong.
This is because mlx5_vector2eqn() checks if EQ index is equal to vector number
and the fact that the internal completion vectors that mlx5 allocates
don't get an EQ index.

The second problem here is that using effective_affinity_mask gives the same
CPU for different vectors.
This leads to unmapped queues when calling it from blk_mq_rdma_map_queues().
This doesn't happen when using affinity_hint mask.

Fixes: 2572cf57d75a ("mlx5: fix mlx5_get_vector_affinity to start from completion vector 0")
Fixes: 05e0cc84e00c ("net/mlx5: Fix get vector affinity helper function")
Signed-off-by: Israel Rukshin <israelr@mellanox.com>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 include/linux/mlx5/driver.h | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 767d193c269a..2a156c5dfadd 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1284,25 +1284,19 @@ enum {
 };
 
 static inline const struct cpumask *
-mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector)
+mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector)
 {
-	const struct cpumask *mask;
 	struct irq_desc *desc;
 	unsigned int irq;
 	int eqn;
 	int err;
 
-	err = mlx5_vector2eqn(dev, MLX5_EQ_VEC_COMP_BASE + vector, &eqn, &irq);
+	err = mlx5_vector2eqn(dev, vector, &eqn, &irq);
 	if (err)
 		return NULL;
 
 	desc = irq_to_desc(irq);
-#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
-	mask = irq_data_get_effective_affinity_mask(&desc->irq_data);
-#else
-	mask = desc->irq_common_data.affinity;
-#endif
-	return mask;
+	return desc->affinity_hint;
 }
 
 #endif /* MLX5_DRIVER_H */
-- 
cgit v1.2.3


From ed769520727edbf526e0f510e6c501fc6ba3824a Mon Sep 17 00:00:00 2001
From: Joel Pepper <joel.pepper@rwth-aachen.de>
Date: Thu, 26 Apr 2018 20:26:08 +0200
Subject: usb: gadget: composite Allow for larger configuration descriptors

The composite framework allows us to create gadgets composed from many
different functions, which need to fit into a single configuration
descriptor.

Some functions (like uvc) can produce configuration descriptors upwards
of 2500 bytes on their own.

This patch increases the limit from 1024 bytes to 4096.

Signed-off-by: Joel Pepper <joel.pepper@rwth-aachen.de>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 include/linux/usb/composite.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 4b6b9283fa7b..8675e145ea8b 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -52,7 +52,7 @@
 #define USB_GADGET_DELAYED_STATUS       0x7fff	/* Impossibly large value */
 
 /* big enough to hold our biggest descriptor */
-#define USB_COMP_EP0_BUFSIZ	1024
+#define USB_COMP_EP0_BUFSIZ	4096
 
 /* OS feature descriptor length <= 4kB */
 #define USB_COMP_EP0_OS_DESC_BUFSIZ	4096
-- 
cgit v1.2.3


From 19b9ad67310ed2f685062a00aec602bec33835f0 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 5 Feb 2018 19:32:18 +0200
Subject: <linux/stringhash.h>: fix end_name_hash() for 64bit long

The comment claims that this helper will try not to loose bits, but for
64bit long it looses the high bits before hashing 64bit long into 32bit
int.  Use the helper hash_long() to do the right thing for 64bit long.
For 32bit long, there is no change.

All the callers of end_name_hash() either assign the result to
qstr->hash, which is u32 or return the result as an int value (e.g.
full_name_hash()).  Change the helper return type to int to conform to
its users.

[ It took me a while to apply this, because my initial reaction to it
  was - incorrectly - that it could make for slower code.

  After having looked more at it, I take back all my complaints about
  the patch, Amir was right and I was mis-reading things or just being
  stupid.

  I also don't worry too much about the possible performance impact of
  this on 64-bit, since most architectures that actually care about
  performance end up not using this very much (the dcache code is the
  most performance-critical, but the word-at-a-time case uses its own
  hashing anyway).

  So this ends up being mostly used for filesystems that do their own
  degraded hashing (usually because they want a case-insensitive
  comparison function).

  A _tiny_ worry remains, in that not everybody uses DCACHE_WORD_ACCESS,
  and then this potentially makes things more expensive on 64-bit
  architectures with slow or lacking multipliers even for the normal
  case.

  That said, realistically the only such architecture I can think of is
  PA-RISC. Nobody really cares about performance on that, it's more of a
  "look ma, I've got warts^W an odd machine" platform.

  So the patch is fine, and all my initial worries were just misplaced
  from not looking at this properly.   - Linus ]

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/stringhash.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stringhash.h b/include/linux/stringhash.h
index e8f0f852968f..c0c5c5b73dc0 100644
--- a/include/linux/stringhash.h
+++ b/include/linux/stringhash.h
@@ -50,9 +50,9 @@ partial_name_hash(unsigned long c, unsigned long prevhash)
  * losing bits).  This also has the property (wanted by the dcache)
  * that the msbits make a good hash table index.
  */
-static inline unsigned long end_name_hash(unsigned long hash)
+static inline unsigned int end_name_hash(unsigned long hash)
 {
-	return __hash_32((unsigned int)hash);
+	return hash_long(hash, 32);
 }
 
 /*
-- 
cgit v1.2.3


From 85f1abe0019fcb3ea10df7029056cf42702283a8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 1 May 2018 18:14:45 +0200
Subject: kthread, sched/wait: Fix kthread_parkme() completion issue

Even with the wait-loop fixed, there is a further issue with
kthread_parkme(). Upon hotplug, when we do takedown_cpu(),
smpboot_park_threads() can return before all those threads are in fact
blocked, due to the placement of the complete() in __kthread_parkme().

When that happens, sched_cpu_dying() -> migrate_tasks() can end up
migrating such a still runnable task onto another CPU.

Normally the task will have hit schedule() and gone to sleep by the
time we do kthread_unpark(), which will then do __kthread_bind() to
re-bind the task to the correct CPU.

However, when we loose the initial TASK_PARKED store to the concurrent
wakeup issue described previously, do the complete(), get migrated, it
is possible to either:

 - observe kthread_unpark()'s clearing of SHOULD_PARK and terminate
   the park and set TASK_RUNNING, or

 - __kthread_bind()'s wait_task_inactive() to observe the competing
   TASK_RUNNING store.

Either way the WARN() in __kthread_bind() will trigger and fail to
correctly set the CPU affinity.

Fix this by only issuing the complete() when the kthread has scheduled
out. This does away with all the icky 'still running' nonsense.

The alternative is to promote TASK_PARKED to a special state, this
guarantees wait_task_inactive() cannot observe a 'stale' TASK_RUNNING
and we'll end up doing the right thing, but this preserves the whole
icky business of potentially migating the still runnable thing.

Reported-by: Gaurav Kohli <gkohli@codeaurora.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kthread.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c1961761311d..2803264c512f 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k);
 int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
+void kthread_park_complete(struct task_struct *k);
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
-- 
cgit v1.2.3


From 8236b0ae31c837d2b3a2565c5f8d77f637e824cc Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Wed, 2 May 2018 07:07:55 +0900
Subject: bdi: wake up concurrent wb_shutdown() callers.

syzbot is reporting hung tasks at wait_on_bit(WB_shutting_down) in
wb_shutdown() [1]. This seems to be because commit 5318ce7d46866e1d ("bdi:
Shutdown writeback on all cgwbs in cgwb_bdi_destroy()") forgot to call
wake_up_bit(WB_shutting_down) after clear_bit(WB_shutting_down).

Introduce a helper function clear_and_wake_up_bit() and use it, in order
to avoid similar errors in future.

[1] https://syzkaller.appspot.com/bug?id=b297474817af98d5796bc544e1bb806fc3da0e5e

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+c0cf869505e03bdf1a24@syzkaller.appspotmail.com>
Fixes: 5318ce7d46866e1d ("bdi: Shutdown writeback on all cgwbs in cgwb_bdi_destroy()")
Cc: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/wait_bit.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 9318b2166439..2b0072fa5e92 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -305,4 +305,21 @@ do {									\
 	__ret;								\
 })
 
+/**
+ * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit
+ *
+ * @bit: the bit of the word being waited on
+ * @word: the word being waited on, a kernel virtual address
+ *
+ * You can use this helper if bitflags are manipulated atomically rather than
+ * non-atomically under a lock.
+ */
+static inline void clear_and_wake_up_bit(int bit, void *word)
+{
+	clear_bit_unlock(bit, word);
+	/* See wake_up_bit() for which memory barrier you need to use. */
+	smp_mb__after_atomic();
+	wake_up_bit(word, bit);
+}
+
 #endif /* _LINUX_WAIT_BIT_H */
-- 
cgit v1.2.3


From b5bf9a90bbebffba888c9144c5a8a10317b04064 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 30 Apr 2018 14:51:01 +0200
Subject: sched/core: Introduce set_special_state()

Gaurav reported a perceived problem with TASK_PARKED, which turned out
to be a broken wait-loop pattern in __kthread_parkme(), but the
reported issue can (and does) in fact happen for states that do not do
condition based sleeps.

When the 'current->state = TASK_RUNNING' store of a previous
(concurrent) try_to_wake_up() collides with the setting of a 'special'
sleep state, we can loose the sleep state.

Normal condition based wait-loops are immune to this problem, but for
sleep states that are not condition based are subject to this problem.

There already is a fix for TASK_DEAD. Abstract that and also apply it
to TASK_STOPPED and TASK_TRACED, both of which are also without
condition based wait-loop.

Reported-by: Gaurav Kohli <gkohli@codeaurora.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 50 +++++++++++++++++++++++++++++++++++++++-----
 include/linux/sched/signal.h |  2 +-
 2 files changed, 46 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b3d697f3b573..c2413703f45d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -112,17 +112,36 @@ struct task_group;
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 
+/*
+ * Special states are those that do not use the normal wait-loop pattern. See
+ * the comment with set_special_state().
+ */
+#define is_special_task_state(state)				\
+	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
+
 #define __set_current_state(state_value)			\
 	do {							\
+		WARN_ON_ONCE(is_special_task_state(state_value));\
 		current->task_state_change = _THIS_IP_;		\
 		current->state = (state_value);			\
 	} while (0)
+
 #define set_current_state(state_value)				\
 	do {							\
+		WARN_ON_ONCE(is_special_task_state(state_value));\
 		current->task_state_change = _THIS_IP_;		\
 		smp_store_mb(current->state, (state_value));	\
 	} while (0)
 
+#define set_special_state(state_value)					\
+	do {								\
+		unsigned long flags; /* may shadow */			\
+		WARN_ON_ONCE(!is_special_task_state(state_value));	\
+		raw_spin_lock_irqsave(&current->pi_lock, flags);	\
+		current->task_state_change = _THIS_IP_;			\
+		current->state = (state_value);				\
+		raw_spin_unlock_irqrestore(&current->pi_lock, flags);	\
+	} while (0)
 #else
 /*
  * set_current_state() includes a barrier so that the write of current->state
@@ -144,8 +163,8 @@ struct task_group;
  *
  * The above is typically ordered against the wakeup, which does:
  *
- *	need_sleep = false;
- *	wake_up_state(p, TASK_UNINTERRUPTIBLE);
+ *   need_sleep = false;
+ *   wake_up_state(p, TASK_UNINTERRUPTIBLE);
  *
  * Where wake_up_state() (and all other wakeup primitives) imply enough
  * barriers to order the store of the variable against wakeup.
@@ -154,12 +173,33 @@ struct task_group;
  * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
  * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
  *
- * This is obviously fine, since they both store the exact same value.
+ * However, with slightly different timing the wakeup TASK_RUNNING store can
+ * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not
+ * a problem either because that will result in one extra go around the loop
+ * and our @cond test will save the day.
  *
  * Also see the comments of try_to_wake_up().
  */
-#define __set_current_state(state_value) do { current->state = (state_value); } while (0)
-#define set_current_state(state_value)	 smp_store_mb(current->state, (state_value))
+#define __set_current_state(state_value)				\
+	current->state = (state_value)
+
+#define set_current_state(state_value)					\
+	smp_store_mb(current->state, (state_value))
+
+/*
+ * set_special_state() should be used for those states when the blocking task
+ * can not use the regular condition based wait-loop. In that case we must
+ * serialize against wakeups such that any possible in-flight TASK_RUNNING stores
+ * will not collide with our state change.
+ */
+#define set_special_state(state_value)					\
+	do {								\
+		unsigned long flags; /* may shadow */			\
+		raw_spin_lock_irqsave(&current->pi_lock, flags);	\
+		current->state = (state_value);				\
+		raw_spin_unlock_irqrestore(&current->pi_lock, flags);	\
+	} while (0)
+
 #endif
 
 /* Task command name length: */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index a7ce74c74e49..113d1ad1ced7 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -280,7 +280,7 @@ static inline void kernel_signal_stop(void)
 {
 	spin_lock_irq(&current->sighand->siglock);
 	if (current->jobctl & JOBCTL_STOP_DEQUEUED)
-		__set_current_state(TASK_STOPPED);
+		set_special_state(TASK_STOPPED);
 	spin_unlock_irq(&current->sighand->siglock);
 
 	schedule();
-- 
cgit v1.2.3


From 23b8392201e0681b76630c4cea68e1a2e1821ec6 Mon Sep 17 00:00:00 2001
From: Bhadram Varka <vbhadram@nvidia.com>
Date: Wed, 2 May 2018 20:43:58 +0530
Subject: net: phy: broadcom: add support for BCM89610 PHY

It adds support for BCM89610 (Single-Port 10/100/1000BASE-T)
transceiver which is used in P3310 Tegra186 platform.

Signed-off-by: Bhadram Varka <vbhadram@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index d3339dd48b1a..b324e01ccf2d 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -25,6 +25,7 @@
 #define PHY_ID_BCM54612E		0x03625e60
 #define PHY_ID_BCM54616S		0x03625d10
 #define PHY_ID_BCM57780			0x03625d90
+#define PHY_ID_BCM89610			0x03625cd0
 
 #define PHY_ID_BCM7250			0xae025280
 #define PHY_ID_BCM7260			0xae025190
-- 
cgit v1.2.3


From 0010f7052d6cb71c4b120238e28cd3fa413913d1 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 4 May 2018 16:57:30 +0200
Subject: libceph: add osd_req_op_extent_osd_data_bvecs()

... and store num_bvecs for client code's convenience.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
---
 include/linux/ceph/osd_client.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 528ccc943cee..96bb32285989 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -77,7 +77,10 @@ struct ceph_osd_data {
 			u32			bio_length;
 		};
 #endif /* CONFIG_BLOCK */
-		struct ceph_bvec_iter	bvec_pos;
+		struct {
+			struct ceph_bvec_iter	bvec_pos;
+			u32			num_bvecs;
+		};
 	};
 };
 
@@ -412,6 +415,10 @@ void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
 				    struct ceph_bio_iter *bio_pos,
 				    u32 bio_length);
 #endif /* CONFIG_BLOCK */
+void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
+				      unsigned int which,
+				      struct bio_vec *bvecs, u32 num_bvecs,
+				      u32 bytes);
 void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
 					 unsigned int which,
 					 struct ceph_bvec_iter *bvec_pos);
@@ -426,7 +433,8 @@ extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
 					bool own_pages);
 void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
 				       unsigned int which,
-				       struct bio_vec *bvecs, u32 bytes);
+				       struct bio_vec *bvecs, u32 num_bvecs,
+				       u32 bytes);
 extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
 					unsigned int which,
 					struct page **pages, u64 length,
-- 
cgit v1.2.3


From ddc9cfb79c1096a0855839631c091aa7e9602052 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Thu, 26 Apr 2018 17:55:03 -0700
Subject: KVM: Extend MAX_IRQ_ROUTES to 4096 for all archs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Our virtual machines make use of device assignment by configuring
12 NVMe disks for high I/O performance. Each NVMe device has 129
MSI-X Table entries:
Capabilities: [50] MSI-X: Enable+ Count=129 Masked-Vector table: BAR=0 offset=00002000
The windows virtual machines fail to boot since they will map the number of
MSI-table entries that the NVMe hardware reported to the bus to msi routing
table, this will exceed the 1024. This patch extends MAX_IRQ_ROUTES to 4096
for all archs, in the future this might be extended again if needed.

Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Tonny Lu <tonnylu@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6930c63126c7..6d6e79c59e68 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1045,13 +1045,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
 
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 
-#ifdef CONFIG_S390
-#define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that...
-#elif defined(CONFIG_ARM64)
-#define KVM_MAX_IRQ_ROUTES 4096
-#else
-#define KVM_MAX_IRQ_ROUTES 1024
-#endif
+#define KVM_MAX_IRQ_ROUTES 4096 /* might need extension/rework in the future */
 
 bool kvm_arch_can_set_irq_routing(struct kvm *kvm);
 int kvm_set_irq_routing(struct kvm *kvm,
-- 
cgit v1.2.3


From 27ae357fa82be5ab73b2ef8d39dcb8ca2563483a Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 11 May 2018 16:02:04 -0700
Subject: mm, oom: fix concurrent munlock and oom reaper unmap, v3

Since exit_mmap() is done without the protection of mm->mmap_sem, it is
possible for the oom reaper to concurrently operate on an mm until
MMF_OOM_SKIP is set.

This allows munlock_vma_pages_all() to concurrently run while the oom
reaper is operating on a vma.  Since munlock_vma_pages_range() depends
on clearing VM_LOCKED from vm_flags before actually doing the munlock to
determine if any other vmas are locking the same memory, the check for
VM_LOCKED in the oom reaper is racy.

This is especially noticeable on architectures such as powerpc where
clearing a huge pmd requires serialize_against_pte_lookup().  If the pmd
is zapped by the oom reaper during follow_page_mask() after the check
for pmd_none() is bypassed, this ends up deferencing a NULL ptl or a
kernel oops.

Fix this by manually freeing all possible memory from the mm before
doing the munlock and then setting MMF_OOM_SKIP.  The oom reaper can not
run on the mm anymore so the munlock is safe to do in exit_mmap().  It
also matches the logic that the oom reaper currently uses for
determining when to set MMF_OOM_SKIP itself, so there's no new risk of
excessive oom killing.

This issue fixes CVE-2018-1000200.

Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1804241526320.238665@chino.kir.corp.google.com
Fixes: 212925802454 ("mm: oom: let oom_reap_task and exit_mmap run concurrently")
Signed-off-by: David Rientjes <rientjes@google.com>
Suggested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>	[4.14+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 5bad038ac012..6adac113e96d 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm)
 	return 0;
 }
 
+void __oom_reap_task_mm(struct mm_struct *mm);
+
 extern unsigned long oom_badness(struct task_struct *p,
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
 		unsigned long totalpages);
-- 
cgit v1.2.3


From 2075b16e32c26e4031b9fd3cbe26c54676a8fcb5 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 11 May 2018 16:02:14 -0700
Subject: rbtree: include rcu.h

Since commit c1adf20052d8 ("Introduce rb_replace_node_rcu()")
rbtree_augmented.h uses RCU related data structures but does not include
the header file.  It works as long as it gets somehow included before
that and fails otherwise.

Link: http://lkml.kernel.org/r/20180504103159.19938-1-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h | 1 +
 include/linux/rbtree_latch.h     | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 6bfd2b581f75..af8a61be2d8d 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -26,6 +26,7 @@
 
 #include <linux/compiler.h>
 #include <linux/rbtree.h>
+#include <linux/rcupdate.h>
 
 /*
  * Please note - only struct rb_augment_callbacks and the prototypes for
diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
index ece43e882b56..7d012faa509a 100644
--- a/include/linux/rbtree_latch.h
+++ b/include/linux/rbtree_latch.h
@@ -35,6 +35,7 @@
 
 #include <linux/rbtree.h>
 #include <linux/seqlock.h>
+#include <linux/rcupdate.h>
 
 struct latch_tree_node {
 	struct rb_node node[2];
-- 
cgit v1.2.3


From ea739a287f4f16d6250bea779a1026ead79695f2 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Thu, 10 May 2018 19:20:54 +0100
Subject: mtd: Fix comparison in map_word_andequal()

Commit 9e343e87d2c4 ("mtd: cfi: convert inline functions to macros")
changed map_word_andequal() into a macro, but also changed the right
hand side of the comparison from val3 to val2.  Change it back to use
val3 on the right hand side.

Thankfully this did not cause a regression because all callers
currently pass the same argument for val2 and val3.

Fixes: 9e343e87d2c4 ("mtd: cfi: convert inline functions to macros")
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 include/linux/mtd/map.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index b5b43f94f311..01b990e4b228 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -312,7 +312,7 @@ void map_destroy(struct mtd_info *mtd);
 ({									\
 	int i, ret = 1;							\
 	for (i = 0; i < map_words(map); i++) {				\
-		if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) {	\
+		if (((val1).x[i] & (val2).x[i]) != (val3).x[i]) {	\
 			ret = 0;					\
 			break;						\
 		}							\
-- 
cgit v1.2.3


From 9f825e74d761c13b0cfaa5f65344d64ff970e252 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 14 May 2018 12:49:37 +0200
Subject: mtd: rawnand: Fix return type of __DIVIDE() when called with 32-bit

The __DIVIDE() macro checks whether it is called with a 32-bit or 64-bit
dividend, to select the appropriate divide-and-round-up routine.
As the check uses the ternary operator, the result will always be
promoted to a type that can hold both results, i.e. unsigned long long.

When using this result in a division on a 32-bit system, this may lead
to link errors like:

    ERROR: "__udivdi3" [drivers/mtd/nand/raw/nand.ko] undefined!

Fix this by casting the result of the division to the type of the
dividend.

Fixes: 8878b126df769831 ("mtd: nand: add ->exec_op() implementation")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 include/linux/mtd/rawnand.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 5dad59b31244..17c919436f48 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -867,12 +867,18 @@ struct nand_op_instr {
  * tBERS (during an erase) which all of them are u64 values that cannot be
  * divided by usual kernel macros and must be handled with the special
  * DIV_ROUND_UP_ULL() macro.
+ *
+ * Cast to type of dividend is needed here to guarantee that the result won't
+ * be an unsigned long long when the dividend is an unsigned long (or smaller),
+ * which is what the compiler does when it sees ternary operator with 2
+ * different return types (picks the largest type to make sure there's no
+ * loss).
  */
-#define __DIVIDE(dividend, divisor) ({					\
-	sizeof(dividend) == sizeof(u32) ?				\
-		DIV_ROUND_UP(dividend, divisor) :			\
-		DIV_ROUND_UP_ULL(dividend, divisor);			\
-		})
+#define __DIVIDE(dividend, divisor) ({						\
+	(__typeof__(dividend))(sizeof(dividend) <= sizeof(unsigned long) ?	\
+			       DIV_ROUND_UP(dividend, divisor) :		\
+			       DIV_ROUND_UP_ULL(dividend, divisor)); 		\
+	})
 #define PSEC_TO_NSEC(x) __DIVIDE(x, 1000)
 #define PSEC_TO_MSEC(x) __DIVIDE(x, 1000000000)
 
-- 
cgit v1.2.3


From 7f7ccc2ccc2e70c6054685f5e3522efa81556830 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Fri, 11 May 2018 08:11:44 +0200
Subject: proc: do not access cmdline nor environ from file-backed areas

proc_pid_cmdline_read() and environ_read() directly access the target
process' VM to retrieve the command line and environment. If this
process remaps these areas onto a file via mmap(), the requesting
process may experience various issues such as extra delays if the
underlying device is slow to respond.

Let's simply refuse to access file-backed areas in these functions.
For this we add a new FOLL_ANON gup flag that is passed to all calls
to access_remote_vm(). The code already takes care of such failures
(including unmapped areas). Accesses via /proc/pid/mem were not
changed though.

This was assigned CVE-2018-1120.

Note for stable backports: the patch may apply to kernels prior to 4.11
but silently miss one location; it must be checked that no call to
access_remote_vm() keeps zero as the last argument.

Reported-by: Qualys Security Advisory <qsa@qualys.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1ac1f06a4be6..c080af584ddd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2493,6 +2493,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_MLOCK	0x1000	/* lock present pages */
 #define FOLL_REMOTE	0x2000	/* we are working on non-current tsk/mm */
 #define FOLL_COW	0x4000	/* internal GUP flag */
+#define FOLL_ANON	0x8000	/* don't do file mappings */
 
 static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
 {
-- 
cgit v1.2.3


From d97baf9470b0668904aa96865abe7db4000dc3ba Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Fri, 18 May 2018 16:08:47 -0700
Subject: include/linux/mm.h: add new inline function vmf_error()

Many places in drivers/ file systems, error was handled in a common way
like below:

	ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;

vmf_error() will replace this and return vm_fault_t type err.

A lot of drivers and filesystems currently have a rather complex mapping
of errno-to-VM_FAULT code.  We have been able to eliminate a lot of it
by just returning VM_FAULT codes directly from functions which are
called exclusively from the fault handling path.

Some functions can be called both from the fault handler and other
context which are expecting an errno, so they have to continue to return
an errno.  Some users still need to choose different behaviour for
different errnos, but vmf_error() captures the essential error
translation that's common to all users, and those that need to handle
additional errors can handle them first.

Link: http://lkml.kernel.org/r/20180510174826.GA14268@jordon-HP-15-Notebook-PC
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c080af584ddd..c6fa9a255dbf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2466,6 +2466,13 @@ static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma,
 	return VM_FAULT_NOPAGE;
 }
 
+static inline vm_fault_t vmf_error(int err)
+{
+	if (err == -ENOMEM)
+		return VM_FAULT_OOM;
+	return VM_FAULT_SIGBUS;
+}
+
 struct page *follow_page_mask(struct vm_area_struct *vma,
 			      unsigned long address, unsigned int foll_flags,
 			      unsigned int *page_mask);
-- 
cgit v1.2.3