From b994d8f0773cf3b01129c094d00050710f2c422b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Sat, 8 Oct 2022 17:14:59 +0200
Subject: spi: spi-mem: Fix typo (of -> or)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In this instance, "or" makes more sense than "of", so I guess that "or"
was intended and "of" was a typo.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Link: https://lore.kernel.org/r/20221008151459.1421406-1-j.neuschaefer@gmx.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi-mem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 2ba044d0d5e5..8e984d75f5b6 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -225,7 +225,7 @@ static inline void *spi_mem_get_drvdata(struct spi_mem *mem)
 /**
  * struct spi_controller_mem_ops - SPI memory operations
  * @adjust_op_size: shrink the data xfer of an operation to match controller's
- *		    limitations (can be alignment of max RX/TX size
+ *		    limitations (can be alignment or max RX/TX size
  *		    limitations)
  * @supports_op: check if an operation is supported by the controller
  * @exec_op: execute a SPI memory operation
-- 
cgit v1.2.3


From a6d1ce5951185ee91bbe6909fe2758f3625561b0 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Tue, 11 Oct 2022 00:33:58 +0000
Subject: cgroup: add cgroup_v1v2_get_from_[fd/file]()

Add cgroup_v1v2_get_from_fd() and cgroup_v1v2_get_from_file() that
support both cgroup1 and cgroup2.

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 398f0bce7c21..a88de5bdeaa9 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -106,6 +106,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
 
 struct cgroup *cgroup_get_from_path(const char *path);
 struct cgroup *cgroup_get_from_fd(int fd);
+struct cgroup *cgroup_v1v2_get_from_fd(int fd);
 
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
-- 
cgit v1.2.3


From a2550d3ce53c68f54042bc5e468c4d07491ffe0e Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Wed, 12 Oct 2022 19:18:36 +0200
Subject: net: dsa: qca8k: fix inband mgmt for big-endian systems

The header and the data of the skb for the inband mgmt requires
to be in little-endian. This is problematic for big-endian system
as the mgmt header is written in the cpu byte order.

Fix this by converting each value for the mgmt header and data to
little-endian, and convert to cpu byte order the mgmt header and
data sent by the switch.

Fixes: 5950c7c0a68c ("net: dsa: qca8k: add support for mgmt read/write in Ethernet packet")
Tested-by: Pawel Dembicki <paweldembicki@gmail.com>
Tested-by: Lech Perczak <lech.perczak@gmail.com>
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Reviewed-by: Lech Perczak <lech.perczak@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/tag_qca.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h
index 50be7cbd93a5..0e176da1e43f 100644
--- a/include/linux/dsa/tag_qca.h
+++ b/include/linux/dsa/tag_qca.h
@@ -61,9 +61,9 @@ struct sk_buff;
 
 /* Special struct emulating a Ethernet header */
 struct qca_mgmt_ethhdr {
-	u32 command;		/* command bit 31:0 */
-	u32 seq;		/* seq 63:32 */
-	u32 mdio_data;		/* first 4byte mdio */
+	__le32 command;		/* command bit 31:0 */
+	__le32 seq;		/* seq 63:32 */
+	__le32 mdio_data;		/* first 4byte mdio */
 	__be16 hdr;		/* qca hdr */
 } __packed;
 
-- 
cgit v1.2.3


From 0d4636f7d72df3179b20a2d32b647881917a5e2a Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Wed, 12 Oct 2022 19:18:37 +0200
Subject: net: dsa: qca8k: fix ethtool autocast mib for big-endian systems

The switch sends autocast mib in little-endian. This is problematic for
big-endian system as the values needs to be converted.

Fix this by converting each mib value to cpu byte order.

Fixes: 5c957c7ca78c ("net: dsa: qca8k: add support for mib autocast in Ethernet packet")
Tested-by: Pawel Dembicki <paweldembicki@gmail.com>
Tested-by: Lech Perczak <lech.perczak@gmail.com>
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/tag_qca.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h
index 0e176da1e43f..b1b5720d89a5 100644
--- a/include/linux/dsa/tag_qca.h
+++ b/include/linux/dsa/tag_qca.h
@@ -73,7 +73,7 @@ enum mdio_cmd {
 };
 
 struct mib_ethhdr {
-	u32 data[3];		/* first 3 mib counter */
+	__le32 data[3];		/* first 3 mib counter */
 	__be16 hdr;		/* qca hdr */
 } __packed;
 
-- 
cgit v1.2.3


From fc8695eb11f07d936a4a9dbd15d7797986bc8b89 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 14 Oct 2022 09:07:46 -0700
Subject: Revert "net: fix cpu_max_bits_warn() usage in
 netif_attrmask_next{,_and}"

This reverts commit 854701ba4c39afae2362ba19a580c461cb183e4f.

We have more violations around, which leads to:

  WARNING: CPU: 2 PID: 1 at include/linux/cpumask.h:110 __netif_set_xps_queue+0x14e/0x770

Let's back this out and retry with a larger clean up in -next.

Fixes: 854701ba4c39 ("net: fix cpu_max_bits_warn() usage in netif_attrmask_next{,_and}")
Link: https://lore.kernel.org/all/20221014030459.3272206-2-guoren@kernel.org/
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a36edb0ec199..eddf8ee270e7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3663,8 +3663,9 @@ static inline bool netif_attr_test_online(unsigned long j,
 static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp,
 					       unsigned int nr_bits)
 {
-	/* n is a prior cpu */
-	cpu_max_bits_warn(n + 1, nr_bits);
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpu_max_bits_warn(n, nr_bits);
 
 	if (srcp)
 		return find_next_bit(srcp, nr_bits, n + 1);
@@ -3685,8 +3686,9 @@ static inline int netif_attrmask_next_and(int n, const unsigned long *src1p,
 					  const unsigned long *src2p,
 					  unsigned int nr_bits)
 {
-	/* n is a prior cpu */
-	cpu_max_bits_warn(n + 1, nr_bits);
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpu_max_bits_warn(n, nr_bits);
 
 	if (src1p && src2p)
 		return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
-- 
cgit v1.2.3


From 96de900ae78e7dbedc937fd91bafe2934579c65a Mon Sep 17 00:00:00 2001
From: Shenwei Wang <shenwei.wang@nxp.com>
Date: Fri, 14 Oct 2022 09:47:28 -0500
Subject: net: phylink: add mac_managed_pm in phylink_config structure

The recent commit

'commit 744d23c71af3 ("net: phy: Warn about incorrect
mdio_bus_phy_resume() state")'

requires the MAC driver explicitly tell the phy driver who is
managing the PM, otherwise you will see warning during resume
stage.

Add a boolean property in the phylink_config structure so that
the MAC driver can use it to tell the PHY driver if it wants to
manage the PM.

Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state")
Signed-off-by: Shenwei Wang <shenwei.wang@nxp.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phylink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 664dd409feb9..3f01ac8017e0 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -122,6 +122,7 @@ enum phylink_op_type {
  *	(See commit 7cceb599d15d ("net: phylink: avoid mac_config calls")
  * @poll_fixed_state: if true, starts link_poll,
  *		      if MAC link is at %MLO_AN_FIXED mode.
+ * @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM.
  * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND
  * @get_fixed_state: callback to execute to determine the fixed link state,
  *		     if MAC link is at %MLO_AN_FIXED mode.
@@ -134,6 +135,7 @@ struct phylink_config {
 	enum phylink_op_type type;
 	bool legacy_pre_march2020;
 	bool poll_fixed_state;
+	bool mac_managed_pm;
 	bool ovr_an_inband;
 	void (*get_fixed_state)(struct phylink_config *config,
 				struct phylink_link_state *state);
-- 
cgit v1.2.3


From 25b72d530e7aa185955196b63f53c38f751f1632 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Oct 2022 12:18:54 -0700
Subject: fbdev: MIPS supports iomem addresses

Add MIPS to fb_* helpers list for iomem addresses. This silences Sparse
warnings about lacking __iomem address space casts:

drivers/video/fbdev/pvr2fb.c:800:9: sparse: sparse: incorrect type in argument 1 (different address spaces)
drivers/video/fbdev/pvr2fb.c:800:9: sparse:     expected void const *
drivers/video/fbdev/pvr2fb.c:800:9: sparse:     got char [noderef] __iomem *screen_base

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/lkml/202210100209.tR2Iqbqk-lkp@intel.com/
Cc: Helge Deller <deller@gmx.de>
Cc: linux-fbdev@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/linux/fb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 0aff76bcbb00..bcb8658f5b64 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -555,7 +555,7 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
 
 #elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) ||	\
 	defined(__hppa__) || defined(__sh__) || defined(__powerpc__) ||	\
-	defined(__arm__) || defined(__aarch64__)
+	defined(__arm__) || defined(__aarch64__) || defined(__mips__)
 
 #define fb_readb __raw_readb
 #define fb_readw __raw_readw
-- 
cgit v1.2.3


From 472a1482325b3a285e0bcf82c0b0edc689b7e8cd Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Sun, 2 Oct 2022 08:04:19 -0400
Subject: counter: Reduce DEFINE_COUNTER_ARRAY_POLARITY() to defining
 counter_array

A spare warning was reported for drivers/counter/ti-ecap-capture.c::

    sparse warnings: (new ones prefixed by >>)
    >> drivers/counter/ti-ecap-capture.c:380:8: sparse: sparse: symbol 'ecap_cnt_pol_array' was not declared. Should it be static?

    vim +/ecap_cnt_pol_array +380 drivers/counter/ti-ecap-capture.c

       379
     > 380	static DEFINE_COUNTER_ARRAY_POLARITY(ecap_cnt_pol_array, ecap_cnt_pol_avail, ECAP_NB_CEVT);
       381

The first argument to the DEFINE_COUNTER_ARRAY_POLARITY() macro is a
token serving as the symbol name in the definition of a new
struct counter_array structure. However, this macro actually expands to
two statements::

    #define DEFINE_COUNTER_ARRAY_POLARITY(_name, _enums, _length) \
            DEFINE_COUNTER_AVAILABLE(_name##_available, _enums); \
            struct counter_array _name = { \
                    .type = COUNTER_COMP_SIGNAL_POLARITY, \
                    .avail = &(_name##_available), \
                    .length = (_length), \
            }

Because of this, the "static" on line 380 only applies to the first
statement. This patch splits out the DEFINE_COUNTER_AVAILABLE() line
and leaves DEFINE_COUNTER_ARRAY_POLARITY() as a simple structure
definition to avoid issues like this.

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/all/202210020619.NQbyomII-lkp@intel.com/
Cc: Julien Panis <jpanis@baylibre.com>
Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
---
 include/linux/counter.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/counter.h b/include/linux/counter.h
index c41fa602ed28..b63746637de2 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -542,11 +542,10 @@ struct counter_array {
 #define DEFINE_COUNTER_ARRAY_CAPTURE(_name, _length) \
 	DEFINE_COUNTER_ARRAY_U64(_name, _length)
 
-#define DEFINE_COUNTER_ARRAY_POLARITY(_name, _enums, _length) \
-	DEFINE_COUNTER_AVAILABLE(_name##_available, _enums); \
+#define DEFINE_COUNTER_ARRAY_POLARITY(_name, _available, _length) \
 	struct counter_array _name = { \
 		.type = COUNTER_COMP_SIGNAL_POLARITY, \
-		.avail = &(_name##_available), \
+		.avail = &(_available), \
 		.length = (_length), \
 	}
 
-- 
cgit v1.2.3


From ca6c21327c6af02b7eec31ce4b9a740a18c6c13f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 Oct 2022 15:00:39 +0200
Subject: perf: Fix missing SIGTRAPs

Marco reported:

Due to the implementation of how SIGTRAP are delivered if
perf_event_attr::sigtrap is set, we've noticed 3 issues:

  1. Missing SIGTRAP due to a race with event_sched_out() (more
     details below).

  2. Hardware PMU events being disabled due to returning 1 from
     perf_event_overflow(). The only way to re-enable the event is
     for user space to first "properly" disable the event and then
     re-enable it.

  3. The inability to automatically disable an event after a
     specified number of overflows via PERF_EVENT_IOC_REFRESH.

The worst of the 3 issues is problem (1), which occurs when a
pending_disable is "consumed" by a racing event_sched_out(), observed
as follows:

		CPU0			|	CPU1
	--------------------------------+---------------------------
	__perf_event_overflow()		|
	 perf_event_disable_inatomic()	|
	  pending_disable = CPU0	| ...
					| _perf_event_enable()
					|  event_function_call()
					|   task_function_call()
					|    /* sends IPI to CPU0 */
	<IPI>				| ...
	 __perf_event_enable()		+---------------------------
	  ctx_resched()
	   task_ctx_sched_out()
	    ctx_sched_out()
	     group_sched_out()
	      event_sched_out()
	       pending_disable = -1
	</IPI>
	<IRQ-work>
	 perf_pending_event()
	  perf_pending_event_disable()
	   /* Fails to send SIGTRAP because no pending_disable! */
	</IRQ-work>

In the above case, not only is that particular SIGTRAP missed, but also
all future SIGTRAPs because 'event_limit' is not reset back to 1.

To fix, rework pending delivery of SIGTRAP via IRQ-work by introduction
of a separate 'pending_sigtrap', no longer using 'event_limit' and
'pending_disable' for its delivery.

Additionally; and different to Marco's proposed patch:

 - recognise that pending_disable effectively duplicates oncpu for
   the case where it is set. As such, change the irq_work handler to
   use ->oncpu to target the event and use pending_* as boolean toggles.

 - observe that SIGTRAP targets the ctx->task, so the context switch
   optimization that carries contexts between tasks is invalid. If
   the irq_work were delayed enough to hit after a context switch the
   SIGTRAP would be delivered to the wrong task.

 - observe that if the event gets scheduled out
   (rotation/migration/context-switch/...) the irq-work would be
   insufficient to deliver the SIGTRAP when the event gets scheduled
   back in (the irq-work might still be pending on the old CPU).

   Therefore have event_sched_out() convert the pending sigtrap into a
   task_work which will deliver the signal at return_to_user.

Fixes: 97ba62b27867 ("perf: Add support for SIGTRAP on perf events")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Debugged-by: Dmitry Vyukov <dvyukov@google.com>
Reported-by: Marco Elver <elver@google.com>
Debugged-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Marco Elver <elver@google.com>
Tested-by: Marco Elver <elver@google.com>
---
 include/linux/perf_event.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 853f64b6c8c2..0031f7b4d9ab 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -756,11 +756,14 @@ struct perf_event {
 	struct fasync_struct		*fasync;
 
 	/* delayed work for NMIs and such */
-	int				pending_wakeup;
-	int				pending_kill;
-	int				pending_disable;
+	unsigned int			pending_wakeup;
+	unsigned int			pending_kill;
+	unsigned int			pending_disable;
+	unsigned int			pending_sigtrap;
 	unsigned long			pending_addr;	/* SIGTRAP */
-	struct irq_work			pending;
+	struct irq_work			pending_irq;
+	struct callback_head		pending_task;
+	unsigned int			pending_work;
 
 	atomic_t			event_limit;
 
@@ -877,6 +880,14 @@ struct perf_event_context {
 #endif
 	void				*task_ctx_data; /* pmu specific data */
 	struct rcu_head			rcu_head;
+
+	/*
+	 * Sum (event->pending_sigtrap + event->pending_work)
+	 *
+	 * The SIGTRAP is targeted at ctx->task, as such it won't do changing
+	 * that until the signal is delivered.
+	 */
+	local_t				nr_pending;
 };
 
 /*
-- 
cgit v1.2.3


From ccd30a476f8e864732de220bd50e6f372f5ebcab Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 11 Oct 2022 14:38:38 -0700
Subject: fscrypt: fix keyring memory leak on mount failure

Commit d7e7b9af104c ("fscrypt: stop using keyrings subsystem for
fscrypt_master_key") moved the keyring destruction from __put_super() to
generic_shutdown_super() so that the filesystem's block device(s) are
still available.  Unfortunately, this causes a memory leak in the case
where a mount is attempted with the test_dummy_encryption mount option,
but the mount fails after the option has already been processed.

To fix this, attempt the keyring destruction in both places.

Reported-by: syzbot+104c2a89561289cec13e@syzkaller.appspotmail.com
Fixes: d7e7b9af104c ("fscrypt: stop using keyrings subsystem for fscrypt_master_key")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Christian Brauner (Microsoft) <brauner@kernel.org>
Link: https://lore.kernel.org/r/20221011213838.209879-1-ebiggers@kernel.org
---
 include/linux/fscrypt.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index cad78b569c7e..4f5f8a651213 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -307,7 +307,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy)
 }
 
 /* keyring.c */
-void fscrypt_sb_delete(struct super_block *sb);
+void fscrypt_destroy_keyring(struct super_block *sb);
 int fscrypt_ioctl_add_key(struct file *filp, void __user *arg);
 int fscrypt_add_test_dummy_key(struct super_block *sb,
 			       const struct fscrypt_dummy_policy *dummy_policy);
@@ -521,7 +521,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy)
 }
 
 /* keyring.c */
-static inline void fscrypt_sb_delete(struct super_block *sb)
+static inline void fscrypt_destroy_keyring(struct super_block *sb)
 {
 }
 
-- 
cgit v1.2.3


From dbe69b29988465b011f198f2797b1c2b6980b50e Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 18 Oct 2022 09:59:34 +0200
Subject: bpf: Fix dispatcher patchable function entry to 5 bytes nop

The patchable_function_entry(5) might output 5 single nop
instructions (depends on toolchain), which will clash with
bpf_arch_text_poke check for 5 bytes nop instruction.

Adding early init call for dispatcher that checks and change
the patchable entry into expected 5 nop instruction if needed.

There's no need to take text_mutex, because we are using it
in early init call which is called at pre-smp time.

Fixes: ceea991a019c ("bpf: Move bpf_dispatcher function out of ftrace locations")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221018075934.574415-1-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9e7d46d16032..0566705c1d4e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -27,6 +27,7 @@
 #include <linux/bpfptr.h>
 #include <linux/btf.h>
 #include <linux/rcupdate_trace.h>
+#include <linux/init.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -970,6 +971,8 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key,
 					  struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
 int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs);
+int __init bpf_arch_init_dispatcher_early(void *ip);
+
 #define BPF_DISPATCHER_INIT(_name) {				\
 	.mutex = __MUTEX_INITIALIZER(_name.mutex),		\
 	.func = &_name##_func,					\
@@ -983,6 +986,13 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
 	},							\
 }
 
+#define BPF_DISPATCHER_INIT_CALL(_name)					\
+	static int __init _name##_init(void)				\
+	{								\
+		return bpf_arch_init_dispatcher_early(_name##_func);	\
+	}								\
+	early_initcall(_name##_init)
+
 #ifdef CONFIG_X86_64
 #define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5)))
 #else
@@ -1000,7 +1010,9 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
 	}								\
 	EXPORT_SYMBOL(bpf_dispatcher_##name##_func);			\
 	struct bpf_dispatcher bpf_dispatcher_##name =			\
-		BPF_DISPATCHER_INIT(bpf_dispatcher_##name);
+		BPF_DISPATCHER_INIT(bpf_dispatcher_##name);		\
+	BPF_DISPATCHER_INIT_CALL(bpf_dispatcher_##name);
+
 #define DECLARE_BPF_DISPATCHER(name)					\
 	unsigned int bpf_dispatcher_##name##_func(			\
 		const void *ctx,					\
-- 
cgit v1.2.3


From 0251d0107cfb0bb5ab2d3f97710487b9522db020 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 19 Oct 2022 08:44:44 +0800
Subject: iommu: Add gfp parameter to iommu_alloc_resv_region

Add gfp parameter to iommu_alloc_resv_region() for the callers to specify
the memory allocation behavior. Thus iommu_alloc_resv_region() could also
be available in critical contexts.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Link: https://lore.kernel.org/r/20220927053109.4053662-2-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a325532aeab5..3c9da1f8979e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -455,7 +455,7 @@ extern void iommu_set_default_translated(bool cmd_line);
 extern bool iommu_default_passthrough(void);
 extern struct iommu_resv_region *
 iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot,
-			enum iommu_resv_type type);
+			enum iommu_resv_type type, gfp_t gfp);
 extern int iommu_get_group_resv_regions(struct iommu_group *group,
 					struct list_head *head);
 
-- 
cgit v1.2.3


From 8a254d90a77580244ec57e82bca7eb65656cc167 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Wed, 19 Oct 2022 23:29:58 +0200
Subject: efi: efivars: Fix variable writes without query_variable_store()

Commit bbc6d2c6ef22 ("efi: vars: Switch to new wrapper layer")
refactored the efivars layer so that the 'business logic' related to
which UEFI variables affect the boot flow in which way could be moved
out of it, and into the efivarfs driver.

This inadvertently broke setting variables on firmware implementations
that lack the QueryVariableInfo() boot service, because we no longer
tolerate a EFI_UNSUPPORTED result from check_var_size() when calling
efivar_entry_set_get_size(), which now ends up calling check_var_size()
a second time inadvertently.

If QueryVariableInfo() is missing, we support writes of up to 64k -
let's move that logic into check_var_size(), and drop the redundant
call.

Cc: <stable@vger.kernel.org> # v6.0
Fixes: bbc6d2c6ef22 ("efi: vars: Switch to new wrapper layer")
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index da3974bf05d3..80f3c1c7827d 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1085,9 +1085,6 @@ efi_status_t efivar_set_variable_locked(efi_char16_t *name, efi_guid_t *vendor,
 efi_status_t efivar_set_variable(efi_char16_t *name, efi_guid_t *vendor,
 				 u32 attr, unsigned long data_size, void *data);
 
-efi_status_t check_var_size(u32 attributes, unsigned long size);
-efi_status_t check_var_size_nonblocking(u32 attributes, unsigned long size);
-
 #if IS_ENABLED(CONFIG_EFI_CAPSULE_LOADER)
 extern bool efi_capsule_pending(int *reset_type);
 
-- 
cgit v1.2.3


From ed51862f2f57cbce6fed2d4278cfe70a490899fd Mon Sep 17 00:00:00 2001
From: Alexander Graf <graf@amazon.com>
Date: Mon, 17 Oct 2022 20:45:39 +0200
Subject: kvm: Add support for arch compat vm ioctls

We will introduce the first architecture specific compat vm ioctl in the
next patch. Add all necessary boilerplate to allow architectures to
override compat vm ioctls when necessary.

Signed-off-by: Alexander Graf <graf@amazon.com>
Message-Id: <20221017184541.2658-2-graf@amazon.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 32f259fa5801..00c3448ba7f8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1390,6 +1390,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 			    struct kvm_enable_cap *cap);
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg);
+long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
+			      unsigned long arg);
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
-- 
cgit v1.2.3


From e993ffe3da4bcddea0536b03be1031bf35cd8d85 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 21 Oct 2022 11:16:39 +0100
Subject: net: flag sockets supporting msghdr originated zerocopy

We need an efficient way in io_uring to check whether a socket supports
zerocopy with msghdr provided ubuf_info. Add a new flag into the struct
socket flags fields.

Cc: <stable@vger.kernel.org> # 6.0
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/3dafafab822b1c66308bb58a0ac738b1e3f53f74.1666346426.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/net.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 711c3593c3b8..18d942bbdf6e 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -41,6 +41,7 @@ struct net;
 #define SOCK_NOSPACE		2
 #define SOCK_PASSCRED		3
 #define SOCK_PASSSEC		4
+#define SOCK_SUPPORT_ZC		5
 
 #ifndef ARCH_HAS_SOCKET_TYPES
 /**
-- 
cgit v1.2.3


From 52826d3b2d1d8e1180a84bef7d72596d6a024a38 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 23 Oct 2022 12:01:01 -0700
Subject: kernel/utsname_sysctl.c: Fix hostname polling

Commit bfca3dd3d068 ("kernel/utsname_sysctl.c: print kernel arch") added
a new entry to the uts_kern_table[] array, but didn't update the
UTS_PROC_xyz enumerators of older entries, breaking anything that used
them.

Which is admittedly not many cases: it's really just the two uses of
uts_proc_notify() in kernel/sys.c.  But apparently journald-systemd
actually uses this to detect hostname changes.

Reported-by: Torsten Hilbrich <torsten.hilbrich@secunet.com>
Fixes: bfca3dd3d068 ("kernel/utsname_sysctl.c: print kernel arch")
Link: https://lore.kernel.org/lkml/0c2b92a6-0f25-9538-178f-eee3b06da23f@secunet.com/
Link: https://linux-regtracking.leemhuis.info/regzbot/regression/0c2b92a6-0f25-9538-178f-eee3b06da23f@secunet.com/
Cc: Petr Vorel <pvorel@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/utsname.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 2b1737c9b244..bf7613ba412b 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -10,6 +10,7 @@
 #include <uapi/linux/utsname.h>
 
 enum uts_proc {
+	UTS_PROC_ARCH,
 	UTS_PROC_OSTYPE,
 	UTS_PROC_OSRELEASE,
 	UTS_PROC_VERSION,
-- 
cgit v1.2.3


From 161a438d730dade2ba2b1bf8785f0759aba4ca5f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 20 Oct 2022 10:39:08 +0200
Subject: efi: random: reduce seed size to 32 bytes

We no longer need at least 64 bytes of random seed to permit the early
crng init to complete. The RNG is now based on Blake2s, so reduce the
EFI seed size to the Blake2s hash size, which is sufficient for our
purposes.

While at it, drop the READ_ONCE(), which was supposed to prevent size
from being evaluated after seed was unmapped. However, this cannot
actually happen, so READ_ONCE() is unnecessary here.

Cc: <stable@vger.kernel.org> # v4.14+
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
---
 include/linux/efi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 80f3c1c7827d..929d559ad41d 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1222,7 +1222,7 @@ efi_status_t efi_random_get_seed(void);
 	arch_efi_call_virt_teardown();					\
 })
 
-#define EFI_RANDOM_SEED_SIZE		64U
+#define EFI_RANDOM_SEED_SIZE		32U // BLAKE2S_HASH_SIZE
 
 struct linux_efi_random_seed {
 	u32	size;
-- 
cgit v1.2.3


From 31970608a6d3796c3adbfbfd379fa3092de65c5d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 26 Sep 2022 12:45:32 -0700
Subject: overflow: Fix kern-doc markup for functions

Fix the kern-doc markings for several of the overflow helpers and move
their location into the core kernel API documentation, where it belongs
(it's not driver-specific).

Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-doc@vger.kernel.org
Cc: linux-hardening@vger.kernel.org
Reviewed-by: Akira Yokosawa <akiyks@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/overflow.h | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 19dfdd74835e..1d3be1a2204c 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -51,8 +51,8 @@ static inline bool __must_check __must_check_overflow(bool overflow)
 	return unlikely(overflow);
 }
 
-/** check_add_overflow() - Calculate addition with overflow checking
- *
+/**
+ * check_add_overflow() - Calculate addition with overflow checking
  * @a: first addend
  * @b: second addend
  * @d: pointer to store sum
@@ -66,8 +66,8 @@ static inline bool __must_check __must_check_overflow(bool overflow)
 #define check_add_overflow(a, b, d)	\
 	__must_check_overflow(__builtin_add_overflow(a, b, d))
 
-/** check_sub_overflow() - Calculate subtraction with overflow checking
- *
+/**
+ * check_sub_overflow() - Calculate subtraction with overflow checking
  * @a: minuend; value to subtract from
  * @b: subtrahend; value to subtract from @a
  * @d: pointer to store difference
@@ -81,8 +81,8 @@ static inline bool __must_check __must_check_overflow(bool overflow)
 #define check_sub_overflow(a, b, d)	\
 	__must_check_overflow(__builtin_sub_overflow(a, b, d))
 
-/** check_mul_overflow() - Calculate multiplication with overflow checking
- *
+/**
+ * check_mul_overflow() - Calculate multiplication with overflow checking
  * @a: first factor
  * @b: second factor
  * @d: pointer to store product
@@ -96,23 +96,24 @@ static inline bool __must_check __must_check_overflow(bool overflow)
 #define check_mul_overflow(a, b, d)	\
 	__must_check_overflow(__builtin_mul_overflow(a, b, d))
 
-/** check_shl_overflow() - Calculate a left-shifted value and check overflow
- *
+/**
+ * check_shl_overflow() - Calculate a left-shifted value and check overflow
  * @a: Value to be shifted
  * @s: How many bits left to shift
  * @d: Pointer to where to store the result
  *
  * Computes *@d = (@a << @s)
  *
- * Returns true if '*d' cannot hold the result or when 'a << s' doesn't
+ * Returns true if '*@d' cannot hold the result or when '@a << @s' doesn't
  * make sense. Example conditions:
- * - 'a << s' causes bits to be lost when stored in *d.
- * - 's' is garbage (e.g. negative) or so large that the result of
- *   'a << s' is guaranteed to be 0.
- * - 'a' is negative.
- * - 'a << s' sets the sign bit, if any, in '*d'.
  *
- * '*d' will hold the results of the attempted shift, but is not
+ * - '@a << @s' causes bits to be lost when stored in *@d.
+ * - '@s' is garbage (e.g. negative) or so large that the result of
+ *   '@a << @s' is guaranteed to be 0.
+ * - '@a' is negative.
+ * - '@a << @s' sets the sign bit, if any, in '*@d'.
+ *
+ * '*@d' will hold the results of the attempted shift, but is not
  * considered "safe for use" if true is returned.
  */
 #define check_shl_overflow(a, s, d) __must_check_overflow(({		\
@@ -129,7 +130,6 @@ static inline bool __must_check __must_check_overflow(bool overflow)
 
 /**
  * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX
- *
  * @factor1: first factor
  * @factor2: second factor
  *
@@ -149,7 +149,6 @@ static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
 
 /**
  * size_add() - Calculate size_t addition with saturation at SIZE_MAX
- *
  * @addend1: first addend
  * @addend2: second addend
  *
@@ -169,7 +168,6 @@ static inline size_t __must_check size_add(size_t addend1, size_t addend2)
 
 /**
  * size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX
- *
  * @minuend: value to subtract from
  * @subtrahend: value to subtract from @minuend
  *
@@ -192,7 +190,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
 
 /**
  * array_size() - Calculate size of 2-dimensional array.
- *
  * @a: dimension one
  * @b: dimension two
  *
@@ -205,7 +202,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
 
 /**
  * array3_size() - Calculate size of 3-dimensional array.
- *
  * @a: dimension one
  * @b: dimension two
  * @c: dimension three
@@ -220,7 +216,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
 /**
  * flex_array_size() - Calculate size of a flexible array member
  *                     within an enclosing structure.
- *
  * @p: Pointer to the structure.
  * @member: Name of the flexible array member.
  * @count: Number of elements in the array.
@@ -237,7 +232,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
 
 /**
  * struct_size() - Calculate size of structure with trailing flexible array.
- *
  * @p: Pointer to the structure.
  * @member: Name of the array member.
  * @count: Number of elements in the array.
-- 
cgit v1.2.3


From 52491a38b2c2411f3f0229dc6ad610349c704a41 Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Thu, 13 Oct 2022 21:12:19 +0000
Subject: KVM: Initialize gfn_to_pfn_cache locks in dedicated helper

Move the gfn_to_pfn_cache lock initialization to another helper and
call the new helper during VM/vCPU creation.  There are race
conditions possible due to kvm_gfn_to_pfn_cache_init()'s
ability to re-initialize the cache's locks.

For example: a race between ioctl(KVM_XEN_HVM_EVTCHN_SEND) and
kvm_gfn_to_pfn_cache_init() leads to a corrupted shinfo gpc lock.

                (thread 1)                |           (thread 2)
                                          |
 kvm_xen_set_evtchn_fast                  |
  read_lock_irqsave(&gpc->lock, ...)      |
                                          | kvm_gfn_to_pfn_cache_init
                                          |  rwlock_init(&gpc->lock)
  read_unlock_irqrestore(&gpc->lock, ...) |

Rename "cache_init" and "cache_destroy" to activate+deactivate to
avoid implying that the cache really is destroyed/freed.

Note, there more races in the newly named kvm_gpc_activate() that will
be addressed separately.

Fixes: 982ed0de4753 ("KVM: Reinstate gfn_to_pfn_cache with invalidation support")
Cc: stable@vger.kernel.org
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Michal Luczaj <mhal@rbox.co>
[sean: call out that this is a bug fix]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221013211234.1318131-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 00c3448ba7f8..18592bdf4c1b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1240,8 +1240,18 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
 void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 /**
- * kvm_gfn_to_pfn_cache_init - prepare a cached kernel mapping and HPA for a
- *                             given guest physical address.
+ * kvm_gpc_init - initialize gfn_to_pfn_cache.
+ *
+ * @gpc:	   struct gfn_to_pfn_cache object.
+ *
+ * This sets up a gfn_to_pfn_cache by initializing locks.  Note, the cache must
+ * be zero-allocated (or zeroed by the caller before init).
+ */
+void kvm_gpc_init(struct gfn_to_pfn_cache *gpc);
+
+/**
+ * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
+ *                    physical address.
  *
  * @kvm:	   pointer to kvm instance.
  * @gpc:	   struct gfn_to_pfn_cache object.
@@ -1265,9 +1275,9 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
  * kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before
  * accessing the target page.
  */
-int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-			      struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
-			      gpa_t gpa, unsigned long len);
+int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+		     struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
+		     gpa_t gpa, unsigned long len);
 
 /**
  * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
@@ -1324,7 +1334,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
 
 /**
- * kvm_gfn_to_pfn_cache_destroy - destroy and unlink a gfn_to_pfn_cache.
+ * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache.
  *
  * @kvm:	   pointer to kvm instance.
  * @gpc:	   struct gfn_to_pfn_cache object.
@@ -1332,7 +1342,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
  * This removes a cache from the @kvm's list to be processed on MMU notifier
  * invocation.
  */
-void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
+void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
 
 void kvm_sigset_activate(struct kvm_vcpu *vcpu);
 void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From 2d87d455ead2cbdee7e60463cddc5bff3f98c912 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Thu, 27 Oct 2022 16:57:09 +0800
Subject: blk-mq: don't add non-pt request with ->end_io to batch

dm-rq implements ->end_io callback for request issued to underlying queue,
and it isn't passthrough request.

Commit ab3e1d3bbab9 ("block: allow end_io based requests in the completion
batch handling") doesn't clear rq->bio and rq->__data_len for request
with ->end_io in blk_mq_end_request_batch(), and this way is actually
dangerous, but so far it is only for nvme passthrough request.

dm-rq needs to clean up remained bios in case of partial completion,
and req->bio is required, then use-after-free is triggered, so the
underlying clone request can't be completed in blk_mq_end_request_batch.

Fix panic by not adding such request into batch list, and the issue
can be triggered simply by exposing nvme pci to dm-mpath simply.

Fixes: ab3e1d3bbab9 ("block: allow end_io based requests in the completion batch handling")
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@kernel.org>
Reported-by: Changhui Zhong <czhong@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20221027085709.513175-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ba18e9bdb799..d6119c5d1069 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -853,7 +853,8 @@ static inline bool blk_mq_add_to_batch(struct request *req,
 				       struct io_comp_batch *iob, int ioerror,
 				       void (*complete)(struct io_comp_batch *))
 {
-	if (!iob || (req->rq_flags & RQF_ELV) || ioerror)
+	if (!iob || (req->rq_flags & RQF_ELV) || ioerror ||
+			(req->end_io && !blk_rq_is_passthrough(req)))
 		return false;
 
 	if (!iob->complete)
-- 
cgit v1.2.3


From bacd22df95147ed673bec4692ab2d4d585935241 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 26 Oct 2022 14:51:45 +0100
Subject: net/mlx5: Fix possible use-after-free in async command interface

mlx5_cmd_cleanup_async_ctx should return only after all its callback
handlers were completed. Before this patch, the below race between
mlx5_cmd_cleanup_async_ctx and mlx5_cmd_exec_cb_handler was possible and
lead to a use-after-free:

1. mlx5_cmd_cleanup_async_ctx is called while num_inflight is 2 (i.e.
   elevated by 1, a single inflight callback).
2. mlx5_cmd_cleanup_async_ctx decreases num_inflight to 1.
3. mlx5_cmd_exec_cb_handler is called, decreases num_inflight to 0 and
   is about to call wake_up().
4. mlx5_cmd_cleanup_async_ctx calls wait_event, which returns
   immediately as the condition (num_inflight == 0) holds.
5. mlx5_cmd_cleanup_async_ctx returns.
6. The caller of mlx5_cmd_cleanup_async_ctx frees the mlx5_async_ctx
   object.
7. mlx5_cmd_exec_cb_handler goes on and calls wake_up() on the freed
   object.

Fix it by syncing using a completion object. Mark it completed when
num_inflight reaches 0.

Trace:

BUG: KASAN: use-after-free in do_raw_spin_lock+0x23d/0x270
Read of size 4 at addr ffff888139cd12f4 by task swapper/5/0

CPU: 5 PID: 0 Comm: swapper/5 Not tainted 6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
Call Trace:
 <IRQ>
 dump_stack_lvl+0x57/0x7d
 print_report.cold+0x2d5/0x684
 ? do_raw_spin_lock+0x23d/0x270
 kasan_report+0xb1/0x1a0
 ? do_raw_spin_lock+0x23d/0x270
 do_raw_spin_lock+0x23d/0x270
 ? rwlock_bug.part.0+0x90/0x90
 ? __delete_object+0xb8/0x100
 ? lock_downgrade+0x6e0/0x6e0
 _raw_spin_lock_irqsave+0x43/0x60
 ? __wake_up_common_lock+0xb9/0x140
 __wake_up_common_lock+0xb9/0x140
 ? __wake_up_common+0x650/0x650
 ? destroy_tis_callback+0x53/0x70 [mlx5_core]
 ? kasan_set_track+0x21/0x30
 ? destroy_tis_callback+0x53/0x70 [mlx5_core]
 ? kfree+0x1ba/0x520
 ? do_raw_spin_unlock+0x54/0x220
 mlx5_cmd_exec_cb_handler+0x136/0x1a0 [mlx5_core]
 ? mlx5_cmd_cleanup_async_ctx+0x220/0x220 [mlx5_core]
 ? mlx5_cmd_cleanup_async_ctx+0x220/0x220 [mlx5_core]
 mlx5_cmd_comp_handler+0x65a/0x12b0 [mlx5_core]
 ? dump_command+0xcc0/0xcc0 [mlx5_core]
 ? lockdep_hardirqs_on_prepare+0x400/0x400
 ? cmd_comp_notifier+0x7e/0xb0 [mlx5_core]
 cmd_comp_notifier+0x7e/0xb0 [mlx5_core]
 atomic_notifier_call_chain+0xd7/0x1d0
 mlx5_eq_async_int+0x3ce/0xa20 [mlx5_core]
 atomic_notifier_call_chain+0xd7/0x1d0
 ? irq_release+0x140/0x140 [mlx5_core]
 irq_int_handler+0x19/0x30 [mlx5_core]
 __handle_irq_event_percpu+0x1f2/0x620
 handle_irq_event+0xb2/0x1d0
 handle_edge_irq+0x21e/0xb00
 __common_interrupt+0x79/0x1a0
 common_interrupt+0x78/0xa0
 </IRQ>
 <TASK>
 asm_common_interrupt+0x22/0x40
RIP: 0010:default_idle+0x42/0x60
Code: c1 83 e0 07 48 c1 e9 03 83 c0 03 0f b6 14 11 38 d0 7c 04 84 d2 75 14 8b 05 eb 47 22 02 85 c0 7e 07 0f 00 2d e0 9f 48 00 fb f4 <c3> 48 c7 c7 80 08 7f 85 e8 d1 d3 3e fe eb de 66 66 2e 0f 1f 84 00
RSP: 0018:ffff888100dbfdf0 EFLAGS: 00000242
RAX: 0000000000000001 RBX: ffffffff84ecbd48 RCX: 1ffffffff0afe110
RDX: 0000000000000004 RSI: 0000000000000000 RDI: ffffffff835cc9bc
RBP: 0000000000000005 R08: 0000000000000001 R09: ffff88881dec4ac3
R10: ffffed1103bd8958 R11: 0000017d0ca571c9 R12: 0000000000000005
R13: ffffffff84f024e0 R14: 0000000000000000 R15: dffffc0000000000
 ? default_idle_call+0xcc/0x450
 default_idle_call+0xec/0x450
 do_idle+0x394/0x450
 ? arch_cpu_idle_exit+0x40/0x40
 ? do_idle+0x17/0x450
 cpu_startup_entry+0x19/0x20
 start_secondary+0x221/0x2b0
 ? set_cpu_sibling_map+0x2070/0x2070
 secondary_startup_64_no_verify+0xcd/0xdb
 </TASK>

Allocated by task 49502:
 kasan_save_stack+0x1e/0x40
 __kasan_kmalloc+0x81/0xa0
 kvmalloc_node+0x48/0xe0
 mlx5e_bulk_async_init+0x35/0x110 [mlx5_core]
 mlx5e_tls_priv_tx_list_cleanup+0x84/0x3e0 [mlx5_core]
 mlx5e_ktls_cleanup_tx+0x38f/0x760 [mlx5_core]
 mlx5e_cleanup_nic_tx+0xa7/0x100 [mlx5_core]
 mlx5e_detach_netdev+0x1ca/0x2b0 [mlx5_core]
 mlx5e_suspend+0xdb/0x140 [mlx5_core]
 mlx5e_remove+0x89/0x190 [mlx5_core]
 auxiliary_bus_remove+0x52/0x70
 device_release_driver_internal+0x40f/0x650
 driver_detach+0xc1/0x180
 bus_remove_driver+0x125/0x2f0
 auxiliary_driver_unregister+0x16/0x50
 mlx5e_cleanup+0x26/0x30 [mlx5_core]
 cleanup+0xc/0x4e [mlx5_core]
 __x64_sys_delete_module+0x2b5/0x450
 do_syscall_64+0x3d/0x90
 entry_SYSCALL_64_after_hwframe+0x46/0xb0

Freed by task 49502:
 kasan_save_stack+0x1e/0x40
 kasan_set_track+0x21/0x30
 kasan_set_free_info+0x20/0x30
 ____kasan_slab_free+0x11d/0x1b0
 kfree+0x1ba/0x520
 mlx5e_tls_priv_tx_list_cleanup+0x2e7/0x3e0 [mlx5_core]
 mlx5e_ktls_cleanup_tx+0x38f/0x760 [mlx5_core]
 mlx5e_cleanup_nic_tx+0xa7/0x100 [mlx5_core]
 mlx5e_detach_netdev+0x1ca/0x2b0 [mlx5_core]
 mlx5e_suspend+0xdb/0x140 [mlx5_core]
 mlx5e_remove+0x89/0x190 [mlx5_core]
 auxiliary_bus_remove+0x52/0x70
 device_release_driver_internal+0x40f/0x650
 driver_detach+0xc1/0x180
 bus_remove_driver+0x125/0x2f0
 auxiliary_driver_unregister+0x16/0x50
 mlx5e_cleanup+0x26/0x30 [mlx5_core]
 cleanup+0xc/0x4e [mlx5_core]
 __x64_sys_delete_module+0x2b5/0x450
 do_syscall_64+0x3d/0x90
 entry_SYSCALL_64_after_hwframe+0x46/0xb0

Fixes: e355477ed9e4 ("net/mlx5: Make mlx5_cmd_exec_cb() a safe API")
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20221026135153.154807-8-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a12929bc31b2..af2ceb4160bc 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -970,7 +970,7 @@ void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode);
 struct mlx5_async_ctx {
 	struct mlx5_core_dev *dev;
 	atomic_t num_inflight;
-	struct wait_queue_head wait;
+	struct completion inflight_done;
 };
 
 struct mlx5_async_work;
-- 
cgit v1.2.3


From 67eae54bc227b30dedcce9db68b063ba1adb7838 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 24 Oct 2022 15:33:35 -0400
Subject: mm/uffd: fix vma check on userfault for wp

We used to have a report that pte-marker code can be reached even when
uffd-wp is not compiled in for file memories, here:

https://lore.kernel.org/all/YzeR+R6b4bwBlBHh@x1n/T/#u

I just got time to revisit this and found that the root cause is we simply
messed up with the vma check, so that for !PTE_MARKER_UFFD_WP system, we
will allow UFFDIO_REGISTER of MINOR & WP upon shmem as the check was
wrong:

    if (vm_flags & VM_UFFD_MINOR)
        return is_vm_hugetlb_page(vma) || vma_is_shmem(vma);

Where we'll allow anything to pass on shmem as long as minor mode is
requested.

Axel did it right when introducing minor mode but I messed it up in
b1f9e876862d when moving code around.  Fix it.

Link: https://lkml.kernel.org/r/20221024193336.1233616-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20221024193336.1233616-2-peterx@redhat.com
Fixes: b1f9e876862d ("mm/uffd: enable write protection for shmem & hugetlbfs")
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/userfaultfd_k.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index f07e6998bb68..9df0b9a762cc 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -146,9 +146,9 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
 static inline bool vma_can_userfault(struct vm_area_struct *vma,
 				     unsigned long vm_flags)
 {
-	if (vm_flags & VM_UFFD_MINOR)
-		return is_vm_hugetlb_page(vma) || vma_is_shmem(vma);
-
+	if ((vm_flags & VM_UFFD_MINOR) &&
+	    (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
+		return false;
 #ifndef CONFIG_PTE_MARKER_UFFD_WP
 	/*
 	 * If user requested uffd-wp but not enabled pte markers for
-- 
cgit v1.2.3


From 78a498c3a227f2ac773a8234b2ce092a4403f2c3 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Mon, 24 Oct 2022 23:21:44 +0200
Subject: x86: fortify: kmsan: fix KMSAN fortify builds

Ensure that KMSAN builds replace memset/memcpy/memmove calls with the
respective __msan_XXX functions, and that none of the macros are redefined
twice.  This should allow building kernel with both CONFIG_KMSAN and
CONFIG_FORTIFY_SOURCE.

Link: https://lkml.kernel.org/r/20221024212144.2852069-5-glider@google.com
Link: https://github.com/google/kmsan/issues/89
Signed-off-by: Alexander Potapenko <glider@google.com>
Reported-by: Tamas K Lengyel <tamas.lengyel@zentific.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fortify-string.h | 17 +++++++++++++++--
 include/linux/kmsan_string.h   | 21 +++++++++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/kmsan_string.h

(limited to 'include/linux')

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 4029fe368a4f..18a31b125f9d 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -43,11 +43,24 @@ extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen);
 extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat);
 extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy);
 #else
-#define __underlying_memchr	__builtin_memchr
-#define __underlying_memcmp	__builtin_memcmp
+
+#if defined(__SANITIZE_MEMORY__)
+/*
+ * For KMSAN builds all memcpy/memset/memmove calls should be replaced by the
+ * corresponding __msan_XXX functions.
+ */
+#include <linux/kmsan_string.h>
+#define __underlying_memcpy	__msan_memcpy
+#define __underlying_memmove	__msan_memmove
+#define __underlying_memset	__msan_memset
+#else
 #define __underlying_memcpy	__builtin_memcpy
 #define __underlying_memmove	__builtin_memmove
 #define __underlying_memset	__builtin_memset
+#endif
+
+#define __underlying_memchr	__builtin_memchr
+#define __underlying_memcmp	__builtin_memcmp
 #define __underlying_strcat	__builtin_strcat
 #define __underlying_strcpy	__builtin_strcpy
 #define __underlying_strlen	__builtin_strlen
diff --git a/include/linux/kmsan_string.h b/include/linux/kmsan_string.h
new file mode 100644
index 000000000000..7287da6f52ef
--- /dev/null
+++ b/include/linux/kmsan_string.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KMSAN string functions API used in other headers.
+ *
+ * Copyright (C) 2022 Google LLC
+ * Author: Alexander Potapenko <glider@google.com>
+ *
+ */
+#ifndef _LINUX_KMSAN_STRING_H
+#define _LINUX_KMSAN_STRING_H
+
+/*
+ * KMSAN overrides the default memcpy/memset/memmove implementations in the
+ * kernel, which requires having __msan_XXX function prototypes in several other
+ * headers. Keep them in one place instead of open-coding.
+ */
+void *__msan_memcpy(void *dst, const void *src, size_t size);
+void *__msan_memset(void *s, int c, size_t n);
+void *__msan_memmove(void *dest, const void *src, size_t len);
+
+#endif /* _LINUX_KMSAN_STRING_H */
-- 
cgit v1.2.3


From 6f7630b1b5bc672b54c1285ee6aba752b446672c Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 28 Oct 2022 15:32:07 -0700
Subject: fortify: Capture __bos() results in const temp vars

In two recent run-time memcpy() bound checking bug reports (NFS[1] and
JFS[2]), the _detection_ was working correctly (in the sense that the
requested copy size was larger than the destination field size), but
the _warning text_ was showing the destination field size as SIZE_MAX
("unknown size"). This should be impossible, since the detection function
will explicitly give up if the destination field size is unknown. For
example, the JFS warning was:

  memcpy: detected field-spanning write (size 132) of single field "ip->i_link" at fs/jfs/namei.c:950 (size 18446744073709551615)

Other cases of this warning (e.g.[3]) have reported correctly,
and the reproducer only happens under GCC (at least 10.2 and 12.1),
so this currently appears to be a GCC bug. Explicitly capturing the
__builtin_object_size() results in const temporary variables fixes the
report. For example, the JFS reproducer now correctly reports the field
size (128):

  memcpy: detected field-spanning write (size 132) of single field "ip->i_link" at fs/jfs/namei.c:950 (size 128)

Examination of the .text delta (which is otherwise identical), shows
the literal value used in the report changing:

-     mov    $0xffffffffffffffff,%rcx
+     mov    $0x80,%ecx

[1] https://lore.kernel.org/lkml/Y0zEzZwhOxTDcBTB@codemonkey.org.uk/
[2] https://syzkaller.appspot.com/bug?id=23d613df5259b977dac1696bec77f61a85890e3d
[3] https://lore.kernel.org/all/202210110948.26b43120-yujie.liu@intel.com/

Cc: "Dr. David Alan Gilbert" <linux@treblig.org>
Cc: llvm@lists.linux.dev
Cc: linux-hardening@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/fortify-string.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 4029fe368a4f..0f00a551939a 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -441,13 +441,18 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
 
 #define __fortify_memcpy_chk(p, q, size, p_size, q_size,		\
 			     p_size_field, q_size_field, op) ({		\
-	size_t __fortify_size = (size_t)(size);				\
-	WARN_ONCE(fortify_memcpy_chk(__fortify_size, p_size, q_size,	\
-				     p_size_field, q_size_field, #op),	\
+	const size_t __fortify_size = (size_t)(size);			\
+	const size_t __p_size = (p_size);				\
+	const size_t __q_size = (q_size);				\
+	const size_t __p_size_field = (p_size_field);			\
+	const size_t __q_size_field = (q_size_field);			\
+	WARN_ONCE(fortify_memcpy_chk(__fortify_size, __p_size,		\
+				     __q_size, __p_size_field,		\
+				     __q_size_field, #op),		\
 		  #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \
 		  __fortify_size,					\
 		  "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \
-		  p_size_field);					\
+		  __p_size_field);					\
 	__underlying_##op(p, q, __fortify_size);			\
 })
 
-- 
cgit v1.2.3


From 8bbabb3fddcd0f858be69ed5abc9b470a239d6f2 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 1 Nov 2022 21:34:17 -0700
Subject: bpf, sock_map: Move cancel_work_sync() out of sock lock

Stanislav reported a lockdep warning, which is caused by the
cancel_work_sync() called inside sock_map_close(), as analyzed
below by Jakub:

psock->work.func = sk_psock_backlog()
  ACQUIRE psock->work_mutex
    sk_psock_handle_skb()
      skb_send_sock()
        __skb_send_sock()
          sendpage_unlocked()
            kernel_sendpage()
              sock->ops->sendpage = inet_sendpage()
                sk->sk_prot->sendpage = tcp_sendpage()
                  ACQUIRE sk->sk_lock
                    tcp_sendpage_locked()
                  RELEASE sk->sk_lock
  RELEASE psock->work_mutex

sock_map_close()
  ACQUIRE sk->sk_lock
  sk_psock_stop()
    sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED)
    cancel_work_sync()
      __cancel_work_timer()
        __flush_work()
          // wait for psock->work to finish
  RELEASE sk->sk_lock

We can move the cancel_work_sync() out of the sock lock protection,
but still before saved_close() was called.

Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()")
Reported-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20221102043417.279409-1-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 48f4b645193b..70d6cb94e580 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -376,7 +376,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
 }
 
 struct sk_psock *sk_psock_init(struct sock *sk, int node);
-void sk_psock_stop(struct sk_psock *psock, bool wait);
+void sk_psock_stop(struct sk_psock *psock);
 
 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
-- 
cgit v1.2.3


From eb4940d4adf590590a9d0c47e38d2799c2ff9670 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 4 Nov 2022 13:57:11 +0100
Subject: mm/slab: remove !CONFIG_TRACING variants of kmalloc_[node_]trace()

For !CONFIG_TRACING kernels, the kmalloc() implementation tries (in cases where
the allocation size is build-time constant) to save a function call, by
inlining kmalloc_trace() to a kmem_cache_alloc() call.

However since commit 6edf2576a6cc ("mm/slub: enable debugging memory wasting of
kmalloc") this path now fails to pass the original request size to be
eventually recorded (for kmalloc caches with debugging enabled).

We could adjust the code to call __kmem_cache_alloc_node() as the
CONFIG_TRACING variant, but that would as a result inline a call with 5
parameters, bloating the kmalloc() call sites. The cost of extra function
call (to kmalloc_trace()) seems like a lesser evil.

It also appears that the !CONFIG_TRACING variant is incompatible with upcoming
hardening efforts [1] so it's easier if we just remove it now. Kernels with no
tracing are rare these days and the benefit is dubious anyway.

[1] https://lore.kernel.org/linux-mm/20221101222520.never.109-kees@kernel.org/T/#m20ecf14390e406247bde0ea9cce368f469c539ed

Link: https://lore.kernel.org/all/097d8fba-bd10-a312-24a3-a4068c4f424c@suse.cz/
Suggested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 23 -----------------------
 1 file changed, 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 90877fcde70b..45efc6c553b8 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -470,35 +470,12 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignm
 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment
 									 __malloc;
 
-#ifdef CONFIG_TRACING
 void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
 		    __assume_kmalloc_alignment __alloc_size(3);
 
 void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
 			 int node, size_t size) __assume_kmalloc_alignment
 						__alloc_size(4);
-#else /* CONFIG_TRACING */
-/* Save a function call when CONFIG_TRACING=n */
-static __always_inline __alloc_size(3)
-void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
-{
-	void *ret = kmem_cache_alloc(s, flags);
-
-	ret = kasan_kmalloc(s, ret, size, flags);
-	return ret;
-}
-
-static __always_inline __alloc_size(4)
-void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
-			 int node, size_t size)
-{
-	void *ret = kmem_cache_alloc_node(s, gfpflags, node);
-
-	ret = kasan_kmalloc(s, ret, size, gfpflags);
-	return ret;
-}
-#endif /* CONFIG_TRACING */
-
 void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment
 					      __alloc_size(1);
 
-- 
cgit v1.2.3


From 18acb7fac22ff7b36c7ea5a76b12996e7b7dbaba Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 3 Nov 2022 13:00:13 +0100
Subject: bpf: Revert ("Fix dispatcher patchable function entry to 5 bytes
 nop")
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because __attribute__((patchable_function_entry)) is only available
since GCC-8 this solution fails to build on the minimum required GCC
version.

Undo these changes so we might try again -- without cluttering up the
patches with too many changes.

This is an almost complete revert of:

  dbe69b299884 ("bpf: Fix dispatcher patchable function entry to 5 bytes nop")
  ceea991a019c ("bpf: Move bpf_dispatcher function out of ftrace locations")

(notably the arch/x86/Kconfig hunk is kept).

Reported-by: David Laight <David.Laight@aculab.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Björn Töpel <bjorn@kernel.org>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Björn Töpel <bjorn@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/439d8dc735bb4858875377df67f1b29a@AcuMS.aculab.com
Link: https://lore.kernel.org/bpf/20221103120647.728830733@infradead.org
---
 include/linux/bpf.h | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0566705c1d4e..5cd95716b441 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -27,7 +27,6 @@
 #include <linux/bpfptr.h>
 #include <linux/btf.h>
 #include <linux/rcupdate_trace.h>
-#include <linux/init.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -971,8 +970,6 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key,
 					  struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
 int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs);
-int __init bpf_arch_init_dispatcher_early(void *ip);
-
 #define BPF_DISPATCHER_INIT(_name) {				\
 	.mutex = __MUTEX_INITIALIZER(_name.mutex),		\
 	.func = &_name##_func,					\
@@ -986,21 +983,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip);
 	},							\
 }
 
-#define BPF_DISPATCHER_INIT_CALL(_name)					\
-	static int __init _name##_init(void)				\
-	{								\
-		return bpf_arch_init_dispatcher_early(_name##_func);	\
-	}								\
-	early_initcall(_name##_init)
-
-#ifdef CONFIG_X86_64
-#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5)))
-#else
-#define BPF_DISPATCHER_ATTRIBUTES
-#endif
-
 #define DEFINE_BPF_DISPATCHER(name)					\
-	notrace BPF_DISPATCHER_ATTRIBUTES				\
 	noinline __nocfi unsigned int bpf_dispatcher_##name##_func(	\
 		const void *ctx,					\
 		const struct bpf_insn *insnsi,				\
@@ -1010,9 +993,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip);
 	}								\
 	EXPORT_SYMBOL(bpf_dispatcher_##name##_func);			\
 	struct bpf_dispatcher bpf_dispatcher_##name =			\
-		BPF_DISPATCHER_INIT(bpf_dispatcher_##name);		\
-	BPF_DISPATCHER_INIT_CALL(bpf_dispatcher_##name);
-
+		BPF_DISPATCHER_INIT(bpf_dispatcher_##name);
 #define DECLARE_BPF_DISPATCHER(name)					\
 	unsigned int bpf_dispatcher_##name##_func(			\
 		const void *ctx,					\
-- 
cgit v1.2.3


From c86df29d11dfba27c0a1f5039cd6fe387fbf4239 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 3 Nov 2022 13:00:14 +0100
Subject: bpf: Convert BPF_DISPATCHER to use static_call() (not ftrace)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dispatcher function is currently abusing the ftrace __fentry__
call location for its own purposes -- this obviously gives trouble
when the dispatcher and ftrace are both in use.

A previous solution tried using __attribute__((patchable_function_entry()))
which works, except it is GCC-8+ only, breaking the build on the
earlier still supported compilers. Instead use static_call() -- which
has its own annotations and does not conflict with ftrace -- to
rewrite the dispatch function.

By using: return static_call()(ctx, insni, bpf_func) you get a perfect
forwarding tail call as function body (iow a single jmp instruction).
By having the default static_call() target be bpf_dispatcher_nop_func()
it retains the default behaviour (an indirect call to the argument
function). Only once a dispatcher program is attached is the target
rewritten to directly call the JIT'ed image.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Björn Töpel <bjorn@kernel.org>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Björn Töpel <bjorn@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/Y1/oBlK0yFk5c/Im@hirez.programming.kicks-ass.net
Link: https://lore.kernel.org/bpf/20221103120647.796772565@infradead.org
---
 include/linux/bpf.h | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5cd95716b441..74c6f449d81e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -27,6 +27,7 @@
 #include <linux/bpfptr.h>
 #include <linux/btf.h>
 #include <linux/rcupdate_trace.h>
+#include <linux/static_call.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -953,6 +954,10 @@ struct bpf_dispatcher {
 	void *rw_image;
 	u32 image_off;
 	struct bpf_ksym ksym;
+#ifdef CONFIG_HAVE_STATIC_CALL
+	struct static_call_key *sc_key;
+	void *sc_tramp;
+#endif
 };
 
 static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
@@ -970,6 +975,34 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key,
 					  struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
 int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs);
+
+/*
+ * When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn
+ * indirection with a direct call to the bpf program. If the architecture does
+ * not have STATIC_CALL, avoid a double-indirection.
+ */
+#ifdef CONFIG_HAVE_STATIC_CALL
+
+#define __BPF_DISPATCHER_SC_INIT(_name)				\
+	.sc_key = &STATIC_CALL_KEY(_name),			\
+	.sc_tramp = STATIC_CALL_TRAMP_ADDR(_name),
+
+#define __BPF_DISPATCHER_SC(name)				\
+	DEFINE_STATIC_CALL(bpf_dispatcher_##name##_call, bpf_dispatcher_nop_func)
+
+#define __BPF_DISPATCHER_CALL(name)				\
+	static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func)
+
+#define __BPF_DISPATCHER_UPDATE(_d, _new)			\
+	__static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new))
+
+#else
+#define __BPF_DISPATCHER_SC_INIT(name)
+#define __BPF_DISPATCHER_SC(name)
+#define __BPF_DISPATCHER_CALL(name)		bpf_func(ctx, insnsi)
+#define __BPF_DISPATCHER_UPDATE(_d, _new)
+#endif
+
 #define BPF_DISPATCHER_INIT(_name) {				\
 	.mutex = __MUTEX_INITIALIZER(_name.mutex),		\
 	.func = &_name##_func,					\
@@ -981,25 +1014,29 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
 		.name  = #_name,				\
 		.lnode = LIST_HEAD_INIT(_name.ksym.lnode),	\
 	},							\
+	__BPF_DISPATCHER_SC_INIT(_name##_call)			\
 }
 
 #define DEFINE_BPF_DISPATCHER(name)					\
+	__BPF_DISPATCHER_SC(name);					\
 	noinline __nocfi unsigned int bpf_dispatcher_##name##_func(	\
 		const void *ctx,					\
 		const struct bpf_insn *insnsi,				\
 		bpf_func_t bpf_func)					\
 	{								\
-		return bpf_func(ctx, insnsi);				\
+		return __BPF_DISPATCHER_CALL(name);			\
 	}								\
 	EXPORT_SYMBOL(bpf_dispatcher_##name##_func);			\
 	struct bpf_dispatcher bpf_dispatcher_##name =			\
 		BPF_DISPATCHER_INIT(bpf_dispatcher_##name);
+
 #define DECLARE_BPF_DISPATCHER(name)					\
 	unsigned int bpf_dispatcher_##name##_func(			\
 		const void *ctx,					\
 		const struct bpf_insn *insnsi,				\
 		bpf_func_t bpf_func);					\
 	extern struct bpf_dispatcher bpf_dispatcher_##name;
+
 #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func
 #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name)
 void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
-- 
cgit v1.2.3


From ae64438be1923e3c1102d90fd41db7afcfaf54cc Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Wed, 2 Nov 2022 10:54:31 +0100
Subject: can: dev: fix skb drop check

In commit a6d190f8c767 ("can: skb: drop tx skb if in listen only
mode") the priv->ctrlmode element is read even on virtual CAN
interfaces that do not create the struct can_priv at startup. This
out-of-bounds read may lead to CAN frame drops for virtual CAN
interfaces like vcan and vxcan.

This patch mainly reverts the original commit and adds a new helper
for CAN interface drivers that provide the required information in
struct can_priv.

Fixes: a6d190f8c767 ("can: skb: drop tx skb if in listen only mode")
Reported-by: Dariusz Stojaczyk <Dariusz.Stojaczyk@opensynergy.com>
Cc: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Cc: Max Staudt <max@enpas.org>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Acked-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Link: https://lore.kernel.org/all/20221102095431.36831-1-socketcan@hartkopp.net
Cc: stable@vger.kernel.org # 6.0.x
[mkl: patch pch_can, too]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 58f5431a5559..982ba245eb41 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -152,6 +152,22 @@ static inline bool can_is_canxl_dev_mtu(unsigned int mtu)
 	return (mtu >= CANXL_MIN_MTU && mtu <= CANXL_MAX_MTU);
 }
 
+/* drop skb if it does not contain a valid CAN frame for sending */
+static inline bool can_dev_dropped_skb(struct net_device *dev, struct sk_buff *skb)
+{
+	struct can_priv *priv = netdev_priv(dev);
+
+	if (priv->ctrlmode & CAN_CTRLMODE_LISTENONLY) {
+		netdev_info_once(dev,
+				 "interface in listen only mode, dropping skb\n");
+		kfree_skb(skb);
+		dev->stats.tx_dropped++;
+		return true;
+	}
+
+	return can_dropped_invalid_skb(dev, skb);
+}
+
 void can_setup(struct net_device *dev);
 
 struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
-- 
cgit v1.2.3


From 120b116208a0877227fc82e3f0df81e7a3ed4ab1 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Fri, 28 Oct 2022 18:04:30 +0000
Subject: maple_tree: reorganize testing to restore module testing

Along the development cycle, the testing code support for module/in-kernel
compiles was removed.  Restore this functionality by moving any internal
API tests to the userspace side, as well as threading tests.  Fix the
lockdep issues and add a way to reduce memory usage so the tests can
complete with KASAN + memleak detection.  Make the tests work on 32 bit
hosts where possible and detect 32 bit hosts in the radix test suite.

[akpm@linux-foundation.org: fix module export]
[akpm@linux-foundation.org: fix it some more]
[liam.howlett@oracle.com: fix compile warnings on 32bit build in check_find()]
  Link: https://lkml.kernel.org/r/20221107203816.1260327-1-Liam.Howlett@oracle.com
Link: https://lkml.kernel.org/r/20221028180415.3074673-1-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 2effab72add1..e594db58a0f1 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -638,6 +638,12 @@ static inline void mt_set_in_rcu(struct maple_tree *mt)
 	}
 }
 
+static inline unsigned int mt_height(const struct maple_tree *mt)
+
+{
+	return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET;
+}
+
 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max);
 void *mt_find_after(struct maple_tree *mt, unsigned long *index,
 		    unsigned long max);
@@ -664,6 +670,7 @@ extern atomic_t maple_tree_tests_passed;
 
 void mt_dump(const struct maple_tree *mt);
 void mt_validate(struct maple_tree *mt);
+void mt_cache_shrink(void);
 #define MT_BUG_ON(__tree, __x) do {					\
 	atomic_inc(&maple_tree_tests_run);				\
 	if (__x) {							\
-- 
cgit v1.2.3


From 5cd189e410debedda416fecfc12f4716b5829845 Mon Sep 17 00:00:00 2001
From: Anthony DeRossi <ajderossi@gmail.com>
Date: Wed, 9 Nov 2022 17:40:26 -0800
Subject: vfio: Export the device set open count

The open count of a device set is the sum of the open counts of all
devices in the set. Drivers can use this value to determine whether
shared resources are in use without tracking them manually or accessing
the private open_count in vfio_device.

Signed-off-by: Anthony DeRossi <ajderossi@gmail.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Link: https://lore.kernel.org/r/20221110014027.28780-3-ajderossi@gmail.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e7cebeb875dd..fdd393f70b19 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -189,6 +189,7 @@ int vfio_register_emulated_iommu_dev(struct vfio_device *device);
 void vfio_unregister_group_dev(struct vfio_device *device);
 
 int vfio_assign_device_set(struct vfio_device *device, void *set_id);
+unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set);
 
 int vfio_mig_get_next_state(struct vfio_device *device,
 			    enum vfio_device_mig_state cur_fsm,
-- 
cgit v1.2.3


From 550b33cfd445296868a478e8413ffb2e963eed32 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 10 Nov 2022 10:36:20 +0100
Subject: arm64: efi: Force the use of SetVirtualAddressMap() on Altra machines

Ampere Altra machines are reported to misbehave when the SetTime() EFI
runtime service is called after ExitBootServices() but before calling
SetVirtualAddressMap(). Given that the latter is horrid, pointless and
explicitly documented as optional by the EFI spec, we no longer invoke
it at boot if the configured size of the VA space guarantees that the
EFI runtime memory regions can remain mapped 1:1 like they are at boot
time.

On Ampere Altra machines, this results in SetTime() calls issued by the
rtc-efi driver triggering synchronous exceptions during boot.  We can
now recover from those without bringing down the system entirely, due to
commit 23715a26c8d81291 ("arm64: efi: Recover from synchronous
exceptions occurring in firmware"). However, it would be better to avoid
the issue entirely, given that the firmware appears to remain in a funny
state after this.

So attempt to identify these machines based on the 'family' field in the
type #1 SMBIOS record, and call SetVirtualAddressMap() unconditionally
in that case.

Tested-by: Alexandru Elisei <alexandru.elisei@gmail.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 929d559ad41d..7603fc58c47c 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -389,6 +389,7 @@ void efi_native_runtime_setup(void);
 #define EFI_LOAD_FILE2_PROTOCOL_GUID		EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e,  0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d)
 #define EFI_RT_PROPERTIES_TABLE_GUID		EFI_GUID(0xeb66918a, 0x7eef, 0x402a,  0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9)
 #define EFI_DXE_SERVICES_TABLE_GUID		EFI_GUID(0x05ad34ba, 0x6f02, 0x4214,  0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9)
+#define EFI_SMBIOS_PROTOCOL_GUID		EFI_GUID(0x03583ff6, 0xcb36, 0x4940,  0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7)
 
 #define EFI_IMAGE_SECURITY_DATABASE_GUID	EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596,  0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
 #define EFI_SHIM_LOCK_GUID			EFI_GUID(0x605dab50, 0xe046, 0x4300,  0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23)
-- 
cgit v1.2.3


From 1f6e04a1c7b85da3b765ca9f46029e5d1826d839 Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Fri, 11 Nov 2022 07:56:20 -0500
Subject: bpf: Fix offset calculation error in __copy_map_value and
 zero_map_value

Function __copy_map_value and zero_map_value miscalculated copy offset,
resulting in possible copy of unwanted data to user or kernel.

Fix it.

Fixes: cc48755808c6 ("bpf: Add zero_map_value to zero map value with special fields")
Fixes: 4d7d7f69f4b1 ("bpf: Adapt copy_map_value for multiple offset case")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/bpf/20221111125620.754855-1-xukuohai@huaweicloud.com
---
 include/linux/bpf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 74c6f449d81e..c1bd1bd10506 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -315,7 +315,7 @@ static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, b
 		u32 next_off = map->off_arr->field_off[i];
 
 		memcpy(dst + curr_off, src + curr_off, next_off - curr_off);
-		curr_off += map->off_arr->field_sz[i];
+		curr_off = next_off + map->off_arr->field_sz[i];
 	}
 	memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off);
 }
@@ -344,7 +344,7 @@ static inline void zero_map_value(struct bpf_map *map, void *dst)
 		u32 next_off = map->off_arr->field_off[i];
 
 		memset(dst + curr_off, 0, next_off - curr_off);
-		curr_off += map->off_arr->field_sz[i];
+		curr_off = next_off + map->off_arr->field_sz[i];
 	}
 	memset(dst + curr_off, 0, map->value_size - curr_off);
 }
-- 
cgit v1.2.3


From 42fb0a1e84ff525ebe560e2baf9451ab69127e2b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Thu, 20 Oct 2022 23:14:27 -0400
Subject: tracing/ring-buffer: Have polling block on watermark

Currently the way polling works on the ring buffer is broken. It will
return immediately if there's any data in the ring buffer whereas a read
will block until the watermark (defined by the tracefs buffer_percent file)
is hit.

That is, a select() or poll() will return as if there's data available,
but then the following read will block. This is broken for the way
select()s and poll()s are supposed to work.

Have the polling on the ring buffer also block the same way reads and
splice does on the ring buffer.

Link: https://lkml.kernel.org/r/20221020231427.41be3f26@gandalf.local.home

Cc: Linux Trace Kernel <linux-trace-kernel@vger.kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Primiano Tucci <primiano@google.com>
Cc: stable@vger.kernel.org
Fixes: 1e0d6714aceb7 ("ring-buffer: Do not wake up a splice waiter when page is not full")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 2504df9a0453..3c7d295746f6 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -100,7 +100,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
 
 int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
 __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
-			  struct file *filp, poll_table *poll_table);
+			  struct file *filp, poll_table *poll_table, int full);
 void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);
 
 #define RING_BUFFER_ALL_CPUS -1
-- 
cgit v1.2.3


From c964d62f5cab7b43dd0534f22a96eab386c6ec5d Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Thu, 10 Nov 2022 10:44:57 -0800
Subject: block: make dma_alignment a stacking queue_limit

Device mappers had always been getting the default 511 dma mask, but
the underlying device might have a larger alignment requirement. Since
this value is used to determine alloweable direct-io alignment, this
needs to be a stackable limit.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20221110184501.2451620-2-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 50e358a19d98..9898e34b2c2d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -311,6 +311,13 @@ struct queue_limits {
 	unsigned char		discard_misaligned;
 	unsigned char		raid_partial_stripes_expensive;
 	enum blk_zoned_model	zoned;
+
+	/*
+	 * Drivers that set dma_alignment to less than 511 must be prepared to
+	 * handle individual bvec's that are not a multiple of a SECTOR_SIZE
+	 * due to possible offsets.
+	 */
+	unsigned int		dma_alignment;
 };
 
 typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
@@ -456,12 +463,6 @@ struct request_queue {
 	unsigned long		nr_requests;	/* Max # of requests */
 
 	unsigned int		dma_pad_mask;
-	/*
-	 * Drivers that set dma_alignment to less than 511 must be prepared to
-	 * handle individual bvec's that are not a multiple of a SECTOR_SIZE
-	 * due to possible offsets.
-	 */
-	unsigned int		dma_alignment;
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
 	struct blk_crypto_profile *crypto_profile;
@@ -1324,7 +1325,7 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev)
 
 static inline int queue_dma_alignment(const struct request_queue *q)
 {
-	return q ? q->dma_alignment : 511;
+	return q ? q->limits.dma_alignment : 511;
 }
 
 static inline unsigned int bdev_dma_alignment(struct block_device *bdev)
-- 
cgit v1.2.3


From b3228254bb6e91e57f920227f72a1a7d81925d81 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Thu, 10 Nov 2022 10:44:59 -0800
Subject: block: make blk_set_default_limits() private

There are no external users of this function.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20221110184501.2451620-4-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9898e34b2c2d..891f8cbcd043 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -945,7 +945,6 @@ extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
-extern void blk_set_default_limits(struct queue_limits *lim);
 extern void blk_set_stacking_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
-- 
cgit v1.2.3


From bedf06833b1f63c2627bd5634602e05592129d7a Mon Sep 17 00:00:00 2001
From: Aashish Sharma <shraash@google.com>
Date: Mon, 7 Nov 2022 21:35:56 +0530
Subject: tracing: Fix warning on variable 'struct trace_array'

Move the declaration of 'struct trace_array' out of #ifdef
CONFIG_TRACING block, to fix the following warning when CONFIG_TRACING
is not set:

>> include/linux/trace.h:63:45: warning: 'struct trace_array' declared
inside parameter list will not be visible outside of this definition or
declaration

Link: https://lkml.kernel.org/r/20221107160556.2139463-1-shraash@google.com

Fixes: 1a77dd1c2bb5 ("scsi: tracing: Fix compile error in trace_array calls when TRACING is disabled")
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Arun Easi <aeasi@marvell.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Aashish Sharma <shraash@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace.h b/include/linux/trace.h
index b5e16e438448..80ffda871749 100644
--- a/include/linux/trace.h
+++ b/include/linux/trace.h
@@ -26,13 +26,13 @@ struct trace_export {
 	int flags;
 };
 
+struct trace_array;
+
 #ifdef CONFIG_TRACING
 
 int register_ftrace_export(struct trace_export *export);
 int unregister_ftrace_export(struct trace_export *export);
 
-struct trace_array;
-
 void trace_printk_init_buffers(void);
 __printf(3, 4)
 int trace_array_printk(struct trace_array *tr, unsigned long ip,
-- 
cgit v1.2.3


From 91482864768a874c4290ef93b84a78f4f1dac51b Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Nov 2022 18:40:16 +0000
Subject: io_uring: fix multishot accept request leaks

Having REQ_F_POLLED set doesn't guarantee that the request is
executed as a multishot from the polling path. Fortunately for us, if
the code thinks it's multishot issue when it's not, it can only ask to
skip completion so leaking the request. Use issue_flags to mark
multipoll issues.

Cc: stable@vger.kernel.org
Fixes: 390ed29b5e425 ("io_uring: add IORING_ACCEPT_MULTISHOT for accept")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/7700ac57653f2823e30b34dc74da68678c0c5f13.1668710222.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 43bc8a2edccf..0ded9e271523 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -16,6 +16,9 @@ enum io_uring_cmd_flags {
 	IO_URING_F_SQE128		= 4,
 	IO_URING_F_CQE32		= 8,
 	IO_URING_F_IOPOLL		= 16,
+
+	/* the request is executed from poll, it should not be freed */
+	IO_URING_F_MULTISHOT		= 32,
 };
 
 struct io_uring_cmd {
-- 
cgit v1.2.3


From bf0d29fb51ff5e6c13097dbfed7b99e0e35b4a15 Mon Sep 17 00:00:00 2001
From: Eddie James <eajames@linux.ibm.com>
Date: Wed, 2 Nov 2022 15:51:44 -0500
Subject: regmap: Add FSI bus support

Add regmap support for the FSI bus.

Signed-off-by: Eddie James <eajames@linux.ibm.com>
Link: https://lore.kernel.org/r/20221102205148.1334459-2-eajames@linux.ibm.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index ca3434dca3a0..e477112fb1c7 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -24,6 +24,7 @@ struct module;
 struct clk;
 struct device;
 struct device_node;
+struct fsi_device;
 struct i2c_client;
 struct i3c_device;
 struct irq_domain;
@@ -628,6 +629,10 @@ struct regmap *__regmap_init_spi_avmm(struct spi_device *spi,
 				      const struct regmap_config *config,
 				      struct lock_class_key *lock_key,
 				      const char *lock_name);
+struct regmap *__regmap_init_fsi(struct fsi_device *fsi_dev,
+				 const struct regmap_config *config,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name);
 
 struct regmap *__devm_regmap_init(struct device *dev,
 				  const struct regmap_bus *bus,
@@ -693,6 +698,11 @@ struct regmap *__devm_regmap_init_spi_avmm(struct spi_device *spi,
 					   const struct regmap_config *config,
 					   struct lock_class_key *lock_key,
 					   const char *lock_name);
+struct regmap *__devm_regmap_init_fsi(struct fsi_device *fsi_dev,
+				      const struct regmap_config *config,
+				      struct lock_class_key *lock_key,
+				      const char *lock_name);
+
 /*
  * Wrapper for regmap_init macros to include a unique lockdep key and name
  * for each call. No-op if CONFIG_LOCKDEP is not set.
@@ -919,6 +929,19 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 	__regmap_lockdep_wrapper(__regmap_init_spi_avmm, #config,		\
 				 spi, config)
 
+/**
+ * regmap_init_fsi() - Initialise register map
+ *
+ * @fsi_dev: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+#define regmap_init_fsi(fsi_dev, config)				\
+	__regmap_lockdep_wrapper(__regmap_init_fsi, #config, fsi_dev,	\
+				 config)
+
 /**
  * devm_regmap_init() - Initialise managed register map
  *
@@ -1148,6 +1171,20 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 	__regmap_lockdep_wrapper(__devm_regmap_init_spi_avmm, #config,	\
 				 spi, config)
 
+/**
+ * devm_regmap_init_fsi() - Initialise managed register map
+ *
+ * @fsi_dev: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+#define devm_regmap_init_fsi(fsi_dev, config)				\
+	__regmap_lockdep_wrapper(__devm_regmap_init_fsi, #config,	\
+				 fsi_dev, config)
+
 int regmap_mmio_attach_clk(struct regmap *map, struct clk *clk);
 void regmap_mmio_detach_clk(struct regmap *map);
 void regmap_exit(struct regmap *map);
-- 
cgit v1.2.3