From 350afa8a1101f62ce31bc4ed6f69cf4b90ec4fa2 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@amd.com>
Date: Thu, 8 Aug 2024 06:29:34 +0000
Subject: x86/split_lock: Move Split and Bus lock code to a dedicated file

Bus Lock Detect functionality on AMD platforms works identical to Intel.

Move split_lock and bus_lock specific code from intel.c to a dedicated
file so that it can be compiled and supported on non-Intel platforms.

Also, introduce CONFIG_X86_BUS_LOCK_DETECT, make it dependent on
CONFIG_CPU_SUP_INTEL and add compilation dependency of the new bus_lock.c
file on CONFIG_X86_BUS_LOCK_DETECT.

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/all/20240808062937.1149-2-ravi.bangoria@amd.com
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f8d150343d42..d606b2a1de99 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -976,7 +976,7 @@ struct task_struct {
 #ifdef CONFIG_ARCH_HAS_CPU_PASID
 	unsigned			pasid_activated:1;
 #endif
-#ifdef	CONFIG_CPU_SUP_INTEL
+#ifdef CONFIG_X86_BUS_LOCK_DETECT
 	unsigned			reported_split_lock:1;
 #endif
 #ifdef CONFIG_TASK_DELAY_ACCT
-- 
cgit v1.2.3


From 57d298bdb46bc240498675a7f887fc71089da2c0 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Thu, 25 Apr 2024 08:02:36 +0300
Subject: gpu: host1x: Add MLOCK recovery for rest of engines

Add class IDs / MLOCKs for MLOCK recovery for rest of engines
present on Tegra234.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425050238.2943404-4-cyndis@kapsi.fi
---
 include/linux/host1x.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 9c8119ed13a4..5a7a81e5f9bd 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -14,12 +14,17 @@
 
 enum host1x_class {
 	HOST1X_CLASS_HOST1X = 0x1,
+	HOST1X_CLASS_NVJPG1 = 0x7,
+	HOST1X_CLASS_NVENC = 0x21,
+	HOST1X_CLASS_NVENC1 = 0x22,
 	HOST1X_CLASS_GR2D = 0x51,
 	HOST1X_CLASS_GR2D_SB = 0x52,
 	HOST1X_CLASS_VIC = 0x5D,
 	HOST1X_CLASS_GR3D = 0x60,
+	HOST1X_CLASS_NVJPG = 0xC0,
 	HOST1X_CLASS_NVDEC = 0xF0,
 	HOST1X_CLASS_NVDEC1 = 0xF5,
+	HOST1X_CLASS_OFA = 0xF8,
 };
 
 struct host1x;
-- 
cgit v1.2.3


From 3d70eb8e60c6d009c988c8e696cb68c77ff59628 Mon Sep 17 00:00:00 2001
From: Kunwu Chan <chentao@kylinos.cn>
Date: Fri, 23 Aug 2024 16:07:24 +0800
Subject: gpu: host1x: Make host1x_context_device_bus_type constant

Since commit d492cc2573a0 ("driver core: device.h: make struct
bus_type a const *"), the driver core can properly handle constant
struct bus_type, move the host1x_context_device_bus_type variable
to be a constant structure as well, placing it into read-only memory
which can not be modified at runtime.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Kunwu Chan <chentao@kylinos.cn>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240823080724.148423-1-kunwu.chan@linux.dev
---
 include/linux/host1x_context_bus.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/host1x_context_bus.h b/include/linux/host1x_context_bus.h
index 72462737a6db..c928cb432680 100644
--- a/include/linux/host1x_context_bus.h
+++ b/include/linux/host1x_context_bus.h
@@ -9,7 +9,7 @@
 #include <linux/device.h>
 
 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
-extern struct bus_type host1x_context_device_bus_type;
+extern const struct bus_type host1x_context_device_bus_type;
 #endif
 
 #endif
-- 
cgit v1.2.3


From 9c75b16cabc69adbbfdc9d219df87c9173f0da0a Mon Sep 17 00:00:00 2001
From: Mary Guillemard <mary.guillemard@collabora.com>
Date: Mon, 19 Aug 2024 10:02:22 +0200
Subject: drm/panfrost: Add SYSTEM_TIMESTAMP and SYSTEM_TIMESTAMP_FREQUENCY
 parameters

Expose system timestamp and frequency supported by the GPU.

Mali uses an external timer as GPU system time. On ARM, this is wired to
the generic arch timer so we wire cntfrq_el0 as device frequency.

This new uAPI will be used in Mesa to implement timestamp queries and
VK_KHR_calibrated_timestamps.

v2:
- Rewrote to use GPU timestamp register
- Add missing include for arch_timer_get_cntfrq
- Rework commit message

v3:
- Move panfrost_cycle_counter_get and panfrost_cycle_counter_put to
  panfrost_ioctl_query_timestamp
- Handle possible overflow in panfrost_timestamp_read

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240819080224.24914-2-mary.guillemard@collabora.com
---
 include/uapi/drm/panfrost_drm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index 9f231d40a146..52b050e2b660 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -172,6 +172,8 @@ enum drm_panfrost_param {
 	DRM_PANFROST_PARAM_NR_CORE_GROUPS,
 	DRM_PANFROST_PARAM_THREAD_TLS_ALLOC,
 	DRM_PANFROST_PARAM_AFBC_FEATURES,
+	DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP,
+	DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY,
 };
 
 struct drm_panfrost_get_param {
-- 
cgit v1.2.3


From 3a8d97611b564b5b25f68c90b543056fc9ae0bec Mon Sep 17 00:00:00 2001
From: Mary Guillemard <mary.guillemard@collabora.com>
Date: Mon, 19 Aug 2024 10:02:23 +0200
Subject: drm/panfrost: Add cycle counter job requirement

Extend the uAPI with a new job requirement flag for cycle
counters. This requirement is used by userland to indicate that a job
requires cycle counters or system timestamp to be propagated. (for use
with write value timestamp jobs)

We cannot enable cycle counters unconditionally as this would result in
an increase of GPU power consumption. As a result, they should be left
off unless required by the application.

If a job requires cycle counters or system timestamps propagation, we
must enable cycle counting before issuing a job and disable it right
after the job completes.

Since this extends the uAPI and because userland needs a way to advertise
features like VK_KHR_shader_clock conditionally, we bumps the driver
minor version.

v2:
- Rework commit message
- Squash uAPI changes and implementation in this commit
- Simplify changes based on Steven Price comments

v3:
- Add Steven Price r-b
- Fix a codestyle issue

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240819080224.24914-3-mary.guillemard@collabora.com
---
 include/uapi/drm/panfrost_drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index 52b050e2b660..568724be6628 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -40,6 +40,7 @@ extern "C" {
 #define DRM_IOCTL_PANFROST_PERFCNT_DUMP		DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct drm_panfrost_perfcnt_dump)
 
 #define PANFROST_JD_REQ_FS (1 << 0)
+#define PANFROST_JD_REQ_CYCLE_COUNT (1 << 1)
 /**
  * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D
  * engine.
-- 
cgit v1.2.3


From cc3e8a216d6b817c509e1e1a3700055d178e04f8 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 2 Jun 2024 15:04:49 +0300
Subject: drm/imx: add internal bridge handling display-timings DT node

i.MX DRM DT bindings allow using either a proper panel / bridge graph to
provide information about connected panels, or just a display-timings DT
node, describing just the timings and the flags. Add helper bridge
driver supporting the latter usecase. It will be used by both LDB and
parallel-display drivers.

Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Tested-by: Chris Healy <cphealy@gmail.com>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de> # on imx6q-nitrogen6x
Link: https://patchwork.freedesktop.org/patch/msgid/20240602-drm-imx-cleanup-v3-9-e549e2a43100@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 include/drm/bridge/imx.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 include/drm/bridge/imx.h

(limited to 'include')

diff --git a/include/drm/bridge/imx.h b/include/drm/bridge/imx.h
new file mode 100644
index 000000000000..e14f429a9ca2
--- /dev/null
+++ b/include/drm/bridge/imx.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2012 Sascha Hauer, Pengutronix
+ */
+
+#ifndef DRM_IMX_BRIDGE_H
+#define DRM_IMX_BRIDGE_H
+
+struct drm_bridge *devm_imx_drm_legacy_bridge(struct device *dev,
+					      struct device_node *np,
+					      int type);
+
+#endif
-- 
cgit v1.2.3


From a778028cc575deeb5224cc798de6e03d37331bca Mon Sep 17 00:00:00 2001
From: Mary Guillemard <mary.guillemard@collabora.com>
Date: Fri, 30 Aug 2024 10:03:50 +0200
Subject: drm/panthor: Add DEV_QUERY_TIMESTAMP_INFO dev query

Expose timestamp information supported by the GPU with a new device
query.

Mali uses an external timer as GPU system time. On ARM, this is wired to
the generic arch timer so we wire cntfrq_el0 as device frequency.

This new uAPI will be used in Mesa to implement timestamp queries and
VK_KHR_calibrated_timestamps.

Since this extends the uAPI and because userland needs a way to advertise
those features conditionally, this also bumps the driver minor version.

v2:
- Rewrote to use GPU timestamp register
- Added timestamp_offset to drm_panthor_timestamp_info
- Add missing include for arch_timer_get_cntfrq
- Rework commit message

v3:
- Add panthor_gpu_read_64bit_counter
- Change panthor_gpu_read_timestamp to use
  panthor_gpu_read_64bit_counter

v4:
- Fix multiple typos in uAPI documentation
- Mention behavior when the timestamp frequency is unknown
- Use u64 instead of unsigned long long
  for panthor_gpu_read_timestamp
- Apply r-b from Mihail

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Mihail Atanassov <mihail.atanassov@arm.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240830080349.24736-2-mary.guillemard@collabora.com
---
 include/uapi/drm/panthor_drm.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index 926b1deb1116..95e3a984c69a 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -260,6 +260,9 @@ enum drm_panthor_dev_query_type {
 
 	/** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface information. */
 	DRM_PANTHOR_DEV_QUERY_CSIF_INFO,
+
+	/** @DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: Query timestamp information. */
+	DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO,
 };
 
 /**
@@ -377,6 +380,25 @@ struct drm_panthor_csif_info {
 	__u32 pad;
 };
 
+/**
+ * struct drm_panthor_timestamp_info - Timestamp information
+ *
+ * Structure grouping all queryable information relating to the GPU timestamp.
+ */
+struct drm_panthor_timestamp_info {
+	/**
+	 * @timestamp_frequency: The frequency of the timestamp timer or 0 if
+	 * unknown.
+	 */
+	__u64 timestamp_frequency;
+
+	/** @current_timestamp: The current timestamp. */
+	__u64 current_timestamp;
+
+	/** @timestamp_offset: The offset of the timestamp timer. */
+	__u64 timestamp_offset;
+};
+
 /**
  * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY
  */
-- 
cgit v1.2.3


From fc9cb46bdca8747aedd86ce304caaddac6df07fd Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 4 Sep 2024 12:51:32 +0300
Subject: drm/i915/pciids: use designated initializers in INTEL_VGA_DEVICE()

With IGT no longer using INTEL_VGA_DEVICE(), we can make it kernel
specific and use designated initializers. Ditto for
INTEL_QUANTA_VGA_DEVICE(). Remove the superfluous comments while at it.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ce15f8f2a6b672155f9728c8e6a5f49d33fafd24.1725443418.git.jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/intel/i915_pciids.h | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h
index 2bf03ebfcf73..6a78df5687c5 100644
--- a/include/drm/intel/i915_pciids.h
+++ b/include/drm/intel/i915_pciids.h
@@ -25,27 +25,20 @@
 #ifndef _I915_PCIIDS_H
 #define _I915_PCIIDS_H
 
-/*
- * A pci_device_id struct {
- *	__u32 vendor, device;
- *      __u32 subvendor, subdevice;
- *	__u32 class, class_mask;
- *	kernel_ulong_t driver_data;
- * };
- * Don't use C99 here because "class" is reserved and we want to
- * give userspace flexibility.
- */
-#define INTEL_VGA_DEVICE(id, info) { \
-	0x8086,	id, \
-	~0, ~0, \
-	0x030000, 0xff0000, \
-	(unsigned long) info }
-
-#define INTEL_QUANTA_VGA_DEVICE(info) { \
-	0x8086,	0x16a, \
-	0x152d,	0x8990, \
-	0x030000, 0xff0000, \
-	(unsigned long) info }
+#ifdef __KERNEL__
+#define INTEL_VGA_DEVICE(_id, _info) { \
+	PCI_DEVICE(PCI_VENDOR_ID_INTEL, (_id)), \
+	.class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \
+	.driver_data = (kernel_ulong_t)(_info), \
+}
+
+#define INTEL_QUANTA_VGA_DEVICE(_info) { \
+	.vendor = PCI_VENDOR_ID_INTEL, .device = 0x16a, \
+	.subvendor = 0x152d, .subdevice = 0x8990, \
+	.class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \
+	.driver_data = (kernel_ulong_t)(_info), \
+}
+#endif
 
 #define INTEL_I810_IDS(MACRO__, ...) \
 	MACRO__(0x7121, ## __VA_ARGS__), /* I810 */ \
-- 
cgit v1.2.3


From a37c68dd80f9951bb48aa44094fce130197ce3a4 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 4 Sep 2024 12:51:33 +0300
Subject: drm/i915/pciids: separate ARL and MTL PCI IDs

Avoid including PCI IDs for one platform to the PCI IDs of another. It's
more clear to deal with them completely separately at the PCI ID macro
level.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/b70af19ea017a76af4678d0a4ee8332253ee1f3b.1725443418.git.jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/intel/i915_pciids.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h
index 6a78df5687c5..cbb12fdbcb7f 100644
--- a/include/drm/intel/i915_pciids.h
+++ b/include/drm/intel/i915_pciids.h
@@ -764,15 +764,15 @@
 	INTEL_ATS_M150_IDS(MACRO__, ## __VA_ARGS__), \
 	INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
 
-/* MTL */
+/* ARL */
 #define INTEL_ARL_IDS(MACRO__, ...) \
 	MACRO__(0x7D41, ## __VA_ARGS__), \
 	MACRO__(0x7D51, ## __VA_ARGS__), \
 	MACRO__(0x7D67, ## __VA_ARGS__), \
 	MACRO__(0x7DD1, ## __VA_ARGS__)
 
+/* MTL */
 #define INTEL_MTL_IDS(MACRO__, ...) \
-	INTEL_ARL_IDS(MACRO__, ## __VA_ARGS__), \
 	MACRO__(0x7D40, ## __VA_ARGS__), \
 	MACRO__(0x7D45, ## __VA_ARGS__), \
 	MACRO__(0x7D55, ## __VA_ARGS__), \
-- 
cgit v1.2.3


From ae304b054520fec0a5ad5dec103a37abb53fef0e Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 4 Sep 2024 12:46:47 +0300
Subject: drm/xe/pciids: add some missing ADL-N PCI IDs

Similar to commit 425b463859ed ("drm/i915: Update ADL-N PCI IDs").

Reviewed-by: Sai Teja Pottumuttu <sai.teja.pottumuttu@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/47d543393e4026588401a03c4e3ce12ce29780e3.1725443121.git.jani.nikula@intel.com
---
 include/drm/intel/xe_pciids.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h
index 644872a35c35..4aab1bc2327e 100644
--- a/include/drm/intel/xe_pciids.h
+++ b/include/drm/intel/xe_pciids.h
@@ -97,7 +97,9 @@
 #define XE_ADLN_IDS(MACRO__, ...)		\
 	MACRO__(0x46D0, ## __VA_ARGS__),	\
 	MACRO__(0x46D1, ## __VA_ARGS__),	\
-	MACRO__(0x46D2, ## __VA_ARGS__)
+	MACRO__(0x46D2, ## __VA_ARGS__),	\
+	MACRO__(0x46D3, ## __VA_ARGS__),	\
+	MACRO__(0x46D4, ## __VA_ARGS__)
 
 /* RPL-S */
 #define XE_RPLS_IDS(MACRO__, ...)		\
-- 
cgit v1.2.3


From d454902a690db47f1880f963514bbf0fc7a129a8 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 4 Sep 2024 12:46:48 +0300
Subject: drm/xe/pciids: separate RPL-U and RPL-P PCI IDs

Avoid including PCI IDs for one platform to the PCI IDs of another. It's
more clear to deal with them completely separately at the PCI ID macro
level.

Reviewed-by: Sai Teja Pottumuttu <sai.teja.pottumuttu@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/4868d36fbfa8c38ea2d490bca82cf6370b8d65dd.1725443121.git.jani.nikula@intel.com
---
 include/drm/intel/xe_pciids.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h
index 4aab1bc2327e..7a9a7d0a89ca 100644
--- a/include/drm/intel/xe_pciids.h
+++ b/include/drm/intel/xe_pciids.h
@@ -122,7 +122,6 @@
 
 /* RPL-P */
 #define XE_RPLP_IDS(MACRO__, ...)		\
-	XE_RPLU_IDS(MACRO__, ## __VA_ARGS__),	\
 	MACRO__(0xA720, ## __VA_ARGS__),	\
 	MACRO__(0xA7A0, ## __VA_ARGS__),	\
 	MACRO__(0xA7A8, ## __VA_ARGS__),	\
-- 
cgit v1.2.3


From cdb56a63f7eef34e89b045fc8bcae8d326bbdb19 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 4 Sep 2024 12:46:49 +0300
Subject: drm/xe/pciids: separate ARL and MTL PCI IDs

Avoid including PCI IDs for one platform to the PCI IDs of another. It's
more clear to deal with them completely separately at the PCI ID macro
level.

Reviewed-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/a30cb0da7694a8eccceba66d676ac59aa0e96176.1725443121.git.jani.nikula@intel.com
---
 include/drm/intel/xe_pciids.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h
index 7a9a7d0a89ca..79001afa7d27 100644
--- a/include/drm/intel/xe_pciids.h
+++ b/include/drm/intel/xe_pciids.h
@@ -176,16 +176,19 @@
 	XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\
 	XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
 
-/* MTL / ARL */
+/* ARL */
+#define XE_ARL_IDS(MACRO__, ...)		\
+	MACRO__(0x7D41, ## __VA_ARGS__),	\
+	MACRO__(0x7D51, ## __VA_ARGS__),        \
+	MACRO__(0x7D67, ## __VA_ARGS__),	\
+	MACRO__(0x7DD1, ## __VA_ARGS__)
+
+/* MTL */
 #define XE_MTL_IDS(MACRO__, ...)		\
 	MACRO__(0x7D40, ## __VA_ARGS__),	\
-	MACRO__(0x7D41, ## __VA_ARGS__),	\
 	MACRO__(0x7D45, ## __VA_ARGS__),	\
-	MACRO__(0x7D51, ## __VA_ARGS__),        \
 	MACRO__(0x7D55, ## __VA_ARGS__),	\
 	MACRO__(0x7D60, ## __VA_ARGS__),	\
-	MACRO__(0x7D67, ## __VA_ARGS__),	\
-	MACRO__(0x7DD1, ## __VA_ARGS__),        \
 	MACRO__(0x7DD5, ## __VA_ARGS__)
 
 #define XE_LNL_IDS(MACRO__, ...) \
-- 
cgit v1.2.3


From 498ba74654bec380974d87da04361c5edea07181 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 2 Sep 2024 12:53:48 +0200
Subject: drm/gem-vram: Remove support for simple display pipelines

There are no more drivers that use GEM VRAM helpers with a simple
display pipeline. Remove the respective code.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240902105546.792625-11-tzimmermann@suse.de
---
 include/drm/drm_gem_vram_helper.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h
index 9a73f786f4ad..00830b49a3ff 100644
--- a/include/drm/drm_gem_vram_helper.h
+++ b/include/drm/drm_gem_vram_helper.h
@@ -17,7 +17,6 @@
 struct drm_mode_create_dumb;
 struct drm_plane;
 struct drm_plane_state;
-struct drm_simple_display_pipe;
 struct filp;
 struct vm_area_struct;
 
@@ -137,18 +136,6 @@ drm_gem_vram_plane_helper_cleanup_fb(struct drm_plane *plane,
 	.prepare_fb = drm_gem_vram_plane_helper_prepare_fb, \
 	.cleanup_fb = drm_gem_vram_plane_helper_cleanup_fb
 
-/*
- * Helpers for struct drm_simple_display_pipe_funcs
- */
-
-int drm_gem_vram_simple_display_pipe_prepare_fb(
-	struct drm_simple_display_pipe *pipe,
-	struct drm_plane_state *new_state);
-
-void drm_gem_vram_simple_display_pipe_cleanup_fb(
-	struct drm_simple_display_pipe *pipe,
-	struct drm_plane_state *old_state);
-
 /**
  * define DRM_GEM_VRAM_DRIVER - default callback functions for
  *				&struct drm_driver
-- 
cgit v1.2.3


From b2ef808786d93df36585cee42cfb973fc41636eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <ckoenig.leichtzumerken@gmail.com>
Date: Mon, 26 Aug 2024 14:25:38 +0200
Subject: drm/sched: add optional errno to drm_sched_start()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current implementation of drm_sched_start uses a hardcoded
-ECANCELED to dispose of a job when the parent/hw fence is NULL.
This results in drm_sched_job_done being called with -ECANCELED for
each job with a NULL parent in the pending list, making it difficult
to distinguish between recovery methods, whether a queue reset or a
full GPU reset was used.

To improve this, we first try a soft recovery for timeout jobs and
use the error code -ENODATA. If soft recovery fails, we proceed with
a queue reset, where the error code remains -ENODATA for the job.
Finally, for a full GPU reset, we use error codes -ECANCELED or
-ETIME. This patch adds an error code parameter to drm_sched_start,
allowing us to differentiate between queue reset and GPU reset
failures. This enables user mode and test applications to validate
the expected correctness of the requested operation. After a
successful queue reset, the only way to continue normal operation is
to call drm_sched_job_done with the specific error code -ENODATA.

v1: Initial implementation by Jesse utilized amdgpu_device_lock_reset_domain
    and amdgpu_device_unlock_reset_domain to allow user mode to track
    the queue reset status and distinguish between queue reset and
    GPU reset.
v2: Christian suggested using the error codes -ENODATA for queue reset
    and -ECANCELED or -ETIME for GPU reset, returned to
    amdgpu_cs_wait_ioctl.
v3: To meet the requirements, we introduce a new function
    drm_sched_start_ex with an additional parameter to set
    dma_fence_set_error, allowing us to handle the specific error
    codes appropriately and dispose of bad jobs with the selected
    error code depending on whether it was a queue reset or GPU reset.
v4: Alex suggested using a new name, drm_sched_start_with_recovery_error,
    which more accurately describes the function's purpose.
    Additionally, it was recommended to add documentation details
    about the new method.
v5: Fixed declaration of new function drm_sched_start_with_recovery_error.(Alex)
v6 (chk): rebase on upstream changes, cleanup the commit message,
          drop the new function again and update all callers,
          apply the errno also to scheduler fences with hw fences
v7 (chk): rebased

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240826122541.85663-1-christian.koenig@amd.com
---
 include/drm/gpu_scheduler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index fe8edb917360..a8d19b10f9b8 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -579,7 +579,7 @@ bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
 void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
 void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);
 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
-void drm_sched_start(struct drm_gpu_scheduler *sched);
+void drm_sched_start(struct drm_gpu_scheduler *sched, int errno);
 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
 void drm_sched_increase_karma(struct drm_sched_job *bad);
 void drm_sched_reset_karma(struct drm_sched_job *bad);
-- 
cgit v1.2.3


From a401bd1264b400f96a4cf61ed3fc144008e97a4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <ckoenig.leichtzumerken@gmail.com>
Date: Mon, 26 Aug 2024 14:25:39 +0200
Subject: dma-buf: give examples of error codes to use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dma_fence_set_error() function allows to set an error code on a
dma_fence object before it is signaled.

Document some of the potential error codes drivers should use and
especially what they mean.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240826122541.85663-2-christian.koenig@amd.com
---
 include/linux/dma-fence.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index e06bad467f55..e7ad819962e3 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -574,6 +574,12 @@ int dma_fence_get_status(struct dma_fence *fence);
  * rather than success. This must be set before signaling (so that the value
  * is visible before any waiters on the signal callback are woken). This
  * helper exists to help catching erroneous setting of #dma_fence.error.
+ *
+ * Examples of error codes which drivers should use:
+ *
+ * * %-ENODATA	 This operation produced no data, no other operation affected.
+ * * %-ECANCELED All operations from the same context have been canceled.
+ * * %-ETIME	 Operation caused a timeout and potentially device reset.
  */
 static inline void dma_fence_set_error(struct dma_fence *fence,
 				       int error)
-- 
cgit v1.2.3


From 8cf9a01edc216b16b5839eb793ac544d2c97ce97 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Wed, 11 Sep 2024 15:42:57 -0400
Subject: fs: Introduce FOP_ASYNC_LOCK

Some lock managers (NLM, kNFSD) fastidiously avoid blocking their
kernel threads while servicing blocking locks.  If a filesystem supports
asynchronous lock requests those lock managers can use notifications to
quickly inform clients they have acquired a file lock.

Historically, only posix_lock_file() was capable of supporting asynchronous
locks so the check for support was simply file_operations->lock(), but with
recent changes in DLM, both GFS2 and OCFS2 also support asynchronous locks
and have started signalling their support with EXPORT_OP_ASYNC_LOCK.

We recently noticed that those changes dropped the checks for whether a
filesystem simply defaults to posix_lock_file(), so async lock
notifications have not been attempted for NLM and NFSv4.1+ for most
filesystems.  While trying to fix this it has become clear that testing
both the export flag combined with testing ->lock() creates quite a
layering mess.  It seems appropriate to signal support with a fop_flag.

Add FOP_ASYNC_LOCK so that filesystems with ->lock() can signal their
capability to handle lock requests asynchronously.  Add a helper for
lock managers to properly test that support.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Link: https://lore.kernel.org/r/3330d5a324abe2ce9c1dafe89cacdc6db41945d1.1726083391.git.bcodding@redhat.com
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/filelock.h | 5 +++++
 include/linux/fs.h       | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index daee999d05f3..58c1120a8253 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -180,6 +180,11 @@ static inline void locks_wake_up(struct file_lock *fl)
 	wake_up(&fl->c.flc_wait);
 }
 
+static inline bool locks_can_async_lock(const struct file_operations *fops)
+{
+	return !fops->lock || fops->fop_flags & FOP_ASYNC_LOCK;
+}
+
 /* fs/locks.c */
 void locks_free_lock_context(struct inode *inode);
 void locks_free_lock(struct file_lock *fl);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6ca11e241a24..78221ae589d9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2074,6 +2074,8 @@ struct file_operations {
 #define FOP_DIO_PARALLEL_WRITE	((__force fop_flags_t)(1 << 3))
 /* Contains huge pages */
 #define FOP_HUGE_PAGES		((__force fop_flags_t)(1 << 4))
+/* Supports asynchronous lock callbacks */
+#define FOP_ASYNC_LOCK		((__force fop_flags_t)(1 << 5))
 
 /* Wrap a directory iterator that needs exclusive inode access */
 int wrap_directory_iterator(struct file *, struct dir_context *,
-- 
cgit v1.2.3


From 319e53f155907cf2c6dabc16ec9dce0179bc04d1 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Mon, 16 Sep 2024 19:00:08 -0400
Subject: drm/panic: Fix uninitialized spinlock acquisition with
 CONFIG_DRM_PANIC=n

It turns out that if you happen to have a kernel config where
CONFIG_DRM_PANIC is disabled and spinlock debugging is enabled, along with
KMS being enabled - we'll end up trying to acquire an uninitialized
spin_lock with drm_panic_lock() when we try to do a commit:

  rvkms rvkms.0: [drm:drm_atomic_commit] committing 0000000068d2ade1
  INFO: trying to register non-static key.
  The code is fine but needs lockdep annotation, or maybe
  you didn't initialize this object before use?
  turning off the locking correctness validator.
  CPU: 4 PID: 1347 Comm: modprobe Not tainted 6.10.0-rc1Lyude-Test+ #272
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20240524-3.fc40 05/24/2024
  Call Trace:
   <TASK>
   dump_stack_lvl+0x77/0xa0
   assign_lock_key+0x114/0x120
   register_lock_class+0xa8/0x2c0
   __lock_acquire+0x7d/0x2bd0
   ? __vmap_pages_range_noflush+0x3a8/0x550
   ? drm_atomic_helper_swap_state+0x2ad/0x3a0
   lock_acquire+0xec/0x290
   ? drm_atomic_helper_swap_state+0x2ad/0x3a0
   ? lock_release+0xee/0x310
   _raw_spin_lock_irqsave+0x4e/0x70
   ? drm_atomic_helper_swap_state+0x2ad/0x3a0
   drm_atomic_helper_swap_state+0x2ad/0x3a0
   drm_atomic_helper_commit+0xb1/0x270
   drm_atomic_commit+0xaf/0xe0
   ? __pfx___drm_printfn_info+0x10/0x10
   drm_client_modeset_commit_atomic+0x1a1/0x250
   drm_client_modeset_commit_locked+0x4b/0x180
   drm_client_modeset_commit+0x27/0x50
   __drm_fb_helper_restore_fbdev_mode_unlocked+0x76/0x90
   drm_fb_helper_set_par+0x38/0x40
   fbcon_init+0x3c4/0x690
   visual_init+0xc0/0x120
   do_bind_con_driver+0x409/0x4c0
   do_take_over_console+0x233/0x280
   do_fb_registered+0x11f/0x210
   fbcon_fb_registered+0x2c/0x60
   register_framebuffer+0x248/0x2a0
   __drm_fb_helper_initial_config_and_unlock+0x58a/0x720
   drm_fbdev_generic_client_hotplug+0x6e/0xb0
   drm_client_register+0x76/0xc0
   _RNvXs_CsHeezP08sTT_5rvkmsNtB4_5RvkmsNtNtCs1cdwasc6FUb_6kernel8platform6Driver5probe+0xed2/0x1060 [rvkms]
   ? _RNvMs_NtCs1cdwasc6FUb_6kernel8platformINtB4_7AdapterNtCsHeezP08sTT_5rvkms5RvkmsE14probe_callbackBQ_+0x2b/0x70 [rvkms]
   ? acpi_dev_pm_attach+0x25/0x110
   ? platform_probe+0x6a/0xa0
   ? really_probe+0x10b/0x400
   ? __driver_probe_device+0x7c/0x140
   ? driver_probe_device+0x22/0x1b0
   ? __device_attach_driver+0x13a/0x1c0
   ? __pfx___device_attach_driver+0x10/0x10
   ? bus_for_each_drv+0x114/0x170
   ? __device_attach+0xd6/0x1b0
   ? bus_probe_device+0x9e/0x120
   ? device_add+0x288/0x4b0
   ? platform_device_add+0x75/0x230
   ? platform_device_register_full+0x141/0x180
   ? rust_helper_platform_device_register_simple+0x85/0xb0
   ? _RNvMs2_NtCs1cdwasc6FUb_6kernel8platformNtB5_6Device13create_simple+0x1d/0x60
   ? _RNvXs0_CsHeezP08sTT_5rvkmsNtB5_5RvkmsNtCs1cdwasc6FUb_6kernel6Module4init+0x11e/0x160 [rvkms]
   ? 0xffffffffc083f000
   ? init_module+0x20/0x1000 [rvkms]
   ? kernfs_xattr_get+0x3e/0x80
   ? do_one_initcall+0x148/0x3f0
   ? __lock_acquire+0x5ef/0x2bd0
   ? __lock_acquire+0x5ef/0x2bd0
   ? __lock_acquire+0x5ef/0x2bd0
   ? put_cpu_partial+0x51/0x1d0
   ? lock_acquire+0xec/0x290
   ? put_cpu_partial+0x51/0x1d0
   ? lock_release+0xee/0x310
   ? put_cpu_partial+0x51/0x1d0
   ? fs_reclaim_acquire+0x69/0xf0
   ? lock_acquire+0xec/0x290
   ? fs_reclaim_acquire+0x69/0xf0
   ? kfree+0x22f/0x340
   ? lock_release+0xee/0x310
   ? kmalloc_trace_noprof+0x48/0x340
   ? do_init_module+0x22/0x240
   ? kmalloc_trace_noprof+0x155/0x340
   ? do_init_module+0x60/0x240
   ? __se_sys_finit_module+0x2e0/0x3f0
   ? do_syscall_64+0xa4/0x180
   ? syscall_exit_to_user_mode+0x108/0x140
   ? do_syscall_64+0xb0/0x180
   ? vma_end_read+0xd0/0xe0
   ? do_user_addr_fault+0x309/0x640
   ? clear_bhb_loop+0x45/0xa0
   ? clear_bhb_loop+0x45/0xa0
   ? clear_bhb_loop+0x45/0xa0
   ? entry_SYSCALL_64_after_hwframe+0x76/0x7e
   </TASK>

Fix this by stubbing these macros out when this config option isn't
enabled, along with fixing the unused variable warning that introduces.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Fixes: e2a1cda3e0c7 ("drm/panic: Add drm panic locking")
Cc: <stable@vger.kernel.org> # v6.10+
Link: https://patchwork.freedesktop.org/patch/msgid/20240916230103.611490-1-lyude@redhat.com
---
 include/drm/drm_panic.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h
index 54085d5d05c3..f4e1fa9ae607 100644
--- a/include/drm/drm_panic.h
+++ b/include/drm/drm_panic.h
@@ -64,6 +64,8 @@ struct drm_scanout_buffer {
 
 };
 
+#ifdef CONFIG_DRM_PANIC
+
 /**
  * drm_panic_trylock - try to enter the panic printing critical section
  * @dev: struct drm_device
@@ -149,4 +151,16 @@ struct drm_scanout_buffer {
 #define drm_panic_unlock(dev, flags) \
 	raw_spin_unlock_irqrestore(&(dev)->mode_config.panic_lock, flags)
 
+#else
+
+static inline bool drm_panic_trylock(struct drm_device *dev, unsigned long flags)
+{
+	return true;
+}
+
+static inline void drm_panic_lock(struct drm_device *dev, unsigned long flags) {}
+static inline void drm_panic_unlock(struct drm_device *dev, unsigned long flags) {}
+
+#endif
+
 #endif /* __DRM_PANIC_H__ */
-- 
cgit v1.2.3


From 318580ad7f2828f6269e0b8819e943ddedda3375 Mon Sep 17 00:00:00 2001
From: Hongbo Li <lihongbo22@huawei.com>
Date: Thu, 29 Aug 2024 14:41:09 +0800
Subject: hugetlbfs: support tracepoint

Add basic tracepoints for {alloc, evict, free}_inode, setattr and
fallocate. These can help users to debug hugetlbfs more conveniently.

Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
Link: https://lore.kernel.org/r/20240829064110.67884-2-lihongbo22@huawei.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/hugetlbfs.h | 156 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 include/trace/events/hugetlbfs.h

(limited to 'include')

diff --git a/include/trace/events/hugetlbfs.h b/include/trace/events/hugetlbfs.h
new file mode 100644
index 000000000000..8331c904a9ba
--- /dev/null
+++ b/include/trace/events/hugetlbfs.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hugetlbfs
+
+#if !defined(_TRACE_HUGETLBFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HUGETLBFS_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(hugetlbfs_alloc_inode,
+
+	TP_PROTO(struct inode *inode, struct inode *dir, int mode),
+
+	TP_ARGS(inode, dir, mode),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev)
+		__field(ino_t,		ino)
+		__field(ino_t,		dir)
+		__field(__u16,		mode)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->dir		= dir->i_ino;
+		__entry->mode		= mode;
+	),
+
+	TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
+		MAJOR(__entry->dev), MINOR(__entry->dev),
+		(unsigned long) __entry->ino,
+		(unsigned long) __entry->dir, __entry->mode)
+);
+
+DECLARE_EVENT_CLASS(hugetlbfs__inode,
+
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev)
+		__field(ino_t,		ino)
+		__field(__u16,		mode)
+		__field(loff_t,		size)
+		__field(unsigned int,	nlink)
+		__field(unsigned int,	seals)
+		__field(blkcnt_t,	blocks)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->mode		= inode->i_mode;
+		__entry->size		= inode->i_size;
+		__entry->nlink		= inode->i_nlink;
+		__entry->seals		= HUGETLBFS_I(inode)->seals;
+		__entry->blocks		= inode->i_blocks;
+	),
+
+	TP_printk("dev %d,%d ino %lu mode 0%o size %lld nlink %u seals %u blocks %llu",
+		MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino,
+		__entry->mode, __entry->size, __entry->nlink, __entry->seals,
+		(unsigned long long)__entry->blocks)
+);
+
+DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_evict_inode,
+
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode)
+);
+
+DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_free_inode,
+
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode)
+);
+
+TRACE_EVENT(hugetlbfs_setattr,
+
+	TP_PROTO(struct inode *inode, struct dentry *dentry,
+		struct iattr *attr),
+
+	TP_ARGS(inode, dentry, attr),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev)
+		__field(ino_t,		ino)
+		__field(unsigned int,	d_len)
+		__string(d_name,	dentry->d_name.name)
+		__field(unsigned int,	ia_valid)
+		__field(unsigned int,	ia_mode)
+		__field(loff_t,		old_size)
+		__field(loff_t,		ia_size)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->d_len		= dentry->d_name.len;
+		__assign_str(d_name);
+		__entry->ia_valid	= attr->ia_valid;
+		__entry->ia_mode	= attr->ia_mode;
+		__entry->old_size	= inode->i_size;
+		__entry->ia_size	= attr->ia_size;
+	),
+
+	TP_printk("dev %d,%d ino %lu name %.*s valid %#x mode 0%o old_size %lld size %lld",
+		MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long)__entry->ino,
+		__entry->d_len, __get_str(d_name), __entry->ia_valid, __entry->ia_mode,
+		__entry->old_size, __entry->ia_size)
+);
+
+TRACE_EVENT(hugetlbfs_fallocate,
+
+	TP_PROTO(struct inode *inode, int mode,
+		loff_t offset, loff_t len, int ret),
+
+	TP_ARGS(inode, mode, offset, len, ret),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev)
+		__field(ino_t,		ino)
+		__field(int,		mode)
+		__field(loff_t,		offset)
+		__field(loff_t,		len)
+		__field(loff_t,		size)
+		__field(int,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->mode		= mode;
+		__entry->offset		= offset;
+		__entry->len		= len;
+		__entry->size		= inode->i_size;
+		__entry->ret		= ret;
+	),
+
+	TP_printk("dev %d,%d ino %lu mode 0%o offset %lld len %lld size %lld ret %d",
+		MAJOR(__entry->dev), MINOR(__entry->dev),
+		(unsigned long)__entry->ino, __entry->mode,
+		(unsigned long long)__entry->offset,
+		(unsigned long long)__entry->len,
+		(unsigned long long)__entry->size,
+		__entry->ret)
+);
+
+#endif /* _TRACE_HUGETLBFS_H */
+
+ /* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 2b0996c7646293c71c1297c00e07e54cee9bec5c Mon Sep 17 00:00:00 2001
From: Rosen Penev <rosenp@gmail.com>
Date: Fri, 6 Sep 2024 12:53:59 -0700
Subject: wifi: ath9k: remove ath9k_platform_data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Completely unused here in favor of Device Tree based setup. The DT code
in here should currently match what is available with platform files.
Any such lapse can always be added.

Signed-off-by: Rosen Penev <rosenp@gmail.com>
Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Link: https://patch.msgid.link/20240906195359.6982-4-rosenp@gmail.com
---
 include/linux/ath9k_platform.h | 51 ------------------------------------------
 1 file changed, 51 deletions(-)
 delete mode 100644 include/linux/ath9k_platform.h

(limited to 'include')

diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
deleted file mode 100644
index 76860a461ed2..000000000000
--- a/include/linux/ath9k_platform.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2008 Atheros Communications Inc.
- * Copyright (c) 2009 Gabor Juhos <juhosg@openwrt.org>
- * Copyright (c) 2009 Imre Kaloz <kaloz@openwrt.org>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifndef _LINUX_ATH9K_PLATFORM_H
-#define _LINUX_ATH9K_PLATFORM_H
-
-#define ATH9K_PLAT_EEP_MAX_WORDS	2048
-
-struct ath9k_platform_data {
-	const char *eeprom_name;
-
-	u16 eeprom_data[ATH9K_PLAT_EEP_MAX_WORDS];
-	u8 *macaddr;
-
-	int led_pin;
-	u32 gpio_mask;
-	u32 gpio_val;
-
-	u32 bt_active_pin;
-	u32 bt_priority_pin;
-	u32 wlan_active_pin;
-
-	bool endian_check;
-	bool is_clk_25mhz;
-	bool tx_gain_buffalo;
-	bool disable_2ghz;
-	bool disable_5ghz;
-	bool led_active_high;
-
-	int (*get_mac_revision)(void);
-	int (*external_reset)(void);
-
-	bool use_eeprom;
-};
-
-#endif /* _LINUX_ATH9K_PLATFORM_H */
-- 
cgit v1.2.3


From 5b40191152282e1f25d7b9826bcda41be927b39f Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 6 Sep 2024 15:06:03 +0300
Subject: drm/xe/pciids: Add PVC's PCI device ID macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add PVC PCI IDs to the xe_pciids.h header. They're not yet used in the
driver.

Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Simona Vetter <simona.vetter@ffwll.ch>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/6ac1829493a53a3fec889c746648d627a0296892.1725624296.git.jani.nikula@intel.com
---
 include/drm/intel/xe_pciids.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h
index 79001afa7d27..67baa7c2246a 100644
--- a/include/drm/intel/xe_pciids.h
+++ b/include/drm/intel/xe_pciids.h
@@ -191,6 +191,22 @@
 	MACRO__(0x7D60, ## __VA_ARGS__),	\
 	MACRO__(0x7DD5, ## __VA_ARGS__)
 
+/* PVC */
+#define XE_PVC_IDS(MACRO__, ...)		\
+	MACRO__(0x0B69, ## __VA_ARGS__),	\
+	MACRO__(0x0B6E, ## __VA_ARGS__),	\
+	MACRO__(0x0BD4, ## __VA_ARGS__),	\
+	MACRO__(0x0BD5, ## __VA_ARGS__),	\
+	MACRO__(0x0BD6, ## __VA_ARGS__),	\
+	MACRO__(0x0BD7, ## __VA_ARGS__),	\
+	MACRO__(0x0BD8, ## __VA_ARGS__),	\
+	MACRO__(0x0BD9, ## __VA_ARGS__),	\
+	MACRO__(0x0BDA, ## __VA_ARGS__),	\
+	MACRO__(0x0BDB, ## __VA_ARGS__),	\
+	MACRO__(0x0BE0, ## __VA_ARGS__),	\
+	MACRO__(0x0BE1, ## __VA_ARGS__),	\
+	MACRO__(0x0BE5, ## __VA_ARGS__)
+
 #define XE_LNL_IDS(MACRO__, ...) \
 	MACRO__(0x6420, ## __VA_ARGS__), \
 	MACRO__(0x64A0, ## __VA_ARGS__), \
-- 
cgit v1.2.3


From 66b281fd8e599ddd7a00a89dc0dcfb7a13411441 Mon Sep 17 00:00:00 2001
From: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Date: Tue, 10 Sep 2024 11:53:01 +0530
Subject: drm/i915/pciid: Add new PCI id for ARL

Add new PCI id for ARL platform.

Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Reviewed-by: Nemesa Garg <nemesa.garg@intel.com>
Reviewed-by: Sai Teja Pottumuttu <sai.teja.pottumuttu@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240910062301.2006782-1-dnyaneshwar.bhadane@intel.com
---
 include/drm/intel/i915_pciids.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h
index cbb12fdbcb7f..02156c6f79b6 100644
--- a/include/drm/intel/i915_pciids.h
+++ b/include/drm/intel/i915_pciids.h
@@ -769,7 +769,8 @@
 	MACRO__(0x7D41, ## __VA_ARGS__), \
 	MACRO__(0x7D51, ## __VA_ARGS__), \
 	MACRO__(0x7D67, ## __VA_ARGS__), \
-	MACRO__(0x7DD1, ## __VA_ARGS__)
+	MACRO__(0x7DD1, ## __VA_ARGS__), \
+	MACRO__(0xB640, ## __VA_ARGS__)
 
 /* MTL */
 #define INTEL_MTL_IDS(MACRO__, ...) \
-- 
cgit v1.2.3


From 35667a0330612bb25a689e4d3a687d47cede1d7a Mon Sep 17 00:00:00 2001
From: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Date: Thu, 12 Sep 2024 17:29:06 +0530
Subject: drm/xe/pciid: Add new PCI id for ARL

Add new PCI id for ARL platform.

v2: Fix typo in PCI id (SaiTeja)

Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Reviewed-by: Sai Teja Pottumuttu <sai.teja.pottumuttu@intel.com>
Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240912115906.2730577-1-dnyaneshwar.bhadane@intel.com
---
 include/drm/intel/xe_pciids.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h
index 67baa7c2246a..65520a90c17c 100644
--- a/include/drm/intel/xe_pciids.h
+++ b/include/drm/intel/xe_pciids.h
@@ -181,7 +181,8 @@
 	MACRO__(0x7D41, ## __VA_ARGS__),	\
 	MACRO__(0x7D51, ## __VA_ARGS__),        \
 	MACRO__(0x7D67, ## __VA_ARGS__),	\
-	MACRO__(0x7DD1, ## __VA_ARGS__)
+	MACRO__(0x7DD1, ## __VA_ARGS__),	\
+	MACRO__(0xB640, ## __VA_ARGS__)
 
 /* MTL */
 #define XE_MTL_IDS(MACRO__, ...)		\
-- 
cgit v1.2.3


From 1e436f4fff1fd1fcc904ee18139f7e284001dc81 Mon Sep 17 00:00:00 2001
From: Shuicheng Lin <shuicheng.lin@intel.com>
Date: Tue, 17 Sep 2024 14:47:32 +0000
Subject: drm/scheduler: Improve documentation

Function drm_sched_entity_push_job() doesn't have a return value,
remove the return value description for it.
Correct several other typo errors.

v2 (Philipp):
- more correction with related comments.

Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Simona Vetter <simona.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20240917144732.2758572-1-shuicheng.lin@intel.com
---
 include/drm/gpu_scheduler.h | 12 ++++++------
 include/linux/dma-resv.h    |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index a8d19b10f9b8..f83e1209b11d 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -33,11 +33,11 @@
 #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
 
 /**
- * DRM_SCHED_FENCE_DONT_PIPELINE - Prefent dependency pipelining
+ * DRM_SCHED_FENCE_DONT_PIPELINE - Prevent dependency pipelining
  *
  * Setting this flag on a scheduler fence prevents pipelining of jobs depending
  * on this fence. In other words we always insert a full CPU round trip before
- * dependen jobs are pushed to the hw queue.
+ * dependent jobs are pushed to the hw queue.
  */
 #define DRM_SCHED_FENCE_DONT_PIPELINE	DMA_FENCE_FLAG_USER_BITS
 
@@ -71,7 +71,7 @@ enum drm_sched_priority {
 	DRM_SCHED_PRIORITY_COUNT
 };
 
-/* Used to chose between FIFO and RR jobs scheduling */
+/* Used to choose between FIFO and RR job-scheduling */
 extern int drm_sched_policy;
 
 #define DRM_SCHED_POLICY_RR    0
@@ -198,7 +198,7 @@ struct drm_sched_entity {
 	 *
 	 * Points to the finished fence of the last scheduled job. Only written
 	 * by the scheduler thread, can be accessed locklessly from
-	 * drm_sched_job_arm() iff the queue is empty.
+	 * drm_sched_job_arm() if the queue is empty.
 	 */
 	struct dma_fence __rcu		*last_scheduled;
 
@@ -247,7 +247,7 @@ struct drm_sched_entity {
  * @sched: the scheduler to which this rq belongs to.
  * @entities: list of the entities to be scheduled.
  * @current_entity: the entity which is to be scheduled.
- * @rb_tree_root: root of time based priory queue of entities for FIFO scheduling
+ * @rb_tree_root: root of time based priority queue of entities for FIFO scheduling
  *
  * Run queue is a set of entities scheduling command submissions for
  * one specific ring. It implements the scheduling policy that selects
@@ -321,7 +321,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
  * @s_fence: contains the fences for the scheduling of job.
  * @finish_cb: the callback for the finished fence.
  * @credits: the number of credits this job contributes to the scheduler
- * @work: Helper to reschdeule job kill to different context.
+ * @work: Helper to reschedule job kill to different context.
  * @id: a unique id assigned to each job scheduled on the scheduler.
  * @karma: increment on every hang caused by this job. If this exceeds the hang
  *         limit of the scheduler then the job is marked guilty and will not
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 8d0e34dad446..c5ab6fd9ebe8 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -105,10 +105,10 @@ enum dma_resv_usage {
 	 * This should be used by submissions which don't want to participate in
 	 * any implicit synchronization.
 	 *
-	 * The most common case are preemption fences, page table updates, TLB
-	 * flushes as well as explicit synced user submissions.
+	 * The most common cases are preemption fences, page table updates, TLB
+	 * flushes as well as explicitly synced user submissions.
 	 *
-	 * Explicit synced user user submissions can be promoted to
+	 * Explicitly synced user submissions can be promoted to
 	 * DMA_RESV_USAGE_READ or DMA_RESV_USAGE_WRITE as needed using
 	 * dma_buf_import_sync_file() when implicit synchronization should
 	 * become necessary after initial adding of the fence.
-- 
cgit v1.2.3


From 0992b2541e1cd9580c2e70fab7a78558de054bae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Mon, 23 Sep 2024 10:55:08 -0300
Subject: drm/gem: Create a drm_gem_object_init_with_mnt() function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For some applications, such as applications that uses huge pages, we might
want to have a different mountpoint, for which we pass mount flags that
better match our usecase.

Therefore, create a new function `drm_gem_object_init_with_mnt()` that
allow us to define the tmpfs mountpoint where the GEM object will be
created. If this parameter is NULL, then we fallback to `shmem_file_setup()`.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-5-mcanal@igalia.com
---
 include/drm/drm_gem.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index bae4865b2101..2ebf6e10cc44 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -472,6 +472,9 @@ void drm_gem_object_release(struct drm_gem_object *obj);
 void drm_gem_object_free(struct kref *kref);
 int drm_gem_object_init(struct drm_device *dev,
 			struct drm_gem_object *obj, size_t size);
+int drm_gem_object_init_with_mnt(struct drm_device *dev,
+				 struct drm_gem_object *obj, size_t size,
+				 struct vfsmount *gemfs);
 void drm_gem_private_object_init(struct drm_device *dev,
 				 struct drm_gem_object *obj, size_t size);
 void drm_gem_private_object_fini(struct drm_gem_object *obj);
-- 
cgit v1.2.3


From be431dfec976e553a08883e26d0d0cc2598a8dfa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Mon, 23 Sep 2024 10:55:10 -0300
Subject: drm/gem: Create shmem GEM object in a given mountpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Create a function `drm_gem_shmem_create_with_mnt()`, similar to
`drm_gem_shmem_create()`, that has a mountpoint as a argument. This
function will create a shmem GEM object in a given tmpfs mountpoint.

This function will be useful for drivers that have a special mountpoint
with flags enabled.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-7-mcanal@igalia.com
---
 include/drm/drm_gem_shmem_helper.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index efbc9f27312b..d22e3fb53631 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -97,6 +97,9 @@ struct drm_gem_shmem_object {
 	container_of(obj, struct drm_gem_shmem_object, base)
 
 struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size);
+struct drm_gem_shmem_object *drm_gem_shmem_create_with_mnt(struct drm_device *dev,
+							   size_t size,
+							   struct vfsmount *gemfs);
 void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem);
 
 void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem);
-- 
cgit v1.2.3


From 9f8e1c93a0d459463819d8bd222196b2655c279f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Mon, 23 Sep 2024 10:55:15 -0300
Subject: drm/v3d: Expose Super Pages capability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new V3D parameter to expose the support of Super Pages to
userspace. The userspace might want to know this information to
apply optimizations that are specific to kernels with Super Pages
enabled.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-12-mcanal@igalia.com
---
 include/uapi/drm/v3d_drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 87fc5bb0a61e..2376c73abca1 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -290,6 +290,7 @@ enum drm_v3d_param {
 	DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT,
 	DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE,
 	DRM_V3D_PARAM_MAX_PERF_COUNTERS,
+	DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES,
 };
 
 struct drm_v3d_get_param {
-- 
cgit v1.2.3


From f73716fd4550d588a811f11c370e90c303f0829b Mon Sep 17 00:00:00 2001
From: Mary Guillemard <mary.guillemard@collabora.com>
Date: Mon, 9 Sep 2024 08:48:20 +0200
Subject: drm/panthor: Add PANTHOR_GROUP_PRIORITY_REALTIME group priority

This adds a new value to drm_panthor_group_priority exposing the
realtime priority to userspace.

This is required to implement NV_context_priority_realtime in Mesa.

v2:
- Add Steven Price r-b

v3:
- Add Boris Brezillon r-b

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240909064820.34982-3-mary.guillemard@collabora.com
---
 include/uapi/drm/panthor_drm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index 1fd8473548ac..011a555e4674 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -720,6 +720,13 @@ enum drm_panthor_group_priority {
 	 * Requires CAP_SYS_NICE or DRM_MASTER.
 	 */
 	PANTHOR_GROUP_PRIORITY_HIGH,
+
+	/**
+	 * @PANTHOR_GROUP_PRIORITY_REALTIME: Realtime priority group.
+	 *
+	 * Requires CAP_SYS_NICE or DRM_MASTER.
+	 */
+	PANTHOR_GROUP_PRIORITY_REALTIME,
 };
 
 /**
-- 
cgit v1.2.3


From f70000ef23527f6d928d1175c66c5fafa968814b Mon Sep 17 00:00:00 2001
From: Mary Guillemard <mary.guillemard@collabora.com>
Date: Mon, 9 Sep 2024 08:48:21 +0200
Subject: drm/panthor: Add DEV_QUERY_GROUP_PRIORITIES_INFO dev query

Expose allowed group priorities with a new device query.

This new uAPI will be used in Mesa to properly report what priorities a
user can use for EGL_IMG_context_priority.

Since this extends the uAPI and because userland needs a way to
advertise priorities accordingly, this also bumps the driver minor
version.

v2:
- Remove drm_panthor_group_allow_priority_flags definition
- Document that allowed_mask is a bitmask of drm_panthor_group_priority

v3:
- Use BIT macro in panthor_query_group_priorities_info
- Add r-b from Steven Price and Boris Brezillon

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240909064820.34982-4-mary.guillemard@collabora.com
---
 include/uapi/drm/panthor_drm.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index 011a555e4674..87c9cb555dd1 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -263,6 +263,11 @@ enum drm_panthor_dev_query_type {
 
 	/** @DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: Query timestamp information. */
 	DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO,
+
+	/**
+	 * @DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO: Query allowed group priorities information.
+	 */
+	DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO,
 };
 
 /**
@@ -399,6 +404,23 @@ struct drm_panthor_timestamp_info {
 	__u64 timestamp_offset;
 };
 
+/**
+ * struct drm_panthor_group_priorities_info - Group priorities information
+ *
+ * Structure grouping all queryable information relating to the allowed group priorities.
+ */
+struct drm_panthor_group_priorities_info {
+	/**
+	 * @allowed_mask: Bitmask of the allowed group priorities.
+	 *
+	 * Each bit represents a variant of the enum drm_panthor_group_priority.
+	 */
+	__u8 allowed_mask;
+
+	/** @pad: Padding fields, MBZ. */
+	__u8 pad[3];
+};
+
 /**
  * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY
  */
-- 
cgit v1.2.3


From b4ad4ef374d66cc8df3188bb1ddb65bce5fc9e50 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 16 Sep 2024 15:33:20 +0200
Subject: gpu: host1x: Set up device DMA parameters

In order to store device DMA parameters, the DMA framework depends on
the device's dma_parms field to point at a valid memory location. Add
backing storage for this in struct host1x_memory_context and point to
it.

Reported-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240916133320.368620-1-thierry.reding@gmail.com
---
 include/linux/host1x.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 5a7a81e5f9bd..9fa9c30a34e6 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -471,6 +471,7 @@ struct host1x_memory_context {
 	refcount_t ref;
 	struct pid *owner;
 
+	struct device_dma_parameters dma_parms;
 	struct device dev;
 	u64 dma_mask;
 	u32 stream_id;
-- 
cgit v1.2.3


From eb1f4adf9101573fc2347978a60d71c4f1176cca Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 24 Sep 2024 09:11:59 +0200
Subject: drm/fbdev-helper: Move color-mode lookup into 4CC format helper

The color mode as specified on the kernel command line gives the user's
preferred color depth and number of bits per pixel. Move the
color-mode-to-format conversion from fbdev helpers into a 4CC helper,
so that it can be shared among DRM clients.

v2:
- fix grammar in commit message (Laurent)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-2-tzimmermann@suse.de
---
 include/drm/drm_fourcc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h
index ccf91daa4307..c3f4405d6662 100644
--- a/include/drm/drm_fourcc.h
+++ b/include/drm/drm_fourcc.h
@@ -313,6 +313,7 @@ drm_get_format_info(struct drm_device *dev,
 uint32_t drm_mode_legacy_fb_format(uint32_t bpp, uint32_t depth);
 uint32_t drm_driver_legacy_fb_format(struct drm_device *dev,
 				     uint32_t bpp, uint32_t depth);
+uint32_t drm_driver_color_mode_format(struct drm_device *dev, unsigned int color_mode);
 unsigned int drm_format_info_block_width(const struct drm_format_info *info,
 					 int plane);
 unsigned int drm_format_info_block_height(const struct drm_format_info *info,
-- 
cgit v1.2.3


From 5d08c44e47b9d41366714552bdd374ac4b595591 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 24 Sep 2024 09:12:01 +0200
Subject: drm/fbdev: Add memory-agnostic fbdev client

Add an fbdev client that can work with any memory manager. The
client implementation is the same as existing code in fbdev-dma or
fbdev-shmem.

Provide struct drm_driver.fbdev_probe for the new client to allocate
the surface GEM buffer. The new callback replaces fb_probe of struct
drm_fb_helper_funcs, which does the same.

To use the new client, DRM drivers set fbdev_probe in their struct
drm_driver instance and call drm_fbdev_client_setup(). Probing and
creating the fbdev surface buffer is now independent from the other
operations in struct drm_fb_helper. For the pixel format, the fbdev
client either uses a specified format, the value in preferred_depth
or 32-bit RGB.

v2:
- test for struct drm_fb_helper.funcs for NULL (Sui)
- respect struct drm_mode_config.preferred_depth for default format

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-4-tzimmermann@suse.de
---
 include/drm/drm_drv.h          | 18 ++++++++++++++++++
 include/drm/drm_fbdev_client.h | 19 +++++++++++++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 include/drm/drm_fbdev_client.h

(limited to 'include')

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 02ea4e3248fd..36a606af4ba1 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -34,6 +34,8 @@
 
 #include <drm/drm_device.h>
 
+struct drm_fb_helper;
+struct drm_fb_helper_surface_size;
 struct drm_file;
 struct drm_gem_object;
 struct drm_master;
@@ -366,6 +368,22 @@ struct drm_driver {
 			       struct drm_device *dev, uint32_t handle,
 			       uint64_t *offset);
 
+	/**
+	 * @fbdev_probe
+	 *
+	 * Allocates and initialize the fb_info structure for fbdev emulation.
+	 * Furthermore it also needs to allocate the DRM framebuffer used to
+	 * back the fbdev.
+	 *
+	 * This callback is mandatory for fbdev support.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success ot a negative error code otherwise.
+	 */
+	int (*fbdev_probe)(struct drm_fb_helper *fbdev_helper,
+			   struct drm_fb_helper_surface_size *sizes);
+
 	/**
 	 * @show_fdinfo:
 	 *
diff --git a/include/drm/drm_fbdev_client.h b/include/drm/drm_fbdev_client.h
new file mode 100644
index 000000000000..e11a5614f127
--- /dev/null
+++ b/include/drm/drm_fbdev_client.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef DRM_FBDEV_CLIENT_H
+#define DRM_FBDEV_CLIENT_H
+
+struct drm_device;
+struct drm_format_info;
+
+#ifdef CONFIG_DRM_FBDEV_EMULATION
+int drm_fbdev_client_setup(struct drm_device *dev, const struct drm_format_info *format);
+#else
+static inline int drm_fbdev_client_setup(struct drm_device *dev,
+					 const struct drm_format_info *format)
+{
+	return 0;
+}
+#endif
+
+#endif
-- 
cgit v1.2.3


From d07fdf9225922d3e36ebd13ccab3df62b1ccdab3 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 24 Sep 2024 09:12:02 +0200
Subject: drm: Add client-agnostic setup helper

DRM may support multiple in-kernel clients that run as soon as a DRM
driver has been registered. To select the client(s) in a single place,
introduce drm_client_setup().

Drivers that call the new helper automatically instantiate the kernel's
configured default clients. Only fbdev emulation is currently supported.
Later versions can add support for DRM-based logging, a boot logo or even
a console.

Some drivers handle the color mode for clients internally. Provide the
helper drm_client_setup_with_color_mode() for them.

Using the new interface requires the driver to select
DRM_CLIENT_SELECTION in its Kconfig. For now this only enables the
client-setup helpers if the fbdev client has been configured by the
user. A future patchset will further modularize client support and
rework DRM_CLIENT_SELECTION to select the correct dependencies for
all its clients.

v5:
- add CONFIG_DRM_CLIENT_SELECTION und DRM_CLIENT_SETUP
v4:
- fix docs for drm_client_setup_with_fourcc() (Geert)
v3:
- fix build error
v2:
- add drm_client_setup_with_fourcc() (Laurent)
- push default-format handling into actual clients

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-5-tzimmermann@suse.de
---
 include/drm/drm_client_setup.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 include/drm/drm_client_setup.h

(limited to 'include')

diff --git a/include/drm/drm_client_setup.h b/include/drm/drm_client_setup.h
new file mode 100644
index 000000000000..46aab3fb46be
--- /dev/null
+++ b/include/drm/drm_client_setup.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef DRM_CLIENT_SETUP_H
+#define DRM_CLIENT_SETUP_H
+
+#include <linux/types.h>
+
+struct drm_device;
+struct drm_format_info;
+
+#if defined(CONFIG_DRM_CLIENT_SETUP)
+void drm_client_setup(struct drm_device *dev, const struct drm_format_info *format);
+void drm_client_setup_with_fourcc(struct drm_device *dev, u32 fourcc);
+void drm_client_setup_with_color_mode(struct drm_device *dev, unsigned int color_mode);
+#else
+static inline void drm_client_setup(struct drm_device *dev,
+				    const struct drm_format_info *format)
+{ }
+static inline void drm_client_setup_with_fourcc(struct drm_device *dev, u32 fourcc)
+{ }
+static inline void drm_client_setup_with_color_mode(struct drm_device *dev,
+						    unsigned int color_mode)
+{ }
+#endif
+
+#endif
-- 
cgit v1.2.3


From 8998eedda2539d2528cfebdc7c17eed0ad35b714 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 24 Sep 2024 09:12:03 +0200
Subject: drm/fbdev-dma: Support struct drm_driver.fbdev_probe

Rework fbdev probing to support fbdev_probe in struct drm_driver
and reimplement the old fb_probe callback on top of it. Provide an
initializer macro for struct drm_driver that sets the callback
according to the kernel configuration.

This change allows the common fbdev client to run on top of DMA-
based DRM drivers.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-6-tzimmermann@suse.de
---
 include/drm/drm_fbdev_dma.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_fbdev_dma.h b/include/drm/drm_fbdev_dma.h
index 2da7ee784133..6ae4de46497c 100644
--- a/include/drm/drm_fbdev_dma.h
+++ b/include/drm/drm_fbdev_dma.h
@@ -4,12 +4,24 @@
 #define DRM_FBDEV_DMA_H
 
 struct drm_device;
+struct drm_fb_helper;
+struct drm_fb_helper_surface_size;
 
 #ifdef CONFIG_DRM_FBDEV_EMULATION
+int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper,
+				     struct drm_fb_helper_surface_size *sizes);
+
+#define DRM_FBDEV_DMA_DRIVER_OPS \
+	.fbdev_probe = drm_fbdev_dma_driver_fbdev_probe
+
 void drm_fbdev_dma_setup(struct drm_device *dev, unsigned int preferred_bpp);
 #else
 static inline void drm_fbdev_dma_setup(struct drm_device *dev, unsigned int preferred_bpp)
 { }
+
+#define DRM_FBDEV_DMA_DRIVER_OPS \
+	.fbdev_probe = NULL
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From 731fddf4302ec00871fd5ae252c0aa765d61a9ad Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 24 Sep 2024 09:12:46 +0200
Subject: drm/fbdev-dma: Remove obsolete setup function

The old setup function drm_fbdev_dma_setup() is unused. Remove it and
its internal callbacks. New drivers should call drm_client_setup()
instead.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-49-tzimmermann@suse.de
---
 include/drm/drm_fbdev_dma.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_fbdev_dma.h b/include/drm/drm_fbdev_dma.h
index 6ae4de46497c..fb3f2a9aa01a 100644
--- a/include/drm/drm_fbdev_dma.h
+++ b/include/drm/drm_fbdev_dma.h
@@ -3,7 +3,6 @@
 #ifndef DRM_FBDEV_DMA_H
 #define DRM_FBDEV_DMA_H
 
-struct drm_device;
 struct drm_fb_helper;
 struct drm_fb_helper_surface_size;
 
@@ -13,15 +12,9 @@ int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper,
 
 #define DRM_FBDEV_DMA_DRIVER_OPS \
 	.fbdev_probe = drm_fbdev_dma_driver_fbdev_probe
-
-void drm_fbdev_dma_setup(struct drm_device *dev, unsigned int preferred_bpp);
 #else
-static inline void drm_fbdev_dma_setup(struct drm_device *dev, unsigned int preferred_bpp)
-{ }
-
 #define DRM_FBDEV_DMA_DRIVER_OPS \
 	.fbdev_probe = NULL
-
 #endif
 
 #endif
-- 
cgit v1.2.3


From f0f195d1a3aedef126c3ed159712ed57a34daa1c Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 24 Sep 2024 09:12:47 +0200
Subject: drm/fbdev-shmem: Support struct drm_driver.fbdev_probe

Rework fbdev probing to support fbdev_probe in struct drm_driver
and reimplement the old fb_probe callback on top of it. Provide an
initializer macro for struct drm_driver that sets the callback
according to the kernel configuration.

This change allows the common fbdev client to run on top of SHMEM-
based DRM drivers.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-50-tzimmermann@suse.de
---
 include/drm/drm_fbdev_shmem.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_fbdev_shmem.h b/include/drm/drm_fbdev_shmem.h
index fb43cadd1950..3a5d1efa9d55 100644
--- a/include/drm/drm_fbdev_shmem.h
+++ b/include/drm/drm_fbdev_shmem.h
@@ -4,12 +4,23 @@
 #define DRM_FBDEV_SHMEM_H
 
 struct drm_device;
+struct drm_fb_helper;
+struct drm_fb_helper_surface_size;
 
 #ifdef CONFIG_DRM_FBDEV_EMULATION
+int drm_fbdev_shmem_driver_fbdev_probe(struct drm_fb_helper *fb_helper,
+				       struct drm_fb_helper_surface_size *sizes);
+
+#define DRM_FBDEV_SHMEM_DRIVER_OPS \
+	.fbdev_probe = drm_fbdev_shmem_driver_fbdev_probe
+
 void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp);
 #else
 static inline void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp)
 { }
+
+#define DRM_FBDEV_SHMEM_DRIVER_OPS \
+	.fbdev_probe = NULL
 #endif
 
 #endif
-- 
cgit v1.2.3


From bf0978203a746137ce5074a465f83a6cf12e813f Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 24 Sep 2024 09:13:01 +0200
Subject: drm/fbdev-shmem: Remove obsolete setup function

The old setup function drm_fbdev_shmem_setup() is unused. Remove it
and its internal callbacks. New drivers should call drm_client_setup()
instead.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-64-tzimmermann@suse.de
---
 include/drm/drm_fbdev_shmem.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_fbdev_shmem.h b/include/drm/drm_fbdev_shmem.h
index 3a5d1efa9d55..2fc708964d75 100644
--- a/include/drm/drm_fbdev_shmem.h
+++ b/include/drm/drm_fbdev_shmem.h
@@ -3,7 +3,6 @@
 #ifndef DRM_FBDEV_SHMEM_H
 #define DRM_FBDEV_SHMEM_H
 
-struct drm_device;
 struct drm_fb_helper;
 struct drm_fb_helper_surface_size;
 
@@ -13,12 +12,7 @@ int drm_fbdev_shmem_driver_fbdev_probe(struct drm_fb_helper *fb_helper,
 
 #define DRM_FBDEV_SHMEM_DRIVER_OPS \
 	.fbdev_probe = drm_fbdev_shmem_driver_fbdev_probe
-
-void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp);
 #else
-static inline void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp)
-{ }
-
 #define DRM_FBDEV_SHMEM_DRIVER_OPS \
 	.fbdev_probe = NULL
 #endif
-- 
cgit v1.2.3


From c7c1b9e1d52b0a0dbb0ee552efdc3360c0f5363c Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 24 Sep 2024 09:13:02 +0200
Subject: drm/fbdev-ttm: Support struct drm_driver.fbdev_probe

Rework fbdev probing to support fbdev_probe in struct drm_driver
and reimplement the old fb_probe callback on top of it. Provide an
initializer macro for struct drm_driver that sets the callback
according to the kernel configuration.

This change allows the common fbdev client to run on top of TTM-
based DRM drivers.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-65-tzimmermann@suse.de
---
 include/drm/drm_fbdev_ttm.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_fbdev_ttm.h b/include/drm/drm_fbdev_ttm.h
index 9e6c3bdf3537..243685d02eb1 100644
--- a/include/drm/drm_fbdev_ttm.h
+++ b/include/drm/drm_fbdev_ttm.h
@@ -3,11 +3,24 @@
 #ifndef DRM_FBDEV_TTM_H
 #define DRM_FBDEV_TTM_H
 
+#include <linux/stddef.h>
+
 struct drm_device;
+struct drm_fb_helper;
+struct drm_fb_helper_surface_size;
 
 #ifdef CONFIG_DRM_FBDEV_EMULATION
+int drm_fbdev_ttm_driver_fbdev_probe(struct drm_fb_helper *fb_helper,
+				     struct drm_fb_helper_surface_size *sizes);
+
+#define DRM_FBDEV_TTM_DRIVER_OPS \
+	.fbdev_probe = drm_fbdev_ttm_driver_fbdev_probe
+
 void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp);
 #else
+#define DRM_FBDEV_TTM_DRIVER_OPS \
+	.fbdev_probe = NULL
+
 static inline void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp)
 { }
 #endif
-- 
cgit v1.2.3


From 1000634477d8d178179b1ad45d92e925fabe3deb Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 24 Sep 2024 09:13:10 +0200
Subject: drm/fbdev-ttm: Remove obsolete setup function

The old setup function drm_fbdev_ttm_setup() is unused. Remove it and
its internal callbacks. New drivers should call drm_client_setup()
instead.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240924071734.98201-73-tzimmermann@suse.de
---
 include/drm/drm_fbdev_ttm.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_fbdev_ttm.h b/include/drm/drm_fbdev_ttm.h
index 243685d02eb1..ad4a10bb4c78 100644
--- a/include/drm/drm_fbdev_ttm.h
+++ b/include/drm/drm_fbdev_ttm.h
@@ -5,7 +5,6 @@
 
 #include <linux/stddef.h>
 
-struct drm_device;
 struct drm_fb_helper;
 struct drm_fb_helper_surface_size;
 
@@ -15,14 +14,9 @@ int drm_fbdev_ttm_driver_fbdev_probe(struct drm_fb_helper *fb_helper,
 
 #define DRM_FBDEV_TTM_DRIVER_OPS \
 	.fbdev_probe = drm_fbdev_ttm_driver_fbdev_probe
-
-void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp);
 #else
 #define DRM_FBDEV_TTM_DRIVER_OPS \
 	.fbdev_probe = NULL
-
-static inline void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp)
-{ }
 #endif
 
 #endif
-- 
cgit v1.2.3


From 1cc2e1faafb3b5a2be25112559bdb495736b5af7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Fri, 20 Sep 2024 10:57:57 +0200
Subject: pwm: Add more locking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This ensures that a pwm_chip that has no corresponding driver isn't used
and that a driver doesn't go away while a callback is still running.

In the presence of device links this isn't necessary yet (so this is no
fix) but for pwm character device support this is needed.

To not serialize all pwm_apply_state() calls, this introduces a per chip
lock. An additional complication is that for atomic chips a mutex cannot
be used (as pwm_apply_atomic() must not sleep) and a spinlock cannot be
held while calling an operation for a sleeping chip. So depending on the
chip being atomic or not a spinlock or a mutex is used.

An additional change implemented here is that on driver remove the
.free() callback is called for each requested pwm_device. This is the
right time because later (e.g. when the consumer calls pwm_put()) the
free function is (maybe) not available any more.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/026aa891c8270a11723a1ba7e4256f456f7e1e86.1726819463.git.u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 8acd60b53f58..3ea73e075abe 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -275,6 +275,9 @@ struct pwm_ops {
  * @of_xlate: request a PWM device given a device tree PWM specifier
  * @atomic: can the driver's ->apply() be called in atomic context
  * @uses_pwmchip_alloc: signals if pwmchip_allow was used to allocate this chip
+ * @operational: signals if the chip can be used (or is already deregistered)
+ * @nonatomic_lock: mutex for nonatomic chips
+ * @atomic_lock: mutex for atomic chips
  * @pwms: array of PWM devices allocated by the framework
  */
 struct pwm_chip {
@@ -290,6 +293,16 @@ struct pwm_chip {
 
 	/* only used internally by the PWM framework */
 	bool uses_pwmchip_alloc;
+	bool operational;
+	union {
+		/*
+		 * depending on the chip being atomic or not either the mutex or
+		 * the spinlock is used. It protects .operational and
+		 * synchronizes the callbacks in .ops
+		 */
+		struct mutex nonatomic_lock;
+		spinlock_t atomic_lock;
+	};
 	struct pwm_device pwms[] __counted_by(npwm);
 };
 
-- 
cgit v1.2.3


From 17e40c25158f2505cbcdeda96624afcbab4af368 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Fri, 20 Sep 2024 10:57:58 +0200
Subject: pwm: New abstraction for PWM waveforms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Up to now the configuration of a PWM setting is described exclusively by
a struct pwm_state which contains information about period, duty_cycle,
polarity and if the PWM is enabled. (There is another member usage_power
which doesn't completely fit into pwm_state, I ignore it here for
simplicity.)

Instead of a polarity the new abstraction has a member duty_offset_ns
that defines when the rising edge happens after the period start. This
is more general, as with a pwm_state the rising edge can only happen at
the period's start or such that the falling edge is at the end of the
period (i.e. duty_offset_ns == 0 or duty_offset_ns == period_length_ns -
duty_length_ns).

A disabled PWM is modeled by .period_length_ns = 0. In my eyes this is a
nice usage of that otherwise unusable setting, as it doesn't define
anything about the future which matches the fact that consumers should
consider the state of the output as undefined and it's just there to say
"No further requirements about the output, you can save some power.".

Further I renamed period and duty_cycle to period_length_ns and
duty_length_ns. In the past there was confusion from time to time about
duty_cycle being measured in nanoseconds because people expected a
percentage of period instead. With "length_ns" as suffix the semantic
should be more obvious to people unfamiliar with the pwm subsystem.
period is renamed to period_length_ns for consistency.

The API for consumers doesn't change yet, but lowlevel drivers can
implement callbacks that work with pwm_waveforms instead of pwm_states.
A new thing about these callbacks is that the calculation of hardware
settings needed to implement a certain waveform is separated from
actually writing these settings. The motivation for that is that this
allows a consumer to query the hardware capabilities without actually
modifying the hardware state.

The rounding rules that are expected to be implemented in the
round_waveform_tohw() are: First pick the biggest possible period not
bigger than wf->period_length_ns. For that period pick the biggest
possible duty setting not bigger than wf->duty_length_ns. Third pick the
biggest possible offset not bigger than wf->duty_offset_ns. If the
requested period is too small for the hardware, it's expected that a
setting with the minimal period and duty_length_ns = duty_offset_ns = 0
is returned and this fact is signaled by a return value of 1.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Tested-by: Trevor Gamblin <tgamblin@baylibre.com>
Link: https://lore.kernel.org/r/df0faa33bf9e7c9e2e5eab8d31bbf61e861bd401.1726819463.git.u.kleine-koenig@baylibre.com
[ukleinek: Update pwm_check_rounding() to return bool instead of int.]
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 3ea73e075abe..d8cfe1c9b19d 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -49,6 +49,31 @@ enum {
 	PWMF_EXPORTED = 1,
 };
 
+/**
+ * struct pwm_waveform - description of a PWM waveform
+ * @period_length_ns: PWM period
+ * @duty_length_ns: PWM duty cycle
+ * @duty_offset_ns: offset of the rising edge from the period's start
+ *
+ * This is a representation of a PWM waveform alternative to struct pwm_state
+ * below. It's more expressive than struct pwm_state as it contains a
+ * duty_offset_ns and so can represent offsets other than zero (with .polarity =
+ * PWM_POLARITY_NORMAL) and period - duty_cycle (.polarity =
+ * PWM_POLARITY_INVERSED).
+ *
+ * Note there is no explicit bool for enabled. A "disabled" PWM is represented
+ * by .period_length_ns = 0. Note further that the behaviour of a "disabled" PWM
+ * is undefined. Depending on the hardware's capabilities it might drive the
+ * active or inactive level, go high-z or even continue to toggle.
+ *
+ * The unit for all three members is nanoseconds.
+ */
+struct pwm_waveform {
+	u64 period_length_ns;
+	u64 duty_length_ns;
+	u64 duty_offset_ns;
+};
+
 /*
  * struct pwm_state - state of a PWM channel
  * @period: PWM period (in nanoseconds)
@@ -259,6 +284,17 @@ struct pwm_ops {
 	void (*free)(struct pwm_chip *chip, struct pwm_device *pwm);
 	int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm,
 		       struct pwm_capture *result, unsigned long timeout);
+
+	size_t sizeof_wfhw;
+	int (*round_waveform_tohw)(struct pwm_chip *chip, struct pwm_device *pwm,
+				   const struct pwm_waveform *wf, void *wfhw);
+	int (*round_waveform_fromhw)(struct pwm_chip *chip, struct pwm_device *pwm,
+				     const void *wfhw, struct pwm_waveform *wf);
+	int (*read_waveform)(struct pwm_chip *chip, struct pwm_device *pwm,
+			    void *wfhw);
+	int (*write_waveform)(struct pwm_chip *chip, struct pwm_device *pwm,
+			      const void *wfhw);
+
 	int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm,
 		     const struct pwm_state *state);
 	int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm,
-- 
cgit v1.2.3


From 6c5126c6406d1c31e91f5b925c621c1c785366be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Fri, 20 Sep 2024 10:57:59 +0200
Subject: pwm: Provide new consumer API functions for waveforms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide API functions for consumers to work with waveforms.

Note that one relevant difference between pwm_get_state() and
pwm_get_waveform*() is that the latter yields the actually configured
hardware state, while the former yields the last state passed to
pwm_apply*() and so doesn't account for hardware specific rounding.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Tested-by: Trevor Gamblin <tgamblin@baylibre.com>
Link: https://lore.kernel.org/r/6c97d27682853f603e18e9196043886dd671845d.1726819463.git.u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index d8cfe1c9b19d..c3d9ddeafa65 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -358,7 +358,11 @@ static inline void pwmchip_set_drvdata(struct pwm_chip *chip, void *data)
 }
 
 #if IS_ENABLED(CONFIG_PWM)
-/* PWM user APIs */
+
+/* PWM consumer APIs */
+int pwm_round_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf);
+int pwm_get_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf);
+int pwm_set_waveform_might_sleep(struct pwm_device *pwm, const struct pwm_waveform *wf, bool exact);
 int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state);
 int pwm_apply_atomic(struct pwm_device *pwm, const struct pwm_state *state);
 int pwm_adjust_config(struct pwm_device *pwm);
-- 
cgit v1.2.3


From 1afd01db1a76cdd1d96696e3790d66c79621784c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Fri, 20 Sep 2024 10:58:00 +0200
Subject: pwm: Add tracing for waveform callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds trace events for the recently introduced waveform callbacks.
With the introduction of some helper macros consistency among the
different events is ensured.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/1d71879b0de3bf01459c7a9d0f040d43eb5ace56.1726819463.git.u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/trace/events/pwm.h | 134 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 126 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/pwm.h b/include/trace/events/pwm.h
index 8022701c446d..8ba898fd335c 100644
--- a/include/trace/events/pwm.h
+++ b/include/trace/events/pwm.h
@@ -8,15 +8,135 @@
 #include <linux/pwm.h>
 #include <linux/tracepoint.h>
 
+#define TP_PROTO_pwm(args...)							\
+	TP_PROTO(struct pwm_device *pwm, args)
+
+#define TP_ARGS_pwm(args...)							\
+	TP_ARGS(pwm, args)
+
+#define TP_STRUCT__entry_pwm(args...)						\
+	TP_STRUCT__entry(							\
+		__field(unsigned int, chipid)					\
+		__field(unsigned int, hwpwm)					\
+		args)
+
+#define TP_fast_assign_pwm(args...)						\
+	TP_fast_assign(								\
+		__entry->chipid = pwm->chip->id;				\
+		__entry->hwpwm = pwm->hwpwm;					\
+		args)
+
+#define TP_printk_pwm(fmt, args...)						\
+	TP_printk("pwmchip%u.%u: " fmt, __entry->chipid, __entry->hwpwm, args)
+
+#define __field_pwmwf(wf)							\
+	__field(u64, wf ## _period_length_ns)					\
+	__field(u64, wf ## _duty_length_ns)					\
+	__field(u64, wf ## _duty_offset_ns)					\
+
+#define fast_assign_pwmwf(wf)							\
+	__entry->wf ## _period_length_ns = wf->period_length_ns;		\
+	__entry->wf ## _duty_length_ns = wf->duty_length_ns;			\
+	__entry->wf ## _duty_offset_ns = wf->duty_offset_ns
+
+#define printk_pwmwf_format(wf)							\
+	"%lld/%lld [+%lld]"
+
+#define printk_pwmwf_formatargs(wf)						\
+	__entry->wf ## _duty_length_ns, __entry->wf ## _period_length_ns, __entry->wf ## _duty_offset_ns
+
+TRACE_EVENT(pwm_round_waveform_tohw,
+
+	TP_PROTO_pwm(const struct pwm_waveform *wf, void *wfhw, int err),
+
+	TP_ARGS_pwm(wf, wfhw, err),
+
+	TP_STRUCT__entry_pwm(
+		__field_pwmwf(wf)
+		__field(void *, wfhw)
+		__field(int, err)
+	),
+
+	TP_fast_assign_pwm(
+		fast_assign_pwmwf(wf);
+		__entry->wfhw = wfhw;
+		__entry->err = err;
+	),
+
+	TP_printk_pwm(printk_pwmwf_format(wf) " > %p err=%d",
+		printk_pwmwf_formatargs(wf), __entry->wfhw, __entry->err)
+);
+
+TRACE_EVENT(pwm_round_waveform_fromhw,
+
+	TP_PROTO_pwm(const void *wfhw, struct pwm_waveform *wf, int err),
+
+	TP_ARGS_pwm(wfhw, wf, err),
+
+	TP_STRUCT__entry_pwm(
+		__field(const void *, wfhw)
+		__field_pwmwf(wf)
+		__field(int, err)
+	),
+
+	TP_fast_assign_pwm(
+		__entry->wfhw = wfhw;
+		fast_assign_pwmwf(wf);
+		__entry->err = err;
+	),
+
+	TP_printk_pwm("%p > " printk_pwmwf_format(wf) " err=%d",
+		__entry->wfhw, printk_pwmwf_formatargs(wf), __entry->err)
+);
+
+TRACE_EVENT(pwm_read_waveform,
+
+	TP_PROTO_pwm(void *wfhw, int err),
+
+	TP_ARGS_pwm(wfhw, err),
+
+	TP_STRUCT__entry_pwm(
+		__field(void *, wfhw)
+		__field(int, err)
+	),
+
+	TP_fast_assign_pwm(
+		__entry->wfhw = wfhw;
+		__entry->err = err;
+	),
+
+	TP_printk_pwm("%p err=%d",
+		__entry->wfhw, __entry->err)
+);
+
+TRACE_EVENT(pwm_write_waveform,
+
+	TP_PROTO_pwm(const void *wfhw, int err),
+
+	TP_ARGS_pwm(wfhw, err),
+
+	TP_STRUCT__entry_pwm(
+		__field(const void *, wfhw)
+		__field(int, err)
+	),
+
+	TP_fast_assign_pwm(
+		__entry->wfhw = wfhw;
+		__entry->err = err;
+	),
+
+	TP_printk_pwm("%p err=%d",
+		__entry->wfhw, __entry->err)
+);
+
+
 DECLARE_EVENT_CLASS(pwm,
 
 	TP_PROTO(struct pwm_device *pwm, const struct pwm_state *state, int err),
 
 	TP_ARGS(pwm, state, err),
 
-	TP_STRUCT__entry(
-		__field(unsigned int, chipid)
-		__field(unsigned int, hwpwm)
+	TP_STRUCT__entry_pwm(
 		__field(u64, period)
 		__field(u64, duty_cycle)
 		__field(enum pwm_polarity, polarity)
@@ -24,9 +144,7 @@ DECLARE_EVENT_CLASS(pwm,
 		__field(int, err)
 	),
 
-	TP_fast_assign(
-		__entry->chipid = pwm->chip->id;
-		__entry->hwpwm = pwm->hwpwm;
+	TP_fast_assign_pwm(
 		__entry->period = state->period;
 		__entry->duty_cycle = state->duty_cycle;
 		__entry->polarity = state->polarity;
@@ -34,8 +152,8 @@ DECLARE_EVENT_CLASS(pwm,
 		__entry->err = err;
 	),
 
-	TP_printk("pwmchip%u.%u: period=%llu duty_cycle=%llu polarity=%d enabled=%d err=%d",
-		  __entry->chipid, __entry->hwpwm, __entry->period, __entry->duty_cycle,
+	TP_printk_pwm("period=%llu duty_cycle=%llu polarity=%d enabled=%d err=%d",
+		  __entry->period, __entry->duty_cycle,
 		  __entry->polarity, __entry->enabled, __entry->err)
 
 );
-- 
cgit v1.2.3


From caf78b0465053c23aa6211b9815dd5433766627d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 24 Sep 2024 12:08:08 +0200
Subject: regcache: Improve documentation of available cache types

There is some user confusion about which cache types to choose when which
is not helped by the lack of any central documentation providing an
overview of what's available. Provide a short overview in the API header to
try to help reduce this.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20240924-regcache-document-types-v1-1-e157054e1215@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index f9ccad32fc5c..da07584284ab 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -54,7 +54,14 @@ struct sdw_slave;
 #define REGMAP_UPSHIFT(s)	(-(s))
 #define REGMAP_DOWNSHIFT(s)	(s)
 
-/* An enum of all the supported cache types */
+/*
+ * The supported cache types, the default is no cache.  Any new caches
+ * should usually use the maple tree cache unless they specifically
+ * require that there are never any allocations at runtime and can't
+ * provide defaults in which case they should use the flat cache.  The
+ * rbtree cache *may* have some performance advantage for very low end
+ * systems that make heavy use of cache syncs but is mainly legacy.
+ */
 enum regcache_type {
 	REGCACHE_NONE,
 	REGCACHE_RBTREE,
-- 
cgit v1.2.3


From 5441b6975adc26aeaca316b9075e04a98238b1b3 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Wed, 25 Sep 2024 17:38:04 +0800
Subject: regulator: Add of_regulator_get_optional() for pure DT regulator
 lookup

The to-be-introduced I2C component prober needs to enable regulator
supplies (and toggle GPIO pins) for the various components it intends
to probe. To support this, a new "pure DT lookup" method for getting
regulator supplies is needed, since the device normally requesting
the supply won't get created until after the component is probed to
be available.

Add a new of_regulator_get_optional() function for this. This mirrors
the existing regulator_get_optional() function, but is OF-specific.
The underlying code that supports the existing regulator_get*()
functions has been reworked in previous patches to support this
specific case.

Also convert an existing usage of "dev && dev->of_node" to
"dev_of_node(dev)".

Link: https://lore.kernel.org/all/20231220203537.83479-2-jernej.skrabec@gmail.com/ [1]
Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Link: https://patch.msgid.link/20240925093807.1026949-2-wenst@chromium.org
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/consumer.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index b9ce521910a0..2b22f07e491c 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -168,6 +168,19 @@ int devm_regulator_get_enable_read_voltage(struct device *dev, const char *id);
 void regulator_put(struct regulator *regulator);
 void devm_regulator_put(struct regulator *regulator);
 
+#if IS_ENABLED(CONFIG_OF)
+struct regulator *__must_check of_regulator_get_optional(struct device *dev,
+							 struct device_node *node,
+							 const char *id);
+#else
+static inline struct regulator *__must_check of_regulator_get_optional(struct device *dev,
+								       struct device_node *node,
+								       const char *id)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
+
 int regulator_register_supply_alias(struct device *dev, const char *id,
 				    struct device *alias_dev,
 				    const char *alias_id);
@@ -350,6 +363,13 @@ devm_regulator_get_optional(struct device *dev, const char *id)
 	return ERR_PTR(-ENODEV);
 }
 
+static inline struct regulator *__must_check of_regulator_get_optional(struct device *dev,
+								       struct device_node *node,
+								       const char *id)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline void regulator_put(struct regulator *regulator)
 {
 }
-- 
cgit v1.2.3


From 36ec3f437227470568e5f460997f367f5446a34d Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Wed, 25 Sep 2024 17:38:05 +0800
Subject: regulator: Add devres version of of_regulator_get_optional()

There are existing uses for a devres version of of_regulator_get_optional()
in power domain drivers. On MediaTek platforms, power domains may have
regulator supplies tied to them. The driver currently tries to use
devm_regulator_get() to not have to manage the lifecycle, but ends up
doing it in a very hacky way by replacing the device node of the power
domain controller device to the device node of the power domain that is
currently being registered, getting the supply, and reverting the device
node.

Provide a better API so that the hack can be replaced.

Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Link: https://patch.msgid.link/20240925093807.1026949-3-wenst@chromium.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/consumer.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 2b22f07e491c..8c3c372ad735 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -172,6 +172,9 @@ void devm_regulator_put(struct regulator *regulator);
 struct regulator *__must_check of_regulator_get_optional(struct device *dev,
 							 struct device_node *node,
 							 const char *id);
+struct regulator *__must_check devm_of_regulator_get_optional(struct device *dev,
+							      struct device_node *node,
+							      const char *id);
 #else
 static inline struct regulator *__must_check of_regulator_get_optional(struct device *dev,
 								       struct device_node *node,
@@ -179,6 +182,13 @@ static inline struct regulator *__must_check of_regulator_get_optional(struct de
 {
 	return ERR_PTR(-ENODEV);
 }
+
+static inline struct regulator *__must_check devm_of_regulator_get_optional(struct device *dev,
+									    struct device_node *node,
+									    const char *id)
+{
+	return ERR_PTR(-ENODEV);
+}
 #endif
 
 int regulator_register_supply_alias(struct device *dev, const char *id,
@@ -370,6 +380,13 @@ static inline struct regulator *__must_check of_regulator_get_optional(struct de
 	return ERR_PTR(-ENODEV);
 }
 
+static inline struct regulator *__must_check devm_of_regulator_get_optional(struct device *dev,
+									    struct device_node *node,
+									    const char *id)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline void regulator_put(struct regulator *regulator)
 {
 }
-- 
cgit v1.2.3


From 0809a9ccac4a2ffdfd1561bb551aec6099775545 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 2 Sep 2024 20:59:47 +0800
Subject: spi: remove {devm_}spi_alloc_master/slave()

All the {devm_}spi_alloc_master/slave() have been replaced,
so they can be removed and replaced in doc and comment.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://patch.msgid.link/20240902125947.1368-8-yangyingliang@huaweicloud.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 30 ------------------------------
 1 file changed, 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 4b95663163e0..8497f4747e24 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -824,21 +824,6 @@ void spi_take_timestamp_post(struct spi_controller *ctlr,
 extern struct spi_controller *__spi_alloc_controller(struct device *host,
 						unsigned int size, bool slave);
 
-static inline struct spi_controller *spi_alloc_master(struct device *host,
-						      unsigned int size)
-{
-	return __spi_alloc_controller(host, size, false);
-}
-
-static inline struct spi_controller *spi_alloc_slave(struct device *host,
-						     unsigned int size)
-{
-	if (!IS_ENABLED(CONFIG_SPI_SLAVE))
-		return NULL;
-
-	return __spi_alloc_controller(host, size, true);
-}
-
 static inline struct spi_controller *spi_alloc_host(struct device *dev,
 						    unsigned int size)
 {
@@ -858,21 +843,6 @@ struct spi_controller *__devm_spi_alloc_controller(struct device *dev,
 						   unsigned int size,
 						   bool slave);
 
-static inline struct spi_controller *devm_spi_alloc_master(struct device *dev,
-							   unsigned int size)
-{
-	return __devm_spi_alloc_controller(dev, size, false);
-}
-
-static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev,
-							  unsigned int size)
-{
-	if (!IS_ENABLED(CONFIG_SPI_SLAVE))
-		return NULL;
-
-	return __devm_spi_alloc_controller(dev, size, true);
-}
-
 static inline struct spi_controller *devm_spi_alloc_host(struct device *dev,
 							 unsigned int size)
 {
-- 
cgit v1.2.3


From 6074e905023d09f64f2c896f475820a5623deb2c Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Mon, 16 Sep 2024 13:00:25 +0200
Subject: firmware: sysfb: Add a sysfb_handles_screen_info() helper function

That can be used by drivers to check if the Generic System Framebuffers
(sysfb) support can handle the data contained in the global screen_info.

Drivers might need this information to know if have to setup the system
framebuffer, or if they have to delegate this action to sysfb instead.

Suggested-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://lore.kernel.org/r/20240916110040.1688511-2-javierm@redhat.com
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 include/linux/sysfb.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h
index bef5f06a91de..07cbab516942 100644
--- a/include/linux/sysfb.h
+++ b/include/linux/sysfb.h
@@ -60,12 +60,19 @@ struct efifb_dmi_info {
 
 void sysfb_disable(struct device *dev);
 
+bool sysfb_handles_screen_info(void);
+
 #else /* CONFIG_SYSFB */
 
 static inline void sysfb_disable(struct device *dev)
 {
 }
 
+static inline bool sysfb_handles_screen_info(void)
+{
+	return false;
+}
+
 #endif /* CONFIG_SYSFB */
 
 #ifdef CONFIG_EFI
-- 
cgit v1.2.3


From e4ca0e59c39442546866f3dd514a3a5956577daf Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 3 Sep 2024 20:59:04 +0300
Subject: types: Complement the aligned types with signed 64-bit one

Some user may want to use aligned signed 64-bit type.
Provide it for them.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20240903180218.3640501-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/types.h      | 3 ++-
 include/uapi/linux/types.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/types.h b/include/linux/types.h
index 2bc8766ba20c..2d7b9ae8714c 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -115,8 +115,9 @@ typedef u64			u_int64_t;
 typedef s64			int64_t;
 #endif
 
-/* this is a special 64bit data type that is 8-byte aligned */
+/* These are the special 64-bit data types that are 8-byte aligned */
 #define aligned_u64		__aligned_u64
+#define aligned_s64		__aligned_s64
 #define aligned_be64		__aligned_be64
 #define aligned_le64		__aligned_le64
 
diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h
index 6375a0684052..48b933938877 100644
--- a/include/uapi/linux/types.h
+++ b/include/uapi/linux/types.h
@@ -53,6 +53,7 @@ typedef __u32 __bitwise __wsum;
  * No conversions are necessary between 32-bit user-space and a 64-bit kernel.
  */
 #define __aligned_u64 __u64 __attribute__((aligned(8)))
+#define __aligned_s64 __s64 __attribute__((aligned(8)))
 #define __aligned_be64 __be64 __attribute__((aligned(8)))
 #define __aligned_le64 __le64 __attribute__((aligned(8)))
 
-- 
cgit v1.2.3


From faf178607772f28006e403d7bab6c4217d4ee447 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 7 Sep 2024 19:24:46 +0200
Subject: iio: adc: Constify struct iio_map

'struct iio_map' are not modified in these drivers.

Constifying this structure moves some data to a read-only section, so
increase overall security.

In order to do it, the prototype of iio_map_array_register() and
devm_iio_map_array_register(), and a few structures that hold a
"struct iio_map *" need to be adjusted.

On a x86_64, with allmodconfig, as an example:
Before:
======
   text	   data	    bss	    dec	    hex	filename
  21086	    760	      0	  21846	   5556	drivers/iio/adc/axp20x_adc.o

After:
=====
   text	   data	    bss	    dec	    hex	filename
  21470	    360	      0	  21830	   5546	drivers/iio/adc/axp20x_adc.o
  33842	   1697	    384	  35923	   8c53	drivers/iio/addac/ad74413r.o

--
Compile tested only

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://patch.msgid.link/5729dc3cc3892ecf0d8ea28c5f7307b34e27493e.1725729801.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/driver.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/driver.h b/include/linux/iio/driver.h
index 7a157ed218f6..7f8b55551ed0 100644
--- a/include/linux/iio/driver.h
+++ b/include/linux/iio/driver.h
@@ -18,7 +18,7 @@ struct iio_map;
  * @map:	array of mappings specifying association of channel with client
  */
 int iio_map_array_register(struct iio_dev *indio_dev,
-			   struct iio_map *map);
+			   const struct iio_map *map);
 
 /**
  * iio_map_array_unregister() - tell the core to remove consumer mappings for
@@ -38,6 +38,7 @@ int iio_map_array_unregister(struct iio_dev *indio_dev);
  * handle de-registration of the IIO map object when the device's refcount goes to
  * zero.
  */
-int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev, struct iio_map *maps);
+int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev,
+				const struct iio_map *maps);
 
 #endif
-- 
cgit v1.2.3


From 0f702757c68b9790a844e5c07073d3d1c777b13a Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Mon, 26 Aug 2024 11:34:11 +0800
Subject: ARM: samsung: Remove obsoleted declaration for s3c_hwmon_set_platdata

The s3c_hwmon_set_platdata() have been removed since
commit 0d297df03890 ("ARM: s3c: simplify platform code"), and now
it is useless, so remove it.

Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Link: https://lore.kernel.org/r/20240826033411.4022822-1-cuigaosheng1@huawei.com
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/linux/platform_data/hwmon-s3c.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/hwmon-s3c.h b/include/linux/platform_data/hwmon-s3c.h
index 1707ad4147df..7d21e0c41037 100644
--- a/include/linux/platform_data/hwmon-s3c.h
+++ b/include/linux/platform_data/hwmon-s3c.h
@@ -33,14 +33,4 @@ struct s3c_hwmon_pdata {
 	struct s3c_hwmon_chcfg	*in[8];
 };
 
-/**
- * s3c_hwmon_set_platdata - Set platform data for S3C HWMON device
- * @pd: Platform data to register to device.
- *
- * Register the given platform data for use with the S3C HWMON device.
- * The call will copy the platform data, so the board definitions can
- * make the structure itself __initdata.
- */
-extern void __init s3c_hwmon_set_platdata(struct s3c_hwmon_pdata *pd);
-
 #endif /* __HWMON_S3C_H__ */
-- 
cgit v1.2.3


From 3a6ad95d97eb62a7b7c804ef7eeb329a1f697d00 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Mon, 26 Aug 2024 11:31:18 +0800
Subject: ASoC: samsung: Remove obsoleted declaration for
 s3c64xx_ac97_setup_gpio

The s3c64xx_ac97_setup_gpio() have been removed since
commit 0d297df03890 ("ARM: s3c: simplify platform code"), and now
it is useless, so remove it.

Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Link: https://lore.kernel.org/r/20240826033118.4021727-1-cuigaosheng1@huawei.com
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/linux/platform_data/asoc-s3c.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/asoc-s3c.h b/include/linux/platform_data/asoc-s3c.h
index f9c00f839e9f..085dd8e8af76 100644
--- a/include/linux/platform_data/asoc-s3c.h
+++ b/include/linux/platform_data/asoc-s3c.h
@@ -13,8 +13,6 @@
 
 #include <linux/dmaengine.h>
 
-extern void s3c64xx_ac97_setup_gpio(int);
-
 struct samsung_i2s_type {
 /* If the Primary DAI has 5.1 Channels */
 #define QUIRK_PRI_6CHAN		(1 << 0)
-- 
cgit v1.2.3


From 2d3e0135cefccbcd8459112a8afe260e7b51ff6d Mon Sep 17 00:00:00 2001
From: Inbaraj E <inbaraj.e@samsung.com>
Date: Tue, 17 Sep 2024 15:13:55 +0530
Subject: dt-bindings: clock: samsung: remove define with number of clocks for
 FSD

Number of clocks supported by Linux drivers might vary - sometimes we
add new clocks, not exposed previously.  Therefore these numbers of
clocks should not be in the bindings, as that prevents changing them.
Remove it entirely from the bindings, once Linux drivers stopped using
them.

Signed-off-by: Inbaraj E <inbaraj.e@samsung.com>
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://lore.kernel.org/r/20240917094355.37887-3-inbaraj.e@samsung.com
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/dt-bindings/clock/fsd-clk.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/fsd-clk.h b/include/dt-bindings/clock/fsd-clk.h
index c8a2af1dd1ad..3f7b64d93558 100644
--- a/include/dt-bindings/clock/fsd-clk.h
+++ b/include/dt-bindings/clock/fsd-clk.h
@@ -28,7 +28,6 @@
 #define DOUT_CMU_IMEM_ACLK			13
 #define DOUT_CMU_IMEM_DMACLK			14
 #define GAT_CMU_FSYS0_SHARED0DIV4		15
-#define CMU_NR_CLK				16
 
 /* PERIC */
 #define PERIC_SCLK_UART0			1
@@ -76,7 +75,6 @@
 #define PERIC_EQOS_PHYRXCLK_MUX			43
 #define PERIC_EQOS_PHYRXCLK			44
 #define PERIC_DOUT_RGMII_CLK			45
-#define PERIC_NR_CLK				46
 
 /* FSYS0 */
 #define UFS0_MPHY_REFCLK_IXTAL24		1
@@ -101,7 +99,6 @@
 #define FSYS0_EQOS_TOP0_IPCLKPORT_RGMII_CLK_I	20
 #define FSYS0_EQOS_TOP0_IPCLKPORT_CLK_RX_I	21
 #define FSYS0_DOUT_FSYS0_PERIBUS_GRP		22
-#define FSYS0_NR_CLK				23
 
 /* FSYS1 */
 #define PCIE_LINK0_IPCLKPORT_DBI_ACLK		1
@@ -112,7 +109,6 @@
 #define PCIE_LINK1_IPCLKPORT_AUX_ACLK		6
 #define PCIE_LINK1_IPCLKPORT_MSTR_ACLK		7
 #define PCIE_LINK1_IPCLKPORT_SLV_ACLK		8
-#define FSYS1_NR_CLK				9
 
 /* IMEM */
 #define IMEM_DMA0_IPCLKPORT_ACLK		1
@@ -126,11 +122,9 @@
 #define IMEM_TMU_TOP_IPCLKPORT_I_CLK_TS		9
 #define IMEM_TMU_GPU_IPCLKPORT_I_CLK_TS		10
 #define IMEM_TMU_GT_IPCLKPORT_I_CLK_TS		11
-#define IMEM_NR_CLK				12
 
 /* MFC */
 #define MFC_MFC_IPCLKPORT_ACLK			1
-#define MFC_NR_CLK				2
 
 /* CAM_CSI */
 #define CAM_CSI0_0_IPCLKPORT_I_ACLK		1
@@ -145,6 +139,5 @@
 #define CAM_CSI2_1_IPCLKPORT_I_ACLK		10
 #define CAM_CSI2_2_IPCLKPORT_I_ACLK		11
 #define CAM_CSI2_3_IPCLKPORT_I_ACLK		12
-#define CAM_CSI_NR_CLK				13
 
 #endif /*_DT_BINDINGS_CLOCK_FSD_H */
-- 
cgit v1.2.3


From a370b72ec7165ebe1230d0225cbe66f6526e68ef Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Sun, 18 Aug 2024 21:48:13 +0900
Subject: tracing: Add a comment about ftrace_regs definition

To clarify what will be expected on ftrace_regs, add a comment to the
architecture independent definition of the ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fd5e84d0ec47..42106b3de396 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -117,6 +117,32 @@ extern int ftrace_enabled;
 
 #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
 
+/**
+ * ftrace_regs - ftrace partial/optimal register set
+ *
+ * ftrace_regs represents a group of registers which is used at the
+ * function entry and exit. There are three types of registers.
+ *
+ * - Registers for passing the parameters to callee, including the stack
+ *   pointer. (e.g. rcx, rdx, rdi, rsi, r8, r9 and rsp on x86_64)
+ * - Registers for passing the return values to caller.
+ *   (e.g. rax and rdx on x86_64)
+ * - Registers for hooking the function call and return including the
+ *   frame pointer (the frame pointer is architecture/config dependent)
+ *   (e.g. rip, rbp and rsp for x86_64)
+ *
+ * Also, architecture dependent fields can be used for internal process.
+ * (e.g. orig_ax on x86_64)
+ *
+ * On the function entry, those registers will be restored except for
+ * the stack pointer, so that user can change the function parameters
+ * and instruction pointer (e.g. live patching.)
+ * On the function exit, only registers which is used for return values
+ * are restored.
+ *
+ * NOTE: user *must not* access regs directly, only do it via APIs, because
+ * the member can be changed according to the architecture.
+ */
 struct ftrace_regs {
 	struct pt_regs		regs;
 };
-- 
cgit v1.2.3


From a312a0f7834e605e7c41570f0e9525d0fc4a70a4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Sat, 14 Sep 2024 17:48:06 -0400
Subject: fgraph: Use fgraph data to store subtime for profiler

Instead of having the "subtime" for the function profiler in the
infrastructure ftrace_ret_stack structure, have it use the fgraph data
reserve and retrieve functions.

This will keep the limited shadow stack from wasting 8 bytes for something
that is seldom used.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Link: https://lore.kernel.org/20240914214826.780323141@goodmis.org
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 42106b3de396..aabd348cad4a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1081,6 +1081,7 @@ struct fgraph_ops {
 
 void *fgraph_reserve_data(int idx, int size_bytes);
 void *fgraph_retrieve_data(int idx, int *size_bytes);
+void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth);
 
 /*
  * Stack of return addresses for functions
@@ -1091,9 +1092,6 @@ struct ftrace_ret_stack {
 	unsigned long ret;
 	unsigned long func;
 	unsigned long long calltime;
-#ifdef CONFIG_FUNCTION_PROFILER
-	unsigned long long subtime;
-#endif
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	unsigned long fp;
 #endif
-- 
cgit v1.2.3


From 3c9880f3ab52b52b5b4e1850a70e80dd7329cb4c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Sat, 14 Sep 2024 17:48:07 -0400
Subject: ftrace: Use a running sleeptime instead of saving on shadow stack

The fgraph "sleep-time" option tells the function graph tracer and the
profiler whether to include the time a function "sleeps" (is scheduled off
the CPU) in its duration for the function. By default it is true, which
means the duration of a function is calculated by the timestamp of when the
function was entered to the timestamp of when it exits.

If the "sleep-time" option is disabled, it needs to remove the time that the
task was not running on the CPU during the function. Currently it is done in
a sched_switch tracepoint probe where it moves the "calltime" (time of entry
of the function) forward by the sleep time calculated. It updates all the
calltime in the shadow stack.

This is time consuming for those users of the function graph tracer that
does not care about the sleep time. Instead, add a "ftrace_sleeptime" to the
task_struct that gets the sleep time added each time the task wakes up. Then
have the function entry save the current "ftrace_sleeptime" and on function
exit, move the calltime forward by the difference of the current
"ftrace_sleeptime" from the saved sleeptime.

This removes one dependency of "calltime" needed to be on the shadow stack.
It also simplifies the code that removes the sleep time of functions.

TODO: Only enable the sched_switch tracepoint when this is needed.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Link: https://lore.kernel.org/20240914214826.938908568@goodmis.org
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e6ee4258169a..c08f3bdb11a5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1441,6 +1441,7 @@ struct task_struct {
 
 	/* Timestamp for last schedule: */
 	unsigned long long		ftrace_timestamp;
+	unsigned long long		ftrace_sleeptime;
 
 	/*
 	 * Number of functions that haven't been traced
-- 
cgit v1.2.3


From f1f36e22bee967db5e812a65e24389e54c46f3c2 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Sat, 14 Sep 2024 17:48:08 -0400
Subject: ftrace: Have calltime be saved in the fgraph storage

The calltime field in the shadow stack frame is only used by the function
graph tracer and profiler. But now that there's other users of the function
graph infrastructure, this adds overhead and wastes space on the shadow
stack. Move the calltime to the fgraph data storage, where the function
graph and profiler entry functions will save it in its own graph storage and
retrieve it in its exit functions.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Link: https://lore.kernel.org/20240914214827.096968730@goodmis.org
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index aabd348cad4a..e684addf6508 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1091,7 +1091,6 @@ void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth);
 struct ftrace_ret_stack {
 	unsigned long ret;
 	unsigned long func;
-	unsigned long long calltime;
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	unsigned long fp;
 #endif
-- 
cgit v1.2.3


From 26228256b796eb0145bdfb2ae34ec8c4c0ef1319 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 6 Sep 2024 09:52:16 +0200
Subject: backlight: lcd: Test against struct fb_info.lcd_dev

Add struct fb_info.lcd_dev for fbdev drivers to store a reference to
their lcd device. Update the lcd's fb_notifier_callback() to test for
this field. The lcd module can now detect if an lcd device belongs to
an fbdev device.

This works similar to the bl_dev for backlights and will allow for
the removal of the check_fb callback from several fbdev driver's lcd
devices.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Link: https://lore.kernel.org/r/20240906075439.98476-3-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/fb.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 267b59ead432..5ba187e08cf7 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -21,6 +21,7 @@ struct fb_info;
 struct file;
 struct i2c_adapter;
 struct inode;
+struct lcd_device;
 struct module;
 struct notifier_block;
 struct page;
@@ -480,6 +481,13 @@ struct fb_info {
 	struct mutex bl_curve_mutex;
 	u8 bl_curve[FB_BACKLIGHT_LEVELS];
 #endif
+
+	/*
+	 * Assigned LCD device; set before framebuffer
+	 * registration, remove after unregister
+	 */
+	struct lcd_device *lcd_dev;
+
 #ifdef CONFIG_FB_DEFERRED_IO
 	struct delayed_work deferred_work;
 	unsigned long npagerefs;
@@ -754,6 +762,11 @@ static inline struct backlight_device *fb_bl_device(struct fb_info *info)
 }
 #endif
 
+static inline struct lcd_device *fb_lcd_device(struct fb_info *info)
+{
+	return info->lcd_dev;
+}
+
 /* fbmon.c */
 #define FB_MAXTIMINGS		0
 #define FB_VSYNCTIMINGS		1
-- 
cgit v1.2.3


From 48ffe2074c2864ab64ee2004e7ebf3d6a6730fbf Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 6 Sep 2024 09:52:17 +0200
Subject: backlight: lcd: Add LCD_POWER_ constants for power states

Duplicate FB_BLANK_ constants as LCD_POWER_ constants in the lcd
header file. Allows lcd drivers to avoid including the fbdev header
file and removes a compile-time dependency between the two subsystems.

The new LCD_POWER_ constants have the same values as their
FB_BLANK_ counterparts. Hence semantics does not change and the lcd
drivers can be converted one by one. Each instance of FB_BLANK_UNBLANK
becomes LCD_POWER_ON, each of FB_BLANK_POWERDOWN becomes LCD_POWER_OFF,
FB_BLANK_NORMAL becomes LCD_POWER_REDUCED and FB_BLANK_VSYNC_SUSPEND
becomes LCD_POWER_REDUCED_VSYNC_SUSPEND.

Lcd code or drivers do not use FB_BLANK_HSYNC_SUSPEND, so no
new constants for this is being added. The tokens LCD_POWER_REDUCED
and LCD_POWER_REDUCED_VSYNC_SUSPEND are deprecated and drivers should
replace them with LCD_POWER_ON and LCD_POWER_OFF.

See also commit a1cacb8a8e70 ("backlight: Add BACKLIGHT_POWER_ constants
for power states"), which added similar constants for backlight drivers.

v2:
- fix typo in commit description

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Link: https://lore.kernel.org/r/20240906075439.98476-4-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/lcd.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/lcd.h b/include/linux/lcd.h
index 68703a51dc53..dfcc54d327f5 100644
--- a/include/linux/lcd.h
+++ b/include/linux/lcd.h
@@ -14,6 +14,11 @@
 #include <linux/notifier.h>
 #include <linux/fb.h>
 
+#define LCD_POWER_ON			(0)
+#define LCD_POWER_REDUCED		(1) // deprecated; don't use in new code
+#define LCD_POWER_REDUCED_VSYNC_SUSPEND	(2) // deprecated; don't use in new code
+#define LCD_POWER_OFF			(4)
+
 /* Notes on locking:
  *
  * lcd_device->ops_lock is an internal backlight lock protecting the ops
-- 
cgit v1.2.3


From c38a7db56d18b3ec07f3ad52c1e3f1f05c375011 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 6 Sep 2024 09:52:31 +0200
Subject: backlight: platform_lcd: Remove match_fb from struct plat_lcd_data

The match_fb callback in struct plat_lcd_data is unused. Remove it.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Link: https://lore.kernel.org/r/20240906075439.98476-18-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/video/platform_lcd.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/video/platform_lcd.h b/include/video/platform_lcd.h
index 6a95184a28c1..2bdf46519298 100644
--- a/include/video/platform_lcd.h
+++ b/include/video/platform_lcd.h
@@ -8,11 +8,8 @@
 */
 
 struct plat_lcd_data;
-struct fb_info;
 
 struct plat_lcd_data {
 	int	(*probe)(struct plat_lcd_data *);
 	void	(*set_power)(struct plat_lcd_data *, unsigned int power);
-	int	(*match_fb)(struct plat_lcd_data *, struct fb_info *);
 };
-
-- 
cgit v1.2.3


From 43e1120deb3768c86aa3875c7073658e44a30ea5 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 6 Sep 2024 09:52:40 +0200
Subject: backlight: lcd: Replace check_fb with controls_device

Rename check_fb in struct lcd_ops to controls_device. The callback
is now independent from fbdev's struct fb_info and tests if an lcd
device controls a hardware display device. The new naming and semantics
follow similar functionality for backlight devices.

v2:
- fix typos in commit description (Daniel)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Link: https://lore.kernel.org/r/20240906075439.98476-27-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/lcd.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/lcd.h b/include/linux/lcd.h
index dfcc54d327f5..8399b5ed48f2 100644
--- a/include/linux/lcd.h
+++ b/include/linux/lcd.h
@@ -35,7 +35,6 @@
  */
 
 struct lcd_device;
-struct fb_info;
 
 struct lcd_properties {
 	/* The maximum value for contrast (read-only) */
@@ -54,9 +53,18 @@ struct lcd_ops {
         int (*set_contrast)(struct lcd_device *, int contrast);
 	/* Set LCD panel mode (resolutions ...) */
 	int (*set_mode)(struct lcd_device *, struct fb_videomode *);
-	/* Check if given framebuffer device is the one LCD is bound to;
-	   return 0 if not, !=0 if it is. If NULL, lcd always matches the fb. */
-	int (*check_fb)(struct lcd_device *, struct fb_info *);
+
+	/*
+	 * Check if the LCD controls the given display device. This
+	 * operation is optional and if not implemented it is assumed that
+	 * the display is always the one controlled by the LCD.
+	 *
+	 * RETURNS:
+	 *
+	 * If display_dev is NULL or display_dev matches the device controlled by
+	 * the LCD, return true. Otherwise return false.
+	 */
+	bool (*controls_device)(struct lcd_device *lcd, struct device *display_device);
 };
 
 struct lcd_device {
-- 
cgit v1.2.3


From 02e224d096ef58fe59e96609de6018e133f33512 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 6 Sep 2024 09:52:41 +0200
Subject: backlight: lcd: Remove struct fb_videomode from set_mode callback

Implementations of struct lcd_ops.set_mode only require the resolution
from struct fb_videomode. Pass the xres and yres fields, but remove the
dependency on the fbdev data structure.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Link: https://lore.kernel.org/r/20240906075439.98476-28-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/lcd.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lcd.h b/include/linux/lcd.h
index 8399b5ed48f2..59a80b396a71 100644
--- a/include/linux/lcd.h
+++ b/include/linux/lcd.h
@@ -51,8 +51,11 @@ struct lcd_ops {
 	int (*get_contrast)(struct lcd_device *);
 	/* Set LCD panel contrast */
         int (*set_contrast)(struct lcd_device *, int contrast);
-	/* Set LCD panel mode (resolutions ...) */
-	int (*set_mode)(struct lcd_device *, struct fb_videomode *);
+
+	/*
+	 * Set LCD panel mode (resolutions ...)
+	 */
+	int (*set_mode)(struct lcd_device *lcd, u32 xres, u32 yres);
 
 	/*
 	 * Check if the LCD controls the given display device. This
-- 
cgit v1.2.3


From 0d580d99749e759b62dc8e28f511310e9235da7a Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 6 Sep 2024 09:52:42 +0200
Subject: backlight: lcd: Do not include <linux/fb.h> in lcd header

With the exception of fb_notifier_callback(), none of the lcd code
uses fbdev; especially not the lcd drivers. Remove the include
statement for <linux/fb.h> from the public lcd header.

v2:
- fix typos in commit description

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Link: https://lore.kernel.org/r/20240906075439.98476-29-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/lcd.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/lcd.h b/include/linux/lcd.h
index 59a80b396a71..c3ccdff4519a 100644
--- a/include/linux/lcd.h
+++ b/include/linux/lcd.h
@@ -12,7 +12,6 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/notifier.h>
-#include <linux/fb.h>
 
 #define LCD_POWER_ON			(0)
 #define LCD_POWER_REDUCED		(1) // deprecated; don't use in new code
-- 
cgit v1.2.3


From fb4c31587adfa9cd50661a535bdbfcc4da57ee38 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 10 Sep 2024 18:32:51 +0200
Subject: reset: amlogic: add auxiliary reset driver support

Add support for the reset controller present in the audio clock
controller of the g12 and sm1 SoC families, using the auxiliary bus.

This is expected to replace the driver currently present directly
within the related clock driver.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Link: https://lore.kernel.org/r/20240910-meson-rst-aux-v5-9-60be62635d3e@baylibre.com
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/soc/amlogic/reset-meson-aux.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 include/soc/amlogic/reset-meson-aux.h

(limited to 'include')

diff --git a/include/soc/amlogic/reset-meson-aux.h b/include/soc/amlogic/reset-meson-aux.h
new file mode 100644
index 000000000000..d8a15d48c984
--- /dev/null
+++ b/include/soc/amlogic/reset-meson-aux.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SOC_RESET_MESON_AUX_H
+#define __SOC_RESET_MESON_AUX_H
+
+#include <linux/err.h>
+
+struct device;
+struct regmap;
+
+#if IS_ENABLED(CONFIG_RESET_MESON_AUX)
+int devm_meson_rst_aux_register(struct device *dev,
+				struct regmap *map,
+				const char *adev_name);
+#else
+static inline int devm_meson_rst_aux_register(struct device *dev,
+					      struct regmap *map,
+					      const char *adev_name)
+{
+	return 0;
+}
+#endif
+
+#endif /* __SOC_RESET_MESON_AUX_H */
-- 
cgit v1.2.3


From 44badc908f2c85711cb18e45e13119c10ad3a05f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 24 Sep 2024 09:05:45 +0100
Subject: tcp: Fix spelling mistake "emtpy" -> "empty"

There is a spelling mistake in a WARN_ONCE message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Link: https://patch.msgid.link/20240924080545.1324962-1-colin.i.king@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index d1948d357dad..739a9fb83d0c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2442,7 +2442,7 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk)
 		return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
 	} else {
 		WARN_ONCE(1,
-			"rtx queue emtpy: "
+			"rtx queue empty: "
 			"out:%u sacked:%u lost:%u retrans:%u "
 			"tlp_high_seq:%u sk_state:%u ca_state:%u "
 			"advmss:%u mss_cache:%u pmtu:%u\n",
-- 
cgit v1.2.3


From 86b9ce0a8a6cf9397503920cd5412d207a93fae9 Mon Sep 17 00:00:00 2001
From: Sai Krishna Potthuri <sai.krishna.potthuri@amd.com>
Date: Fri, 6 Sep 2024 16:31:12 +0530
Subject: firmware: xilinx: Add Pinctrl Get Attribute ID

Add Pinctrl Get Attribute ID to the query ids list.

Signed-off-by: Sai Krishna Potthuri <sai.krishna.potthuri@amd.com>
Link: https://lore.kernel.org/20240906110113.3154327-3-sai.krishna.potthuri@amd.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/firmware/xlnx-zynqmp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index d7d07afc0532..3b4ce4ec5d3f 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -238,6 +238,7 @@ enum pm_query_id {
 	PM_QID_PINCTRL_GET_PIN_GROUPS = 11,
 	PM_QID_CLOCK_GET_NUM_CLOCKS = 12,
 	PM_QID_CLOCK_GET_MAX_DIVISOR = 13,
+	PM_QID_PINCTRL_GET_ATTRIBUTES = 15,
 };
 
 enum rpu_oper_mode {
-- 
cgit v1.2.3


From b875bd5b381e114115922944f7a01e31f8b07c2a Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Wed, 11 Sep 2024 15:43:00 -0400
Subject: exportfs: Remove EXPORT_OP_ASYNC_LOCK

Now that GFS2 and OCFS2 are signalling async ->lock() support with
FOP_ASYNC_LOCK and checks for support are converted, we can remove
EXPORT_OP_ASYNC_LOCK.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Link: https://lore.kernel.org/r/0a114db814fec3086f937ae3d44a086f13b8de26.1726083391.git.bcodding@redhat.com
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/exportfs.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 893a1d21dc1c..1ab165c2939f 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -250,19 +250,6 @@ struct export_operations {
 	unsigned long	flags;
 };
 
-/**
- * exportfs_lock_op_is_async() - export op supports async lock operation
- * @export_ops:	the nfs export operations to check
- *
- * Returns true if the nfs export_operations structure has
- * EXPORT_OP_ASYNC_LOCK in their flags set
- */
-static inline bool
-exportfs_lock_op_is_async(const struct export_operations *export_ops)
-{
-	return export_ops->flags & EXPORT_OP_ASYNC_LOCK;
-}
-
 extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
 				    int *max_len, struct inode *parent,
 				    int flags);
-- 
cgit v1.2.3


From dad35f7d2fc14e446669d4cab100597a6798eae5 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 25 Sep 2024 18:40:09 +0200
Subject: reset: replace boolean parameters with flags parameter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce enum reset_control_flags and replace the list of boolean
parameters to the internal reset_control_get functions with a single
flags parameter, before adding more boolean options.

The separate boolean parameters have been shown to be error prone in
the past. See for example commit a57f68ddc886 ("reset: Fix devm bulk
optional exclusive control getter").

Acked-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/20240925-reset-get-deasserted-v2-1-b3601bbd0458@pengutronix.de
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/reset.h | 161 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 99 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/include/linux/reset.h b/include/linux/reset.h
index 514ddf003efc..99296af98f81 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -25,6 +25,33 @@ struct reset_control_bulk_data {
 	struct reset_control		*rstc;
 };
 
+#define RESET_CONTROL_FLAGS_BIT_SHARED		BIT(0)	/* not exclusive */
+#define RESET_CONTROL_FLAGS_BIT_OPTIONAL	BIT(1)
+#define RESET_CONTROL_FLAGS_BIT_ACQUIRED	BIT(2)	/* iff exclusive, not released */
+
+/**
+ * enum reset_control_flags - Flags that can be passed to the reset_control_get functions
+ *                    to determine the type of reset control.
+ *                    These values cannot be OR'd.
+ *
+ * @RESET_CONTROL_EXCLUSIVE:				exclusive, acquired,
+ * @RESET_CONTROL_EXCLUSIVE_RELEASED:			exclusive, released,
+ * @RESET_CONTROL_SHARED:				shared
+ * @RESET_CONTROL_OPTIONAL_EXCLUSIVE:			optional, exclusive, acquired
+ * @RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED:		optional, exclusive, released
+ * @RESET_CONTROL_OPTIONAL_SHARED:			optional, shared
+ */
+enum reset_control_flags {
+	RESET_CONTROL_EXCLUSIVE				= RESET_CONTROL_FLAGS_BIT_ACQUIRED,
+	RESET_CONTROL_EXCLUSIVE_RELEASED		= 0,
+	RESET_CONTROL_SHARED				= RESET_CONTROL_FLAGS_BIT_SHARED,
+	RESET_CONTROL_OPTIONAL_EXCLUSIVE		= RESET_CONTROL_FLAGS_BIT_OPTIONAL |
+							  RESET_CONTROL_FLAGS_BIT_ACQUIRED,
+	RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED	= RESET_CONTROL_FLAGS_BIT_OPTIONAL,
+	RESET_CONTROL_OPTIONAL_SHARED			= RESET_CONTROL_FLAGS_BIT_OPTIONAL |
+							  RESET_CONTROL_FLAGS_BIT_SHARED,
+};
+
 #ifdef CONFIG_RESET_CONTROLLER
 
 int reset_control_reset(struct reset_control *rstc);
@@ -42,30 +69,25 @@ int reset_control_bulk_acquire(int num_rstcs, struct reset_control_bulk_data *rs
 void reset_control_bulk_release(int num_rstcs, struct reset_control_bulk_data *rstcs);
 
 struct reset_control *__of_reset_control_get(struct device_node *node,
-				     const char *id, int index, bool shared,
-				     bool optional, bool acquired);
+				     const char *id, int index, enum reset_control_flags flags);
 struct reset_control *__reset_control_get(struct device *dev, const char *id,
-					  int index, bool shared,
-					  bool optional, bool acquired);
+					  int index, enum reset_control_flags flags);
 void reset_control_put(struct reset_control *rstc);
 int __reset_control_bulk_get(struct device *dev, int num_rstcs,
 			     struct reset_control_bulk_data *rstcs,
-			     bool shared, bool optional, bool acquired);
+			     enum reset_control_flags flags);
 void reset_control_bulk_put(int num_rstcs, struct reset_control_bulk_data *rstcs);
 
 int __device_reset(struct device *dev, bool optional);
 struct reset_control *__devm_reset_control_get(struct device *dev,
-				     const char *id, int index, bool shared,
-				     bool optional, bool acquired);
+				     const char *id, int index, enum reset_control_flags flags);
 int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs,
 				  struct reset_control_bulk_data *rstcs,
-				  bool shared, bool optional, bool acquired);
+				  enum reset_control_flags flags);
 
 struct reset_control *devm_reset_control_array_get(struct device *dev,
-						   bool shared, bool optional);
-struct reset_control *of_reset_control_array_get(struct device_node *np,
-						 bool shared, bool optional,
-						 bool acquired);
+						   enum reset_control_flags flags);
+struct reset_control *of_reset_control_array_get(struct device_node *np, enum reset_control_flags);
 
 int reset_control_get_count(struct device *dev);
 
@@ -116,17 +138,19 @@ static inline int __device_reset(struct device *dev, bool optional)
 
 static inline struct reset_control *__of_reset_control_get(
 					struct device_node *node,
-					const char *id, int index, bool shared,
-					bool optional, bool acquired)
+					const char *id, int index, enum reset_control_flags flags)
 {
+	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
+
 	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct reset_control *__reset_control_get(
 					struct device *dev, const char *id,
-					int index, bool shared, bool optional,
-					bool acquired)
+					int index, enum reset_control_flags flags)
 {
+	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
+
 	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
@@ -162,8 +186,10 @@ reset_control_bulk_release(int num_rstcs, struct reset_control_bulk_data *rstcs)
 static inline int
 __reset_control_bulk_get(struct device *dev, int num_rstcs,
 			 struct reset_control_bulk_data *rstcs,
-			 bool shared, bool optional, bool acquired)
+			 enum reset_control_flags flags)
 {
+	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
+
 	return optional ? 0 : -EOPNOTSUPP;
 }
 
@@ -174,30 +200,36 @@ reset_control_bulk_put(int num_rstcs, struct reset_control_bulk_data *rstcs)
 
 static inline struct reset_control *__devm_reset_control_get(
 					struct device *dev, const char *id,
-					int index, bool shared, bool optional,
-					bool acquired)
+					int index, enum reset_control_flags flags)
 {
+	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
+
 	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
 static inline int
 __devm_reset_control_bulk_get(struct device *dev, int num_rstcs,
 			      struct reset_control_bulk_data *rstcs,
-			      bool shared, bool optional, bool acquired)
+			      enum reset_control_flags flags)
 {
+	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
+
 	return optional ? 0 : -EOPNOTSUPP;
 }
 
 static inline struct reset_control *
-devm_reset_control_array_get(struct device *dev, bool shared, bool optional)
+devm_reset_control_array_get(struct device *dev, enum reset_control_flags flags)
 {
+	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
+
 	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct reset_control *
-of_reset_control_array_get(struct device_node *np, bool shared, bool optional,
-			   bool acquired)
+of_reset_control_array_get(struct device_node *np, enum reset_control_flags flags)
 {
+	bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL;
+
 	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
@@ -236,7 +268,7 @@ static inline int device_reset_optional(struct device *dev)
 static inline struct reset_control *
 __must_check reset_control_get_exclusive(struct device *dev, const char *id)
 {
-	return __reset_control_get(dev, id, 0, false, false, true);
+	return __reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE);
 }
 
 /**
@@ -253,7 +285,7 @@ static inline int __must_check
 reset_control_bulk_get_exclusive(struct device *dev, int num_rstcs,
 				 struct reset_control_bulk_data *rstcs)
 {
-	return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, true);
+	return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_EXCLUSIVE);
 }
 
 /**
@@ -274,7 +306,7 @@ static inline struct reset_control *
 __must_check reset_control_get_exclusive_released(struct device *dev,
 						  const char *id)
 {
-	return __reset_control_get(dev, id, 0, false, false, false);
+	return __reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE_RELEASED);
 }
 
 /**
@@ -295,7 +327,7 @@ static inline int __must_check
 reset_control_bulk_get_exclusive_released(struct device *dev, int num_rstcs,
 					  struct reset_control_bulk_data *rstcs)
 {
-	return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, false);
+	return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_EXCLUSIVE_RELEASED);
 }
 
 /**
@@ -316,7 +348,8 @@ static inline int __must_check
 reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_rstcs,
 						   struct reset_control_bulk_data *rstcs)
 {
-	return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, false);
+	return __reset_control_bulk_get(dev, num_rstcs, rstcs,
+					RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED);
 }
 
 /**
@@ -344,7 +377,7 @@ reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_r
 static inline struct reset_control *reset_control_get_shared(
 					struct device *dev, const char *id)
 {
-	return __reset_control_get(dev, id, 0, true, false, false);
+	return __reset_control_get(dev, id, 0, RESET_CONTROL_SHARED);
 }
 
 /**
@@ -361,7 +394,7 @@ static inline int __must_check
 reset_control_bulk_get_shared(struct device *dev, int num_rstcs,
 			      struct reset_control_bulk_data *rstcs)
 {
-	return __reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, false);
+	return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_SHARED);
 }
 
 /**
@@ -378,7 +411,7 @@ reset_control_bulk_get_shared(struct device *dev, int num_rstcs,
 static inline struct reset_control *reset_control_get_optional_exclusive(
 					struct device *dev, const char *id)
 {
-	return __reset_control_get(dev, id, 0, false, true, true);
+	return __reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE);
 }
 
 /**
@@ -398,7 +431,7 @@ static inline int __must_check
 reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs,
 					  struct reset_control_bulk_data *rstcs)
 {
-	return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true);
+	return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_OPTIONAL_EXCLUSIVE);
 }
 
 /**
@@ -415,7 +448,7 @@ reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs,
 static inline struct reset_control *reset_control_get_optional_shared(
 					struct device *dev, const char *id)
 {
-	return __reset_control_get(dev, id, 0, true, true, false);
+	return __reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED);
 }
 
 /**
@@ -435,7 +468,7 @@ static inline int __must_check
 reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs,
 				       struct reset_control_bulk_data *rstcs)
 {
-	return __reset_control_bulk_get(dev, num_rstcs, rstcs, true, true, false);
+	return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_OPTIONAL_SHARED);
 }
 
 /**
@@ -451,7 +484,7 @@ reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs,
 static inline struct reset_control *of_reset_control_get_exclusive(
 				struct device_node *node, const char *id)
 {
-	return __of_reset_control_get(node, id, 0, false, false, true);
+	return __of_reset_control_get(node, id, 0, RESET_CONTROL_EXCLUSIVE);
 }
 
 /**
@@ -471,7 +504,7 @@ static inline struct reset_control *of_reset_control_get_exclusive(
 static inline struct reset_control *of_reset_control_get_optional_exclusive(
 				struct device_node *node, const char *id)
 {
-	return __of_reset_control_get(node, id, 0, false, true, true);
+	return __of_reset_control_get(node, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE);
 }
 
 /**
@@ -496,7 +529,7 @@ static inline struct reset_control *of_reset_control_get_optional_exclusive(
 static inline struct reset_control *of_reset_control_get_shared(
 				struct device_node *node, const char *id)
 {
-	return __of_reset_control_get(node, id, 0, true, false, false);
+	return __of_reset_control_get(node, id, 0, RESET_CONTROL_SHARED);
 }
 
 /**
@@ -513,7 +546,7 @@ static inline struct reset_control *of_reset_control_get_shared(
 static inline struct reset_control *of_reset_control_get_exclusive_by_index(
 					struct device_node *node, int index)
 {
-	return __of_reset_control_get(node, NULL, index, false, false, true);
+	return __of_reset_control_get(node, NULL, index, RESET_CONTROL_EXCLUSIVE);
 }
 
 /**
@@ -541,7 +574,7 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index(
 static inline struct reset_control *of_reset_control_get_shared_by_index(
 					struct device_node *node, int index)
 {
-	return __of_reset_control_get(node, NULL, index, true, false, false);
+	return __of_reset_control_get(node, NULL, index, RESET_CONTROL_SHARED);
 }
 
 /**
@@ -560,7 +593,7 @@ static inline struct reset_control *
 __must_check devm_reset_control_get_exclusive(struct device *dev,
 					      const char *id)
 {
-	return __devm_reset_control_get(dev, id, 0, false, false, true);
+	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE);
 }
 
 /**
@@ -580,7 +613,8 @@ static inline int __must_check
 devm_reset_control_bulk_get_exclusive(struct device *dev, int num_rstcs,
 				      struct reset_control_bulk_data *rstcs)
 {
-	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, true);
+	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs,
+					     RESET_CONTROL_EXCLUSIVE);
 }
 
 /**
@@ -599,7 +633,7 @@ static inline struct reset_control *
 __must_check devm_reset_control_get_exclusive_released(struct device *dev,
 						       const char *id)
 {
-	return __devm_reset_control_get(dev, id, 0, false, false, false);
+	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE_RELEASED);
 }
 
 /**
@@ -619,7 +653,8 @@ static inline int __must_check
 devm_reset_control_bulk_get_exclusive_released(struct device *dev, int num_rstcs,
 					       struct reset_control_bulk_data *rstcs)
 {
-	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, false);
+	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs,
+					     RESET_CONTROL_EXCLUSIVE_RELEASED);
 }
 
 /**
@@ -638,7 +673,7 @@ static inline struct reset_control *
 __must_check devm_reset_control_get_optional_exclusive_released(struct device *dev,
 								const char *id)
 {
-	return __devm_reset_control_get(dev, id, 0, false, true, false);
+	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED);
 }
 
 /**
@@ -658,7 +693,8 @@ static inline int __must_check
 devm_reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_rstcs,
 							struct reset_control_bulk_data *rstcs)
 {
-	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, false);
+	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs,
+					     RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED);
 }
 
 /**
@@ -673,7 +709,7 @@ devm_reset_control_bulk_get_optional_exclusive_released(struct device *dev, int
 static inline struct reset_control *devm_reset_control_get_shared(
 					struct device *dev, const char *id)
 {
-	return __devm_reset_control_get(dev, id, 0, true, false, false);
+	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_SHARED);
 }
 
 /**
@@ -693,7 +729,7 @@ static inline int __must_check
 devm_reset_control_bulk_get_shared(struct device *dev, int num_rstcs,
 				   struct reset_control_bulk_data *rstcs)
 {
-	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, false);
+	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_SHARED);
 }
 
 /**
@@ -711,7 +747,7 @@ devm_reset_control_bulk_get_shared(struct device *dev, int num_rstcs,
 static inline struct reset_control *devm_reset_control_get_optional_exclusive(
 					struct device *dev, const char *id)
 {
-	return __devm_reset_control_get(dev, id, 0, false, true, true);
+	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE);
 }
 
 /**
@@ -731,7 +767,8 @@ static inline int __must_check
 devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs,
 					       struct reset_control_bulk_data *rstcs)
 {
-	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true);
+	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs,
+					     RESET_CONTROL_OPTIONAL_EXCLUSIVE);
 }
 
 /**
@@ -749,7 +786,7 @@ devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs
 static inline struct reset_control *devm_reset_control_get_optional_shared(
 					struct device *dev, const char *id)
 {
-	return __devm_reset_control_get(dev, id, 0, true, true, false);
+	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED);
 }
 
 /**
@@ -769,7 +806,7 @@ static inline int __must_check
 devm_reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs,
 					    struct reset_control_bulk_data *rstcs)
 {
-	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, true, false);
+	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_OPTIONAL_SHARED);
 }
 
 /**
@@ -787,7 +824,7 @@ devm_reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs,
 static inline struct reset_control *
 devm_reset_control_get_exclusive_by_index(struct device *dev, int index)
 {
-	return __devm_reset_control_get(dev, NULL, index, false, false, true);
+	return __devm_reset_control_get(dev, NULL, index, RESET_CONTROL_EXCLUSIVE);
 }
 
 /**
@@ -803,7 +840,7 @@ devm_reset_control_get_exclusive_by_index(struct device *dev, int index)
 static inline struct reset_control *
 devm_reset_control_get_shared_by_index(struct device *dev, int index)
 {
-	return __devm_reset_control_get(dev, NULL, index, true, false, false);
+	return __devm_reset_control_get(dev, NULL, index, RESET_CONTROL_SHARED);
 }
 
 /*
@@ -851,54 +888,54 @@ static inline struct reset_control *devm_reset_control_get_by_index(
 static inline struct reset_control *
 devm_reset_control_array_get_exclusive(struct device *dev)
 {
-	return devm_reset_control_array_get(dev, false, false);
+	return devm_reset_control_array_get(dev, RESET_CONTROL_EXCLUSIVE);
 }
 
 static inline struct reset_control *
 devm_reset_control_array_get_shared(struct device *dev)
 {
-	return devm_reset_control_array_get(dev, true, false);
+	return devm_reset_control_array_get(dev, RESET_CONTROL_SHARED);
 }
 
 static inline struct reset_control *
 devm_reset_control_array_get_optional_exclusive(struct device *dev)
 {
-	return devm_reset_control_array_get(dev, false, true);
+	return devm_reset_control_array_get(dev, RESET_CONTROL_OPTIONAL_EXCLUSIVE);
 }
 
 static inline struct reset_control *
 devm_reset_control_array_get_optional_shared(struct device *dev)
 {
-	return devm_reset_control_array_get(dev, true, true);
+	return devm_reset_control_array_get(dev, RESET_CONTROL_OPTIONAL_SHARED);
 }
 
 static inline struct reset_control *
 of_reset_control_array_get_exclusive(struct device_node *node)
 {
-	return of_reset_control_array_get(node, false, false, true);
+	return of_reset_control_array_get(node, RESET_CONTROL_EXCLUSIVE);
 }
 
 static inline struct reset_control *
 of_reset_control_array_get_exclusive_released(struct device_node *node)
 {
-	return of_reset_control_array_get(node, false, false, false);
+	return of_reset_control_array_get(node, RESET_CONTROL_EXCLUSIVE_RELEASED);
 }
 
 static inline struct reset_control *
 of_reset_control_array_get_shared(struct device_node *node)
 {
-	return of_reset_control_array_get(node, true, false, true);
+	return of_reset_control_array_get(node, RESET_CONTROL_SHARED);
 }
 
 static inline struct reset_control *
 of_reset_control_array_get_optional_exclusive(struct device_node *node)
 {
-	return of_reset_control_array_get(node, false, true, true);
+	return of_reset_control_array_get(node, RESET_CONTROL_OPTIONAL_EXCLUSIVE);
 }
 
 static inline struct reset_control *
 of_reset_control_array_get_optional_shared(struct device_node *node)
 {
-	return of_reset_control_array_get(node, true, true, true);
+	return of_reset_control_array_get(node, RESET_CONTROL_OPTIONAL_SHARED);
 }
 #endif
-- 
cgit v1.2.3


From d872bed85036f5e60c66b0dd0994346b4ea6470c Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 25 Sep 2024 18:40:10 +0200
Subject: reset: Add devres helpers to request pre-deasserted reset controls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add devres helpers

 - devm_reset_control_bulk_get_exclusive_deasserted
 - devm_reset_control_bulk_get_optional_exclusive_deasserted
 - devm_reset_control_bulk_get_optional_shared_deasserted
 - devm_reset_control_bulk_get_shared_deasserted
 - devm_reset_control_get_exclusive_deasserted
 - devm_reset_control_get_optional_exclusive_deasserted
 - devm_reset_control_get_optional_shared_deasserted
 - devm_reset_control_get_shared_deasserted

to request and immediately deassert reset controls. During cleanup,
reset_control_assert() will be called automatically on the returned
reset controls.

Acked-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/20240925-reset-get-deasserted-v2-2-b3601bbd0458@pengutronix.de
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/reset.h | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

(limited to 'include')

diff --git a/include/linux/reset.h b/include/linux/reset.h
index 99296af98f81..2986ced69a02 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -28,6 +28,7 @@ struct reset_control_bulk_data {
 #define RESET_CONTROL_FLAGS_BIT_SHARED		BIT(0)	/* not exclusive */
 #define RESET_CONTROL_FLAGS_BIT_OPTIONAL	BIT(1)
 #define RESET_CONTROL_FLAGS_BIT_ACQUIRED	BIT(2)	/* iff exclusive, not released */
+#define RESET_CONTROL_FLAGS_BIT_DEASSERTED	BIT(3)
 
 /**
  * enum reset_control_flags - Flags that can be passed to the reset_control_get functions
@@ -35,21 +36,35 @@ struct reset_control_bulk_data {
  *                    These values cannot be OR'd.
  *
  * @RESET_CONTROL_EXCLUSIVE:				exclusive, acquired,
+ * @RESET_CONTROL_EXCLUSIVE_DEASSERTED:			exclusive, acquired, deasserted
  * @RESET_CONTROL_EXCLUSIVE_RELEASED:			exclusive, released,
  * @RESET_CONTROL_SHARED:				shared
+ * @RESET_CONTROL_SHARED_DEASSERTED:			shared, deasserted
  * @RESET_CONTROL_OPTIONAL_EXCLUSIVE:			optional, exclusive, acquired
+ * @RESET_CONTROL_OPTIONAL_EXCLUSIVE_DEASSERTED:	optional, exclusive, acquired, deasserted
  * @RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED:		optional, exclusive, released
  * @RESET_CONTROL_OPTIONAL_SHARED:			optional, shared
+ * @RESET_CONTROL_OPTIONAL_SHARED_DEASSERTED:		optional, shared, deasserted
  */
 enum reset_control_flags {
 	RESET_CONTROL_EXCLUSIVE				= RESET_CONTROL_FLAGS_BIT_ACQUIRED,
+	RESET_CONTROL_EXCLUSIVE_DEASSERTED		= RESET_CONTROL_FLAGS_BIT_ACQUIRED |
+							  RESET_CONTROL_FLAGS_BIT_DEASSERTED,
 	RESET_CONTROL_EXCLUSIVE_RELEASED		= 0,
 	RESET_CONTROL_SHARED				= RESET_CONTROL_FLAGS_BIT_SHARED,
+	RESET_CONTROL_SHARED_DEASSERTED			= RESET_CONTROL_FLAGS_BIT_SHARED |
+							  RESET_CONTROL_FLAGS_BIT_DEASSERTED,
 	RESET_CONTROL_OPTIONAL_EXCLUSIVE		= RESET_CONTROL_FLAGS_BIT_OPTIONAL |
 							  RESET_CONTROL_FLAGS_BIT_ACQUIRED,
+	RESET_CONTROL_OPTIONAL_EXCLUSIVE_DEASSERTED	= RESET_CONTROL_FLAGS_BIT_OPTIONAL |
+							  RESET_CONTROL_FLAGS_BIT_ACQUIRED |
+							  RESET_CONTROL_FLAGS_BIT_DEASSERTED,
 	RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED	= RESET_CONTROL_FLAGS_BIT_OPTIONAL,
 	RESET_CONTROL_OPTIONAL_SHARED			= RESET_CONTROL_FLAGS_BIT_OPTIONAL |
 							  RESET_CONTROL_FLAGS_BIT_SHARED,
+	RESET_CONTROL_OPTIONAL_SHARED_DEASSERTED	= RESET_CONTROL_FLAGS_BIT_OPTIONAL |
+							  RESET_CONTROL_FLAGS_BIT_SHARED |
+							  RESET_CONTROL_FLAGS_BIT_DEASSERTED,
 };
 
 #ifdef CONFIG_RESET_CONTROLLER
@@ -596,6 +611,25 @@ __must_check devm_reset_control_get_exclusive(struct device *dev,
 	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE);
 }
 
+/**
+ * devm_reset_control_get_exclusive_deasserted - resource managed
+ *                                    reset_control_get_exclusive() +
+ *                                    reset_control_deassert()
+ * @dev: device to be reset by the controller
+ * @id: reset line name
+ *
+ * Managed reset_control_get_exclusive() + reset_control_deassert(). For reset
+ * controllers returned from this function, reset_control_assert() +
+ * reset_control_put() is called automatically on driver detach.
+ *
+ * See reset_control_get_exclusive() for more information.
+ */
+static inline struct reset_control * __must_check
+devm_reset_control_get_exclusive_deasserted(struct device *dev, const char *id)
+{
+	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE_DEASSERTED);
+}
+
 /**
  * devm_reset_control_bulk_get_exclusive - resource managed
  *                                         reset_control_bulk_get_exclusive()
@@ -712,6 +746,25 @@ static inline struct reset_control *devm_reset_control_get_shared(
 	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_SHARED);
 }
 
+/**
+ * devm_reset_control_get_shared_deasserted - resource managed
+ *                                            reset_control_get_shared() +
+ *                                            reset_control_deassert()
+ * @dev: device to be reset by the controller
+ * @id: reset line name
+ *
+ * Managed reset_control_get_shared() + reset_control_deassert(). For reset
+ * controllers returned from this function, reset_control_assert() +
+ * reset_control_put() is called automatically on driver detach.
+ *
+ * See devm_reset_control_get_shared() for more information.
+ */
+static inline struct reset_control * __must_check
+devm_reset_control_get_shared_deasserted(struct device *dev, const char *id)
+{
+	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_SHARED_DEASSERTED);
+}
+
 /**
  * devm_reset_control_bulk_get_shared - resource managed
  *                                      reset_control_bulk_get_shared()
@@ -732,6 +785,28 @@ devm_reset_control_bulk_get_shared(struct device *dev, int num_rstcs,
 	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_SHARED);
 }
 
+/**
+ * devm_reset_control_bulk_get_shared_deasserted - resource managed
+ *                                                 reset_control_bulk_get_shared() +
+ *                                                 reset_control_bulk_deassert()
+ * @dev: device to be reset by the controller
+ * @num_rstcs: number of entries in rstcs array
+ * @rstcs: array of struct reset_control_bulk_data with reset line names set
+ *
+ * Managed reset_control_bulk_get_shared() + reset_control_bulk_deassert(). For
+ * reset controllers returned from this function, reset_control_bulk_assert() +
+ * reset_control_bulk_put() are called automatically on driver detach.
+ *
+ * See devm_reset_control_bulk_get_shared() for more information.
+ */
+static inline int __must_check
+devm_reset_control_bulk_get_shared_deasserted(struct device *dev, int num_rstcs,
+					      struct reset_control_bulk_data *rstcs)
+{
+	return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs,
+					     RESET_CONTROL_SHARED_DEASSERTED);
+}
+
 /**
  * devm_reset_control_get_optional_exclusive - resource managed
  *                                             reset_control_get_optional_exclusive()
@@ -750,6 +825,25 @@ static inline struct reset_control *devm_reset_control_get_optional_exclusive(
 	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE);
 }
 
+/**
+ * devm_reset_control_get_optional_exclusive_deasserted - resource managed
+ *                                                        reset_control_get_optional_exclusive() +
+ *                                                        reset_control_deassert()
+ * @dev: device to be reset by the controller
+ * @id: reset line name
+ *
+ * Managed reset_control_get_optional_exclusive() + reset_control_deassert().
+ * For reset controllers returned from this function, reset_control_assert() +
+ * reset_control_put() is called automatically on driver detach.
+ *
+ * See devm_reset_control_get_optional_exclusive() for more information.
+ */
+static inline struct reset_control *
+devm_reset_control_get_optional_exclusive_deasserted(struct device *dev, const char *id)
+{
+	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE_DEASSERTED);
+}
+
 /**
  * devm_reset_control_bulk_get_optional_exclusive - resource managed
  *                                                  reset_control_bulk_get_optional_exclusive()
@@ -789,6 +883,25 @@ static inline struct reset_control *devm_reset_control_get_optional_shared(
 	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED);
 }
 
+/**
+ * devm_reset_control_get_optional_shared_deasserted - resource managed
+ *                                                     reset_control_get_optional_shared() +
+ *                                                     reset_control_deassert()
+ * @dev: device to be reset by the controller
+ * @id: reset line name
+ *
+ * Managed reset_control_get_optional_shared() + reset_control_deassert(). For
+ * reset controllers returned from this function, reset_control_assert() +
+ * reset_control_put() is called automatically on driver detach.
+ *
+ * See devm_reset_control_get_optional_shared() for more information.
+ */
+static inline struct reset_control *
+devm_reset_control_get_optional_shared_deasserted(struct device *dev, const char *id)
+{
+	return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED_DEASSERTED);
+}
+
 /**
  * devm_reset_control_bulk_get_optional_shared - resource managed
  *                                               reset_control_bulk_get_optional_shared()
-- 
cgit v1.2.3


From dab9cd4b8e7f5fce4e7a0424991ec4714a780f3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Tue, 1 Oct 2024 10:51:39 +0200
Subject: pwm: Add kernel doc for members added to pwm_ops recently
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The callbacks for lowlevel pwm drivers were expanded to handle the new
waveform abstraction. When doing that I missed to expand the kernel doc
description. This is catched up here.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Link: https://lore.kernel.org/linux-next/20241001135207.125ca7af@canb.auug.org.au
Fixes: 17e40c25158f ("pwm: New abstraction for PWM waveforms")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/20241001085138.1025818-2-u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index c3d9ddeafa65..f1cb1e5b0a36 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -276,6 +276,11 @@ struct pwm_capture {
  * @request: optional hook for requesting a PWM
  * @free: optional hook for freeing a PWM
  * @capture: capture and report PWM signal
+ * @sizeof_wfhw: size (in bytes) of driver specific waveform presentation
+ * @round_waveform_tohw: convert a struct pwm_waveform to driver specific presentation
+ * @round_waveform_fromhw: convert a driver specific waveform presentation to struct pwm_waveform
+ * @read_waveform: read driver specific waveform presentation from hardware
+ * @write_waveform: write driver specific waveform presentation to hardware
  * @apply: atomically apply a new PWM config
  * @get_state: get the current PWM state.
  */
-- 
cgit v1.2.3


From 030ace430afcf847f537227afceb22dfe8fb8fc8 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@linaro.org>
Date: Thu, 26 Sep 2024 22:19:52 +0800
Subject: spi: spi-mem: Allow specifying the byte order in Octal DTR mode

There are NOR flashes (Macronix) that swap the bytes on a 16-bit
boundary when configured in Octal DTR mode. The byte order of
16-bit words is swapped when read or written in Octal Double
Transfer Rate (DTR) mode compared to Single Transfer Rate (STR)
modes. If one writes D0 D1 D2 D3 bytes using 1-1-1 mode, and uses
8D-8D-8D SPI mode for reading, it will read back D1 D0 D3 D2.
Swapping the bytes may introduce some endianness problems. It can
affect the boot sequence if the entire boot sequence is not handled
in either 8D-8D-8D mode or 1-1-1 mode. Therefore, it is necessary
to swap the bytes back to ensure the same byte order as in STR modes.
Fortunately there are controllers that could swap the bytes back at
runtime, addressing the flash's endianness requirements. Provide a
way for the upper layers to specify the byte order in Octal DTR mode.

Merge Tudor's patch and add modifications for suiting newer version
of Linux kernel.

Suggested-by: Michael Walle <mwalle@kernel.org>
Signed-off-by: JaimeLiao <jaimeliao@mxic.com.tw>
Signed-off-by: AlvinZhou <alvinzhou@mxic.com.tw>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20240926141956.2386374-3-alvinzhou.tw@gmail.com
Signed-off-by: Tudor Ambarus <tudor.ambarus@linaro.org>
---
 include/linux/spi/spi-mem.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index f866d5c8ed32..c46d2b8029be 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -90,6 +90,8 @@ enum spi_mem_data_dir {
  * @data.buswidth: number of IO lanes used to send/receive the data
  * @data.dtr: whether the data should be sent in DTR mode or not
  * @data.ecc: whether error correction is required or not
+ * @data.swap16: whether the byte order of 16-bit words is swapped when read
+ *		 or written in Octal DTR mode compared to STR mode.
  * @data.dir: direction of the transfer
  * @data.nbytes: number of data bytes to send/receive. Can be zero if the
  *		 operation does not involve transferring data
@@ -124,7 +126,8 @@ struct spi_mem_op {
 		u8 buswidth;
 		u8 dtr : 1;
 		u8 ecc : 1;
-		u8 __pad : 6;
+		u8 swap16 : 1;
+		u8 __pad : 5;
 		enum spi_mem_data_dir dir;
 		unsigned int nbytes;
 		union {
@@ -297,10 +300,13 @@ struct spi_controller_mem_ops {
  * struct spi_controller_mem_caps - SPI memory controller capabilities
  * @dtr: Supports DTR operations
  * @ecc: Supports operations with error correction
+ * @swap16: Supports swapping bytes on a 16 bit boundary when configured in
+ *	    Octal DTR
  */
 struct spi_controller_mem_caps {
 	bool dtr;
 	bool ecc;
+	bool swap16;
 };
 
 #define spi_mem_controller_is_capable(ctlr, cap)	\
-- 
cgit v1.2.3


From 848f2bbb363d4cdb4202db328a103fd3c34e21a2 Mon Sep 17 00:00:00 2001
From: Ronak Jain <ronak.jain@amd.com>
Date: Tue, 30 Jul 2024 01:43:42 -0700
Subject: firmware: xilinx: Add missing debug firmware interfaces

Add missing PM EEMI APIs interface in debug firmware driver.

The debugfs firmware driver interface is intended for testing and
debugging the EEMI APIs only. This interface does not contain any
checking regarding improper usage, and the number, type and valid
ranges of the arguments. This interface must be used with a lot of
care. In fact, accessing this interface during normal PM operation
will very likely cause unexpected problems.

The debugfs interface shouldn't be used in the production system and
hence it is disabled by default in defconfig.

Signed-off-by: Ronak Jain <ronak.jain@amd.com>
Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
Signed-off-by: Michal Simek <michal.simek@amd.com>
Link: https://lore.kernel.org/r/20240730084342.1683231-1-ronak.jain@amd.com
---
 include/linux/firmware/xlnx-zynqmp.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index d7d07afc0532..563382cf16f2 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -218,9 +218,13 @@ enum pm_ioctl_id {
 	/* Runtime feature configuration */
 	IOCTL_SET_FEATURE_CONFIG = 26,
 	IOCTL_GET_FEATURE_CONFIG = 27,
+	/* IOCTL for Secure Read/Write Interface */
+	IOCTL_READ_REG = 28,
 	/* Dynamic SD/GEM configuration */
 	IOCTL_SET_SD_CONFIG = 30,
 	IOCTL_SET_GEM_CONFIG = 31,
+	/* IOCTL to get default/current QoS */
+	IOCTL_GET_QOS = 34,
 };
 
 enum pm_query_id {
-- 
cgit v1.2.3


From f33d6099edf78e3c97900c0173fedbfecc025a9e Mon Sep 17 00:00:00 2001
From: Ronak Jain <ronak.jain@amd.com>
Date: Fri, 30 Aug 2024 03:00:42 -0700
Subject: firmware: xilinx: use u32 for reset ID in reset APIs

Refactors the reset handling mechanisms by replacing the reset ID's
enum type with a u32. This update improves flexibility, allowing the
reset ID to accommodate a broader range of values, including those
that may not fit into predefined enum values.

The use of u32 for reset ID enhances extensibility, especially for
hardware platforms or features where more granular control of reset
operations is required. By shifting to a general integer type, this
change reduces constraints and simplifies integration with other
system components that rely on non-enum-based reset IDs.

Signed-off-by: Ronak Jain <ronak.jain@amd.com>
Link: https://lore.kernel.org/r/20240830100042.3163511-1-ronak.jain@amd.com
Signed-off-by: Michal Simek <michal.simek@amd.com>
---
 include/linux/firmware/xlnx-zynqmp.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 563382cf16f2..5b938fc2adad 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -557,9 +557,9 @@ int zynqmp_pm_get_pll_frac_data(u32 clk_id, u32 *data);
 int zynqmp_pm_set_sd_tapdelay(u32 node_id, u32 type, u32 value);
 int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type);
 int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select);
-int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset,
+int zynqmp_pm_reset_assert(const u32 reset,
 			   const enum zynqmp_pm_reset_action assert_flag);
-int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, u32 *status);
+int zynqmp_pm_reset_get_status(const u32 reset, u32 *status);
 unsigned int zynqmp_pm_bootmode_read(u32 *ps_mode);
 int zynqmp_pm_bootmode_write(u32 ps_mode);
 int zynqmp_pm_init_finalize(void);
@@ -702,14 +702,13 @@ static inline int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select)
 	return -ENODEV;
 }
 
-static inline int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset,
+static inline int zynqmp_pm_reset_assert(const u32 reset,
 					 const enum zynqmp_pm_reset_action assert_flag)
 {
 	return -ENODEV;
 }
 
-static inline int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset,
-					     u32 *status)
+static inline int zynqmp_pm_reset_get_status(const u32 reset, u32 *status)
 {
 	return -ENODEV;
 }
-- 
cgit v1.2.3


From 92fb71333d5737d0296fb968a653dfda4b225175 Mon Sep 17 00:00:00 2001
From: Ronak Jain <ronak.jain@amd.com>
Date: Thu, 19 Sep 2024 22:55:01 -0700
Subject: firmware: xilinx: add support for new SMC call format

Added zynqmp_pm_invoke_fw_fn() to use new SMC format in which
lower 12 bits of SMC id are fixed and firmware header is moved to
subsequent SMC arguments. The new SMC format supports full request and
response buffers.

Added zynqmp_pm_get_sip_svc_version() to get SiP SVC version
number to check if TF-A is newer or older and use the SMC format
accordingly to handle backward compatibility.

Used new SMC format for PM_QUERY_DATA API as more response values are
required in it.

Signed-off-by: Ronak Jain <ronak.jain@amd.com>
Signed-off-by: Jay Buddhabhatti <jay.buddhabhatti@amd.com>
Link: https://lore.kernel.org/r/20240920055501.2658642-1-ronak.jain@amd.com
Signed-off-by: Michal Simek <michal.simek@amd.com>
---
 include/linux/firmware/xlnx-zynqmp.h | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 5b938fc2adad..76d85ad82ec0 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -3,7 +3,7 @@
  * Xilinx Zynq MPSoC Firmware layer
  *
  *  Copyright (C) 2014-2021 Xilinx
- *  Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc.
+ *  Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc.
  *
  *  Michal Simek <michal.simek@amd.com>
  *  Davorin Mista <davorin.mista@aggios.com>
@@ -32,6 +32,19 @@
 /* SMC SIP service Call Function Identifier Prefix */
 #define PM_SIP_SVC			0xC2000000
 
+/* SMC function ID to get SiP SVC version */
+#define GET_SIP_SVC_VERSION	(0x8200ff03U)
+
+/* SiP Service Calls version numbers */
+#define SIP_SVC_VERSION_MAJOR		(0U)
+#define SIP_SVC_VERSION_MINOR		(2U)
+
+#define SIP_SVC_PASSTHROUGH_VERSION	((SIP_SVC_VERSION_MAJOR << 16) | \
+					 SIP_SVC_VERSION_MINOR)
+
+/* Fixed ID for FW specific APIs */
+#define PASS_THROUGH_FW_CMD_ID	GENMASK(11, 0)
+
 /* PM API versions */
 #define PM_API_VERSION_1	1
 #define PM_API_VERSION_2	2
@@ -51,6 +64,7 @@
 
 #define API_ID_MASK		GENMASK(7, 0)
 #define MODULE_ID_MASK		GENMASK(11, 8)
+#define PLM_MODULE_ID_MASK	GENMASK(15, 8)
 
 /* Firmware feature check version mask */
 #define FIRMWARE_VERSION_MASK		0xFFFFU
@@ -62,7 +76,13 @@
 #define GET_CALLBACK_DATA		0xa01
 
 /* Number of 32bits values in payload */
-#define PAYLOAD_ARG_CNT	4U
+#define PAYLOAD_ARG_CNT	7U
+
+/* Number of 64bits arguments for SMC call */
+#define SMC_ARG_CNT_64	8U
+
+/* Number of 32bits arguments for SMC call */
+#define SMC_ARG_CNT_32	13U
 
 /* Number of arguments for a callback */
 #define CB_ARG_CNT     4
@@ -130,6 +150,7 @@
 
 enum pm_module_id {
 	PM_MODULE_ID = 0x0,
+	XPM_MODULE_ID = 0x2,
 	XSEM_MODULE_ID = 0x3,
 	TF_A_MODULE_ID = 0xa,
 };
@@ -537,6 +558,7 @@ struct zynqmp_pm_query_data {
 };
 
 int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...);
+int zynqmp_pm_invoke_fw_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...);
 
 #if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE)
 int zynqmp_pm_get_api_version(u32 *version);
-- 
cgit v1.2.3


From a849a0273d0f73a252d14d31c5003a8059ea51fc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 11 Sep 2024 15:17:37 +0200
Subject: ntp: Remove unused tick_nsec

tick_nsec is only updated in the NTP core, but there are no users.

Remove it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <jstultz@google.com>
Link: https://lore.kernel.org/all/20240911-devel-anna-maria-b4-timers-ptp-ntp-v1-1-2d52f4e13476@linutronix.de
---
 include/linux/timex.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 3871b06bd302..7f7a12fd8200 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -145,7 +145,6 @@ unsigned long random_get_entropy_fallback(void);
  * estimated error = NTP dispersion.
  */
 extern unsigned long tick_usec;		/* USER_HZ period (usec) */
-extern unsigned long tick_nsec;		/* SHIFTED_HZ period (nsec) */
 
 /* Required to safely shift negative values */
 #define shift_right(x, s) ({	\
-- 
cgit v1.2.3


From 66606a93849bfe3cbe9f0b801b40f60b87c54e11 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 11 Sep 2024 15:17:38 +0200
Subject: ntp: Make tick_usec static

There are no users of tick_usec outside of the NTP core code. Therefore
make tick_usec static.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <jstultz@google.com>
Link: https://lore.kernel.org/all/20240911-devel-anna-maria-b4-timers-ptp-ntp-v1-2-2d52f4e13476@linutronix.de
---
 include/linux/timex.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 7f7a12fd8200..4ee32eff3f22 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -139,13 +139,6 @@ unsigned long random_get_entropy_fallback(void);
 #define MAXSEC 2048		/* max interval between updates (s) */
 #define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */
 
-/*
- * kernel variables
- * Note: maximum error = NTP sync distance = dispersion + delay / 2;
- * estimated error = NTP dispersion.
- */
-extern unsigned long tick_usec;		/* USER_HZ period (usec) */
-
 /* Required to safely shift negative values */
 #define shift_right(x, s) ({	\
 	__typeof__(x) __x = (x);	\
-- 
cgit v1.2.3


From 8102c4daf44ab86c2d2226a8136bec905d6e2bd1 Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@google.com>
Date: Wed, 11 Sep 2024 10:30:20 +0100
Subject: timekeeping: Add the boot clock to system time snapshot

For tracing purpose, the boot clock is interesting as it doesn't stop on
suspend. Export it as part of the time snapshot. This will later allow
the hypervisor to add boot clock timestamps to its events.

Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <jstultz@google.com>
Link: https://lore.kernel.org/all/20240911093029.3279154-5-vdonnefort@google.com
---
 include/linux/timekeeping.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index fc12a9ba2c88..e85c27347e44 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -275,6 +275,7 @@ struct ktime_timestamps {
  *				 counter value
  * @cycles:	Clocksource counter value to produce the system times
  * @real:	Realtime system time
+ * @boot:	Boot time
  * @raw:	Monotonic raw system time
  * @cs_id:	Clocksource ID
  * @clock_was_set_seq:	The sequence number of clock-was-set events
@@ -283,6 +284,7 @@ struct ktime_timestamps {
 struct system_time_snapshot {
 	u64			cycles;
 	ktime_t			real;
+	ktime_t			boot;
 	ktime_t			raw;
 	enum clocksource_ids	cs_id;
 	unsigned int		clock_was_set_seq;
-- 
cgit v1.2.3


From f69767a1ada3ac74be2e1ac0795a05e1d1384eff Mon Sep 17 00:00:00 2001
From: Wei Huang <wei.huang2@amd.com>
Date: Wed, 2 Oct 2024 11:59:50 -0500
Subject: PCI: Add TLP Processing Hints (TPH) support

Add support for PCIe TLP Processing Hints (TPH) support (see PCIe r6.2,
sec 6.17).

Add TPH register definitions in pci_regs.h, including the TPH Requester
capability register, TPH Requester control register, TPH Completer
capability, and the ST fields of MSI-X entry.

Introduce pcie_enable_tph() and pcie_disable_tph(), enabling drivers to
toggle TPH support and configure specific ST mode as needed. Also add a new
kernel parameter, "pci=notph", allowing users to disable TPH support across
the entire system.

Link: https://lore.kernel.org/r/20241002165954.128085-2-wei.huang2@amd.com
Co-developed-by: Jing Liu <jing2.liu@intel.com>
Co-developed-by: Paul Luse <paul.e.luse@linux.intel.com>
Co-developed-by: Eric Van Tassell <Eric.VanTassell@amd.com>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Paul Luse <paul.e.luse@linux.intel.com>
Signed-off-by: Eric Van Tassell <Eric.VanTassell@amd.com>
Signed-off-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
---
 include/linux/pci-tph.h       | 21 +++++++++++++++++++++
 include/linux/pci.h           |  7 +++++++
 include/uapi/linux/pci_regs.h | 37 +++++++++++++++++++++++++++++--------
 3 files changed, 57 insertions(+), 8 deletions(-)
 create mode 100644 include/linux/pci-tph.h

(limited to 'include')

diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h
new file mode 100644
index 000000000000..58654a334ffb
--- /dev/null
+++ b/include/linux/pci-tph.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * TPH (TLP Processing Hints)
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ *     Eric Van Tassell <Eric.VanTassell@amd.com>
+ *     Wei Huang <wei.huang2@amd.com>
+ */
+#ifndef LINUX_PCI_TPH_H
+#define LINUX_PCI_TPH_H
+
+#ifdef CONFIG_PCIE_TPH
+void pcie_disable_tph(struct pci_dev *pdev);
+int pcie_enable_tph(struct pci_dev *pdev, int mode);
+#else
+static inline void pcie_disable_tph(struct pci_dev *pdev) { }
+static inline int pcie_enable_tph(struct pci_dev *pdev, int mode)
+{ return -EINVAL; }
+#endif
+
+#endif /* LINUX_PCI_TPH_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 573b4c4c2be6..8351d76b6e12 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -434,6 +434,7 @@ struct pci_dev {
 	unsigned int	ats_enabled:1;		/* Address Translation Svc */
 	unsigned int	pasid_enabled:1;	/* Process Address Space ID */
 	unsigned int	pri_enabled:1;		/* Page Request Interface */
+	unsigned int	tph_enabled:1;		/* TLP Processing Hints */
 	unsigned int	is_managed:1;		/* Managed via devres */
 	unsigned int	is_msi_managed:1;	/* MSI release via devres installed */
 	unsigned int	needs_freset:1;		/* Requires fundamental reset */
@@ -534,6 +535,12 @@ struct pci_dev {
 
 	/* These methods index pci_reset_fn_methods[] */
 	u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */
+
+#ifdef CONFIG_PCIE_TPH
+	u16		tph_cap;	/* TPH capability offset */
+	u8		tph_mode;	/* TPH mode */
+	u8		tph_req_type;	/* TPH requester type */
+#endif
 };
 
 static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 12323b3334a9..155dea741615 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -340,7 +340,8 @@
 #define PCI_MSIX_ENTRY_UPPER_ADDR	0x4  /* Message Upper Address */
 #define PCI_MSIX_ENTRY_DATA		0x8  /* Message Data */
 #define PCI_MSIX_ENTRY_VECTOR_CTRL	0xc  /* Vector Control */
-#define  PCI_MSIX_ENTRY_CTRL_MASKBIT	0x00000001
+#define  PCI_MSIX_ENTRY_CTRL_MASKBIT	0x00000001  /* Mask Bit */
+#define  PCI_MSIX_ENTRY_CTRL_ST		0xffff0000  /* Steering Tag */
 
 /* CompactPCI Hotswap Register */
 
@@ -659,6 +660,7 @@
 #define  PCI_EXP_DEVCAP2_ATOMIC_COMP64	0x00000100 /* 64b AtomicOp completion */
 #define  PCI_EXP_DEVCAP2_ATOMIC_COMP128	0x00000200 /* 128b AtomicOp completion */
 #define  PCI_EXP_DEVCAP2_LTR		0x00000800 /* Latency tolerance reporting */
+#define  PCI_EXP_DEVCAP2_TPH_COMP_MASK	0x00003000 /* TPH completer support */
 #define  PCI_EXP_DEVCAP2_OBFF_MASK	0x000c0000 /* OBFF support mechanism */
 #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
 #define  PCI_EXP_DEVCAP2_OBFF_WAKE	0x00080000 /* Re-use WAKE# for OBFF */
@@ -1023,15 +1025,34 @@
 #define  PCI_DPA_CAP_SUBSTATE_MASK	0x1F	/* # substates - 1 */
 #define PCI_DPA_BASE_SIZEOF	16	/* size with 0 substates */
 
+/* TPH Completer Support */
+#define PCI_EXP_DEVCAP2_TPH_COMP_NONE		0x0 /* None */
+#define PCI_EXP_DEVCAP2_TPH_COMP_TPH_ONLY	0x1 /* TPH only */
+#define PCI_EXP_DEVCAP2_TPH_COMP_EXT_TPH	0x3 /* TPH and Extended TPH */
+
 /* TPH Requester */
 #define PCI_TPH_CAP		4	/* capability register */
-#define  PCI_TPH_CAP_LOC_MASK	0x600	/* location mask */
-#define   PCI_TPH_LOC_NONE	0x000	/* no location */
-#define   PCI_TPH_LOC_CAP	0x200	/* in capability */
-#define   PCI_TPH_LOC_MSIX	0x400	/* in MSI-X */
-#define PCI_TPH_CAP_ST_MASK	0x07FF0000	/* ST table mask */
-#define PCI_TPH_CAP_ST_SHIFT	16	/* ST table shift */
-#define PCI_TPH_BASE_SIZEOF	0xc	/* size with no ST table */
+#define  PCI_TPH_CAP_ST_NS	0x00000001 /* No ST Mode Supported */
+#define  PCI_TPH_CAP_ST_IV	0x00000002 /* Interrupt Vector Mode Supported */
+#define  PCI_TPH_CAP_ST_DS	0x00000004 /* Device Specific Mode Supported */
+#define  PCI_TPH_CAP_EXT_TPH	0x00000100 /* Ext TPH Requester Supported */
+#define  PCI_TPH_CAP_LOC_MASK	0x00000600 /* ST Table Location */
+#define   PCI_TPH_LOC_NONE	0x00000000 /* Not present */
+#define   PCI_TPH_LOC_CAP	0x00000200 /* In capability */
+#define   PCI_TPH_LOC_MSIX	0x00000400 /* In MSI-X */
+#define  PCI_TPH_CAP_ST_MASK	0x07FF0000 /* ST Table Size */
+#define  PCI_TPH_CAP_ST_SHIFT	16	/* ST Table Size shift */
+#define PCI_TPH_BASE_SIZEOF	0xc	/* Size with no ST table */
+
+#define PCI_TPH_CTRL		8	/* control register */
+#define  PCI_TPH_CTRL_MODE_SEL_MASK	0x00000007 /* ST Mode Select */
+#define   PCI_TPH_ST_NS_MODE		0x0 /* No ST Mode */
+#define   PCI_TPH_ST_IV_MODE		0x1 /* Interrupt Vector Mode */
+#define   PCI_TPH_ST_DS_MODE		0x2 /* Device Specific Mode */
+#define  PCI_TPH_CTRL_REQ_EN_MASK	0x00000300 /* TPH Requester Enable */
+#define   PCI_TPH_REQ_DISABLE		0x0 /* No TPH requests allowed */
+#define   PCI_TPH_REQ_TPH_ONLY		0x1 /* TPH only requests allowed */
+#define   PCI_TPH_REQ_EXT_TPH		0x3 /* Extended TPH requests allowed */
 
 /* Downstream Port Containment */
 #define PCI_EXP_DPC_CAP			0x04	/* DPC Capability */
-- 
cgit v1.2.3


From d2e8a34876ce69b27f450eebfc550ab8e316f752 Mon Sep 17 00:00:00 2001
From: Wei Huang <wei.huang2@amd.com>
Date: Wed, 2 Oct 2024 11:59:51 -0500
Subject: PCI/TPH: Add Steering Tag support

Add pcie_tph_get_cpu_st() to allow a caller to retrieve Steering Tags for a
target memory associated with a specific CPU. The ST tag is retrieved by
invoking PCI ACPI "_DSM to Query Cache Locality TPH Features" method
(rev=0x7, func=0xF) of the device's Root Port device.

Add pcie_tph_set_st_entry() to update the device's Steering Tags. The tags
will be written into the device's MSI-X table or the ST table located in
the TPH Extended Capability space.

Co-developed-by: Eric Van Tassell <Eric.VanTassell@amd.com>
Link: https://lore.kernel.org/r/20241002165954.128085-3-wei.huang2@amd.com
Signed-off-by: Eric Van Tassell <Eric.VanTassell@amd.com>
Signed-off-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
---
 include/linux/pci-tph.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h
index 58654a334ffb..c3e806c13d64 100644
--- a/include/linux/pci-tph.h
+++ b/include/linux/pci-tph.h
@@ -9,10 +9,33 @@
 #ifndef LINUX_PCI_TPH_H
 #define LINUX_PCI_TPH_H
 
+/*
+ * According to the ECN for PCI Firmware Spec, Steering Tag can be different
+ * depending on the memory type: Volatile Memory or Persistent Memory. When a
+ * caller query about a target's Steering Tag, it must provide the target's
+ * tph_mem_type. ECN link: https://members.pcisig.com/wg/PCI-SIG/document/15470.
+ */
+enum tph_mem_type {
+	TPH_MEM_TYPE_VM,	/* volatile memory */
+	TPH_MEM_TYPE_PM		/* persistent memory */
+};
+
 #ifdef CONFIG_PCIE_TPH
+int pcie_tph_set_st_entry(struct pci_dev *pdev,
+			  unsigned int index, u16 tag);
+int pcie_tph_get_cpu_st(struct pci_dev *dev,
+			enum tph_mem_type mem_type,
+			unsigned int cpu_uid, u16 *tag);
 void pcie_disable_tph(struct pci_dev *pdev);
 int pcie_enable_tph(struct pci_dev *pdev, int mode);
 #else
+static inline int pcie_tph_set_st_entry(struct pci_dev *pdev,
+					unsigned int index, u16 tag)
+{ return -EINVAL; }
+static inline int pcie_tph_get_cpu_st(struct pci_dev *dev,
+				      enum tph_mem_type mem_type,
+				      unsigned int cpu_uid, u16 *tag)
+{ return -EINVAL; }
 static inline void pcie_disable_tph(struct pci_dev *pdev) { }
 static inline int pcie_enable_tph(struct pci_dev *pdev, int mode)
 { return -EINVAL; }
-- 
cgit v1.2.3


From 277b339c4ba5c3d51f86892efd7bfbb012318ed8 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 1 Oct 2024 17:04:10 +0100
Subject: net: pcs: xpcs: move PCS reset to .pcs_pre_config()

Move the PCS reset to .pcs_pre_config() rather than at creation time,
which means we call the reset function with the interface that we're
actually going to be using to talk to the downstream device.

Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com> # sja1105
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Tested-by: for them?
Link: https://patch.msgid.link/E1svfMA-005ZI3-Va@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/pcs/pcs-xpcs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index b4a4eb6c8866..fd75d0605bb6 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -61,6 +61,7 @@ struct dw_xpcs {
 	struct clk_bulk_data clks[DW_XPCS_NUM_CLKS];
 	struct phylink_pcs pcs;
 	phy_interface_t interface;
+	bool need_reset;
 };
 
 int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
-- 
cgit v1.2.3


From bedea1539acb808c870ad85b16f8d8f0bef5e0bb Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 1 Oct 2024 17:04:26 +0100
Subject: net: pcs: xpcs: add xpcs_destroy_pcs() and xpcs_create_pcs_mdiodev()

Provide xpcs create/destroy functions that return and take a phylink_pcs
pointer instead of an xpcs pointer. This will be used by drivers that
have been converted to use phylink_pcs pointers internally, rather than
dw_xpcs pointers.

As xpcs_create_mdiodev() no longer makes use of its interface argument,
pass PHY_INTERFACE_MODE_NA into xpcs_create_mdiodev() until it is
removed later in the series.

Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1svfMQ-005ZIL-Bi@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/pcs/pcs-xpcs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index fd75d0605bb6..a4e2243ce647 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -78,4 +78,7 @@ struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode,
 				   phy_interface_t interface);
 void xpcs_destroy(struct dw_xpcs *xpcs);
 
+struct phylink_pcs *xpcs_create_pcs_mdiodev(struct mii_bus *bus, int addr);
+void xpcs_destroy_pcs(struct phylink_pcs *pcs);
+
 #endif /* __LINUX_PCS_XPCS_H */
-- 
cgit v1.2.3


From bf5a61645bb2d51be53da5b26f948045b571147c Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 1 Oct 2024 17:04:51 +0100
Subject: net: pcs: xpcs: drop interface argument from xpcs_create*()

The XPCS sub-driver no longer uses the "interface" argument to the
xpcs_create_mdiodev() and xpcs_create_fwnode() functions. Remove
this now unnecessary argument, updating the stmmac driver
appropriately.

Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1svfMp-005ZIp-UX@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/pcs/pcs-xpcs.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index a4e2243ce647..758daabb76c7 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -72,10 +72,8 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
 void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
 int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
 		    int enable);
-struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr,
-				    phy_interface_t interface);
-struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode,
-				   phy_interface_t interface);
+struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr);
+struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode);
 void xpcs_destroy(struct dw_xpcs *xpcs);
 
 struct phylink_pcs *xpcs_create_pcs_mdiodev(struct mii_bus *bus, int addr);
-- 
cgit v1.2.3


From faefc9730d073e88a490e37b00e3e196b823abcb Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 1 Oct 2024 17:04:57 +0100
Subject: net: pcs: xpcs: make xpcs_do_config() and xpcs_link_up() internal

As nothing outside pcs-xpcs.c calls neither xpcs_do_config() nor
xpcs_link_up(), remove their exports and prototypes.

Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1svfMv-005ZIv-2M@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/pcs/pcs-xpcs.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 758daabb76c7..abda475111d1 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -65,10 +65,6 @@ struct dw_xpcs {
 };
 
 int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
-void xpcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
-		  phy_interface_t interface, int speed, int duplex);
-int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
-		   const unsigned long *advertising, unsigned int neg_mode);
 void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
 int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
 		    int enable);
-- 
cgit v1.2.3


From ced20ea315fe8591093f19574ec32222c1ab71ba Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Tue, 24 Sep 2024 13:48:43 +0530
Subject: soundwire: amd: pass acp pci revision id as resource data

Pass ACP pci revision id as resource data and store it in amd SoundWire
manager private data structure. This field will be used to differentiate
ACP variants.

Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Link: https://lore.kernel.org/r/20240924081846.1834612-2-Vijendar.Mukunda@amd.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw_amd.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h
index 28a4eb77717f..e0abc59d4748 100644
--- a/include/linux/soundwire/sdw_amd.h
+++ b/include/linux/soundwire/sdw_amd.h
@@ -30,6 +30,7 @@
 
 struct acp_sdw_pdata {
 	u16 instance;
+	u32 acp_rev;
 	/* mutex to protect acp common register access */
 	struct mutex *acp_sdw_lock;
 };
@@ -66,6 +67,7 @@ struct sdw_amd_dai_runtime {
  * @instance: SoundWire manager instance
  * @quirks: SoundWire manager quirks
  * @wake_en_mask: wake enable mask per SoundWire manager
+ * @acp_rev: acp pci device revision id
  * @clk_stopped: flag set to true when clock is stopped
  * @power_mode_mask: flag interprets amd SoundWire manager power mode
  * @dai_runtime_array: dai runtime array
@@ -94,6 +96,7 @@ struct amd_sdw_manager {
 	u32 quirks;
 	u32 wake_en_mask;
 	u32 power_mode_mask;
+	u32 acp_rev;
 	bool clk_stopped;
 
 	struct sdw_amd_dai_runtime **dai_runtime_array;
@@ -134,6 +137,7 @@ struct sdw_amd_ctx {
  * struct sdw_amd_res - Soundwire AMD global resource structure,
  * typically populated by the DSP driver/Legacy driver
  *
+ * @acp_rev: acp pci device revision id
  * @addr: acp pci device resource start address
  * @reg_range: ACP register range
  * @link_mask: bit-wise mask listing links selected by the DSP driver/
@@ -146,6 +150,7 @@ struct sdw_amd_ctx {
  * @acp_lock: mutex protecting acp common registers access
  */
 struct sdw_amd_res {
+	u32 acp_rev;
 	u32 addr;
 	u32 reg_range;
 	u32 link_mask;
-- 
cgit v1.2.3


From 7b54323dde29452dd06e6acd2701d9b489c9547d Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Tue, 24 Sep 2024 13:48:44 +0530
Subject: soundwire: amd: refactor existing code for acp 6.3 platform

Refactor existing code by adding acp pci revision id coditional checks
for ACP 6.3 platform. Rename the macros and structure names with ACP63
tag.

Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Link: https://lore.kernel.org/r/20240924081846.1834612-3-Vijendar.Mukunda@amd.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw_amd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h
index e0abc59d4748..c9586f22c5a9 100644
--- a/include/linux/soundwire/sdw_amd.h
+++ b/include/linux/soundwire/sdw_amd.h
@@ -27,6 +27,7 @@
 #define ACP_SDW0	0
 #define ACP_SDW1	1
 #define AMD_SDW_MAX_MANAGER_COUNT	2
+#define ACP63_PCI_REV_ID		0x63
 
 struct acp_sdw_pdata {
 	u16 instance;
-- 
cgit v1.2.3


From 444d6824a4feca142b0a57095a2f1f1bda98e2ab Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 3 Oct 2024 15:06:38 +0800
Subject: soundwire: optimize sdw_stream_runtime memory layout

pahole suggestion: swap position of 'm_rt_count'

before: pahole -C sdw_stream_runtime drivers/soundwire/soundwire-bus.ko
struct sdw_stream_runtime {
	const char  *              name;                 /*     0     8 */
	struct sdw_stream_params   params;               /*     8    12 */
	enum sdw_stream_state      state;                /*    20     4 */
	enum sdw_stream_type       type;                 /*    24     4 */

	/* XXX 4 bytes hole, try to pack */

	struct list_head           master_list;          /*    32    16 */
	int                        m_rt_count;           /*    48     4 */

	/* size: 56, cachelines: 1, members: 6 */
	/* sum members: 48, holes: 1, sum holes: 4 */
	/* padding: 4 */
	/* last cacheline: 56 bytes */
};

after: pahole --reorganize -C sdw_stream_runtime drivers/soundwire/soundwire-bus.ko
struct sdw_stream_runtime {
	const char  *              name;                 /*     0     8 */
	struct sdw_stream_params   params;               /*     8    12 */
	enum sdw_stream_state      state;                /*    20     4 */
	enum sdw_stream_type       type;                 /*    24     4 */
	int                        m_rt_count;           /*    28     4 */
	struct list_head           master_list;          /*    32    16 */

	/* size: 48, cachelines: 1, members: 6 */
	/* last cacheline: 48 bytes */
};   /* saved 8 bytes! */

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20241003070650.62787-3-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 5e0dd47a0412..a4fa45132030 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -820,15 +820,15 @@ struct sdw_master_port_ops {
 struct sdw_msg;
 
 /**
- * struct sdw_defer - SDW deffered message
- * @length: message length
+ * struct sdw_defer - SDW deferred message
  * @complete: message completion
  * @msg: SDW message
+ * @length: message length
  */
 struct sdw_defer {
+	struct sdw_msg *msg;
 	int length;
 	struct completion complete;
-	struct sdw_msg *msg;
 };
 
 /**
@@ -1010,18 +1010,18 @@ struct sdw_stream_params {
  * @params: Stream parameters
  * @state: Current state of the stream
  * @type: Stream type PCM or PDM
+ * @m_rt_count: Count of Master runtime(s) in this stream
  * @master_list: List of Master runtime(s) in this stream.
  * master_list can contain only one m_rt per Master instance
  * for a stream
- * @m_rt_count: Count of Master runtime(s) in this stream
  */
 struct sdw_stream_runtime {
 	const char *name;
 	struct sdw_stream_params params;
 	enum sdw_stream_state state;
 	enum sdw_stream_type type;
-	struct list_head master_list;
 	int m_rt_count;
+	struct list_head master_list;
 };
 
 struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name);
-- 
cgit v1.2.3


From 6cb2c156439430a7f9db2e1f71a7dccf1ca978bf Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 3 Oct 2024 15:06:39 +0800
Subject: soundwire: optimize sdw_master_prop

Make pahole happy by moving pointers and u64 first instead of
interleaving them.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20241003070650.62787-4-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index a4fa45132030..2caea7345c3e 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -406,13 +406,14 @@ struct sdw_slave_prop {
 
 /**
  * struct sdw_master_prop - Master properties
+ * @clk_gears: Clock gears supported
+ * @clk_freq: Clock frequencies supported, in Hz
+ * @quirks: bitmask identifying optional behavior beyond the scope of the MIPI specification
  * @revision: MIPI spec version of the implementation
  * @clk_stop_modes: Bitmap, bit N set when clock-stop-modeN supported
  * @max_clk_freq: Maximum Bus clock frequency, in Hz
  * @num_clk_gears: Number of clock gears supported
- * @clk_gears: Clock gears supported
  * @num_clk_freq: Number of clock frequencies supported, in Hz
- * @clk_freq: Clock frequencies supported, in Hz
  * @default_frame_rate: Controller default Frame rate, in Hz
  * @default_row: Number of rows
  * @default_col: Number of columns
@@ -421,24 +422,23 @@ struct sdw_slave_prop {
  * command
  * @mclk_freq: clock reference passed to SoundWire Master, in Hz.
  * @hw_disabled: if true, the Master is not functional, typically due to pin-mux
- * @quirks: bitmask identifying optional behavior beyond the scope of the MIPI specification
  */
 struct sdw_master_prop {
+	u32 *clk_gears;
+	u32 *clk_freq;
+	u64 quirks;
 	u32 revision;
 	u32 clk_stop_modes;
 	u32 max_clk_freq;
 	u32 num_clk_gears;
-	u32 *clk_gears;
 	u32 num_clk_freq;
-	u32 *clk_freq;
 	u32 default_frame_rate;
 	u32 default_row;
 	u32 default_col;
-	bool dynamic_frame;
 	u32 err_threshold;
 	u32 mclk_freq;
+	bool dynamic_frame;
 	bool hw_disabled;
-	u64 quirks;
 };
 
 /* Definitions for Master quirks */
-- 
cgit v1.2.3


From 0a323dad1c4e04048988cd04c60eaffd6ae61b1a Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 3 Oct 2024 15:06:40 +0800
Subject: soundwire: optimize sdw_bus structure

The sdw_bus structure has seen multiple additions over the years. It's
one of the most used structures in this subsystem, so there's merit in
reshuffling the members a bit with 'pahole' to reduce holes and
structures across cache lines.

before:

struct sdw_bus {
	struct device *            dev;                  /*     0     8 */
	struct sdw_master_device * md;                   /*     8     8 */
	int                        controller_id;        /*    16     4 */
	unsigned int               link_id;              /*    20     4 */
	int                        id;                   /*    24     4 */

	/* XXX 4 bytes hole, try to pack */

	struct list_head           slaves;               /*    32    16 */
	long unsigned int          assigned[1];          /*    48     8 */
	struct mutex               bus_lock;             /*    56   160 */
	/* --- cacheline 3 boundary (192 bytes) was 24 bytes ago --- */
	struct lock_class_key      bus_lock_key;         /*   216    16 */
	struct mutex               msg_lock;             /*   232   160 */
	/* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
	struct lock_class_key      msg_lock_key;         /*   392    16 */
	int                        (*compute_params)(struct sdw_bus *); /*   408     8 */
	const struct sdw_master_ops  * ops;              /*   416     8 */
	const struct sdw_master_port_ops  * port_ops;    /*   424     8 */
	struct sdw_bus_params      params;               /*   432    36 */

	/* XXX 4 bytes hole, try to pack */

	/* --- cacheline 7 boundary (448 bytes) was 24 bytes ago --- */
	struct sdw_master_prop     prop;                 /*   472    72 */

	/* XXX last struct has 6 bytes of padding */

	/* --- cacheline 8 boundary (512 bytes) was 32 bytes ago --- */
	void *                     vendor_specific_prop; /*   544     8 */
	struct list_head           m_rt_list;            /*   552    16 */
	struct dentry *            debugfs;              /*   568     8 */
	/* --- cacheline 9 boundary (576 bytes) --- */
	struct irq_chip            irq_chip;             /*   576   264 */
	/* --- cacheline 13 boundary (832 bytes) was 8 bytes ago --- */
	struct irq_domain *        domain;               /*   840     8 */
	struct sdw_defer           defer_msg;            /*   848   112 */
	/* --- cacheline 15 boundary (960 bytes) --- */
	unsigned int               clk_stop_timeout;     /*   960     4 */
	u32                        bank_switch_timeout;  /*   964     4 */
	bool                       multi_link;           /*   968     1 */

	/* XXX 3 bytes hole, try to pack */

	int                        hw_sync_min_links;    /*   972     4 */
	int                        stream_refcount;      /*   976     4 */

	/* size: 984, cachelines: 16, members: 27 */
	/* sum members: 969, holes: 3, sum holes: 11 */
	/* padding: 4 */
	/* paddings: 1, sum paddings: 6 */
	/* last cacheline: 24 bytes */
};

after:

struct sdw_bus {
	struct device *            dev;                  /*     0     8 */
	struct sdw_master_device * md;                   /*     8     8 */
	struct lock_class_key      bus_lock_key;         /*    16    16 */
	struct mutex               bus_lock;             /*    32   160 */
	/* --- cacheline 3 boundary (192 bytes) --- */
	struct list_head           slaves;               /*   192    16 */
	struct lock_class_key      msg_lock_key;         /*   208    16 */
	struct mutex               msg_lock;             /*   224   160 */
	/* --- cacheline 6 boundary (384 bytes) --- */
	struct list_head           m_rt_list;            /*   384    16 */
	struct sdw_defer           defer_msg;            /*   400   112 */
	/* --- cacheline 8 boundary (512 bytes) --- */
	struct sdw_bus_params      params;               /*   512    36 */
	int                        stream_refcount;      /*   548     4 */
	const struct sdw_master_ops  * ops;              /*   552     8 */
	const struct sdw_master_port_ops  * port_ops;    /*   560     8 */
	struct sdw_master_prop     prop;                 /*   568    72 */

	/* XXX last struct has 6 bytes of padding */

	/* --- cacheline 10 boundary (640 bytes) --- */
	void *                     vendor_specific_prop; /*   640     8 */
	int                        hw_sync_min_links;    /*   648     4 */
	int                        controller_id;        /*   652     4 */
	unsigned int               link_id;              /*   656     4 */
	int                        id;                   /*   660     4 */
	int                        (*compute_params)(struct sdw_bus *); /*   664     8 */
	long unsigned int          assigned[1];          /*   672     8 */
	unsigned int               clk_stop_timeout;     /*   680     4 */
	u32                        bank_switch_timeout;  /*   684     4 */
	struct irq_chip            irq_chip;             /*   688   264 */
	/* --- cacheline 14 boundary (896 bytes) was 56 bytes ago --- */
	struct irq_domain *        domain;               /*   952     8 */
	/* --- cacheline 15 boundary (960 bytes) --- */
	struct dentry *            debugfs;              /*   960     8 */
	bool                       multi_link;           /*   968     1 */

	/* size: 976, cachelines: 16, members: 27 */
	/* padding: 7 */
	/* paddings: 1, sum paddings: 6 */
	/* last cacheline: 16 bytes */
};

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20241003070650.62787-5-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 77 ++++++++++++++++++++++---------------------
 1 file changed, 40 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 2caea7345c3e..6fcf122c1831 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -871,68 +871,71 @@ struct sdw_master_ops {
  * struct sdw_bus - SoundWire bus
  * @dev: Shortcut to &bus->md->dev to avoid changing the entire code.
  * @md: Master device
- * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used.
- * @link_id: Link id number, can be 0 to N, unique for each Controller
- * @id: bus system-wide unique id
- * @slaves: list of Slaves on this bus
- * @assigned: Bitmap for Slave device numbers.
- * Bit set implies used number, bit clear implies unused number.
+ * @bus_lock_key: bus lock key associated to @bus_lock
  * @bus_lock: bus lock
+ * @slaves: list of Slaves on this bus
+ * @msg_lock_key: message lock key associated to @msg_lock
  * @msg_lock: message lock
- * @compute_params: points to Bus resource management implementation
- * @ops: Master callback ops
- * @port_ops: Master port callback ops
- * @params: Current bus parameters
- * @prop: Master properties
- * @vendor_specific_prop: pointer to non-standard properties
  * @m_rt_list: List of Master instance of all stream(s) running on Bus. This
  * is used to compute and program bus bandwidth, clock, frame shape,
  * transport and port parameters
- * @debugfs: Bus debugfs
- * @domain: IRQ domain
  * @defer_msg: Defer message
- * @clk_stop_timeout: Clock stop timeout computed
- * @bank_switch_timeout: Bank switch timeout computed
- * @multi_link: Store bus property that indicates if multi links
- * are supported. This flag is populated by drivers after reading
- * appropriate firmware (ACPI/DT).
+ * @params: Current bus parameters
+ * @stream_refcount: number of streams currently using this bus
+ * @ops: Master callback ops
+ * @port_ops: Master port callback ops
+ * @prop: Master properties
+ * @vendor_specific_prop: pointer to non-standard properties
  * @hw_sync_min_links: Number of links used by a stream above which
  * hardware-based synchronization is required. This value is only
  * meaningful if multi_link is set. If set to 1, hardware-based
  * synchronization will be used even if a stream only uses a single
  * SoundWire segment.
- * @stream_refcount: number of streams currently using this bus
+ * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used.
+ * @link_id: Link id number, can be 0 to N, unique for each Controller
+ * @id: bus system-wide unique id
+ * @compute_params: points to Bus resource management implementation
+ * @assigned: Bitmap for Slave device numbers.
+ * Bit set implies used number, bit clear implies unused number.
+ * @clk_stop_timeout: Clock stop timeout computed
+ * @bank_switch_timeout: Bank switch timeout computed
+ * @domain: IRQ domain
+ * @irq_chip: IRQ chip
+ * @debugfs: Bus debugfs (optional)
+ * @multi_link: Store bus property that indicates if multi links
+ * are supported. This flag is populated by drivers after reading
+ * appropriate firmware (ACPI/DT).
  */
 struct sdw_bus {
 	struct device *dev;
 	struct sdw_master_device *md;
-	int controller_id;
-	unsigned int link_id;
-	int id;
-	struct list_head slaves;
-	DECLARE_BITMAP(assigned, SDW_MAX_DEVICES);
-	struct mutex bus_lock;
 	struct lock_class_key bus_lock_key;
-	struct mutex msg_lock;
+	struct mutex bus_lock;
+	struct list_head slaves;
 	struct lock_class_key msg_lock_key;
-	int (*compute_params)(struct sdw_bus *bus);
+	struct mutex msg_lock;
+	struct list_head m_rt_list;
+	struct sdw_defer defer_msg;
+	struct sdw_bus_params params;
+	int stream_refcount;
 	const struct sdw_master_ops *ops;
 	const struct sdw_master_port_ops *port_ops;
-	struct sdw_bus_params params;
 	struct sdw_master_prop prop;
 	void *vendor_specific_prop;
-	struct list_head m_rt_list;
+	int hw_sync_min_links;
+	int controller_id;
+	unsigned int link_id;
+	int id;
+	int (*compute_params)(struct sdw_bus *bus);
+	DECLARE_BITMAP(assigned, SDW_MAX_DEVICES);
+	unsigned int clk_stop_timeout;
+	u32 bank_switch_timeout;
+	struct irq_chip irq_chip;
+	struct irq_domain *domain;
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debugfs;
 #endif
-	struct irq_chip irq_chip;
-	struct irq_domain *domain;
-	struct sdw_defer defer_msg;
-	unsigned int clk_stop_timeout;
-	u32 bank_switch_timeout;
 	bool multi_link;
-	int hw_sync_min_links;
-	int stream_refcount;
 };
 
 int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent,
-- 
cgit v1.2.3


From 1c758df5a83ea0c9b5055536336d8a586b5010b0 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 3 Oct 2024 15:06:41 +0800
Subject: soundwire: optimize sdw_slave_prop

move pointers first, and move booleans together.

before:

struct sdw_slave_prop {
	u32                        mipi_revision;        /*     0     4 */
	bool                       wake_capable;         /*     4     1 */
	bool                       test_mode_capable;    /*     5     1 */
	bool                       clk_stop_mode1;       /*     6     1 */
	bool                       simple_clk_stop_capable; /*     7     1 */
	u32                        clk_stop_timeout;     /*     8     4 */
	u32                        ch_prep_timeout;      /*    12     4 */
	enum sdw_clk_stop_reset_behave reset_behave;     /*    16     4 */
	bool                       high_PHY_capable;     /*    20     1 */
	bool                       paging_support;       /*    21     1 */
	bool                       bank_delay_support;   /*    22     1 */

	/* XXX 1 byte hole, try to pack */

	enum sdw_p15_behave        p15_behave;           /*    24     4 */
	bool                       lane_control_support; /*    28     1 */

	/* XXX 3 bytes hole, try to pack */

	u32                        master_count;         /*    32     4 */
	u32                        source_ports;         /*    36     4 */
	u32                        sink_ports;           /*    40     4 */

	/* XXX 4 bytes hole, try to pack */

	struct sdw_dp0_prop *      dp0_prop;             /*    48     8 */
	struct sdw_dpn_prop *      src_dpn_prop;         /*    56     8 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	struct sdw_dpn_prop *      sink_dpn_prop;        /*    64     8 */
	u8                         scp_int1_mask;        /*    72     1 */

	/* XXX 3 bytes hole, try to pack */

	u32                        quirks;               /*    76     4 */
	bool                       clock_reg_supported;  /*    80     1 */
	bool                       use_domain_irq;       /*    81     1 */

	/* size: 88, cachelines: 2, members: 23 */
	/* sum members: 71, holes: 4, sum holes: 11 */
	/* padding: 6 */
	/* last cacheline: 24 bytes */
};

after:

truct sdw_slave_prop {
	struct sdw_dp0_prop *      dp0_prop;             /*     0     8 */
	struct sdw_dpn_prop *      src_dpn_prop;         /*     8     8 */
	struct sdw_dpn_prop *      sink_dpn_prop;        /*    16     8 */
	u32                        mipi_revision;        /*    24     4 */
	bool                       wake_capable;         /*    28     1 */
	bool                       test_mode_capable;    /*    29     1 */
	bool                       clk_stop_mode1;       /*    30     1 */
	bool                       simple_clk_stop_capable; /*    31     1 */
	u32                        clk_stop_timeout;     /*    32     4 */
	u32                        ch_prep_timeout;      /*    36     4 */
	enum sdw_clk_stop_reset_behave reset_behave;     /*    40     4 */
	bool                       high_PHY_capable;     /*    44     1 */
	bool                       paging_support;       /*    45     1 */
	bool                       bank_delay_support;   /*    46     1 */
	bool                       lane_control_support; /*    47     1 */
	enum sdw_p15_behave        p15_behave;           /*    48     4 */
	u32                        master_count;         /*    52     4 */
	u32                        source_ports;         /*    56     4 */
	u32                        sink_ports;           /*    60     4 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	u32                        quirks;               /*    64     4 */
	u8                         scp_int1_mask;        /*    68     1 */
	bool                       clock_reg_supported;  /*    69     1 */
	bool                       use_domain_irq;       /*    70     1 */

	/* size: 72, cachelines: 2, members: 23 */
	/* padding: 1 */
	/* last cacheline: 8 bytes */
};

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20241003070650.62787-6-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 6fcf122c1831..38db81f5bdb9 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -344,6 +344,9 @@ struct sdw_dpn_prop {
 
 /**
  * struct sdw_slave_prop - SoundWire Slave properties
+ * @dp0_prop: Data Port 0 properties
+ * @src_dpn_prop: Source Data Port N properties
+ * @sink_dpn_prop: Sink Data Port N properties
  * @mipi_revision: Spec version of the implementation
  * @wake_capable: Wake-up events are supported
  * @test_mode_capable: If test mode is supported
@@ -360,15 +363,12 @@ struct sdw_dpn_prop {
  * SCP_AddrPage2
  * @bank_delay_support: Slave implements bank delay/bridge support registers
  * SCP_BankDelay and SCP_NextFrame
+ * @lane_control_support: Slave supports lane control
  * @p15_behave: Slave behavior when the Master attempts a read to the Port15
  * alias
- * @lane_control_support: Slave supports lane control
  * @master_count: Number of Masters present on this Slave
  * @source_ports: Bitmap identifying source ports
  * @sink_ports: Bitmap identifying sink ports
- * @dp0_prop: Data Port 0 properties
- * @src_dpn_prop: Source Data Port N properties
- * @sink_dpn_prop: Sink Data Port N properties
  * @scp_int1_mask: SCP_INT1_MASK desired settings
  * @quirks: bitmask identifying deltas from the MIPI specification
  * @clock_reg_supported: the Peripheral implements the clock base and scale
@@ -377,6 +377,9 @@ struct sdw_dpn_prop {
  * @use_domain_irq: call actual IRQ handler on slave, as well as callback
  */
 struct sdw_slave_prop {
+	struct sdw_dp0_prop *dp0_prop;
+	struct sdw_dpn_prop *src_dpn_prop;
+	struct sdw_dpn_prop *sink_dpn_prop;
 	u32 mipi_revision;
 	bool wake_capable;
 	bool test_mode_capable;
@@ -388,16 +391,13 @@ struct sdw_slave_prop {
 	bool high_PHY_capable;
 	bool paging_support;
 	bool bank_delay_support;
-	enum sdw_p15_behave p15_behave;
 	bool lane_control_support;
+	enum sdw_p15_behave p15_behave;
 	u32 master_count;
 	u32 source_ports;
 	u32 sink_ports;
-	struct sdw_dp0_prop *dp0_prop;
-	struct sdw_dpn_prop *src_dpn_prop;
-	struct sdw_dpn_prop *sink_dpn_prop;
-	u8 scp_int1_mask;
 	u32 quirks;
+	u8 scp_int1_mask;
 	bool clock_reg_supported;
 	bool use_domain_irq;
 };
-- 
cgit v1.2.3


From 557e28f8b53243097162cf4d3e59bcee9fb9713b Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 3 Oct 2024 15:06:42 +0800
Subject: soundwire: optimize sdw_dp0_prop

Move pointers and booleans.

Before:

struct sdw_dp0_prop {
	u32                        max_word;             /*     0     4 */
	u32                        min_word;             /*     4     4 */
	u32                        num_words;            /*     8     4 */

	/* XXX 4 bytes hole, try to pack */

	u32 *                      words;                /*    16     8 */
	bool                       BRA_flow_controlled;  /*    24     1 */
	bool                       simple_ch_prep_sm;    /*    25     1 */

	/* XXX 2 bytes hole, try to pack */

	u32                        ch_prep_timeout;      /*    28     4 */
	bool                       imp_def_interrupts;   /*    32     1 */

	/* size: 40, cachelines: 1, members: 8 */
	/* sum members: 27, holes: 2, sum holes: 6 */
	/* padding: 7 */
	/* last cacheline: 40 bytes */
};

after:

struct sdw_dp0_prop {
	u32 *                      words;                /*     0     8 */
	u32                        max_word;             /*     8     4 */
	u32                        min_word;             /*    12     4 */
	u32                        num_words;            /*    16     4 */
	u32                        ch_prep_timeout;      /*    20     4 */
	bool                       BRA_flow_controlled;  /*    24     1 */
	bool                       simple_ch_prep_sm;    /*    25     1 */
	bool                       imp_def_interrupts;   /*    26     1 */

	/* size: 32, cachelines: 1, members: 8 */
	/* padding: 5 */
	/* last cacheline: 32 bytes */
};

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20241003070650.62787-7-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 38db81f5bdb9..c72095137a35 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -226,16 +226,16 @@ enum sdw_clk_stop_mode {
 
 /**
  * struct sdw_dp0_prop - DP0 properties
+ * @words: wordlengths supported
  * @max_word: Maximum number of bits in a Payload Channel Sample, 1 to 64
  * (inclusive)
  * @min_word: Minimum number of bits in a Payload Channel Sample, 1 to 64
  * (inclusive)
  * @num_words: number of wordlengths supported
- * @words: wordlengths supported
+ * @ch_prep_timeout: Port-specific timeout value, in milliseconds
  * @BRA_flow_controlled: Slave implementation results in an OK_NotReady
  * response
  * @simple_ch_prep_sm: If channel prepare sequence is required
- * @ch_prep_timeout: Port-specific timeout value, in milliseconds
  * @imp_def_interrupts: If set, each bit corresponds to support for
  * implementation-defined interrupts
  *
@@ -244,13 +244,13 @@ enum sdw_clk_stop_mode {
  * support
  */
 struct sdw_dp0_prop {
+	u32 *words;
 	u32 max_word;
 	u32 min_word;
 	u32 num_words;
-	u32 *words;
+	u32 ch_prep_timeout;
 	bool BRA_flow_controlled;
 	bool simple_ch_prep_sm;
-	u32 ch_prep_timeout;
 	bool imp_def_interrupts;
 };
 
-- 
cgit v1.2.3


From 9942f90bdcc035eb5f01d7343dac99bd805ef3ec Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 3 Oct 2024 15:06:43 +0800
Subject: soundwire: optimize sdw_dpn_prop

before:
struct sdw_dpn_prop {
	u32                        num;                  /*     0     4 */
	u32                        max_word;             /*     4     4 */
	u32                        min_word;             /*     8     4 */
	u32                        num_words;            /*    12     4 */
	u32 *                      words;                /*    16     8 */
	enum sdw_dpn_type          type;                 /*    24     4 */
	u32                        max_grouping;         /*    28     4 */
	bool                       simple_ch_prep_sm;    /*    32     1 */

	/* XXX 3 bytes hole, try to pack */

	u32                        ch_prep_timeout;      /*    36     4 */
	u32                        imp_def_interrupts;   /*    40     4 */
	u32                        max_ch;               /*    44     4 */
	u32                        min_ch;               /*    48     4 */
	u32                        num_channels;         /*    52     4 */
	u32 *                      channels;             /*    56     8 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	u32                        num_ch_combinations;  /*    64     4 */

	/* XXX 4 bytes hole, try to pack */

	u32 *                      ch_combinations;      /*    72     8 */
	u32                        modes;                /*    80     4 */
	u32                        max_async_buffer;     /*    84     4 */
	bool                       block_pack_mode;      /*    88     1 */
	bool                       read_only_wordlength; /*    89     1 */

	/* XXX 2 bytes hole, try to pack */

	u32                        port_encoding;        /*    92     4 */
	struct sdw_dpn_audio_mode * audio_modes;         /*    96     8 */

	/* size: 104, cachelines: 2, members: 22 */
	/* sum members: 95, holes: 3, sum holes: 9 */
	/* last cacheline: 40 bytes */
};

after:

struct sdw_dpn_prop {
	struct sdw_dpn_audio_mode * audio_modes;         /*     0     8 */
	u32                        num;                  /*     8     4 */
	u32                        max_word;             /*    12     4 */
	u32                        min_word;             /*    16     4 */
	u32                        num_words;            /*    20     4 */
	u32 *                      words;                /*    24     8 */
	enum sdw_dpn_type          type;                 /*    32     4 */
	u32                        max_grouping;         /*    36     4 */
	u32                        ch_prep_timeout;      /*    40     4 */
	u32                        imp_def_interrupts;   /*    44     4 */
	u32                        max_ch;               /*    48     4 */
	u32                        min_ch;               /*    52     4 */
	u32                        num_channels;         /*    56     4 */
	u32                        num_ch_combinations;  /*    60     4 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	u32 *                      channels;             /*    64     8 */
	u32 *                      ch_combinations;      /*    72     8 */
	u32                        modes;                /*    80     4 */
	u32                        max_async_buffer;     /*    84     4 */
	u32                        port_encoding;        /*    88     4 */
	bool                       block_pack_mode;      /*    92     1 */
	bool                       read_only_wordlength; /*    93     1 */
	bool                       simple_ch_prep_sm;    /*    94     1 */

	/* size: 96, cachelines: 2, members: 22 */
	/* padding: 1 */
	/* last cacheline: 32 bytes */
};

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20241003070650.62787-8-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index c72095137a35..cc0afb8af333 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -288,6 +288,7 @@ struct sdw_dpn_audio_mode {
 
 /**
  * struct sdw_dpn_prop - Data Port DPn properties
+ * @audio_modes: Audio modes supported
  * @num: port number
  * @max_word: Maximum number of bits in a Payload Channel Sample, 1 to 64
  * (inclusive)
@@ -298,26 +299,26 @@ struct sdw_dpn_audio_mode {
  * @type: Data port type. Full, Simplified or Reduced
  * @max_grouping: Maximum number of samples that can be grouped together for
  * a full data port
- * @simple_ch_prep_sm: If the port supports simplified channel prepare state
- * machine
  * @ch_prep_timeout: Port-specific timeout value, in milliseconds
  * @imp_def_interrupts: If set, each bit corresponds to support for
  * implementation-defined interrupts
  * @max_ch: Maximum channels supported
  * @min_ch: Minimum channels supported
  * @num_channels: Number of discrete channels supported
- * @channels: Discrete channels supported
  * @num_ch_combinations: Number of channel combinations supported
+ * @channels: Discrete channels supported
  * @ch_combinations: Channel combinations supported
  * @modes: SDW mode supported
  * @max_async_buffer: Number of samples that this port can buffer in
  * asynchronous modes
+ * @port_encoding: Payload Channel Sample encoding schemes supported
  * @block_pack_mode: Type of block port mode supported
  * @read_only_wordlength: Read Only wordlength field in DPN_BlockCtrl1 register
- * @port_encoding: Payload Channel Sample encoding schemes supported
- * @audio_modes: Audio modes supported
+ * @simple_ch_prep_sm: If the port supports simplified channel prepare state
+ * machine
  */
 struct sdw_dpn_prop {
+	struct sdw_dpn_audio_mode *audio_modes;
 	u32 num;
 	u32 max_word;
 	u32 min_word;
@@ -325,21 +326,20 @@ struct sdw_dpn_prop {
 	u32 *words;
 	enum sdw_dpn_type type;
 	u32 max_grouping;
-	bool simple_ch_prep_sm;
 	u32 ch_prep_timeout;
 	u32 imp_def_interrupts;
 	u32 max_ch;
 	u32 min_ch;
 	u32 num_channels;
-	u32 *channels;
 	u32 num_ch_combinations;
+	u32 *channels;
 	u32 *ch_combinations;
 	u32 modes;
 	u32 max_async_buffer;
+	u32 port_encoding;
 	bool block_pack_mode;
 	bool read_only_wordlength;
-	u32 port_encoding;
-	struct sdw_dpn_audio_mode *audio_modes;
+	bool simple_ch_prep_sm;
 };
 
 /**
-- 
cgit v1.2.3


From 1ae4aa59d79399be0591c8d78c44e280406e2c34 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 3 Oct 2024 15:06:44 +0800
Subject: soundwire: mipi-disco: remove DPn audio-modes

The concept of DPn audio-modes was never used by anyone, and was
removed from the DisCo for SoundWire 2.0 specification.

Remove the definitions and TODO.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20241003070650.62787-9-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 34 ----------------------------------
 1 file changed, 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index cc0afb8af333..66feaa79ecfc 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -254,41 +254,8 @@ struct sdw_dp0_prop {
 	bool imp_def_interrupts;
 };
 
-/**
- * struct sdw_dpn_audio_mode - Audio mode properties for DPn
- * @bus_min_freq: Minimum bus frequency, in Hz
- * @bus_max_freq: Maximum bus frequency, in Hz
- * @bus_num_freq: Number of discrete frequencies supported
- * @bus_freq: Discrete bus frequencies, in Hz
- * @min_freq: Minimum sampling frequency, in Hz
- * @max_freq: Maximum sampling bus frequency, in Hz
- * @num_freq: Number of discrete sampling frequency supported
- * @freq: Discrete sampling frequencies, in Hz
- * @prep_ch_behave: Specifies the dependencies between Channel Prepare
- * sequence and bus clock configuration
- * If 0, Channel Prepare can happen at any Bus clock rate
- * If 1, Channel Prepare sequence shall happen only after Bus clock is
- * changed to a frequency supported by this mode or compatible modes
- * described by the next field
- * @glitchless: Bitmap describing possible glitchless transitions from this
- * Audio Mode to other Audio Modes
- */
-struct sdw_dpn_audio_mode {
-	u32 bus_min_freq;
-	u32 bus_max_freq;
-	u32 bus_num_freq;
-	u32 *bus_freq;
-	u32 max_freq;
-	u32 min_freq;
-	u32 num_freq;
-	u32 *freq;
-	u32 prep_ch_behave;
-	u32 glitchless;
-};
-
 /**
  * struct sdw_dpn_prop - Data Port DPn properties
- * @audio_modes: Audio modes supported
  * @num: port number
  * @max_word: Maximum number of bits in a Payload Channel Sample, 1 to 64
  * (inclusive)
@@ -318,7 +285,6 @@ struct sdw_dpn_audio_mode {
  * machine
  */
 struct sdw_dpn_prop {
-	struct sdw_dpn_audio_mode *audio_modes;
 	u32 num;
 	u32 max_word;
 	u32 min_word;
-- 
cgit v1.2.3


From 543bd28a3bfeff31f748ba83348b63313dd37ff9 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 3 Oct 2024 15:06:49 +0800
Subject: soundwire: mipi-disco: add new properties from 2.0 spec

The DisCo for SoundWire 2.0 spec adds support for new
'mipi-sdw-sdca-interrupt-register-list' and
'mipi-sdw-commit-register-supported'.

This patch only adds the definitions and property reads, but the use
of these properties will come at some point in the future when needed.

Note a slight conceptual disconnect between the MIPI DisCo definition
of a boolean property and the Linux implementation. The latter only
checks the presence of the property to set its value to 'true',
whereas the MIPI definitions allow for a property with a 'false'
value. This patch uses the new introduced mipi_device_property_read_bool()
to handle it.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20241003070650.62787-14-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 66feaa79ecfc..952514f044f0 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -335,8 +335,11 @@ struct sdw_dpn_prop {
  * @master_count: Number of Masters present on this Slave
  * @source_ports: Bitmap identifying source ports
  * @sink_ports: Bitmap identifying sink ports
- * @scp_int1_mask: SCP_INT1_MASK desired settings
  * @quirks: bitmask identifying deltas from the MIPI specification
+ * @sdca_interrupt_register_list: indicates which sets of SDCA interrupt status
+ * and masks are supported
+ * @commit_register_supported: is PCP_Commit register supported
+ * @scp_int1_mask: SCP_INT1_MASK desired settings
  * @clock_reg_supported: the Peripheral implements the clock base and scale
  * registers introduced with the SoundWire 1.2 specification. SDCA devices
  * do not need to set this boolean property as the registers are required.
@@ -363,6 +366,8 @@ struct sdw_slave_prop {
 	u32 source_ports;
 	u32 sink_ports;
 	u32 quirks;
+	u32 sdca_interrupt_register_list;
+	u8 commit_register_supported;
 	u8 scp_int1_mask;
 	bool clock_reg_supported;
 	bool use_domain_irq;
-- 
cgit v1.2.3


From 71b405b184449fffcb76ea0814104b71dfdb2aee Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 3 Oct 2024 15:06:50 +0800
Subject: soundwire: mipi-disco: add support for DP0/DPn 'lane-list' property

The SoundWire specification did not clearly require that ports could
use all Lanes. Some SoundWire/SDCA peripheral adopters added
restrictions on which lanes can be used by what port, and the DisCo
for SoundWire 2.1 specification added a 'lane-list' property to model
this hardware limitation.

When not specified, the ports can use all Lanes. Otherwise, the
'lane-list' indicates which Lanes can be used, sorted by order of
preference (most-preferred-first).

This patch only reads the properties, the use of this property will
come at a later time with multi-lane support.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20241003070650.62787-15-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 952514f044f0..73f655334fe9 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -238,6 +238,8 @@ enum sdw_clk_stop_mode {
  * @simple_ch_prep_sm: If channel prepare sequence is required
  * @imp_def_interrupts: If set, each bit corresponds to support for
  * implementation-defined interrupts
+ * @num_lanes: array size of @lane_list
+ * @lane_list: indicates which Lanes can be used by DP0
  *
  * The wordlengths are specified by Spec as max, min AND number of
  * discrete values, implementation can define based on the wordlengths they
@@ -252,6 +254,8 @@ struct sdw_dp0_prop {
 	bool BRA_flow_controlled;
 	bool simple_ch_prep_sm;
 	bool imp_def_interrupts;
+	int num_lanes;
+	u32 *lane_list;
 };
 
 /**
@@ -275,6 +279,8 @@ struct sdw_dp0_prop {
  * @num_ch_combinations: Number of channel combinations supported
  * @channels: Discrete channels supported
  * @ch_combinations: Channel combinations supported
+ * @lane_list: indicates which Lanes can be used by DPn
+ * @num_lanes: array size of @lane_list
  * @modes: SDW mode supported
  * @max_async_buffer: Number of samples that this port can buffer in
  * asynchronous modes
@@ -300,6 +306,8 @@ struct sdw_dpn_prop {
 	u32 num_ch_combinations;
 	u32 *channels;
 	u32 *ch_combinations;
+	u32 *lane_list;
+	int num_lanes;
 	u32 modes;
 	u32 max_async_buffer;
 	u32 port_encoding;
-- 
cgit v1.2.3


From e26a0c5d828b225b88f534e2fcf10bf617f85f23 Mon Sep 17 00:00:00 2001
From: Shradha Gupta <shradhagupta@linux.microsoft.com>
Date: Sun, 29 Sep 2024 20:44:35 -0700
Subject: net: mana: Increase the DEF_RX_BUFFERS_PER_QUEUE to 1024

Through some experiments, we found out that increasing the default
RX buffers count from 512 to 1024, gives slightly better throughput
and significantly reduces the no_wqe_rx errs on the receiver side.
Along with these, other parameters like cpu usage, retrans seg etc
also show some improvement with 1024 value.

Following are some snippets from the experiments

ntttcp tests with 512 Rx buffers
---------------------------------------
connections|  throughput|  no_wqe errs|
---------------------------------------
1          |  40.93Gbps | 123,211     |
16         | 180.15Gbps | 190,120     |
128        | 180.20Gbps | 173,508     |
256        | 180.27Gbps | 189,884     |

ntttcp tests with 1024 Rx buffers
---------------------------------------
connections|  throughput|  no_wqe errs|
---------------------------------------
1          |  44.22Gbps | 19,864      |
16         | 180.19Gbps | 4,430       |
128        | 180.21Gbps | 2,560       |
256        | 180.29Gbps | 1,529       |

So, increasing the default RX buffers per queue count to 1024

Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Link: https://patch.msgid.link/1727667875-29908-1-git-send-email-shradhagupta@linux.microsoft.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/mana/mana.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index f2a5200d8a0f..9b0faa24b758 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -43,7 +43,7 @@ enum TRI_STATE {
  * size beyond this value gets rejected by __alloc_page() call.
  */
 #define MAX_RX_BUFFERS_PER_QUEUE 8192
-#define DEF_RX_BUFFERS_PER_QUEUE 512
+#define DEF_RX_BUFFERS_PER_QUEUE 1024
 #define MIN_RX_BUFFERS_PER_QUEUE 128
 
 /* This max value for TX buffers is derived as the maximum allocatable
-- 
cgit v1.2.3


From aa3ab3336e6052dba95dac9f5c268eb7d1823002 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Mon, 30 Sep 2024 14:33:19 +0200
Subject: scsi: libfcoe: Include <linux/prandom.h> instead of <linux/random.h>

Substitute the inclusion of <linux/random.h> header with
<linux/prandom.h> to allow the removal of legacy inclusion
of <linux/prandom.h> from <linux/random.h>.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Cc: Hannes Reinecke <hare@suse.de>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/scsi/libfcoe.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
index 3c5899290aed..6616348e59b9 100644
--- a/include/scsi/libfcoe.h
+++ b/include/scsi/libfcoe.h
@@ -15,7 +15,7 @@
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <linux/local_lock.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <scsi/fc/fc_fcoe.h>
 #include <scsi/libfc.h>
 #include <scsi/fcoe_sysfs.h>
-- 
cgit v1.2.3


From 5b3fdc9f2ff10d3cee106ddaa0ee6636c7de381e Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Mon, 30 Sep 2024 14:33:29 +0200
Subject: random: Do not include <linux/prandom.h> in <linux/random.h>

Files that use prandom infrastructure are now converted to
use <linux/prandom.h> header instead of <linux/random.h>.
Remove the legacy inclusion of <linux/prandom.h> from
<linux/random.h>.

This is the "nice cleanup" part, wished in c0842fbc1b18.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Fixes: c0842fbc1b18 ("random32: move the pseudo-random 32-bit definitions to prandom.h")
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/linux/random.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/random.h b/include/linux/random.h
index b0a940af4fff..333cecfca93f 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -145,13 +145,6 @@ declare_get_random_var_wait(u64, u32)
 declare_get_random_var_wait(long, unsigned long)
 #undef declare_get_random_var
 
-/*
- * This is designed to be standalone for just prandom
- * users, but for now we include it from <linux/random.h>
- * for legacy reasons.
- */
-#include <linux/prandom.h>
-
 #ifdef CONFIG_SMP
 int random_prepare_cpu(unsigned int cpu);
 int random_online_cpu(unsigned int cpu);
-- 
cgit v1.2.3


From d18c13697b4dcbf6a8f06c3d8e564c4f5ad1477c Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Mon, 30 Sep 2024 14:33:30 +0200
Subject: prandom: Include <linux/percpu.h> in <linux/prandom.h>

<linux/percpu.h> include was removed from <linux/prandom.h>
in d9f29deb7fe8 ("prandom: Remove unused include") because
this inclusion broke arm64 due to a circular dependency
on include files.

__percpu tag is defined in include/linux/compiler_types.h, so there
is currently no direct need for the inclusion of <linux/percpu.h>.
However, in [1] we would like to repurpose __percpu tag as a named
address space qualifier, where __percpu macro uses defines from
<linux/percpu.h>.

The circular dependency was removed in ddd8e37ebaa1 ("random: Do not
include <linux/prandom.h> in <linux/random.h>") and it cleared
the path for the inclusion of <linux/percpu.h> in <linux/prandom.h>.

This patch is basically a revert of d9f29deb7fe8
("prandom: Remove unused include").

[1] https://lore.kernel.org/lkml/20240812115945.484051-4-ubizjak@gmail.com/

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/linux/prandom.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index f7f1e5251c67..f2ed5b72b3d6 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -10,6 +10,7 @@
 
 #include <linux/types.h>
 #include <linux/once.h>
+#include <linux/percpu.h>
 #include <linux/random.h>
 
 struct rnd_state {
-- 
cgit v1.2.3


From 7788d320ba5ecbfa88d0be8c32ef8f018f2f020f Mon Sep 17 00:00:00 2001
From: Antonino Maniscalco <antomani103@gmail.com>
Date: Thu, 3 Oct 2024 18:12:59 +0200
Subject: drm/msm/a6xx: Add a flag to allow preemption to submitqueue_create

Some userspace changes are necessary so add a flag for userspace to
advertise support for preemption when creating the submitqueue.

When this flag is not set preemption will not be allowed in the middle
of the submitted IBs therefore mantaining compatibility with older
userspace.

The flag is rejected if preemption is not supported on the target, this
allows userspace to know whether preemption is supported.

Tested-by: Rob Clark <robdclark@gmail.com>
Tested-by: Neil Armstrong <neil.armstrong@linaro.org> # on SM8650-QRD
Tested-by: Neil Armstrong <neil.armstrong@linaro.org> # on SM8550-QRD
Tested-by: Neil Armstrong <neil.armstrong@linaro.org> # on SM8450-HDK
Signed-off-by: Antonino Maniscalco <antomani103@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/618028/
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 include/uapi/drm/msm_drm.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 2377147b6af0..b916aab80dde 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -347,7 +347,10 @@ struct drm_msm_gem_madvise {
  * backwards compatibility as a "default" submitqueue
  */
 
-#define MSM_SUBMITQUEUE_FLAGS (0)
+#define MSM_SUBMITQUEUE_ALLOW_PREEMPT	0x00000001
+#define MSM_SUBMITQUEUE_FLAGS		    ( \
+		MSM_SUBMITQUEUE_ALLOW_PREEMPT | \
+		0)
 
 /*
  * The submitqueue priority should be between 0 and MSM_PARAM_PRIORITIES-1,
-- 
cgit v1.2.3


From 816ad8f1e498fe5e9e992da137316302219f2137 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 2 Oct 2024 14:51:52 -0700
Subject: lib: packing: remove kernel-doc from header file

It is not necessary to have the kernel-doc duplicated both in the
header and in the implementation. It is better to have it near the
implementation of the function, since in C, a function can have N
declarations, but only one definition.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20241002-packing-kunit-tests-and-split-pack-unpack-v2-3-8373e551eae3@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/packing.h | 26 --------------------------
 1 file changed, 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/packing.h b/include/linux/packing.h
index 8d6571feb95d..69baefebcd02 100644
--- a/include/linux/packing.h
+++ b/include/linux/packing.h
@@ -17,32 +17,6 @@ enum packing_op {
 	UNPACK,
 };
 
-/**
- * packing - Convert numbers (currently u64) between a packed and an unpacked
- *	     format. Unpacked means laid out in memory in the CPU's native
- *	     understanding of integers, while packed means anything else that
- *	     requires translation.
- *
- * @pbuf: Pointer to a buffer holding the packed value.
- * @uval: Pointer to an u64 holding the unpacked value.
- * @startbit: The index (in logical notation, compensated for quirks) where
- *	      the packed value starts within pbuf. Must be larger than, or
- *	      equal to, endbit.
- * @endbit: The index (in logical notation, compensated for quirks) where
- *	    the packed value ends within pbuf. Must be smaller than, or equal
- *	    to, startbit.
- * @op: If PACK, then uval will be treated as const pointer and copied (packed)
- *	into pbuf, between startbit and endbit.
- *	If UNPACK, then pbuf will be treated as const pointer and the logical
- *	value between startbit and endbit will be copied (unpacked) to uval.
- * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
- *	    QUIRK_MSB_ON_THE_RIGHT.
- *
- * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
- *	   correct usage, return code may be discarded.
- *	   If op is PACK, pbuf is modified.
- *	   If op is UNPACK, uval is modified.
- */
 int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
 	    enum packing_op op, u8 quirks);
 
-- 
cgit v1.2.3


From 7263f64e16d90ded44ce211381b2def83db32fd9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 2 Oct 2024 14:51:53 -0700
Subject: lib: packing: add pack() and unpack() wrappers over packing()

Geert Uytterhoeven described packing() as "really bad API" because of
not being able to enforce const correctness. The same function is used
both when "pbuf" is input and "uval" is output, as in the other way
around.

Create 2 wrapper functions where const correctness can be ensured.
Do ugly type casts inside, to be able to reuse packing() as currently
implemented - which will _not_ modify the input argument.

Also, take the opportunity to change the type of startbit and endbit to
size_t - an unsigned type - in these new function prototypes. When int,
an extra check for negative values is necessary. Hopefully, when
packing() goes away completely, that check can be dropped.

My concern is that code which does rely on the conditional directionality
of packing() is harder to refactor without blowing up in size. So it may
take a while to completely eliminate packing(). But let's make alternatives
available for those who do not need that.

Link: https://lore.kernel.org/netdev/20210223112003.2223332-1-geert+renesas@glider.be/
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20241002-packing-kunit-tests-and-split-pack-unpack-v2-4-8373e551eae3@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/packing.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/packing.h b/include/linux/packing.h
index 69baefebcd02..ea25cb93cc70 100644
--- a/include/linux/packing.h
+++ b/include/linux/packing.h
@@ -20,4 +20,10 @@ enum packing_op {
 int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
 	    enum packing_op op, u8 quirks);
 
+int pack(void *pbuf, const u64 *uval, size_t startbit, size_t endbit,
+	 size_t pbuflen, u8 quirks);
+
+int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit,
+	   size_t pbuflen, u8 quirks);
+
 #endif
-- 
cgit v1.2.3


From 28aec9ca29f05dabb835293acc6e202bf51b8092 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 2 Oct 2024 14:51:54 -0700
Subject: lib: packing: duplicate pack() and unpack() implementations

packing() is now used in some hot paths, and it would be good to get rid
of some ifs and buts that depend on "op", to speed things up a little bit.

With the main implementations now taking size_t endbit, we no longer
have to check for negative values. Update the local integer variables to
also be size_t to match.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20241002-packing-kunit-tests-and-split-pack-unpack-v2-5-8373e551eae3@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/packing.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/packing.h b/include/linux/packing.h
index ea25cb93cc70..5d36dcd06f60 100644
--- a/include/linux/packing.h
+++ b/include/linux/packing.h
@@ -20,8 +20,8 @@ enum packing_op {
 int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
 	    enum packing_op op, u8 quirks);
 
-int pack(void *pbuf, const u64 *uval, size_t startbit, size_t endbit,
-	 size_t pbuflen, u8 quirks);
+int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen,
+	 u8 quirks);
 
 int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit,
 	   size_t pbuflen, u8 quirks);
-- 
cgit v1.2.3


From 7e863e5db6185b1add0df4cb01b31a4ed1c4b738 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Tue, 1 Oct 2024 21:28:43 +0200
Subject: ipv4: Convert ip_route_input() to dscp_t.

Pass a dscp_t variable to ip_route_input(), instead of a plain u8, to
prevent accidental setting of ECN bits in ->flowi4_tos.

Callers of ip_route_input() to consider are:

  * input_action_end_dx4_finish() and input_action_end_dt4() in
    net/ipv6/seg6_local.c. These functions set the tos parameter to 0,
    which is already a valid dscp_t value, so they don't need to be
    adjusted for the new prototype.

  * icmp_route_lookup(), which already has a dscp_t variable to pass as
    parameter. We just need to remove the inet_dscp_to_dsfield()
    conversion.

  * br_nf_pre_routing_finish(), ip_options_rcv_srr() and ip4ip6_err(),
    which get the DSCP directly from IPv4 headers. Define a helper to
    read the .tos field of struct iphdr as dscp_t, so that these
    function don't have to do the conversion manually.

While there, declare *iph as const in br_nf_pre_routing_finish(),
declare its local variables in reverse-christmas-tree order and move
the "err = ip_route_input()" assignment out of the conditional to avoid
checkpatch warning.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/e9d40781d64d3d69f4c79ac8a008b8d67a033e8d.1727807926.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip.h    | 5 +++++
 include/net/route.h | 5 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index d92d3bc3ec0e..bab084df1567 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -424,6 +424,11 @@ int ip_decrease_ttl(struct iphdr *iph)
 	return --iph->ttl;
 }
 
+static inline dscp_t ip4h_dscp(const struct iphdr *ip4h)
+{
+	return inet_dsfield_to_dscp(ip4h->tos);
+}
+
 static inline int ip_mtu_locked(const struct dst_entry *dst)
 {
 	const struct rtable *rt = dst_rtable(dst);
diff --git a/include/net/route.h b/include/net/route.h
index 1789f1e6640b..03dd28cf4bc4 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -208,12 +208,13 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
 		      const struct sk_buff *hint);
 
 static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
-				 u8 tos, struct net_device *devin)
+				 dscp_t dscp, struct net_device *devin)
 {
 	int err;
 
 	rcu_read_lock();
-	err = ip_route_input_noref(skb, dst, src, tos, devin);
+	err = ip_route_input_noref(skb, dst, src, inet_dscp_to_dsfield(dscp),
+				   devin);
 	if (!err) {
 		skb_dst_force(skb);
 		if (!skb_dst(skb))
-- 
cgit v1.2.3


From 66fb6386d358a04edd5c640e38b4a02b323b89d8 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Tue, 1 Oct 2024 21:28:49 +0200
Subject: ipv4: Convert ip_route_input_noref() to dscp_t.

Pass a dscp_t variable to ip_route_input_noref(), instead of a plain
u8, to prevent accidental setting of ECN bits in ->flowi4_tos.

Callers of ip_route_input_noref() to consider are:

  * arp_process() in net/ipv4/arp.c. This function sets the tos
    parameter to 0, which is already a valid dscp_t value, so it
    doesn't need to be adjusted for the new prototype.

  * ip_route_input(), which already has a dscp_t variable to pass as
    parameter. We just need to remove the inet_dscp_to_dsfield()
    conversion.

  * ipvlan_l3_rcv(), bpf_lwt_input_reroute(), ip_expire(),
    ip_rcv_finish_core(), xfrm4_rcv_encap_finish() and
    xfrm4_rcv_encap(), which get the DSCP directly from IPv4 headers
    and can simply use the ip4h_dscp() helper.

While there, declare the IPv4 header pointers as const in
ipvlan_l3_rcv() and bpf_lwt_input_reroute().
Also, modify the declaration of ip_route_input_noref() in
include/net/route.h so that it matches the prototype of its
implementation in net/ipv4/route.c.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/a8a747bed452519c4d0cc06af32c7e7795d7b627.1727807926.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/route.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index 03dd28cf4bc4..5e4374d66927 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -201,8 +201,8 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
 int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			  u8 tos, struct net_device *dev,
 			  struct in_device *in_dev, u32 *itag);
-int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
-			 u8 tos, struct net_device *devin);
+int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+			 dscp_t dscp, struct net_device *dev);
 int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
 		      u8 tos, struct net_device *devin,
 		      const struct sk_buff *hint);
@@ -213,8 +213,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 	int err;
 
 	rcu_read_lock();
-	err = ip_route_input_noref(skb, dst, src, inet_dscp_to_dsfield(dscp),
-				   devin);
+	err = ip_route_input_noref(skb, dst, src, dscp, devin);
 	if (!err) {
 		skb_dst_force(skb);
 		if (!skb_dst(skb))
-- 
cgit v1.2.3


From 4f647a780f3606acbd2116248d51eadb4d865615 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Mon, 16 Sep 2024 02:17:10 -0700
Subject: bpf: __bpf_fastcall for bpf_get_smp_processor_id in uapi

Since [1] kernel supports __bpf_fastcall attribute for helper function
bpf_get_smp_processor_id(). Update uapi definition for this helper in
order to have this attribute in the generated bpf_helper_defs.h

[1] commit 91b7fbf3936f ("bpf, x86, riscv, arm: no_caller_saved_registers for bpf_get_smp_processor_id()")

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20240916091712.2929279-3-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c6cd7c7aeeee..8ab4d8184b9d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1970,6 +1970,8 @@ union bpf_attr {
  * 		program.
  * 	Return
  * 		The SMP id of the processor running the program.
+ * 	Attributes
+ * 		__bpf_fastcall
  *
  * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
  * 	Description
-- 
cgit v1.2.3


From da7d71bcb0637b7aa18934628fdb5a55f2db49a6 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Mon, 16 Sep 2024 02:17:11 -0700
Subject: bpf: Use KF_FASTCALL to mark kfuncs supporting fastcall contract

In order to allow pahole add btf_decl_tag("bpf_fastcall") for kfuncs
supporting bpf_fastcall, mark such functions with KF_FASTCALL in
id_set8 objects.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20240916091712.2929279-4-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index b8a583194c4a..631060e3ad14 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -75,6 +75,7 @@
 #define KF_ITER_NEXT    (1 << 9) /* kfunc implements BPF iter next method */
 #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */
 #define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */
+#define KF_FASTCALL     (1 << 12) /* kfunc supports bpf_fastcall protocol */
 
 /*
  * Tag marking a kernel function as a kfunc. This is meant to minimize the
-- 
cgit v1.2.3


From 22fbabe82cea7af4127f089b7f3553cd75571d9a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 12 Sep 2024 15:30:02 -0700
Subject: scsi: ufs: core: Improve the struct ufs_hba documentation

Make the role of the structure members related to UIC command processing
more clear.

Reviewed-by: Peter Wang <peter.wang@mediatek.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240912223019.3510966-2-bvanassche@acm.org
Reviewed-by: Bao D. Nguyen <quic_nguyenb@quicinc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 3f68ae3e4330..a95282b9f743 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -877,9 +877,10 @@ enum ufshcd_mcq_opr {
  * @tmf_tag_set: TMF tag set.
  * @tmf_queue: Used to allocate TMF tags.
  * @tmf_rqs: array with pointers to TMF requests while these are in progress.
- * @active_uic_cmd: handle of active UIC command
- * @uic_cmd_mutex: mutex for UIC command
- * @uic_async_done: completion used during UIC processing
+ * @active_uic_cmd: pointer to active UIC command.
+ * @uic_cmd_mutex: mutex used for serializing UIC command processing.
+ * @uic_async_done: completion used to wait for power mode or hibernation state
+ *	changes.
  * @ufshcd_state: UFSHCD state
  * @eh_flags: Error handling flags
  * @intr_mask: Interrupt Mask Bits
-- 
cgit v1.2.3


From a04d5f82fa3893aa06386db93883b151b93b11ed Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Oct 2024 05:01:05 +0100
Subject: mm: Remove PageMappedToDisk

All callers have now been converted to the folio APIs, so remove
the page API for this flag.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Link: https://lore.kernel.org/r/20241002040111.1023018-4-willy@infradead.org
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 1b3a76710487..35d08c30d4a6 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -554,7 +554,7 @@ FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE)
  */
 TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL)
 	TESTSCFLAG(Writeback, writeback, PF_NO_TAIL)
-PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)
+FOLIO_FLAG(mappedtodisk, FOLIO_HEAD_PAGE)
 
 /* PG_readahead is only used for reads; PG_reclaim is only for writes */
 PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL)
-- 
cgit v1.2.3


From fd15ba4cb00a43fb4df42e1f95f94857ad122eea Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Oct 2024 05:01:07 +0100
Subject: ceph: Remove call to PagePrivate2()

Use the folio that we already have to call folio_test_private_2()
instead.  This is the last call to PagePrivate2(), so replace its
PAGEFLAG() definition with FOLIO_FLAG().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Link: https://lore.kernel.org/r/20241002040111.1023018-6-willy@infradead.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 35d08c30d4a6..4c2dfe289046 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -543,7 +543,7 @@ FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE)
  * - PG_private and PG_private_2 cause release_folio() and co to be invoked
  */
 PAGEFLAG(Private, private, PF_ANY)
-PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY)
+FOLIO_FLAG(private_2, FOLIO_HEAD_PAGE)
 
 /* owner_2 can be set on tail pages for anon memory */
 FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE)
-- 
cgit v1.2.3


From 0e45a09a1da0872786885c505467aab8fb29b5b4 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 4 Sep 2024 21:17:06 -0700
Subject: Input: serio - define serio_pause_rx guard to pause and resume serio
 ports

serio_pause_rx() and serio_continue_rx() are usually used together to
temporarily stop receiving interrupts/data for a given serio port.
Define "serio_pause_rx" guard for this so that the port is always
resumed once critical section is over.

Example:

	scoped_guard(serio_pause_rx, elo->serio) {
		elo->expected_packet = toupper(packet[0]);
		init_completion(&elo->cmd_done);
	}

Link: https://lore.kernel.org/r/20240905041732.2034348-2-dmitry.torokhov@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/serio.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/serio.h b/include/linux/serio.h
index bf2191f25350..69a47674af65 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -6,6 +6,7 @@
 #define _SERIO_H
 
 
+#include <linux/cleanup.h>
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
@@ -161,4 +162,6 @@ static inline void serio_continue_rx(struct serio *serio)
 	spin_unlock_irq(&serio->lock);
 }
 
+DEFINE_GUARD(serio_pause_rx, struct serio *, serio_pause_rx(_T), serio_continue_rx(_T))
+
 #endif
-- 
cgit v1.2.3


From c653ffc283404a6c1c0e65143a833180c7ff799b Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Oct 2024 07:46:51 -0700
Subject: HID: stop exporting hid_snto32()

The only user of hid_snto32() is Logitech HID++ driver, which always
calls hid_snto32() with valid size (constant, either 12 or 8) and
therefore can simply use sign_extend32().

Make the switch and remove hid_snto32(). Move snto32() and s32ton() to
avoid introducing forward declaration.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Link: https://patch.msgid.link/20241003144656.3786064-2-dmitry.torokhov@gmail.com
[bentiss: fix checkpatch warning]
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 121d5b8bc867..f7c3a66647fd 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -974,7 +974,6 @@ const struct hid_device_id *hid_match_device(struct hid_device *hdev,
 					     struct hid_driver *hdrv);
 bool hid_compare_device_paths(struct hid_device *hdev_a,
 			      struct hid_device *hdev_b, char separator);
-s32 hid_snto32(__u32 value, unsigned n);
 __u32 hid_field_extract(const struct hid_device *hid, __u8 *report,
 		     unsigned offset, unsigned n);
 
-- 
cgit v1.2.3


From 9ab515b18f8463fbb340fece47cd461809e42a9d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 1 Oct 2024 23:58:41 +0100
Subject: mm: Define VM_HIGH_ARCH_6

The addition of protection keys means that on arm64 we now use all of the
currently defined VM_HIGH_ARCH_x bits. In order to allow us to allocate a
new flag for GCS pages define VM_HIGH_ARCH_6.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-2-222b78d87eee@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ecf63d2b0582..182bad0c55df 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -329,12 +329,14 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_HIGH_ARCH_BIT_3	35	/* bit only usable on 64-bit architectures */
 #define VM_HIGH_ARCH_BIT_4	36	/* bit only usable on 64-bit architectures */
 #define VM_HIGH_ARCH_BIT_5	37	/* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_6	38	/* bit only usable on 64-bit architectures */
 #define VM_HIGH_ARCH_0	BIT(VM_HIGH_ARCH_BIT_0)
 #define VM_HIGH_ARCH_1	BIT(VM_HIGH_ARCH_BIT_1)
 #define VM_HIGH_ARCH_2	BIT(VM_HIGH_ARCH_BIT_2)
 #define VM_HIGH_ARCH_3	BIT(VM_HIGH_ARCH_BIT_3)
 #define VM_HIGH_ARCH_4	BIT(VM_HIGH_ARCH_BIT_4)
 #define VM_HIGH_ARCH_5	BIT(VM_HIGH_ARCH_BIT_5)
+#define VM_HIGH_ARCH_6	BIT(VM_HIGH_ARCH_BIT_6)
 #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
 
 #ifdef CONFIG_ARCH_HAS_PKEYS
-- 
cgit v1.2.3


From 91e102e79740ae43ded050ccac71aa3371db4f33 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 1 Oct 2024 23:58:43 +0100
Subject: prctl: arch-agnostic prctl for shadow stack

Three architectures (x86, aarch64, riscv) have announced support for
shadow stacks with fairly similar functionality.  While x86 is using
arch_prctl() to control the functionality neither arm64 nor riscv uses
that interface so this patch adds arch-agnostic prctl() support to
get and set status of shadow stacks and lock the current configuation to
prevent further changes, with support for turning on and off individual
subfeatures so applications can limit their exposure to features that
they do not need.  The features are:

  - PR_SHADOW_STACK_ENABLE: Tracking and enforcement of shadow stacks,
    including allocation of a shadow stack if one is not already
    allocated.
  - PR_SHADOW_STACK_WRITE: Writes to specific addresses in the shadow
    stack.
  - PR_SHADOW_STACK_PUSH: Push additional values onto the shadow stack.

These features are expected to be inherited by new threads and cleared
on exec(), unknown features should be rejected for enable but accepted
for locking (in order to allow for future proofing).

This is based on a patch originally written by Deepak Gupta but modified
fairly heavily, support for indirect landing pads is removed, additional
modes added and the locking interface reworked.  The set status prctl()
is also reworked to just set flags, if setting/reading the shadow stack
pointer is required this could be a separate prctl.

Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Yury Khrustalev <yury.khrustalev@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Deepak Gupta <debug@rivosinc.com>
Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-4-222b78d87eee@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/mm.h         |  4 ++++
 include/uapi/linux/prctl.h | 22 ++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 182bad0c55df..56654306a832 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4221,4 +4221,8 @@ static inline void pgalloc_tag_copy(struct folio *new, struct folio *old)
 }
 #endif /* CONFIG_MEM_ALLOC_PROFILING */
 
+int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status);
+int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status);
+int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
+
 #endif /* _LINUX_MM_H */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 35791791a879..557a3d2ac1d4 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -328,4 +328,26 @@ struct prctl_mm_map {
 # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC	0x10 /* Clear the aspect on exec */
 # define PR_PPC_DEXCR_CTRL_MASK		0x1f
 
+/*
+ * Get the current shadow stack configuration for the current thread,
+ * this will be the value configured via PR_SET_SHADOW_STACK_STATUS.
+ */
+#define PR_GET_SHADOW_STACK_STATUS      74
+
+/*
+ * Set the current shadow stack configuration.  Enabling the shadow
+ * stack will cause a shadow stack to be allocated for the thread.
+ */
+#define PR_SET_SHADOW_STACK_STATUS      75
+# define PR_SHADOW_STACK_ENABLE         (1UL << 0)
+# define PR_SHADOW_STACK_WRITE		(1UL << 1)
+# define PR_SHADOW_STACK_PUSH		(1UL << 2)
+
+/*
+ * Prevent further changes to the specified shadow stack
+ * configuration.  All bits may be locked via this call, including
+ * undefined bits.
+ */
+#define PR_LOCK_SHADOW_STACK_STATUS      76
+
 #endif /* _LINUX_PRCTL_H */
-- 
cgit v1.2.3


From 3630e82ab6bd2642f0fc03b574783ccf2fb0c955 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 1 Oct 2024 23:58:44 +0100
Subject: mman: Add map_shadow_stack() flags

In preparation for adding arm64 GCS support make the map_shadow_stack()
SHADOW_STACK_SET_TOKEN flag generic and add _SET_MARKER. The existing
flag indicates that a token usable for stack switch should be added to
the top of the newly mapped GCS region while the new flag indicates that
a top of stack marker suitable for use by unwinders should be added
above that.

For arm64 the top of stack marker is all bits 0.

Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Yury Khrustalev <yury.khrustalev@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-5-222b78d87eee@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/uapi/asm-generic/mman.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h
index 57e8195d0b53..5e3d61ddbd8c 100644
--- a/include/uapi/asm-generic/mman.h
+++ b/include/uapi/asm-generic/mman.h
@@ -19,4 +19,8 @@
 #define MCL_FUTURE	2		/* lock all future mappings */
 #define MCL_ONFAULT	4		/* lock all pages that are faulted in */
 
+#define SHADOW_STACK_SET_TOKEN (1ULL << 0)     /* Set up a restore token in the shadow stack */
+#define SHADOW_STACK_SET_MARKER (1ULL << 1)     /* Set up a top of stack marker in the shadow stack */
+
+
 #endif /* __ASM_GENERIC_MMAN_H */
-- 
cgit v1.2.3


From ae80e1629aeaf5be726a9ea94eb7345b1a44b00d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 1 Oct 2024 23:58:53 +0100
Subject: mm: Define VM_SHADOW_STACK for arm64 when we support GCS

Use VM_HIGH_ARCH_5 for guarded control stack pages.

Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-14-222b78d87eee@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/mm.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 56654306a832..8852c39c7695 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -367,7 +367,17 @@ extern unsigned int kobjsize(const void *objp);
  * for more details on the guard size.
  */
 # define VM_SHADOW_STACK	VM_HIGH_ARCH_5
-#else
+#endif
+
+#if defined(CONFIG_ARM64_GCS)
+/*
+ * arm64's Guarded Control Stack implements similar functionality and
+ * has similar constraints to shadow stacks.
+ */
+# define VM_SHADOW_STACK	VM_HIGH_ARCH_6
+#endif
+
+#ifndef VM_SHADOW_STACK
 # define VM_SHADOW_STACK	VM_NONE
 #endif
 
-- 
cgit v1.2.3


From 7ec3b57cb29f8371bf12a725b6e8f75831a03f27 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 1 Oct 2024 23:59:06 +0100
Subject: arm64/ptrace: Expose GCS via ptrace and core files

Provide a new register type NT_ARM_GCS reporting the current GCS mode
and pointer for EL0.  Due to the interactions with allocation and
deallocation of Guarded Control Stacks we do not permit any changes to
the GCS mode via ptrace, only GCSPR_EL0 may be changed.

Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-27-222b78d87eee@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/uapi/linux/elf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b9935988da5c..9adc218fb6df 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -443,6 +443,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_ZT	0x40d		/* ARM SME ZT registers */
 #define NT_ARM_FPMR	0x40e		/* ARM floating point mode register */
 #define NT_ARM_POE	0x40f		/* ARM POE registers */
+#define NT_ARM_GCS	0x410		/* ARM GCS state */
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
-- 
cgit v1.2.3


From b63c755cb65d43c8aba987c4f6b57c77c6f123f2 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Mon, 30 Sep 2024 14:29:53 +0100
Subject: appletalk: Remove deadcode

alloc_ltalkdev in net/appletalk/dev.c is dead since
commit 00f3696f7555 ("net: appletalk: remove cops support")

Removing it (and it's helper) leaves dev.c and if_ltalk.h empty;
remove them and the Makefile entry.

tun.c was including that if_ltalk.h but actually wanted
the uapi version for LTALK_ALEN, fix up the path.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ltalk.h | 8 --------
 1 file changed, 8 deletions(-)
 delete mode 100644 include/linux/if_ltalk.h

(limited to 'include')

diff --git a/include/linux/if_ltalk.h b/include/linux/if_ltalk.h
deleted file mode 100644
index 4cc1c0b77870..000000000000
--- a/include/linux/if_ltalk.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LINUX_LTALK_H
-#define __LINUX_LTALK_H
-
-#include <uapi/linux/if_ltalk.h>
-
-extern struct net_device *alloc_ltalkdev(int sizeof_priv);
-#endif
-- 
cgit v1.2.3


From ec841b8d73cff37f8960e209017efe1eb2fb21f2 Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Mon, 23 Sep 2024 16:12:01 +0800
Subject: usb: chipidea: add CI_HDRC_HAS_SHORT_PKT_LIMIT flag

Currently, the imx deivice controller has below limitations:

1. can't generate short packet interrupt if IOC not set in dTD. So if one
   request span more than one dTDs and only the last dTD set IOC, the usb
   request will pending there if no more data comes.
2. the controller can't accurately deliver data to differtent usb requests
   in some cases due to short packet. For example: one usb request span 3
   dTDs, then if the controller received a short packet the next packet
   will go to 2nd dTD of current request rather than the first dTD of next
   request.
3. can't build a bus packet use multiple dTDs. For example: controller
   needs to send one packet of 512 bytes use dTD1 (200 bytes) + dTD2
   (312 bytes), actually the host side will see 200 bytes short packet.

Based on these limits, add CI_HDRC_HAS_SHORT_PKT_LIMIT flag and use it on
imx platforms.

Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Acked-by: Peter Chen <peter.chen@kernel.org>
Link: https://lore.kernel.org/r/20240923081203.2851768-1-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/chipidea.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index 5a7f96684ea2..ebdfef124b2b 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -65,6 +65,7 @@ struct ci_hdrc_platform_data {
 #define CI_HDRC_PHY_VBUS_CONTROL	BIT(16)
 #define CI_HDRC_HAS_PORTSC_PEC_MISSED	BIT(17)
 #define CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS	BIT(18)
+#define	CI_HDRC_HAS_SHORT_PKT_LIMIT	BIT(19)
 	enum usb_dr_mode	dr_mode;
 #define CI_HDRC_CONTROLLER_RESET_EVENT		0
 #define CI_HDRC_CONTROLLER_STOPPED_EVENT	1
-- 
cgit v1.2.3


From 8b7fd6a15f8c32760c2026a62dcf55219b4da15b Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Tue, 1 Oct 2024 16:30:05 +0200
Subject: HID: bpf: move HID-BPF report descriptor fixup earlier

Currently, hid_bpf_rdesc_fixup() is called once the match between the
HID device and the driver is done. This can be problematic in case
the driver selected by the kernel would change the report descriptor
after the fact.

To give a chance for hid_bpf_rdesc_fixup() to provide hints on to how
to select a dedicated driver or not, move the call to that BPF hook
earlier in the .probe() process, when we get the first match.

However, this means that we might get called more than once (typically
once for hid-generic, and once for hid-vendor-specific). So we store the
result of HID-BPF fixup in struct hid_device. Basically, this means that
->bpf_rdesc can replace ->dev_rdesc when it was used in the code.

In order to not grow struct hid_device, some fields are re-ordered. This
was the output of pahole for the first 128 bytes:
struct hid_device {
	__u8 *                     dev_rdesc;            /*     0     8 */
	unsigned int               dev_rsize;            /*     8     4 */

	/* XXX 4 bytes hole, try to pack */

	__u8 *                     rdesc;                /*    16     8 */
	unsigned int               rsize;                /*    24     4 */

	/* XXX 4 bytes hole, try to pack */

	struct hid_collection *    collection;           /*    32     8 */
	unsigned int               collection_size;      /*    40     4 */
	unsigned int               maxcollection;        /*    44     4 */
	unsigned int               maxapplication;       /*    48     4 */
	__u16                      bus;                  /*    52     2 */
	__u16                      group;                /*    54     2 */
	__u32                      vendor;               /*    56     4 */
	__u32                      product;              /*    60     4 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	__u32                      version;              /*    64     4 */
	enum hid_type              type;                 /*    68     4 */
	unsigned int               country;              /*    72     4 */

	/* XXX 4 bytes hole, try to pack */

	struct hid_report_enum     report_enum[3];       /*    80  6216 */

Basically, we got three holes of 4 bytes. We can reorder things a little
and makes those 3 holes a continuous 12 bytes hole, which can be replaced
by the new pointer and the new unsigned int we need.

In terms of code allocation, when not using HID-BPF, we are back to kernel
v6.2 in hid_open_report(). These multiple kmemdup() calls will be fixed
in a later commit.

Link: https://patch.msgid.link/20241001-hid-bpf-hid-generic-v3-1-2ef1019468df@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h     | 18 ++++++++++--------
 include/linux/hid_bpf.h | 11 +++--------
 2 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 121d5b8bc867..ff58b5ceb62e 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -599,15 +599,17 @@ enum hid_battery_status {
 struct hid_driver;
 struct hid_ll_driver;
 
-struct hid_device {							/* device report descriptor */
-	const __u8 *dev_rdesc;
-	unsigned dev_rsize;
-	const __u8 *rdesc;
-	unsigned rsize;
+struct hid_device {
+	const __u8 *dev_rdesc;						/* device report descriptor */
+	const __u8 *bpf_rdesc;						/* bpf modified report descriptor, if any */
+	const __u8 *rdesc;						/* currently used report descriptor */
+	unsigned int dev_rsize;
+	unsigned int bpf_rsize;
+	unsigned int rsize;
+	unsigned int collection_size;					/* Number of allocated hid_collections */
 	struct hid_collection *collection;				/* List of HID collections */
-	unsigned collection_size;					/* Number of allocated hid_collections */
-	unsigned maxcollection;						/* Number of parsed collections */
-	unsigned maxapplication;					/* Number of applications */
+	unsigned int maxcollection;						/* Number of parsed collections */
+	unsigned int maxapplication;					/* Number of applications */
 	__u16 bus;							/* BUS ID */
 	__u16 group;							/* Report group */
 	__u32 vendor;							/* Vendor ID */
diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index 6a47223e6460..a6876ab29004 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -212,7 +212,7 @@ int hid_bpf_connect_device(struct hid_device *hdev);
 void hid_bpf_disconnect_device(struct hid_device *hdev);
 void hid_bpf_destroy_device(struct hid_device *hid);
 int hid_bpf_device_init(struct hid_device *hid);
-u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size);
+const u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size);
 #else /* CONFIG_HID_BPF */
 static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type,
 						u8 *data, u32 *size, int interrupt,
@@ -228,13 +228,8 @@ static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; }
 static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {}
 static inline void hid_bpf_destroy_device(struct hid_device *hid) {}
 static inline int hid_bpf_device_init(struct hid_device *hid) { return 0; }
-/*
- * This specialized allocator has to be a macro for its allocations to be
- * accounted separately (to have a separate alloc_tag). The typecast is
- * intentional to enforce typesafety.
- */
-#define call_hid_bpf_rdesc_fixup(_hdev, _rdesc, _size)	\
-		((u8 *)kmemdup(_rdesc, *(_size), GFP_KERNEL))
+static inline const u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc,
+						 unsigned int *size) { return rdesc; }
 
 #endif /* CONFIG_HID_BPF */
 
-- 
cgit v1.2.3


From 645c224ac5f6e0013931c342ea707b398d24d410 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Tue, 1 Oct 2024 16:30:12 +0200
Subject: HID: add per device quirk to force bind to hid-generic

We already have the possibility to force not binding to hid-generic and
rely on a dedicated driver, but we couldn't do the other way around.

This is useful for BPF programs where we are fixing the report descriptor
and the events, but want to avoid a specialized driver to come after BPF
which would unwind everything that is done there.

Reviewed-by: Peter Hutterer <peter.hutterer@who-t.net>
Link: https://patch.msgid.link/20241001-hid-bpf-hid-generic-v3-8-2ef1019468df@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index ff58b5ceb62e..63330d623335 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -359,6 +359,7 @@ struct hid_item {
  * | @HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP:
  * | @HID_QUIRK_HAVE_SPECIAL_DRIVER:
  * | @HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE:
+ * | @HID_QUIRK_IGNORE_SPECIAL_DRIVER
  * | @HID_QUIRK_FULLSPEED_INTERVAL:
  * | @HID_QUIRK_NO_INIT_REPORTS:
  * | @HID_QUIRK_NO_IGNORE:
@@ -384,6 +385,7 @@ struct hid_item {
 #define HID_QUIRK_HAVE_SPECIAL_DRIVER		BIT(19)
 #define HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE	BIT(20)
 #define HID_QUIRK_NOINVERT			BIT(21)
+#define HID_QUIRK_IGNORE_SPECIAL_DRIVER		BIT(22)
 #define HID_QUIRK_FULLSPEED_INTERVAL		BIT(28)
 #define HID_QUIRK_NO_INIT_REPORTS		BIT(29)
 #define HID_QUIRK_NO_IGNORE			BIT(30)
-- 
cgit v1.2.3


From 5acd957a986c167d357e617a887630203be29ea4 Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Tue, 1 Oct 2024 13:37:04 +0300
Subject: net/mlx5: hw counters: Make fc_stats & fc_pool private

The mlx5_fc_stats and mlx5_fc_pool structs are only used from
fs_counters.c. As such, make them private there.

mlx5_fc_pool is not used or referenced at all outside fs_counters.

mlx5_fc_stats is referenced from mlx5_core_dev, so instead of having it
as a direct member (which requires exporting it from fs_counters), store
a pointer to it, allocate it on init and clear it on destroy.
One caveat is that a simple container_of to get from a 'work' struct to
the outermost mlx5_core_dev struct directly no longer works, so an extra
pointer had to be added to mlx5_fc_stats back to the parent dev.

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20241001103709.58127-2-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/driver.h | 33 +--------------------------------
 1 file changed, 1 insertion(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e23c692a34c7..fc7e6153b73d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -45,7 +45,6 @@
 #include <linux/workqueue.h>
 #include <linux/mempool.h>
 #include <linux/interrupt.h>
-#include <linux/idr.h>
 #include <linux/notifier.h>
 #include <linux/refcount.h>
 #include <linux/auxiliary_bus.h>
@@ -474,36 +473,6 @@ struct mlx5_core_sriov {
 	u16			max_ec_vfs;
 };
 
-struct mlx5_fc_pool {
-	struct mlx5_core_dev *dev;
-	struct mutex pool_lock; /* protects pool lists */
-	struct list_head fully_used;
-	struct list_head partially_used;
-	struct list_head unused;
-	int available_fcs;
-	int used_fcs;
-	int threshold;
-};
-
-struct mlx5_fc_stats {
-	spinlock_t counters_idr_lock; /* protects counters_idr */
-	struct idr counters_idr;
-	struct list_head counters;
-	struct llist_head addlist;
-	struct llist_head dellist;
-
-	struct workqueue_struct *wq;
-	struct delayed_work work;
-	unsigned long next_query;
-	unsigned long sampling_interval; /* jiffies */
-	u32 *bulk_query_out;
-	int bulk_query_len;
-	size_t num_counters;
-	bool bulk_query_alloc_failed;
-	unsigned long next_bulk_query_alloc;
-	struct mlx5_fc_pool fc_pool;
-};
-
 struct mlx5_events;
 struct mlx5_mpfs;
 struct mlx5_eswitch;
@@ -630,7 +599,7 @@ struct mlx5_priv {
 	struct mlx5_devcom_comp_dev *hca_devcom_comp;
 	struct mlx5_fw_reset	*fw_reset;
 	struct mlx5_core_roce	roce;
-	struct mlx5_fc_stats		fc_stats;
+	struct mlx5_fc_stats		*fc_stats;
 	struct mlx5_rl_table            rl_table;
 	struct mlx5_ft_pool		*ft_pool;
 
-- 
cgit v1.2.3


From d1c9cffe4b01f4d8bc52169139a5fedd48908abc Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Tue, 1 Oct 2024 13:37:09 +0300
Subject: net/mlx5: hw counters: Remove mlx5_fc_create_ex

It no longer serves any purpose and is identical to mlx5_fc_create upon
which it was originally based of.

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20241001103709.58127-7-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/fs.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index b744e554f014..438db888bde0 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -298,9 +298,6 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
 
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 
-/* As mlx5_fc_create() but doesn't queue stats refresh thread. */
-struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging);
-
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
-- 
cgit v1.2.3


From 4aecca4c76808f3736056d18ff510df80424bc9f Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vadfed@meta.com>
Date: Tue, 1 Oct 2024 05:57:14 -0700
Subject: net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message

SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX
timestamps and packets sent via socket. Unfortunately, there is no way
to reliably predict socket timestamp ID value in case of error returned
by sendmsg. For UDP sockets it's impossible because of lockless
nature of UDP transmit, several threads may send packets in parallel. In
case of RAW sockets MSG_MORE option makes things complicated. More
details are in the conversation [1].
This patch adds new control message type to give user-space
software an opportunity to control the mapping between packets and
values by providing ID with each sendmsg for UDP sockets.
The documentation is also added in this patch.

[1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/

Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
Link: https://patch.msgid.link/20241001125716.2832769-2-vadfed@meta.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_sock.h           | 4 +++-
 include/net/sock.h                | 7 +++++++
 include/uapi/asm-generic/socket.h | 2 ++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 394c3b66065e..f01dd273bea6 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -174,6 +174,7 @@ struct inet_cork {
 	__s16			tos;
 	char			priority;
 	__u16			gso_size;
+	u32			ts_opt_id;
 	u64			transmit_time;
 	u32			mark;
 };
@@ -241,7 +242,8 @@ struct inet_sock {
 	struct inet_cork_full	cork;
 };
 
-#define IPCORK_OPT	1	/* ip-options has been held in ipcork.opt */
+#define IPCORK_OPT		1	/* ip-options has been held in ipcork.opt */
+#define IPCORK_TS_OPT_ID	2	/* ts_opt_id field is valid, overriding sk_tskey */
 
 enum {
 	INET_FLAGS_PKTINFO	= 0,
diff --git a/include/net/sock.h b/include/net/sock.h
index c58ca8dd561b..ccf28c2b70b1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -954,6 +954,12 @@ enum sock_flags {
 };
 
 #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
+/*
+ * The highest bit of sk_tsflags is reserved for kernel-internal
+ * SOCKCM_FLAG_TS_OPT_ID. There is a check in core/sock.c to control that
+ * SOF_TIMESTAMPING* values do not reach this reserved area
+ */
+#define SOCKCM_FLAG_TS_OPT_ID	BIT(31)
 
 static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk)
 {
@@ -1796,6 +1802,7 @@ struct sockcm_cookie {
 	u64 transmit_time;
 	u32 mark;
 	u32 tsflags;
+	u32 ts_opt_id;
 };
 
 static inline void sockcm_init(struct sockcm_cookie *sockc,
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 3b4e3e815602..deacfd6dd197 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -141,6 +141,8 @@
 #define SCM_DEVMEM_DMABUF	SO_DEVMEM_DMABUF
 #define SO_DEVMEM_DONTNEED	80
 
+#define SCM_TS_OPT_ID		81
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
-- 
cgit v1.2.3


From 822b5bc6db55f1c3ea51659c423784ac6919ddd4 Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vadfed@meta.com>
Date: Tue, 1 Oct 2024 05:57:15 -0700
Subject: net_tstamp: add SCM_TS_OPT_ID for RAW sockets

The last type of sockets which supports SOF_TIMESTAMPING_OPT_ID is RAW
sockets. To add new option this patch converts all callers (direct and
indirect) of _sock_tx_timestamp to provide sockcm_cookie instead of
tsflags. And while here fix __sock_tx_timestamp to receive tsflags as
__u32 instead of __u16.

Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
Link: https://patch.msgid.link/20241001125716.2832769-3-vadfed@meta.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sock.h | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index ccf28c2b70b1..e282127092ab 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2660,39 +2660,48 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
 		sock_write_timestamp(sk, 0);
 }
 
-void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags);
+void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags);
 
 /**
  * _sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
  * @sk:		socket sending this packet
- * @tsflags:	timestamping flags to use
+ * @sockc:	pointer to socket cmsg cookie to get timestamping info
  * @tx_flags:	completed with instructions for time stamping
  * @tskey:      filled in with next sk_tskey (not for TCP, which uses seqno)
  *
  * Note: callers should take care of initial ``*tx_flags`` value (usually 0)
  */
-static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags,
+static inline void _sock_tx_timestamp(struct sock *sk,
+				      const struct sockcm_cookie *sockc,
 				      __u8 *tx_flags, __u32 *tskey)
 {
+	__u32 tsflags = sockc->tsflags;
+
 	if (unlikely(tsflags)) {
 		__sock_tx_timestamp(tsflags, tx_flags);
 		if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey &&
-		    tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
-			*tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+		    tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) {
+			if (tsflags & SOCKCM_FLAG_TS_OPT_ID)
+				*tskey = sockc->ts_opt_id;
+			else
+				*tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+		}
 	}
 	if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
 		*tx_flags |= SKBTX_WIFI_STATUS;
 }
 
-static inline void sock_tx_timestamp(struct sock *sk, __u16 tsflags,
+static inline void sock_tx_timestamp(struct sock *sk,
+				     const struct sockcm_cookie *sockc,
 				     __u8 *tx_flags)
 {
-	_sock_tx_timestamp(sk, tsflags, tx_flags, NULL);
+	_sock_tx_timestamp(sk, sockc, tx_flags, NULL);
 }
 
-static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
+static inline void skb_setup_tx_timestamp(struct sk_buff *skb,
+					  const struct sockcm_cookie *sockc)
 {
-	_sock_tx_timestamp(skb->sk, tsflags, &skb_shinfo(skb)->tx_flags,
+	_sock_tx_timestamp(skb->sk, sockc, &skb_shinfo(skb)->tx_flags,
 			   &skb_shinfo(skb)->tskey);
 }
 
-- 
cgit v1.2.3


From 5a9071a760a61b00260334ad576fe60debafaafc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 2 Oct 2024 17:30:40 +0000
Subject: tcp: annotate data-races around icsk->icsk_pending

icsk->icsk_pending can be read locklessly already.

Following patch in the series will add another lockless read.

Add smp_load_acquire() and smp_store_release() annotations
because following patch will add a test in tcp_write_timer(),
and READ_ONCE()/WRITE_ONCE() alone would possibly lead to races.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241002173042.917928-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_connection_sock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c0deaafebfdc..914d19772704 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -197,7 +197,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
-		icsk->icsk_pending = 0;
+		smp_store_release(&icsk->icsk_pending, 0);
 #ifdef INET_CSK_CLEAR_TIMERS
 		sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
 #endif
@@ -229,7 +229,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
 
 	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 ||
 	    what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) {
-		icsk->icsk_pending = what;
+		smp_store_release(&icsk->icsk_pending, what);
 		icsk->icsk_timeout = jiffies + when;
 		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
 	} else if (what == ICSK_TIME_DACK) {
-- 
cgit v1.2.3


From 81df4fa94ee8c0800ed42c47357435602ed105ad Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 2 Oct 2024 17:30:42 +0000
Subject: tcp: add a fast path in tcp_delack_timer()

delack timer is not stopped from inet_csk_clear_xmit_timer()
because we do not define INET_CSK_CLEAR_TIMERS.

This is a conscious choice : inet_csk_clear_xmit_timer()
is often called from another cpu. Calling del_timer()
would cause false sharing and lock contention.

This means that very often, tcp_delack_timer() is called
at the timer expiration, while there is no ACK to transmit.

This can be detected very early, avoiding the socket spinlock.

Notes:
- test about tp->compressed_ack is racy,
  but in the unlikely case there is a race, the dedicated
  compressed_ack_timer hrtimer would close it.

- Even if the fast path is not taken, reading
  icsk->icsk_ack.pending and tp->compressed_ack
  before acquiring the socket spinlock reduces
  acquisition time and chances of contention.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241002173042.917928-4-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_connection_sock.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 914d19772704..3c82fad904d4 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -202,7 +202,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 		sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
 #endif
 	} else if (what == ICSK_TIME_DACK) {
-		icsk->icsk_ack.pending = 0;
+		smp_store_release(&icsk->icsk_ack.pending, 0);
 		icsk->icsk_ack.retry = 0;
 #ifdef INET_CSK_CLEAR_TIMERS
 		sk_stop_timer(sk, &icsk->icsk_delack_timer);
@@ -233,7 +233,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
 		icsk->icsk_timeout = jiffies + when;
 		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
 	} else if (what == ICSK_TIME_DACK) {
-		icsk->icsk_ack.pending |= ICSK_ACK_TIMER;
+		smp_store_release(&icsk->icsk_ack.pending,
+				  icsk->icsk_ack.pending | ICSK_ACK_TIMER);
 		icsk->icsk_ack.timeout = jiffies + when;
 		sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
 	} else {
-- 
cgit v1.2.3


From f858cc9eed5b05cbe38d7ffd2787c21e3718eb7d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 3 Oct 2024 12:12:18 +0000
Subject: net: add IFLA_MAX_PACING_OFFLOAD_HORIZON device attribute

Some network devices have the ability to offload EDT (Earliest
Departure Time) which is the model used for TCP pacing and FQ
packet scheduler.

Some of them implement the timing wheel mechanism described in
https://saeed.github.io/files/carousel-sigcomm17.pdf
with an associated 'timing wheel horizon'.

This patch adds dev->max_pacing_offload_horizon expressing
this timing wheel horizon in nsec units.

This is a read-only attribute.

Unless a driver sets it, dev->max_pacing_offload_horizon
is zero.

v2: addressed Jakub feedback ( https://lore.kernel.org/netdev/20240930152304.472767-2-edumazet@google.com/T/#mf6294d714c41cc459962154cc2580ce3c9693663 )
v3: added yaml doc (also per Jakub feedback)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20241003121219.2396589-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h    | 4 ++++
 include/uapi/linux/if_link.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4d20c776a4ff..49a7e7db0883 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2009,6 +2009,8 @@ enum netdev_reg_state {
  *	@dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem,
  *		   where the clock is recovered.
  *
+ *	@max_pacing_offload_horizon: max EDT offload horizon in nsec.
+ *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */
@@ -2399,6 +2401,8 @@ struct net_device {
 	/** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
 	struct dim_irq_moder	*irq_moder;
 
+	u64			max_pacing_offload_horizon;
+
 	u8			priv[] ____cacheline_aligned
 				       __counted_by(priv_len);
 } ____cacheline_aligned;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6dc258993b17..506ba9c80e83 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -377,6 +377,7 @@ enum {
 	IFLA_GSO_IPV4_MAX_SIZE,
 	IFLA_GRO_IPV4_MAX_SIZE,
 	IFLA_DPLL_PIN,
+	IFLA_MAX_PACING_OFFLOAD_HORIZON,
 	__IFLA_MAX
 };
 
-- 
cgit v1.2.3


From f26080d47007df2ee90e65b7d390207ff3a588af Mon Sep 17 00:00:00 2001
From: Jeffrey Ji <jeffreyji@google.com>
Date: Thu, 3 Oct 2024 12:12:19 +0000
Subject: net_sched: sch_fq: add the ability to offload pacing

Some network devices have the ability to offload EDT (Earliest
Departure Time) which is the model used for TCP pacing and FQ packet
scheduler.

Some of them implement the timing wheel mechanism described in
https://saeed.github.io/files/carousel-sigcomm17.pdf
with an associated 'timing wheel horizon'.

This patchs adds to FQ packet scheduler TCA_FQ_OFFLOAD_HORIZON
attribute.

Its value is capped by the device max_pacing_offload_horizon,
added in the prior patch.

It allows FQ to let packets within pacing offload horizon
to be delivered to the device, which will handle the needed
delay without host involvement.

Signed-off-by: Jeffrey Ji <jeffreyji@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20241003121219.2396589-3-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/pkt_sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index a3cd0c2dc995..25a9a47001cd 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -836,6 +836,8 @@ enum {
 
 	TCA_FQ_WEIGHTS,		/* Weights for each band */
 
+	TCA_FQ_OFFLOAD_HORIZON, /* dequeue paced packets within this horizon immediately (us units) */
+
 	__TCA_FQ_MAX
 };
 
-- 
cgit v1.2.3


From 65c4c93caaf1a9fca2855942e338530967162d25 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 10 Sep 2024 16:30:12 +0200
Subject: crypto: sig - Introduce sig_alg backend

Commit 6cb8815f41a9 ("crypto: sig - Add interface for sign/verify")
began a transition of asymmetric sign/verify operations from
crypto_akcipher to a new crypto_sig frontend.

Internally, the crypto_sig frontend still uses akcipher_alg as backend,
however:

   "The link between sig and akcipher is meant to be temporary.  The
    plan is to create a new low-level API for sig and then migrate
    the signature code over to that from akcipher."
    https://lore.kernel.org/r/ZrG6w9wsb-iiLZIF@gondor.apana.org.au/

   "having a separate alg for sig is definitely where we want to
    be since there is very little that the two types actually share."
    https://lore.kernel.org/r/ZrHlpz4qnre0zWJO@gondor.apana.org.au/

Take the next step of that migration and augment the crypto_sig frontend
with a sig_alg backend to which all algorithms can be moved.

During the migration, there will briefly be signature algorithms that
are still based on crypto_akcipher, whilst others are already based on
crypto_sig.  Allow for that by building a fork into crypto_sig_*() API
calls (i.e. crypto_sig_maxsize() and friends) such that one of the two
backends is selected based on the transform's cra_type.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/sig.h   | 80 +++++++++++++++++++++++++++++++++++++++++
 include/crypto/sig.h            | 61 +++++++++++++++++++++++++++++++
 include/uapi/linux/cryptouser.h |  5 +++
 3 files changed, 146 insertions(+)

(limited to 'include')

diff --git a/include/crypto/internal/sig.h b/include/crypto/internal/sig.h
index 97cb26ef8115..b16648c1a986 100644
--- a/include/crypto/internal/sig.h
+++ b/include/crypto/internal/sig.h
@@ -10,8 +10,88 @@
 #include <crypto/algapi.h>
 #include <crypto/sig.h>
 
+struct sig_instance {
+	void (*free)(struct sig_instance *inst);
+	union {
+		struct {
+			char head[offsetof(struct sig_alg, base)];
+			struct crypto_instance base;
+		};
+		struct sig_alg alg;
+	};
+};
+
+struct crypto_sig_spawn {
+	struct crypto_spawn base;
+};
+
 static inline void *crypto_sig_ctx(struct crypto_sig *tfm)
 {
 	return crypto_tfm_ctx(&tfm->base);
 }
+
+/**
+ * crypto_register_sig() -- Register public key signature algorithm
+ *
+ * Function registers an implementation of a public key signature algorithm
+ *
+ * @alg:	algorithm definition
+ *
+ * Return: zero on success; error code in case of error
+ */
+int crypto_register_sig(struct sig_alg *alg);
+
+/**
+ * crypto_unregister_sig() -- Unregister public key signature algorithm
+ *
+ * Function unregisters an implementation of a public key signature algorithm
+ *
+ * @alg:	algorithm definition
+ */
+void crypto_unregister_sig(struct sig_alg *alg);
+
+int sig_register_instance(struct crypto_template *tmpl,
+			  struct sig_instance *inst);
+
+static inline struct sig_instance *sig_instance(struct crypto_instance *inst)
+{
+	return container_of(&inst->alg, struct sig_instance, alg.base);
+}
+
+static inline struct sig_instance *sig_alg_instance(struct crypto_sig *tfm)
+{
+	return sig_instance(crypto_tfm_alg_instance(&tfm->base));
+}
+
+static inline struct crypto_instance *sig_crypto_instance(struct sig_instance
+									*inst)
+{
+	return container_of(&inst->alg.base, struct crypto_instance, alg);
+}
+
+static inline void *sig_instance_ctx(struct sig_instance *inst)
+{
+	return crypto_instance_ctx(sig_crypto_instance(inst));
+}
+
+int crypto_grab_sig(struct crypto_sig_spawn *spawn,
+		    struct crypto_instance *inst,
+		    const char *name, u32 type, u32 mask);
+
+static inline struct crypto_sig *crypto_spawn_sig(struct crypto_sig_spawn
+								   *spawn)
+{
+	return crypto_spawn_tfm2(&spawn->base);
+}
+
+static inline void crypto_drop_sig(struct crypto_sig_spawn *spawn)
+{
+	crypto_drop_spawn(&spawn->base);
+}
+
+static inline struct sig_alg *crypto_spawn_sig_alg(struct crypto_sig_spawn
+								    *spawn)
+{
+	return container_of(spawn->base.alg, struct sig_alg, base);
+}
 #endif
diff --git a/include/crypto/sig.h b/include/crypto/sig.h
index d25186bb2be3..f0f52a7c5ae7 100644
--- a/include/crypto/sig.h
+++ b/include/crypto/sig.h
@@ -19,6 +19,52 @@ struct crypto_sig {
 	struct crypto_tfm base;
 };
 
+/**
+ * struct sig_alg - generic public key signature algorithm
+ *
+ * @sign:	Function performs a sign operation as defined by public key
+ *		algorithm. Optional.
+ * @verify:	Function performs a complete verify operation as defined by
+ *		public key algorithm, returning verification status. Optional.
+ * @set_pub_key: Function invokes the algorithm specific set public key
+ *		function, which knows how to decode and interpret
+ *		the BER encoded public key and parameters. Mandatory.
+ * @set_priv_key: Function invokes the algorithm specific set private key
+ *		function, which knows how to decode and interpret
+ *		the BER encoded private key and parameters. Optional.
+ * @max_size:	Function returns key size. Mandatory.
+ * @init:	Initialize the cryptographic transformation object.
+ *		This function is used to initialize the cryptographic
+ *		transformation object. This function is called only once at
+ *		the instantiation time, right after the transformation context
+ *		was allocated. In case the cryptographic hardware has some
+ *		special requirements which need to be handled by software, this
+ *		function shall check for the precise requirement of the
+ *		transformation and put any software fallbacks in place.
+ * @exit:	Deinitialize the cryptographic transformation object. This is a
+ *		counterpart to @init, used to remove various changes set in
+ *		@init.
+ *
+ * @base:	Common crypto API algorithm data structure
+ */
+struct sig_alg {
+	int (*sign)(struct crypto_sig *tfm,
+		    const void *src, unsigned int slen,
+		    void *dst, unsigned int dlen);
+	int (*verify)(struct crypto_sig *tfm,
+		      const void *src, unsigned int slen,
+		      const void *digest, unsigned int dlen);
+	int (*set_pub_key)(struct crypto_sig *tfm,
+			   const void *key, unsigned int keylen);
+	int (*set_priv_key)(struct crypto_sig *tfm,
+			    const void *key, unsigned int keylen);
+	unsigned int (*max_size)(struct crypto_sig *tfm);
+	int (*init)(struct crypto_sig *tfm);
+	void (*exit)(struct crypto_sig *tfm);
+
+	struct crypto_alg base;
+};
+
 /**
  * DOC: Generic Public Key Signature API
  *
@@ -47,6 +93,21 @@ static inline struct crypto_tfm *crypto_sig_tfm(struct crypto_sig *tfm)
 	return &tfm->base;
 }
 
+static inline struct crypto_sig *__crypto_sig_tfm(struct crypto_tfm *tfm)
+{
+	return container_of(tfm, struct crypto_sig, base);
+}
+
+static inline struct sig_alg *__crypto_sig_alg(struct crypto_alg *alg)
+{
+	return container_of(alg, struct sig_alg, base);
+}
+
+static inline struct sig_alg *crypto_sig_alg(struct crypto_sig *tfm)
+{
+	return __crypto_sig_alg(crypto_sig_tfm(tfm)->__crt_alg);
+}
+
 /**
  * crypto_free_sig() - free signature tfm handle
  *
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
index 20a6c0fc149e..db05e0419972 100644
--- a/include/uapi/linux/cryptouser.h
+++ b/include/uapi/linux/cryptouser.h
@@ -64,6 +64,7 @@ enum crypto_attr_type_t {
 	CRYPTOCFGA_STAT_AKCIPHER,	/* No longer supported, do not use. */
 	CRYPTOCFGA_STAT_KPP,		/* No longer supported, do not use. */
 	CRYPTOCFGA_STAT_ACOMP,		/* No longer supported, do not use. */
+	CRYPTOCFGA_REPORT_SIG,		/* struct crypto_report_sig */
 	__CRYPTOCFGA_MAX
 
 #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1)
@@ -207,6 +208,10 @@ struct crypto_report_acomp {
 	char type[CRYPTO_MAX_NAME];
 };
 
+struct crypto_report_sig {
+	char type[CRYPTO_MAX_NAME];
+};
+
 #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \
 			       sizeof(struct crypto_report_blkcipher))
 
-- 
cgit v1.2.3


From 7964b0d4bd1271f82d6b455366a200d320f7dbf8 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 10 Sep 2024 16:30:15 +0200
Subject: crypto: rsa-pkcs1pad - Deduplicate set_{pub,priv}_key callbacks

pkcs1pad_set_pub_key() and pkcs1pad_set_priv_key() are almost identical.

The upcoming migration of sign/verify operations from rsa-pkcs1pad.c
into a separate crypto_template will require another copy of the exact
same functions.  When RSASSA-PSS and RSAES-OAEP are introduced, each
will need yet another copy.

Deduplicate the functions into a single one which lives in a common
header file for reuse by RSASSA-PKCS1-v1_5, RSASSA-PSS and RSAES-OAEP.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/rsa.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/crypto/internal/rsa.h b/include/crypto/internal/rsa.h
index e870133f4b77..754f687134df 100644
--- a/include/crypto/internal/rsa.h
+++ b/include/crypto/internal/rsa.h
@@ -8,6 +8,7 @@
 #ifndef _RSA_HELPER_
 #define _RSA_HELPER_
 #include <linux/types.h>
+#include <crypto/akcipher.h>
 
 /**
  * rsa_key - RSA key structure
@@ -53,5 +54,32 @@ int rsa_parse_pub_key(struct rsa_key *rsa_key, const void *key,
 int rsa_parse_priv_key(struct rsa_key *rsa_key, const void *key,
 		       unsigned int key_len);
 
+#define RSA_PUB (true)
+#define RSA_PRIV (false)
+
+static inline int rsa_set_key(struct crypto_akcipher *child,
+			      unsigned int *key_size, bool is_pubkey,
+			      const void *key, unsigned int keylen)
+{
+	int err;
+
+	*key_size = 0;
+
+	if (is_pubkey)
+		err = crypto_akcipher_set_pub_key(child, key, keylen);
+	else
+		err = crypto_akcipher_set_priv_key(child, key, keylen);
+	if (err)
+		return err;
+
+	/* Find out new modulus size from rsa implementation */
+	err = crypto_akcipher_maxsize(child);
+	if (err > PAGE_SIZE)
+		return -ENOTSUPP;
+
+	*key_size = err;
+	return 0;
+}
+
 extern struct crypto_template rsa_pkcs1pad_tmpl;
 #endif
-- 
cgit v1.2.3


From 1e562deacecca1f1bec7d23da526904a1e87525e Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 10 Sep 2024 16:30:16 +0200
Subject: crypto: rsassa-pkcs1 - Migrate to sig_alg backend

A sig_alg backend has just been introduced with the intent of moving all
asymmetric sign/verify algorithms to it one by one.

Migrate the sign/verify operations from rsa-pkcs1pad.c to a separate
rsassa-pkcs1.c which uses the new backend.

Consequently there are now two templates which build on the "rsa"
akcipher_alg:

* The existing "pkcs1pad" template, which is instantiated as an
  akcipher_instance and retains the encrypt/decrypt operations of
  RSAES-PKCS1-v1_5 (RFC 8017 sec 7.2).

* The new "pkcs1" template, which is instantiated as a sig_instance
  and contains the sign/verify operations of RSASSA-PKCS1-v1_5
  (RFC 8017 sec 8.2).

In a separate step, rsa-pkcs1pad.c could optionally be renamed to
rsaes-pkcs1.c for clarity.  Additional "oaep" and "pss" templates
could be added for RSAES-OAEP and RSASSA-PSS.

Note that it's currently allowed to allocate a "pkcs1pad(rsa)" transform
without specifying a hash algorithm.  That makes sense if the transform
is only used for encrypt/decrypt and continues to be supported.  But for
sign/verify, such transforms previously did not insert the Full Hash
Prefix into the padding.  The resulting message encoding was incompliant
with EMSA-PKCS1-v1_5 (RFC 8017 sec 9.2) and therefore nonsensical.

From here on in, it is no longer allowed to allocate a transform without
specifying a hash algorithm if the transform is used for sign/verify
operations.  This simplifies the code because the insertion of the Full
Hash Prefix is no longer optional, so various "if (digest_info)" clauses
can be removed.

There has been a previous attempt to forbid transform allocation without
specifying a hash algorithm, namely by commit c0d20d22e0ad ("crypto:
rsa-pkcs1pad - Require hash to be present").  It had to be rolled back
with commit b3a8c8a5ebb5 ("crypto: rsa-pkcs1pad: Allow hash to be
optional [ver #2]"), presumably because it broke allocation of a
transform which was solely used for encrypt/decrypt, not sign/verify.
Avoid such breakage by allowing transform allocation for encrypt/decrypt
with and without specifying a hash algorithm (and simply ignoring the
hash algorithm in the former case).

So again, specifying a hash algorithm is now mandatory for sign/verify,
but optional and ignored for encrypt/decrypt.

The new sig_alg API uses kernel buffers instead of sglists, which
avoids the overhead of copying signature and digest from sglists back
into kernel buffers.  rsassa-pkcs1.c is thus simplified quite a bit.

sig_alg is always synchronous, whereas the underlying "rsa" akcipher_alg
may be asynchronous.  So await the result of the akcipher_alg, similar
to crypto_akcipher_sync_{en,de}crypt().

As part of the migration, rename "rsa_digest_info" to "hash_prefix" to
adhere to the spec language in RFC 9580.  Otherwise keep the code
unmodified wherever possible to ease reviewing and bisecting.  Leave
several simplification and hardening opportunities to separate commits.

rsassa-pkcs1.c uses modern __free() syntax for allocation of buffers
which need to be freed by kfree_sensitive(), hence a DEFINE_FREE()
clause for kfree_sensitive() is introduced herein as a byproduct.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/rsa.h | 1 +
 include/linux/slab.h          | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/crypto/internal/rsa.h b/include/crypto/internal/rsa.h
index 754f687134df..071a1951b992 100644
--- a/include/crypto/internal/rsa.h
+++ b/include/crypto/internal/rsa.h
@@ -82,4 +82,5 @@ static inline int rsa_set_key(struct crypto_akcipher *child,
 }
 
 extern struct crypto_template rsa_pkcs1pad_tmpl;
+extern struct crypto_template rsassa_pkcs1_tmpl;
 #endif
diff --git a/include/linux/slab.h b/include/linux/slab.h
index b35e2db7eb0e..0268ea7abf8b 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -448,6 +448,7 @@ void kfree_sensitive(const void *objp);
 size_t __ksize(const void *objp);
 
 DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T))
+DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T))
 
 /**
  * ksize - Report actual allocation size of associated object
-- 
cgit v1.2.3


From 5b553e06b3215fa97d222ebddc2bc964f1824c5b Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 10 Sep 2024 16:30:19 +0200
Subject: crypto: virtio - Drop sign/verify operations

The virtio crypto driver exposes akcipher sign/verify operations in a
user space ABI.  This blocks removal of sign/verify from akcipher_alg.

Herbert opines:

   "I would say that this is something that we can break.  Breaking it
    is no different to running virtio on a host that does not support
    these algorithms.  After all, a software implementation must always
    be present.

    I deliberately left akcipher out of crypto_user because the API
    is still in flux.  We should not let virtio constrain ourselves."
    https://lore.kernel.org/all/ZtqoNAgcnXnrYhZZ@gondor.apana.org.au/

   "I would remove virtio akcipher support in its entirety.  This API
    was never meant to be exposed outside of the kernel."
    https://lore.kernel.org/all/Ztqql_gqgZiMW8zz@gondor.apana.org.au/

Drop sign/verify support from virtio crypto.  There's no strong reason
to also remove encrypt/decrypt support, so keep it.

A key selling point of virtio crypto is to allow guest access to crypto
accelerators on the host.  So far the only akcipher algorithm supported
by virtio crypto is RSA.  Dropping sign/verify merely means that the
PKCS#1 padding is now always generated or verified inside the guest,
but the actual signature generation/verification (which is an RSA
decrypt/encrypt operation) may still use an accelerator on the host.

Generating or verifying the PKCS#1 padding is cheap, so a hardware
accelerator won't be of much help there.  Which begs the question
whether virtio crypto support for sign/verify makes sense at all.

It would make sense for the sign operation if the host has a security
chip to store asymmetric private keys.  But the kernel doesn't even
have an asymmetric_key_subtype yet for hardware-based private keys.
There's at least one rudimentary driver for such chips (atmel-ecc.c for
ATECC508A), but it doesn't implement the sign operation.  The kernel
would first have to grow support for a hardware asymmetric_key_subtype
and at least one driver implementing the sign operation before exposure
to guests via virtio makes sense.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/uapi/linux/virtio_crypto.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h
index 71a54a6849ca..2fccb64c9d6b 100644
--- a/include/uapi/linux/virtio_crypto.h
+++ b/include/uapi/linux/virtio_crypto.h
@@ -329,6 +329,7 @@ struct virtio_crypto_op_header {
 	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x00)
 #define VIRTIO_CRYPTO_AKCIPHER_DECRYPT \
 	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x01)
+	/* akcipher sign/verify opcodes are deprecated */
 #define VIRTIO_CRYPTO_AKCIPHER_SIGN \
 	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x02)
 #define VIRTIO_CRYPTO_AKCIPHER_VERIFY \
-- 
cgit v1.2.3


From 6b34562f0cfe81f1f207fc7c146c4ff4b31eb625 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 10 Sep 2024 16:30:21 +0200
Subject: crypto: akcipher - Drop sign/verify operations

A sig_alg backend has just been introduced and all asymmetric
sign/verify algorithms have been migrated to it.

The sign/verify operations can thus be dropped from akcipher_alg.
It is now purely for asymmetric encrypt/decrypt.

Move struct crypto_akcipher_sync_data from internal.h to akcipher.c and
unexport crypto_akcipher_sync_{prep,post}():  They're no longer used by
sig.c but only locally in akcipher.c.

In crypto_akcipher_sync_{prep,post}(), drop various NULL pointer checks
for data->dst as they were only necessary for the verify operation.

In the crypto_sig_*() API calls, remove the forks that were necessary
while algorithms were converted from crypto_akcipher to crypto_sig
one by one.

In struct akcipher_testvec, remove the "params", "param_len" and "algo"
elements as they were only needed for the ecrdsa verify operation.
Remove corresponding dead code from test_akcipher_one() as well.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/akcipher.h          | 69 +++++---------------------------------
 include/crypto/internal/akcipher.h |  4 +--
 2 files changed, 10 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h
index 18a10cad07aa..cdf7da74bf2f 100644
--- a/include/crypto/akcipher.h
+++ b/include/crypto/akcipher.h
@@ -12,24 +12,19 @@
 #include <linux/crypto.h>
 
 /**
- * struct akcipher_request - public key request
+ * struct akcipher_request - public key cipher request
  *
  * @base:	Common attributes for async crypto requests
  * @src:	Source data
- *		For verify op this is signature + digest, in that case
- *		total size of @src is @src_len + @dst_len.
- * @dst:	Destination data (Should be NULL for verify op)
+ * @dst:	Destination data
  * @src_len:	Size of the input buffer
- *		For verify op it's size of signature part of @src, this part
- *		is supposed to be operated by cipher.
- * @dst_len:	Size of @dst buffer (for all ops except verify).
+ * @dst_len:	Size of @dst buffer
  *		It needs to be at least	as big as the expected result
  *		depending on the operation.
  *		After operation it will be updated with the actual size of the
  *		result.
  *		In case of error where the dst sgl size was insufficient,
  *		it will be updated to the size required for the operation.
- *		For verify op this is size of digest part in @src.
  * @__ctx:	Start of private context data
  */
 struct akcipher_request {
@@ -55,15 +50,8 @@ struct crypto_akcipher {
 };
 
 /**
- * struct akcipher_alg - generic public key algorithm
+ * struct akcipher_alg - generic public key cipher algorithm
  *
- * @sign:	Function performs a sign operation as defined by public key
- *		algorithm. In case of error, where the dst_len was insufficient,
- *		the req->dst_len will be updated to the size required for the
- *		operation
- * @verify:	Function performs a complete verify operation as defined by
- *		public key algorithm, returning verification status. Requires
- *		digest value as input parameter.
  * @encrypt:	Function performs an encrypt operation as defined by public key
  *		algorithm. In case of error, where the dst_len was insufficient,
  *		the req->dst_len will be updated to the size required for the
@@ -94,8 +82,6 @@ struct crypto_akcipher {
  * @base:	Common crypto API algorithm data structure
  */
 struct akcipher_alg {
-	int (*sign)(struct akcipher_request *req);
-	int (*verify)(struct akcipher_request *req);
 	int (*encrypt)(struct akcipher_request *req);
 	int (*decrypt)(struct akcipher_request *req);
 	int (*set_pub_key)(struct crypto_akcipher *tfm, const void *key,
@@ -110,9 +96,9 @@ struct akcipher_alg {
 };
 
 /**
- * DOC: Generic Public Key API
+ * DOC: Generic Public Key Cipher API
  *
- * The Public Key API is used with the algorithms of type
+ * The Public Key Cipher API is used with the algorithms of type
  * CRYPTO_ALG_TYPE_AKCIPHER (listed as type "akcipher" in /proc/crypto)
  */
 
@@ -243,10 +229,9 @@ static inline void akcipher_request_set_callback(struct akcipher_request *req,
  *
  * @req:	public key request
  * @src:	ptr to input scatter list
- * @dst:	ptr to output scatter list or NULL for verify op
+ * @dst:	ptr to output scatter list
  * @src_len:	size of the src input scatter list to be processed
- * @dst_len:	size of the dst output scatter list or size of signature
- *		portion in @src for verify op
+ * @dst_len:	size of the dst output scatter list
  */
 static inline void akcipher_request_set_crypt(struct akcipher_request *req,
 					      struct scatterlist *src,
@@ -347,44 +332,6 @@ int crypto_akcipher_sync_decrypt(struct crypto_akcipher *tfm,
 				 const void *src, unsigned int slen,
 				 void *dst, unsigned int dlen);
 
-/**
- * crypto_akcipher_sign() - Invoke public key sign operation
- *
- * Function invokes the specific public key sign operation for a given
- * public key algorithm
- *
- * @req:	asymmetric key request
- *
- * Return: zero on success; error code in case of error
- */
-static inline int crypto_akcipher_sign(struct akcipher_request *req)
-{
-	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-
-	return crypto_akcipher_alg(tfm)->sign(req);
-}
-
-/**
- * crypto_akcipher_verify() - Invoke public key signature verification
- *
- * Function invokes the specific public key signature verification operation
- * for a given public key algorithm.
- *
- * @req:	asymmetric key request
- *
- * Note: req->dst should be NULL, req->src should point to SG of size
- * (req->src_size + req->dst_size), containing signature (of req->src_size
- * length) with appended digest (of req->dst_size length).
- *
- * Return: zero on verification success; error code in case of error.
- */
-static inline int crypto_akcipher_verify(struct akcipher_request *req)
-{
-	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-
-	return crypto_akcipher_alg(tfm)->verify(req);
-}
-
 /**
  * crypto_akcipher_set_pub_key() - Invoke set public key operation
  *
diff --git a/include/crypto/internal/akcipher.h b/include/crypto/internal/akcipher.h
index a0fba4b2eccf..14ee62bc52b6 100644
--- a/include/crypto/internal/akcipher.h
+++ b/include/crypto/internal/akcipher.h
@@ -124,7 +124,7 @@ static inline struct akcipher_alg *crypto_spawn_akcipher_alg(
 /**
  * crypto_register_akcipher() -- Register public key algorithm
  *
- * Function registers an implementation of a public key verify algorithm
+ * Function registers an implementation of a public key cipher algorithm
  *
  * @alg:	algorithm definition
  *
@@ -135,7 +135,7 @@ int crypto_register_akcipher(struct akcipher_alg *alg);
 /**
  * crypto_unregister_akcipher() -- Unregister public key algorithm
  *
- * Function unregisters an implementation of a public key verify algorithm
+ * Function unregisters an implementation of a public key cipher algorithm
  *
  * @alg:	algorithm definition
  */
-- 
cgit v1.2.3


From 5ba296674e468ddd40b6ef5aa845c2d2ee794b84 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 10 Sep 2024 16:30:22 +0200
Subject: crypto: sig - Move crypto_sig_*() API calls to include file

The crypto_sig_*() API calls lived in sig.c so far because they needed
access to struct crypto_sig_type:  This was necessary to differentiate
between signature algorithms that had already been migrated from
crypto_akcipher to crypto_sig and those that hadn't yet.

Now that all algorithms have been migrated, the API calls can become
static inlines in <crypto/sig.h> to mimic what <crypto/akcipher.h> is
doing.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/sig.h | 47 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/crypto/sig.h b/include/crypto/sig.h
index f0f52a7c5ae7..bbc902642bf5 100644
--- a/include/crypto/sig.h
+++ b/include/crypto/sig.h
@@ -130,7 +130,12 @@ static inline void crypto_free_sig(struct crypto_sig *tfm)
  *
  * @tfm:	signature tfm handle allocated with crypto_alloc_sig()
  */
-int crypto_sig_maxsize(struct crypto_sig *tfm);
+static inline int crypto_sig_maxsize(struct crypto_sig *tfm)
+{
+	struct sig_alg *alg = crypto_sig_alg(tfm);
+
+	return alg->max_size(tfm);
+}
 
 /**
  * crypto_sig_sign() - Invoke signing operation
@@ -145,9 +150,14 @@ int crypto_sig_maxsize(struct crypto_sig *tfm);
  *
  * Return: zero on success; error code in case of error
  */
-int crypto_sig_sign(struct crypto_sig *tfm,
-		    const void *src, unsigned int slen,
-		    void *dst, unsigned int dlen);
+static inline int crypto_sig_sign(struct crypto_sig *tfm,
+				  const void *src, unsigned int slen,
+				  void *dst, unsigned int dlen)
+{
+	struct sig_alg *alg = crypto_sig_alg(tfm);
+
+	return alg->sign(tfm, src, slen, dst, dlen);
+}
 
 /**
  * crypto_sig_verify() - Invoke signature verification
@@ -163,9 +173,14 @@ int crypto_sig_sign(struct crypto_sig *tfm,
  *
  * Return: zero on verification success; error code in case of error.
  */
-int crypto_sig_verify(struct crypto_sig *tfm,
-		      const void *src, unsigned int slen,
-		      const void *digest, unsigned int dlen);
+static inline int crypto_sig_verify(struct crypto_sig *tfm,
+				    const void *src, unsigned int slen,
+				    const void *digest, unsigned int dlen)
+{
+	struct sig_alg *alg = crypto_sig_alg(tfm);
+
+	return alg->verify(tfm, src, slen, digest, dlen);
+}
 
 /**
  * crypto_sig_set_pubkey() - Invoke set public key operation
@@ -180,8 +195,13 @@ int crypto_sig_verify(struct crypto_sig *tfm,
  *
  * Return: zero on success; error code in case of error
  */
-int crypto_sig_set_pubkey(struct crypto_sig *tfm,
-			  const void *key, unsigned int keylen);
+static inline int crypto_sig_set_pubkey(struct crypto_sig *tfm,
+					const void *key, unsigned int keylen)
+{
+	struct sig_alg *alg = crypto_sig_alg(tfm);
+
+	return alg->set_pub_key(tfm, key, keylen);
+}
 
 /**
  * crypto_sig_set_privkey() - Invoke set private key operation
@@ -196,6 +216,11 @@ int crypto_sig_set_pubkey(struct crypto_sig *tfm,
  *
  * Return: zero on success; error code in case of error
  */
-int crypto_sig_set_privkey(struct crypto_sig *tfm,
-			   const void *key, unsigned int keylen);
+static inline int crypto_sig_set_privkey(struct crypto_sig *tfm,
+					 const void *key, unsigned int keylen)
+{
+	struct sig_alg *alg = crypto_sig_alg(tfm);
+
+	return alg->set_priv_key(tfm, key, keylen);
+}
 #endif
-- 
cgit v1.2.3


From 4df86c6ea5c37fe0452638f39a1e4b189da75c54 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 10 Sep 2024 16:30:23 +0200
Subject: ASN.1: Clean up include statements in public headers

If <linux/asn1_decoder.h> is the first header included from a .c file
(due to headers being sorted alphabetically), the compiler complains:

  include/linux/asn1_decoder.h:18:29: error: unknown type name 'size_t'

Avoid by including <linux/types.h>.

Jonathan notes that the counterpart <linux/asn1_encoder.h> already
includes <linux/types.h>, but additionally includes the unnecessary
<linux/bug.h>.  Drop it.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/asn1_decoder.h | 1 +
 include/linux/asn1_encoder.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/asn1_decoder.h b/include/linux/asn1_decoder.h
index 83f9c6e1e5e9..b41bce82a191 100644
--- a/include/linux/asn1_decoder.h
+++ b/include/linux/asn1_decoder.h
@@ -9,6 +9,7 @@
 #define _LINUX_ASN1_DECODER_H
 
 #include <linux/asn1.h>
+#include <linux/types.h>
 
 struct asn1_decoder;
 
diff --git a/include/linux/asn1_encoder.h b/include/linux/asn1_encoder.h
index 08cd0c2ad34f..d17484dffb74 100644
--- a/include/linux/asn1_encoder.h
+++ b/include/linux/asn1_encoder.h
@@ -6,7 +6,6 @@
 #include <linux/types.h>
 #include <linux/asn1.h>
 #include <linux/asn1_ber_bytecode.h>
-#include <linux/bug.h>
 
 #define asn1_oid_len(oid) (sizeof(oid)/sizeof(u32))
 unsigned char *
-- 
cgit v1.2.3


From d6793ff974e07e4eea151d1f0805e92d042825a1 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 10 Sep 2024 16:30:25 +0200
Subject: crypto: ecdsa - Move X9.62 signature decoding into template

Unlike the rsa driver, which separates signature decoding and
signature verification into two steps, the ecdsa driver does both in one.

This restricts users to the one signature format currently supported
(X9.62) and prevents addition of others such as P1363, which is needed
by the forthcoming SPDM library (Security Protocol and Data Model) for
PCI device authentication.

Per Herbert's suggestion, change ecdsa to use a "raw" signature encoding
and then implement X9.62 and P1363 as templates which convert their
respective encodings to the raw one.  One may then specify
"x962(ecdsa-nist-XXX)" or "p1363(ecdsa-nist-XXX)" to pick the encoding.

The present commit moves X9.62 decoding to a template.  A separate
commit is going to introduce another template for P1363 decoding.

The ecdsa driver internally represents a signature as two u64 arrays of
size ECC_MAX_BYTES.  This appears to be the most natural choice for the
raw format as it can directly be used for verification without having to
further decode signature data or copy it around.

Repurpose all the existing test vectors for "x962(ecdsa-nist-XXX)" and
create a duplicate of them to test the raw encoding.

Link: https://lore.kernel.org/all/ZoHXyGwRzVvYkcTP@gondor.apana.org.au/
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Tested-by: Stefan Berger <stefanb@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/ecc.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h
index 0717a53ae732..68db975e0963 100644
--- a/include/crypto/internal/ecc.h
+++ b/include/crypto/internal/ecc.h
@@ -42,6 +42,18 @@
 
 #define ECC_POINT_INIT(x, y, ndigits)	(struct ecc_point) { x, y, ndigits }
 
+/*
+ * The integers r and s making up the signature are expected to be
+ * formatted as two consecutive u64 arrays of size ECC_MAX_BYTES.
+ * The bytes within each u64 digit are in native endianness,
+ * but the order of the u64 digits themselves is little endian.
+ * This format allows direct use by internal vli_*() functions.
+ */
+struct ecdsa_raw_sig {
+	u64 r[ECC_MAX_DIGITS];
+	u64 s[ECC_MAX_DIGITS];
+};
+
 /**
  * ecc_swap_digits() - Copy ndigits from big endian array to native array
  * @in:       Input array
@@ -293,4 +305,5 @@ void ecc_point_mult_shamir(const struct ecc_point *result,
 			   const u64 *y, const struct ecc_point *q,
 			   const struct ecc_curve *curve);
 
+extern struct crypto_template ecdsa_x962_tmpl;
 #endif
-- 
cgit v1.2.3


From 221f00418e726237dbe38ba627ce08b22d3667f7 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 10 Sep 2024 16:30:26 +0200
Subject: crypto: sig - Rename crypto_sig_maxsize() to crypto_sig_keysize()

crypto_sig_maxsize() is a bit of a misnomer as it doesn't return the
maximum signature size, but rather the key size.

Rename it as well as all implementations of the ->max_size callback.
A subsequent commit introduces a crypto_sig_maxsize() function which
returns the actual maximum signature size.

While at it, change the return type of crypto_sig_keysize() from int to
unsigned int for consistency with crypto_akcipher_maxsize().  None of
the callers checks for a negative return value and an error condition
can always be indicated by returning zero.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/sig.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/crypto/sig.h b/include/crypto/sig.h
index bbc902642bf5..a3ef17c5f72f 100644
--- a/include/crypto/sig.h
+++ b/include/crypto/sig.h
@@ -32,7 +32,7 @@ struct crypto_sig {
  * @set_priv_key: Function invokes the algorithm specific set private key
  *		function, which knows how to decode and interpret
  *		the BER encoded private key and parameters. Optional.
- * @max_size:	Function returns key size. Mandatory.
+ * @key_size:	Function returns key size. Mandatory.
  * @init:	Initialize the cryptographic transformation object.
  *		This function is used to initialize the cryptographic
  *		transformation object. This function is called only once at
@@ -58,7 +58,7 @@ struct sig_alg {
 			   const void *key, unsigned int keylen);
 	int (*set_priv_key)(struct crypto_sig *tfm,
 			    const void *key, unsigned int keylen);
-	unsigned int (*max_size)(struct crypto_sig *tfm);
+	unsigned int (*key_size)(struct crypto_sig *tfm);
 	int (*init)(struct crypto_sig *tfm);
 	void (*exit)(struct crypto_sig *tfm);
 
@@ -121,20 +121,20 @@ static inline void crypto_free_sig(struct crypto_sig *tfm)
 }
 
 /**
- * crypto_sig_maxsize() - Get len for output buffer
+ * crypto_sig_keysize() - Get key size
  *
- * Function returns the dest buffer size required for a given key.
+ * Function returns the key size in bytes.
  * Function assumes that the key is already set in the transformation. If this
- * function is called without a setkey or with a failed setkey, you will end up
+ * function is called without a setkey or with a failed setkey, you may end up
  * in a NULL dereference.
  *
  * @tfm:	signature tfm handle allocated with crypto_alloc_sig()
  */
-static inline int crypto_sig_maxsize(struct crypto_sig *tfm)
+static inline unsigned int crypto_sig_keysize(struct crypto_sig *tfm)
 {
 	struct sig_alg *alg = crypto_sig_alg(tfm);
 
-	return alg->max_size(tfm);
+	return alg->key_size(tfm);
 }
 
 /**
-- 
cgit v1.2.3


From a2471684dae23a676b4badea306140d24e6507f5 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 10 Sep 2024 16:30:27 +0200
Subject: crypto: ecdsa - Move X9.62 signature size calculation into template

software_key_query() returns the maximum signature and digest size for a
given key to user space.  When it only supported RSA keys, calculating
those sizes was trivial as they were always equivalent to the key size.

However when ECDSA was added, the function grew somewhat complicated
calculations which take the ASN.1 encoding and curve into account.
This doesn't scale well and adjusting the calculations is easily
forgotten when adding support for new encodings or curves.  In fact,
when NIST P521 support was recently added, the function was initially
not amended:

https://lore.kernel.org/all/b749d5ee-c3b8-4cbd-b252-7773e4536e07@linux.ibm.com/

Introduce a ->max_size() callback to struct sig_alg and take advantage
of it to move the signature size calculations to ecdsa-x962.c.

Introduce a ->digest_size() callback to struct sig_alg and move the
maximum ECDSA digest size to ecdsa.c.  It is common across ecdsa-x962.c
and the upcoming ecdsa-p1363.c and thus inherited by both of them.

For all other algorithms, continue using the key size as maximum
signature and digest size.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/sig.h | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/crypto/sig.h b/include/crypto/sig.h
index a3ef17c5f72f..cff41ad93824 100644
--- a/include/crypto/sig.h
+++ b/include/crypto/sig.h
@@ -33,6 +33,8 @@ struct crypto_sig {
  *		function, which knows how to decode and interpret
  *		the BER encoded private key and parameters. Optional.
  * @key_size:	Function returns key size. Mandatory.
+ * @digest_size: Function returns maximum digest size. Optional.
+ * @max_size:	Function returns maximum signature size. Optional.
  * @init:	Initialize the cryptographic transformation object.
  *		This function is used to initialize the cryptographic
  *		transformation object. This function is called only once at
@@ -59,6 +61,8 @@ struct sig_alg {
 	int (*set_priv_key)(struct crypto_sig *tfm,
 			    const void *key, unsigned int keylen);
 	unsigned int (*key_size)(struct crypto_sig *tfm);
+	unsigned int (*digest_size)(struct crypto_sig *tfm);
+	unsigned int (*max_size)(struct crypto_sig *tfm);
 	int (*init)(struct crypto_sig *tfm);
 	void (*exit)(struct crypto_sig *tfm);
 
@@ -137,6 +141,40 @@ static inline unsigned int crypto_sig_keysize(struct crypto_sig *tfm)
 	return alg->key_size(tfm);
 }
 
+/**
+ * crypto_sig_digestsize() - Get maximum digest size
+ *
+ * Function returns the maximum digest size in bytes.
+ * Function assumes that the key is already set in the transformation. If this
+ * function is called without a setkey or with a failed setkey, you may end up
+ * in a NULL dereference.
+ *
+ * @tfm:	signature tfm handle allocated with crypto_alloc_sig()
+ */
+static inline unsigned int crypto_sig_digestsize(struct crypto_sig *tfm)
+{
+	struct sig_alg *alg = crypto_sig_alg(tfm);
+
+	return alg->digest_size(tfm);
+}
+
+/**
+ * crypto_sig_maxsize() - Get maximum signature size
+ *
+ * Function returns the maximum signature size in bytes.
+ * Function assumes that the key is already set in the transformation. If this
+ * function is called without a setkey or with a failed setkey, you may end up
+ * in a NULL dereference.
+ *
+ * @tfm:	signature tfm handle allocated with crypto_alloc_sig()
+ */
+static inline unsigned int crypto_sig_maxsize(struct crypto_sig *tfm)
+{
+	struct sig_alg *alg = crypto_sig_alg(tfm);
+
+	return alg->max_size(tfm);
+}
+
 /**
  * crypto_sig_sign() - Invoke signing operation
  *
-- 
cgit v1.2.3


From b04163863caf599d4348a05af5a71cf5d42f11dc Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 10 Sep 2024 16:30:28 +0200
Subject: crypto: ecdsa - Support P1363 signature decoding

Alternatively to the X9.62 encoding of ecdsa signatures, which uses
ASN.1 and is already supported by the kernel, there's another common
encoding called P1363.  It stores r and s as the concatenation of two
big endian, unsigned integers.  The name originates from IEEE P1363.

Add a P1363 template in support of the forthcoming SPDM library
(Security Protocol and Data Model) for PCI device authentication.

P1363 is prescribed by SPDM 1.2.1 margin no 44:

   "For ECDSA signatures, excluding SM2, in SPDM, the signature shall be
    the concatenation of r and s.  The size of r shall be the size of
    the selected curve.  Likewise, the size of s shall be the size of
    the selected curve.  See BaseAsymAlgo in NEGOTIATE_ALGORITHMS for
    the size of r and s.  The byte order for r and s shall be in big
    endian order.  When placing ECDSA signatures into an SPDM signature
    field, r shall come first followed by s."

Link: https://www.dmtf.org/sites/default/files/standards/documents/DSP0274_1.2.1.pdf
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/ecc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h
index 68db975e0963..71483e5305e1 100644
--- a/include/crypto/internal/ecc.h
+++ b/include/crypto/internal/ecc.h
@@ -306,4 +306,5 @@ void ecc_point_mult_shamir(const struct ecc_point *result,
 			   const struct ecc_curve *curve);
 
 extern struct crypto_template ecdsa_x962_tmpl;
+extern struct crypto_template ecdsa_p1363_tmpl;
 #endif
-- 
cgit v1.2.3


From 452c55dcefa958f7e87798d764497485687e4bc3 Mon Sep 17 00:00:00 2001
From: Chenghai Huang <huangchenghai2@huawei.com>
Date: Sun, 29 Sep 2024 19:26:57 +0800
Subject: crypto: hisilicon/qm - fix the coding specifications issue

Ensure that the inline function contains no more than 10 lines.
move q_num_set() from hisi_acc_qm.h to qm.c.

Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/hisi_acc_qm.h | 33 ++-------------------------------
 1 file changed, 2 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 9d7754ad5e9b..389e95754776 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -436,37 +436,6 @@ struct hisi_qp {
 	struct uacce_queue *uacce_q;
 };
 
-static inline int q_num_set(const char *val, const struct kernel_param *kp,
-			    unsigned int device)
-{
-	struct pci_dev *pdev;
-	u32 n, q_num;
-	int ret;
-
-	if (!val)
-		return -EINVAL;
-
-	pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL);
-	if (!pdev) {
-		q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2);
-		pr_info("No device found currently, suppose queue number is %u\n",
-			q_num);
-	} else {
-		if (pdev->revision == QM_HW_V1)
-			q_num = QM_QNUM_V1;
-		else
-			q_num = QM_QNUM_V2;
-
-		pci_dev_put(pdev);
-	}
-
-	ret = kstrtou32(val, 10, &n);
-	if (ret || n < QM_MIN_QNUM || n > q_num)
-		return -EINVAL;
-
-	return param_set_int(val, kp);
-}
-
 static inline int vfs_num_set(const char *val, const struct kernel_param *kp)
 {
 	u32 n;
@@ -526,6 +495,8 @@ static inline void hisi_qm_del_list(struct hisi_qm *qm, struct hisi_qm_list *qm_
 	mutex_unlock(&qm_list->lock);
 }
 
+int hisi_qm_q_num_set(const char *val, const struct kernel_param *kp,
+		      unsigned int device);
 int hisi_qm_init(struct hisi_qm *qm);
 void hisi_qm_uninit(struct hisi_qm *qm);
 int hisi_qm_start(struct hisi_qm *qm);
-- 
cgit v1.2.3


From 21e92806d39c68af2accd1fb238c2daecfcf9fbd Mon Sep 17 00:00:00 2001
From: Donglin Peng <pengdonglin@xiaomi.com>
Date: Sat, 14 Sep 2024 20:29:12 -0700
Subject: function_graph: Support recording and printing the function return
 address

When using function_graph tracer to analyze the flow of kernel function
execution, it is often necessary to quickly locate the exact line of code
where the call occurs. While this may be easy at times, it can be more
time-consuming when some functions are inlined or the flow is too long.

This feature aims to simplify the process by recording the return address
of traced funcions and printing it when outputing trace logs.

To enhance human readability, the prefix 'ret=' is used for the kernel return
value, while '<-' serves as the prefix for the return address in trace logs to
make it look more like the function tracer.

A new trace option named 'funcgraph-retaddr' has been introduced, and the
existing option 'sym-addr' can be used to control the format of the return
address.

See below logs with both funcgraph-retval and funcgraph-retaddr enabled.

0)             | load_elf_binary() { /* <-bprm_execve+0x249/0x600 */
0)             |   load_elf_phdrs() { /* <-load_elf_binary+0x84/0x1730 */
0)             |     __kmalloc_noprof() { /* <-load_elf_phdrs+0x4a/0xb0 */
0)   3.657 us  |       __cond_resched(); /* <-__kmalloc_noprof+0x28c/0x390 ret=0x0 */
0) + 24.335 us |     } /* __kmalloc_noprof ret=0xffff8882007f3000 */
0)             |     kernel_read() { /* <-load_elf_phdrs+0x6c/0xb0 */
0)             |       rw_verify_area() { /* <-kernel_read+0x2b/0x50 */
0)             |         security_file_permission() { /* <-kernel_read+0x2b/0x50 */
0)             |           selinux_file_permission() { /* <-security_file_permission+0x26/0x40 */
0)             |             __inode_security_revalidate() { /* <-selinux_file_permission+0x6d/0x140 */
0)   2.034 us  |               __cond_resched(); /* <-__inode_security_revalidate+0x5f/0x80 ret=0x0 */
0)   6.602 us  |             } /* __inode_security_revalidate ret=0x0 */
0)   2.214 us  |             avc_policy_seqno(); /* <-selinux_file_permission+0x107/0x140 ret=0x0 */
0) + 16.670 us |           } /* selinux_file_permission ret=0x0 */
0) + 20.809 us |         } /* security_file_permission ret=0x0 */
0) + 25.217 us |       } /* rw_verify_area ret=0x0 */
0)             |       __kernel_read() { /* <-load_elf_phdrs+0x6c/0xb0 */
0)             |         ext4_file_read_iter() { /* <-__kernel_read+0x160/0x2e0 */

Then, we can use the faddr2line to locate the source code, for example:

$ ./scripts/faddr2line ./vmlinux load_elf_phdrs+0x6c/0xb0
load_elf_phdrs+0x6c/0xb0:
elf_read at fs/binfmt_elf.c:471
(inlined by) load_elf_phdrs at fs/binfmt_elf.c:531

Link: https://lore.kernel.org/20240915032912.1118397-1-dolinux.peng@gmail.com
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202409150605.HgUmU8ea-lkp@intel.com/
Signed-off-by: Donglin Peng <dolinux.peng@gmail.com>
[ Rebased to handle text_delta offsets ]
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e684addf6508..2ac3b3b53cd0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1040,6 +1040,17 @@ struct ftrace_graph_ent {
 	int depth;
 } __packed;
 
+/*
+ * Structure that defines an entry function trace with retaddr.
+ * It's already packed but the attribute "packed" is needed
+ * to remove extra padding at the end.
+ */
+struct fgraph_retaddr_ent {
+	unsigned long func; /* Current function */
+	int depth;
+	unsigned long retaddr;  /* Return address */
+} __packed;
+
 /*
  * Structure that defines a return function trace.
  * It's already packed but the attribute "packed" is needed
@@ -1057,19 +1068,29 @@ struct ftrace_graph_ret {
 	unsigned long long rettime;
 } __packed;
 
+struct fgraph_extras;
 struct fgraph_ops;
 
 /* Type of the callback handlers for tracing function graph*/
 typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *,
 				       struct fgraph_ops *); /* return */
 typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *,
-				      struct fgraph_ops *); /* entry */
+				      struct fgraph_ops *,
+				      struct fgraph_extras *); /* entry */
 
-extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph_ops *gops);
+extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace,
+				   struct fgraph_ops *gops,
+				   struct fgraph_extras *extras);
 bool ftrace_pids_enabled(struct ftrace_ops *ops);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
+/* Used to convey some extra datas when creating a graph entry */
+struct fgraph_extras {
+	u32 flags;
+	unsigned long retaddr;
+};
+
 struct fgraph_ops {
 	trace_func_graph_ent_t		entryfunc;
 	trace_func_graph_ret_t		retfunc;
@@ -1115,6 +1136,8 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
 				    unsigned long ret, unsigned long *retp);
 unsigned long *fgraph_get_task_var(struct fgraph_ops *gops);
 
+u32 graph_tracer_flags_get(u32 flags);
+
 /*
  * Sometimes we don't want to trace a function with the function
  * graph tracer but we want them to keep traced by the usual function
-- 
cgit v1.2.3


From 4436df478860bb5da1864df2cd20f281a210f139 Mon Sep 17 00:00:00 2001
From: Erick Archer <erick.archer@outlook.com>
Date: Fri, 7 Jun 2024 18:19:12 +0200
Subject: batman-adv: Add flex array to struct batadv_tvlv_tt_data

The "struct batadv_tvlv_tt_data" uses a dynamically sized set of
trailing elements. Specifically, it uses an array of structures of type
"batadv_tvlv_tt_vlan_data". So, use the preferred way in the kernel
declaring a flexible array [1].

At the same time, prepare for the coming implementation by GCC and Clang
of the __counted_by attribute. Flexible array members annotated with
__counted_by can have their accesses bounds-checked at run-time via
CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for
strcpy/memcpy-family functions). In this case, it is important to note
that the attribute used is specifically __counted_by_be since variable
"num_vlan" is of type __be16.

The following change to the "batadv_tt_tvlv_ogm_handler_v1" function:

-	tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1);
-	tt_change = (struct batadv_tvlv_tt_change *)(tt_vlan + num_vlan);

+	tt_change = (struct batadv_tvlv_tt_change *)((void *)tt_data
+						     + flex_size);

is intended to prevent the compiler from generating an "out-of-bounds"
notification due to the __counted_by attribute. The compiler can do a
pointer calculation using the vlan_data flexible array memory, or in
other words, this may be calculated as an array offset, since it is the
same as:

        &tt_data->vlan_data[num_vlan]

Therefore, we go past the end of the array. In other "multiple trailing
flexible array" situations, this has been solved by addressing from the
base pointer, since the compiler either knows the full allocation size
or it knows nothing about it (this case, since it came from a "void *"
function argument).

The order in which the structure batadv_tvlv_tt_data and the structure
batadv_tvlv_tt_vlan_data are defined must be swap to avoid an incomplete
type error.

Also, avoid the open-coded arithmetic in memory allocator functions [2]
using the "struct_size" macro and use the "flex_array_size" helper to
clarify some calculations, when possible.

Moreover, the new structure member also allow us to avoid the open-coded
arithmetic on pointers in some situations. Take advantage of this.

This code was detected with the help of Coccinelle, and audited and
modified manually.

Link: https://www.kernel.org/doc/html/next/process/deprecated.html#zero-length-and-one-element-arrays [1]
Link: https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [2]
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Erick Archer <erick.archer@outlook.com>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batadv_packet.h | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index 6e25753015df..439132a819ea 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -9,6 +9,7 @@
 
 #include <asm/byteorder.h>
 #include <linux/if_ether.h>
+#include <linux/stddef.h>
 #include <linux/types.h>
 
 /**
@@ -592,19 +593,6 @@ struct batadv_tvlv_gateway_data {
 	__be32 bandwidth_up;
 };
 
-/**
- * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container
- * @flags: translation table flags (see batadv_tt_data_flags)
- * @ttvn: translation table version number
- * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by
- *  one batadv_tvlv_tt_vlan_data object per announced vlan
- */
-struct batadv_tvlv_tt_data {
-	__u8   flags;
-	__u8   ttvn;
-	__be16 num_vlan;
-};
-
 /**
  * struct batadv_tvlv_tt_vlan_data - vlan specific tt data propagated through
  *  the tt tvlv container
@@ -618,6 +606,21 @@ struct batadv_tvlv_tt_vlan_data {
 	__u16  reserved;
 };
 
+/**
+ * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container
+ * @flags: translation table flags (see batadv_tt_data_flags)
+ * @ttvn: translation table version number
+ * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by
+ *  one batadv_tvlv_tt_vlan_data object per announced vlan
+ * @vlan_data: array of batadv_tvlv_tt_vlan_data objects
+ */
+struct batadv_tvlv_tt_data {
+	__u8   flags;
+	__u8   ttvn;
+	__be16 num_vlan;
+	struct batadv_tvlv_tt_vlan_data vlan_data[] __counted_by_be(num_vlan);
+};
+
 /**
  * struct batadv_tvlv_tt_change - translation table diff data
  * @flags: status indicators concerning the non-mesh client (see
-- 
cgit v1.2.3


From 61b17d072d811df5733a1570889b8c6fa6834bf8 Mon Sep 17 00:00:00 2001
From: Danila Tikhonov <danila@jiaxyga.com>
Date: Sun, 18 Aug 2024 23:43:39 +0300
Subject: dt-bindings: clock: qcom,gcc-sm8450: Add SM8475 GCC bindings

Add new entry to the SM8450 dt-bindings and add SM8475-specific clocks
to SM8450 GCC header file.

Signed-off-by: Danila Tikhonov <danila@jiaxyga.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240818204348.197788-2-danila@jiaxyga.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,gcc-sm8450.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,gcc-sm8450.h b/include/dt-bindings/clock/qcom,gcc-sm8450.h
index 9679410843a0..7320e63c3a2f 100644
--- a/include/dt-bindings/clock/qcom,gcc-sm8450.h
+++ b/include/dt-bindings/clock/qcom,gcc-sm8450.h
@@ -194,6 +194,9 @@
 #define GCC_VIDEO_AXI0_CLK					182
 #define GCC_VIDEO_AXI1_CLK					183
 #define GCC_VIDEO_XO_CLK					184
+/* Additional SM8475-specific clocks */
+#define SM8475_GCC_GPLL2					185
+#define SM8475_GCC_GPLL3					186
 
 /* GCC resets */
 #define GCC_CAMERA_BCR						0
-- 
cgit v1.2.3


From 421d2251fbeac8ab4308ab6653a9252dc4efa6c9 Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Thu, 3 Oct 2024 13:46:39 +0200
Subject: dt-bindings: iio: adc: Add the GE HealthCare PMC ADC

The GE HealthCare PMC Analog to Digital Converter (ADC) is a 16-Channel
(voltage and current), 16-Bit ADC with an I2C Interface.

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Tested-by: Ian Ray <ian.ray@gehealthcare.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://patch.msgid.link/20241003114641.672086-3-herve.codina@bootlin.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/dt-bindings/iio/adc/gehc,pmc-adc.h | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 include/dt-bindings/iio/adc/gehc,pmc-adc.h

(limited to 'include')

diff --git a/include/dt-bindings/iio/adc/gehc,pmc-adc.h b/include/dt-bindings/iio/adc/gehc,pmc-adc.h
new file mode 100644
index 000000000000..2f291e3c76ae
--- /dev/null
+++ b/include/dt-bindings/iio/adc/gehc,pmc-adc.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+
+#ifndef _DT_BINDINGS_IIO_ADC_GEHC_PMC_ADC_H
+#define _DT_BINDINGS_IIO_ADC_GEHC_PMC_ADC_H
+
+/* ADC channel type */
+#define GEHC_PMC_ADC_VOLTAGE	0
+#define GEHC_PMC_ADC_CURRENT	1
+
+#endif
-- 
cgit v1.2.3


From 70c8fd00a9bd0509bbf7bccd9baea8bbd5ddc756 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 2 Oct 2024 17:27:16 -0400
Subject: timekeeping: Add interfaces for handling timestamps with a floor
 value

Multigrain timestamps allow the kernel to use fine-grained timestamps when
an inode's attributes is being actively observed via ->getattr().  With
this support, it's possible for a file to get a fine-grained timestamp, and
another modified after it to get a coarse-grained stamp that is earlier
than the fine-grained time.  If this happens then the files can appear to
have been modified in reverse order, which breaks VFS ordering guarantees
[1].

To prevent this, maintain a floor value for multigrain timestamps.
Whenever a fine-grained timestamp is handed out, record it, and when later
coarse-grained stamps are handed out, ensure they are not earlier than that
value. If the coarse-grained timestamp is earlier than the fine-grained
floor, return the floor value instead.

Add a static singleton atomic64_t into timekeeper.c that is used to keep
track of the latest fine-grained time ever handed out. This is tracked as a
monotonic ktime_t value to ensure that it isn't affected by clock
jumps. Because it is updated at different times than the rest of the
timekeeper object, the floor value is managed independently of the
timekeeper via a cmpxchg() operation, and sits on its own cacheline.

Add two new public interfaces:

- ktime_get_coarse_real_ts64_mg() fills a timespec64 with the later of the
  coarse-grained clock and the floor time

- ktime_get_real_ts64_mg() gets the fine-grained clock value, and tries
  to swap it into the floor. A timespec64 is filled with the result.

The floor value is global and updated via a single try_cmpxchg(). If
that fails then the operation raced with a concurrent update. Any
concurrent update must be later than the existing floor value, so any
racing tasks can accept any resulting floor value without retrying.

[1]: POSIX requires that files be stamped with realtime clock values, and
     makes no provision for dealing with backward clock jumps. If a backward
     realtime clock jump occurs, then files can appear to have been modified
     in reverse order.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # documentation bits
Acked-by: John Stultz <jstultz@google.com>
Link: https://lore.kernel.org/all/20241002-mgtime-v10-1-d1c4717f5284@kernel.org
---
 include/linux/timekeeping.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index fc12a9ba2c88..7aa85246c183 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -45,6 +45,10 @@ extern void ktime_get_real_ts64(struct timespec64 *tv);
 extern void ktime_get_coarse_ts64(struct timespec64 *ts);
 extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
 
+/* Multigrain timestamp interfaces */
+extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
+extern void ktime_get_real_ts64_mg(struct timespec64 *ts);
+
 void getboottime64(struct timespec64 *ts);
 
 /*
-- 
cgit v1.2.3


From 96f9a366ec8abe027326d7aab84d64370019f0f1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 2 Oct 2024 17:27:17 -0400
Subject: timekeeping: Add percpu counter for tracking floor swap events

The mgtime_floor value is a global variable for tracking the latest
fine-grained timestamp handed out. Because it's a global, track the
number of times that a new floor value is assigned.

Add a new percpu counter to the timekeeping code to track the number of
floor swap events that have occurred. A later patch will add a debugfs
file to display this counter alongside other stats involving multigrain
timestamps.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # documentation bits
Link: https://lore.kernel.org/all/20241002-mgtime-v10-2-d1c4717f5284@kernel.org
---
 include/linux/timekeeping.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 7aa85246c183..84a035e86ac8 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -48,6 +48,7 @@ extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
 /* Multigrain timestamp interfaces */
 extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
 extern void ktime_get_real_ts64_mg(struct timespec64 *ts);
+extern unsigned long timekeeping_get_mg_floor_swaps(void);
 
 void getboottime64(struct timespec64 *ts);
 
-- 
cgit v1.2.3


From 51976c6cd786151b6a1bdf8b8b3334beac0ba99c Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Fri, 27 Sep 2024 18:33:22 +0800
Subject: RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap
 pages

Provide a new api rdma_user_mmap_disassociate() for drivers to
disassociate mmap pages for a device.

Since drivers can now disassociate mmaps by calling this api,
introduce a new disassociation_lock to specifically prevent
races between this disassociation process and new mmaps. And
thus the old hw_destroy_rwsem is not needed in this api.

Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://patch.msgid.link/20240927103323.1897094-2-huangjunxian6@hisilicon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/ib_verbs.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index aa8ede439905..9cb8b5fe7eee 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2948,6 +2948,14 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
 				      size_t length, u32 min_pgoff,
 				      u32 max_pgoff);
 
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+void rdma_user_mmap_disassociate(struct ib_device *device);
+#else
+static inline void rdma_user_mmap_disassociate(struct ib_device *device)
+{
+}
+#endif
+
 static inline int
 rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext,
 				  struct rdma_user_mmap_entry *entry,
-- 
cgit v1.2.3


From 2382d68d7d43873ba856baf567cab0d5c523f23b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 25 Sep 2024 15:31:38 +1000
Subject: sched: change wake_up_bit() and related function to expect unsigned
 long *

wake_up_bit() currently allows a "void *".  While this isn't strictly a
problem as the address is never dereferenced, it is inconsistent with
the corresponding wait_on_bit() which requires "unsigned long *" and
does dereference the pointer.

Any code that needs to wait for a change in something other than an
unsigned long would be better served by wake_up_var()/wait_var_event().

This patch changes all related "void *" to "unsigned long *".

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240925053405.3960701-2-neilb@suse.de
---
 include/linux/wait_bit.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 7725b7579b78..48e123839892 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -8,7 +8,7 @@
 #include <linux/wait.h>
 
 struct wait_bit_key {
-	void			*flags;
+	unsigned long		*flags;
 	int			bit_nr;
 	unsigned long		timeout;
 };
@@ -23,14 +23,14 @@ struct wait_bit_queue_entry {
 
 typedef int wait_bit_action_f(struct wait_bit_key *key, int mode);
 
-void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit);
+void __wake_up_bit(struct wait_queue_head *wq_head, unsigned long *word, int bit);
 int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
 int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
-void wake_up_bit(void *word, int bit);
-int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode);
-int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);
-int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode);
-struct wait_queue_head *bit_waitqueue(void *word, int bit);
+void wake_up_bit(unsigned long *word, int bit);
+int out_of_line_wait_on_bit(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode);
+int out_of_line_wait_on_bit_timeout(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);
+int out_of_line_wait_on_bit_lock(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode);
+struct wait_queue_head *bit_waitqueue(unsigned long *word, int bit);
 extern void __init wait_bit_init(void);
 
 int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
@@ -327,7 +327,7 @@ do {									\
  * You can use this helper if bitflags are manipulated atomically rather than
  * non-atomically under a lock.
  */
-static inline void clear_and_wake_up_bit(int bit, void *word)
+static inline void clear_and_wake_up_bit(int bit, unsigned long *word)
 {
 	clear_bit_unlock(bit, word);
 	/* See wake_up_bit() for which memory barrier you need to use. */
-- 
cgit v1.2.3


From 3cdee6b359f134da22f7fd4606e0338413cfd79e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 25 Sep 2024 15:31:39 +1000
Subject: sched: Improve documentation for wake_up_bit/wait_on_bit family of
 functions

This patch revises the documention for wake_up_bit(),
clear_and_wake_up_bit(), and all the wait_on_bit() family of functions.

The new documentation places less emphasis on the pool of waitqueues
used (an implementation detail) and focuses instead on details of how
the functions behave.

The barriers included in the wait functions and clear_and_wake_up_bit()
and those required for wake_up_bit() are spelled out more clearly.

The error statuses returned are given explicitly.

The fact that the wait_on_bit_lock() function sets the bit is made more
obvious.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240925053405.3960701-3-neilb@suse.de
---
 include/linux/wait_bit.h | 159 +++++++++++++++++++++++++----------------------
 1 file changed, 86 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 48e123839892..723e7bf35747 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -53,19 +53,21 @@ extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode);
 
 /**
  * wait_on_bit - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * @word: the address containing the bit being waited on
+ * @bit: the bit at that address being waited on
  * @mode: the task state to sleep in
  *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that waits on a bit.
- * For instance, if one were to have waiters on a bitflag, one would
- * call wait_on_bit() in threads waiting for the bit to clear.
- * One uses wait_on_bit() where one is waiting for the bit to clear,
- * but has no intention of setting it.
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
+ * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP())
+ * to be cleared.  The clearing of the bit must be signalled with
+ * wake_up_bit(), often as clear_and_wake_up_bit().
+ *
+ * The process will wait on a waitqueue selected by hash from a shared
+ * pool.  It will only be woken on a wake_up for the target bit, even
+ * if other processes on the same queue are waiting for other bits.
+ *
+ * Returned value will be zero if the bit was cleared in which case the
+ * call has ACQUIRE semantics, or %-EINTR if the process received a
+ * signal and the mode permitted wake up on that signal.
  */
 static inline int
 wait_on_bit(unsigned long *word, int bit, unsigned mode)
@@ -80,17 +82,20 @@ wait_on_bit(unsigned long *word, int bit, unsigned mode)
 
 /**
  * wait_on_bit_io - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * @word: the address containing the bit being waited on
+ * @bit: the bit at that address being waited on
  * @mode: the task state to sleep in
  *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared.  This is similar to wait_on_bit(), but calls
- * io_schedule() instead of schedule() for the actual waiting.
+ * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP())
+ * to be cleared.  The clearing of the bit must be signalled with
+ * wake_up_bit(), often as clear_and_wake_up_bit().
+ *
+ * This is similar to wait_on_bit(), but calls io_schedule() instead of
+ * schedule() for the actual waiting.
  *
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
+ * Returned value will be zero if the bit was cleared in which case the
+ * call has ACQUIRE semantics, or %-EINTR if the process received a
+ * signal and the mode permitted wake up on that signal.
  */
 static inline int
 wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
@@ -104,19 +109,24 @@ wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
 }
 
 /**
- * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * wait_on_bit_timeout - wait for a bit to be cleared or a timeout to elapse
+ * @word: the address containing the bit being waited on
+ * @bit: the bit at that address being waited on
  * @mode: the task state to sleep in
  * @timeout: timeout, in jiffies
  *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared. This is similar to wait_on_bit(), except also takes a
- * timeout parameter.
+ * Wait for the given bit in an unsigned long or bitmap (see
+ * DECLARE_BITMAP()) to be cleared, or for a timeout to expire.  The
+ * clearing of the bit must be signalled with wake_up_bit(), often as
+ * clear_and_wake_up_bit().
  *
- * Returned value will be zero if the bit was cleared before the
- * @timeout elapsed, or non-zero if the @timeout elapsed or process
- * received a signal and the mode permitted wakeup on that signal.
+ * This is similar to wait_on_bit(), except it also takes a timeout
+ * parameter.
+ *
+ * Returned value will be zero if the bit was cleared in which case the
+ * call has ACQUIRE semantics, or %-EINTR if the process received a
+ * signal and the mode permitted wake up on that signal, or %-EAGAIN if the
+ * timeout elapsed.
  */
 static inline int
 wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
@@ -132,19 +142,21 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
 
 /**
  * wait_on_bit_action - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * @word: the address containing the bit waited on
+ * @bit: the bit at that address being waited on
  * @action: the function used to sleep, which may take special actions
  * @mode: the task state to sleep in
  *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared, and allow the waiting action to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
- * is done.
+ * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP())
+ * to be cleared.  The clearing of the bit must be signalled with
+ * wake_up_bit(), often as clear_and_wake_up_bit().
+ *
+ * This is similar to wait_on_bit(), but calls @action() instead of
+ * schedule() for the actual waiting.
  *
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
+ * Returned value will be zero if the bit was cleared in which case the
+ * call has ACQUIRE semantics, or the error code returned by @action if
+ * that call returned non-zero.
  */
 static inline int
 wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
@@ -157,23 +169,22 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
 }
 
 /**
- * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * wait_on_bit_lock - wait for a bit to be cleared, then set it
+ * @word: the address containing the bit being waited on
+ * @bit: the bit of the word being waited on and set
  * @mode: the task state to sleep in
  *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that waits on a bit
- * when one intends to set it, for instance, trying to lock bitflags.
- * For instance, if one were to have waiters trying to set bitflag
- * and waiting for it to clear before setting it, one would call
- * wait_on_bit() in threads waiting to be able to set the bit.
- * One uses wait_on_bit_lock() where one is waiting for the bit to
- * clear with the intention of setting it, and when done, clearing it.
+ * Wait for the given bit in an unsigned long or bitmap (see
+ * DECLARE_BITMAP()) to be cleared.  The clearing of the bit must be
+ * signalled with wake_up_bit(), often as clear_and_wake_up_bit().  As
+ * soon as it is clear, atomically set it and return.
  *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set.  Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
+ * This is similar to wait_on_bit(), but sets the bit before returning.
+ *
+ * Returned value will be zero if the bit was successfully set in which
+ * case the call has the same memory sequencing semantics as
+ * test_and_clear_bit(), or %-EINTR if the process received a signal and
+ * the mode permitted wake up on that signal.
  */
 static inline int
 wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
@@ -185,15 +196,18 @@ wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
 }
 
 /**
- * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * wait_on_bit_lock_io - wait for a bit to be cleared, then set it
+ * @word: the address containing the bit being waited on
+ * @bit: the bit of the word being waited on and set
  * @mode: the task state to sleep in
  *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared and then to atomically set it.  This is similar
- * to wait_on_bit(), but calls io_schedule() instead of schedule()
- * for the actual waiting.
+ * Wait for the given bit in an unsigned long or bitmap (see
+ * DECLARE_BITMAP()) to be cleared.  The clearing of the bit must be
+ * signalled with wake_up_bit(), often as clear_and_wake_up_bit().  As
+ * soon as it is clear, atomically set it and return.
+ *
+ * This is similar to wait_on_bit_lock(), but calls io_schedule() instead
+ * of schedule().
  *
  * Returns zero if the bit was (eventually) found to be clear and was
  * set.  Returns non-zero if a signal was delivered to the process and
@@ -209,21 +223,19 @@ wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode)
 }
 
 /**
- * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * wait_on_bit_lock_action - wait for a bit to be cleared, then set it
+ * @word: the address containing the bit being waited on
+ * @bit: the bit of the word being waited on and set
  * @action: the function used to sleep, which may take special actions
  * @mode: the task state to sleep in
  *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared and then to set it, and allow the waiting action
- * to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
- * is done.
+ * This is similar to wait_on_bit_lock(), but calls @action() instead of
+ * schedule() for the actual waiting.
  *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set.  Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
+ * Returned value will be zero if the bit was successfully set in which
+ * case the call has the same memory sequencing semantics as
+ * test_and_clear_bit(), or the error code returned by @action if that
+ * call returned non-zero.
  */
 static inline int
 wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
@@ -320,12 +332,13 @@ do {									\
 
 /**
  * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit
- *
  * @bit: the bit of the word being waited on
- * @word: the word being waited on, a kernel virtual address
+ * @word: the address containing the bit being waited on
  *
- * You can use this helper if bitflags are manipulated atomically rather than
- * non-atomically under a lock.
+ * The designated bit is cleared and any tasks waiting in wait_on_bit()
+ * or similar will be woken.  This call has RELEASE semantics so that
+ * any changes to memory made before this call are guaranteed to be visible
+ * after the corresponding wait_on_bit() completes.
  */
 static inline void clear_and_wake_up_bit(int bit, unsigned long *word)
 {
-- 
cgit v1.2.3


From bf39882edc798279765ca31751f6e679b50b97ef Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 25 Sep 2024 15:31:40 +1000
Subject: sched: Document wait_var_event() family of functions and
 wake_up_var()

wake_up_var(), wait_var_event() and related interfaces are not
documented but have important ordering requirements.  This patch adds
documentation and makes these requirements explicit.

The return values for those wait_var_event_* functions which return a
value are documented.  Note that these are, perhaps surprisingly,
sometimes different from comparable wait_on_bit() functions.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240925053405.3960701-4-neilb@suse.de
---
 include/linux/wait_bit.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

(limited to 'include')

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 723e7bf35747..06ec99b90bf3 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -282,6 +282,22 @@ __out:	__ret;								\
 	___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			  schedule())
 
+/**
+ * wait_var_event - wait for a variable to be updated and notified
+ * @var: the address of variable being waited on
+ * @condition: the condition to wait for
+ *
+ * Wait for a @condition to be true, only re-checking when a wake up is
+ * received for the given @var (an arbitrary kernel address which need
+ * not be directly related to the given condition, but usually is).
+ *
+ * The process will wait on a waitqueue selected by hash from a shared
+ * pool.  It will only be woken on a wake_up for the given address.
+ *
+ * The condition should normally use smp_load_acquire() or a similarly
+ * ordered access to ensure that any changes to memory made before the
+ * condition became true will be visible after the wait completes.
+ */
 #define wait_var_event(var, condition)					\
 do {									\
 	might_sleep();							\
@@ -294,6 +310,24 @@ do {									\
 	___wait_var_event(var, condition, TASK_KILLABLE, 0, 0,		\
 			  schedule())
 
+/**
+ * wait_var_event_killable - wait for a variable to be updated and notified
+ * @var: the address of variable being waited on
+ * @condition: the condition to wait for
+ *
+ * Wait for a @condition to be true or a fatal signal to be received,
+ * only re-checking the condition when a wake up is received for the given
+ * @var (an arbitrary kernel address which need not be directly related
+ * to the given condition, but usually is).
+ *
+ * This is similar to wait_var_event() but returns a value which is
+ * 0 if the condition became true, or %-ERESTARTSYS if a fatal signal
+ * was received.
+ *
+ * The condition should normally use smp_load_acquire() or a similarly
+ * ordered access to ensure that any changes to memory made before the
+ * condition became true will be visible after the wait completes.
+ */
 #define wait_var_event_killable(var, condition)				\
 ({									\
 	int __ret = 0;							\
@@ -308,6 +342,26 @@ do {									\
 			  TASK_UNINTERRUPTIBLE, 0, timeout,		\
 			  __ret = schedule_timeout(__ret))
 
+/**
+ * wait_var_event_timeout - wait for a variable to be updated or a timeout to expire
+ * @var: the address of variable being waited on
+ * @condition: the condition to wait for
+ * @timeout: maximum time to wait in jiffies
+ *
+ * Wait for a @condition to be true or a timeout to expire, only
+ * re-checking the condition when a wake up is received for the given
+ * @var (an arbitrary kernel address which need not be directly related
+ * to the given condition, but usually is).
+ *
+ * This is similar to wait_var_event() but returns a value which is 0 if
+ * the timeout expired and the condition was still false, or the
+ * remaining time left in the timeout (but at least 1) if the condition
+ * was found to be true.
+ *
+ * The condition should normally use smp_load_acquire() or a similarly
+ * ordered access to ensure that any changes to memory made before the
+ * condition became true will be visible after the wait completes.
+ */
 #define wait_var_event_timeout(var, condition, timeout)			\
 ({									\
 	long __ret = timeout;						\
@@ -321,6 +375,23 @@ do {									\
 	___wait_var_event(var, condition, TASK_INTERRUPTIBLE, 0, 0,	\
 			  schedule())
 
+/**
+ * wait_var_event_killable - wait for a variable to be updated and notified
+ * @var: the address of variable being waited on
+ * @condition: the condition to wait for
+ *
+ * Wait for a @condition to be true or a signal to be received, only
+ * re-checking the condition when a wake up is received for the given
+ * @var (an arbitrary kernel address which need not be directly related
+ * to the given condition, but usually is).
+ *
+ * This is similar to wait_var_event() but returns a value which is 0 if
+ * the condition became true, or %-ERESTARTSYS if a signal was received.
+ *
+ * The condition should normally use smp_load_acquire() or a similarly
+ * ordered access to ensure that any changes to memory made before the
+ * condition became true will be visible after the wait completes.
+ */
 #define wait_var_event_interruptible(var, condition)			\
 ({									\
 	int __ret = 0;							\
-- 
cgit v1.2.3


From 52d633def56c10fe3e82a2c5d88c3ecb3f4e4852 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 25 Sep 2024 15:31:41 +1000
Subject: sched: Add test_and_clear_wake_up_bit() and atomic_dec_and_wake_up()

There are common patterns in the kernel of using test_and_clear_bit()
before wake_up_bit(), and atomic_dec_and_test() before wake_up_var().

These combinations don't need extra barriers but sometimes include them
unnecessarily.

To help avoid the unnecessary barriers and to help discourage the
general use of wake_up_bit/var (which is a fragile interface) introduce
two combined functions which implement these patterns.

Also add store_release_wake_up() which supports the task of simply
setting a non-atomic variable and sending a wakeup.  This pattern
requires barriers which are often omitted.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240925053405.3960701-5-neilb@suse.de
---
 include/linux/wait_bit.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

(limited to 'include')

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 06ec99b90bf3..0272629b590a 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -419,4 +419,64 @@ static inline void clear_and_wake_up_bit(int bit, unsigned long *word)
 	wake_up_bit(word, bit);
 }
 
+/**
+ * test_and_clear_wake_up_bit - clear a bit if it was set: wake up anyone waiting on that bit
+ * @bit: the bit of the word being waited on
+ * @word: the address of memory containing that bit
+ *
+ * If the bit is set and can be atomically cleared, any tasks waiting in
+ * wait_on_bit() or similar will be woken.  This call has the same
+ * complete ordering semantics as test_and_clear_bit().  Any changes to
+ * memory made before this call are guaranteed to be visible after the
+ * corresponding wait_on_bit() completes.
+ *
+ * Returns %true if the bit was successfully set and the wake up was sent.
+ */
+static inline bool test_and_clear_wake_up_bit(int bit, unsigned long *word)
+{
+	if (!test_and_clear_bit(bit, word))
+		return false;
+	/* no extra barrier required */
+	wake_up_bit(word, bit);
+	return true;
+}
+
+/**
+ * atomic_dec_and_wake_up - decrement an atomic_t and if zero, wake up waiters
+ * @var: the variable to dec and test
+ *
+ * Decrements the atomic variable and if it reaches zero, send a wake_up to any
+ * processes waiting on the variable.
+ *
+ * This function has the same complete ordering semantics as atomic_dec_and_test.
+ *
+ * Returns %true is the variable reaches zero and the wake up was sent.
+ */
+
+static inline bool atomic_dec_and_wake_up(atomic_t *var)
+{
+	if (!atomic_dec_and_test(var))
+		return false;
+	/* No extra barrier required */
+	wake_up_var(var);
+	return true;
+}
+
+/**
+ * store_release_wake_up - update a variable and send a wake_up
+ * @var: the address of the variable to be updated and woken
+ * @val: the value to store in the variable.
+ *
+ * Store the given value in the variable send a wake up to any tasks
+ * waiting on the variable.  All necessary barriers are included to ensure
+ * the task calling wait_var_event() sees the new value and all values
+ * written to memory before this call.
+ */
+#define store_release_wake_up(var, val)					\
+do {									\
+	smp_store_release(var, val);					\
+	smp_mb();							\
+	wake_up_var(var);						\
+} while (0)
+
 #endif /* _LINUX_WAIT_BIT_H */
-- 
cgit v1.2.3


From cc2e1c82d7e474753681a38b07b63034e107e369 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 25 Sep 2024 15:31:42 +1000
Subject: sched: Add wait/wake interface for variable updated under a lock.

Sometimes we need to wait for a condition to be true which must be
testing while holding a lock.  Correspondingly the condition is made
true while holding the lock and the wake up is sent under the lock.

This patch provides wake and wait interfaces which can be used for this
situation when the lock is a mutex or a spinlock, or any other lock for
which there are foo_lock() and foo_unlock() functions.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240925053405.3960701-6-neilb@suse.de
---
 include/linux/wait_bit.h | 106 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)

(limited to 'include')

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 0272629b590a..6aea10efca3d 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -401,6 +401,112 @@ do {									\
 	__ret;								\
 })
 
+/**
+ * wait_var_event_any_lock - wait for a variable to be updated under a lock
+ * @var: the address of the variable being waited on
+ * @condition: condition to wait for
+ * @lock: the object that is locked to protect updates to the variable
+ * @type: prefix on lock and unlock operations
+ * @state: waiting state, %TASK_UNINTERRUPTIBLE etc.
+ *
+ * Wait for a condition which can only be reliably tested while holding
+ * a lock.  The variables assessed in the condition will normal be updated
+ * under the same lock, and the wake up should be signalled with
+ * wake_up_var_locked() under the same lock.
+ *
+ * This is similar to wait_var_event(), but assumes a lock is held
+ * while calling this function and while updating the variable.
+ *
+ * This must be called while the given lock is held and the lock will be
+ * dropped when schedule() is called to wait for a wake up, and will be
+ * reclaimed before testing the condition again.  The functions used to
+ * unlock and lock the object are constructed by appending _unlock and _lock
+ * to @type.
+ *
+ * Return %-ERESTARTSYS if a signal arrives which is allowed to interrupt
+ * the wait according to @state.
+ */
+#define wait_var_event_any_lock(var, condition, lock, type, state)	\
+({									\
+	int __ret = 0;							\
+	if (!(condition))						\
+		__ret = ___wait_var_event(var, condition, state, 0, 0,	\
+					  type ## _unlock(lock);	\
+					  schedule();			\
+					  type ## _lock(lock));		\
+	__ret;								\
+})
+
+/**
+ * wait_var_event_spinlock - wait for a variable to be updated under a spinlock
+ * @var: the address of the variable being waited on
+ * @condition: condition to wait for
+ * @lock: the spinlock which protects updates to the variable
+ *
+ * Wait for a condition which can only be reliably tested while holding
+ * a spinlock.  The variables assessed in the condition will normal be updated
+ * under the same spinlock, and the wake up should be signalled with
+ * wake_up_var_locked() under the same spinlock.
+ *
+ * This is similar to wait_var_event(), but assumes a spinlock is held
+ * while calling this function and while updating the variable.
+ *
+ * This must be called while the given lock is held and the lock will be
+ * dropped when schedule() is called to wait for a wake up, and will be
+ * reclaimed before testing the condition again.
+ */
+#define wait_var_event_spinlock(var, condition, lock)			\
+	wait_var_event_any_lock(var, condition, lock, spin, TASK_UNINTERRUPTIBLE)
+
+/**
+ * wait_var_event_mutex - wait for a variable to be updated under a mutex
+ * @var: the address of the variable being waited on
+ * @condition: condition to wait for
+ * @mutex: the mutex which protects updates to the variable
+ *
+ * Wait for a condition which can only be reliably tested while holding
+ * a mutex.  The variables assessed in the condition will normal be
+ * updated under the same mutex, and the wake up should be signalled
+ * with wake_up_var_locked() under the same mutex.
+ *
+ * This is similar to wait_var_event(), but assumes a mutex is held
+ * while calling this function and while updating the variable.
+ *
+ * This must be called while the given mutex is held and the mutex will be
+ * dropped when schedule() is called to wait for a wake up, and will be
+ * reclaimed before testing the condition again.
+ */
+#define wait_var_event_mutex(var, condition, lock)			\
+	wait_var_event_any_lock(var, condition, lock, mutex, TASK_UNINTERRUPTIBLE)
+
+/**
+ * wake_up_var_protected - wake up waiters for a variable asserting that it is safe
+ * @var: the address of the variable being waited on
+ * @cond: the condition which afirms this is safe
+ *
+ * When waking waiters which use wait_var_event_any_lock() the waker must be
+ * holding the reelvant lock to avoid races.  This version of wake_up_var()
+ * asserts that the relevant lock is held and so no barrier is needed.
+ * The @cond is only tested when CONFIG_LOCKDEP is enabled.
+ */
+#define wake_up_var_protected(var, cond)				\
+do {									\
+	lockdep_assert(cond);						\
+	wake_up_var(var);						\
+} while (0)
+
+/**
+ * wake_up_var_locked - wake up waiters for a variable while holding a spinlock or mutex
+ * @var: the address of the variable being waited on
+ * @lock: The spinlock or mutex what protects the variable
+ *
+ * Send a wake up for the given variable which should be waited for with
+ * wait_var_event_spinlock() or wait_var_event_mutex().  Unlike wake_up_var(),
+ * no extra barriers are needed as the locking provides sufficient sequencing.
+ */
+#define wake_up_var_locked(var, lock)					\
+	wake_up_var_protected(var, lockdep_is_held(lock))
+
 /**
  * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit
  * @bit: the bit of the word being waited on
-- 
cgit v1.2.3


From 80681c04c5e8e4297b9ebf201ca3ce6242aa16c3 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 25 Sep 2024 15:31:43 +1000
Subject: sched: add wait_var_event_io()

It is not currently possible to wait wait_var_event for an io_schedule()
style wait.  This patch adds wait_var_event_io() for that purpose.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240925053405.3960701-7-neilb@suse.de
---
 include/linux/wait_bit.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 6aea10efca3d..6346e26fbfd1 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -281,6 +281,9 @@ __out:	__ret;								\
 #define __wait_var_event(var, condition)				\
 	___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			  schedule())
+#define __wait_var_event_io(var, condition)				\
+	___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			  io_schedule())
 
 /**
  * wait_var_event - wait for a variable to be updated and notified
@@ -306,6 +309,34 @@ do {									\
 	__wait_var_event(var, condition);				\
 } while (0)
 
+/**
+ * wait_var_event_io - wait for a variable to be updated and notified
+ * @var: the address of variable being waited on
+ * @condition: the condition to wait for
+ *
+ * Wait for an IO related @condition to be true, only re-checking when a
+ * wake up is received for the given @var (an arbitrary kernel address
+ * which need not be directly related to the given condition, but
+ * usually is).
+ *
+ * The process will wait on a waitqueue selected by hash from a shared
+ * pool.  It will only be woken on a wake_up for the given address.
+ *
+ * This is similar to wait_var_event(), but calls io_schedule() instead
+ * of schedule().
+ *
+ * The condition should normally use smp_load_acquire() or a similarly
+ * ordered access to ensure that any changes to memory made before the
+ * condition became true will be visible after the wait completes.
+ */
+#define wait_var_event_io(var, condition)				\
+do {									\
+	might_sleep();							\
+	if (condition)							\
+		break;							\
+	__wait_var_event_io(var, condition);				\
+} while (0)
+
 #define __wait_var_event_killable(var, condition)			\
 	___wait_var_event(var, condition, TASK_KILLABLE, 0, 0,		\
 			  schedule())
-- 
cgit v1.2.3


From 5e9f0c4819deb9459f32f12c4fd2b47993b8c395 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 30 Sep 2024 05:09:46 +0000
Subject: sched: remove unused __HAVE_THREAD_FUNCTIONS hook support

__HAVE_THREAD_FUNCTIONS could be defined by architectures wishing to
provide their own task_thread_info(), task_stack_page(),
setup_thread_stack() and end_of_stack() hooks.

Commit cf8e8658100d ("arch: Remove Itanium (IA-64) architecture")
removed the last upstream consumer of __HAVE_THREAD_FUNCTIONS, so change
the remaining !CONFIG_THREAD_INFO_IN_TASK && !__HAVE_THREAD_FUNCTIONS
conditionals to only check for the former case.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lkml.kernel.org/r/20240930050945.30304-2-ddiss@suse.de
---
 include/linux/sched.h            | 2 +-
 include/linux/sched/task_stack.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e6ee4258169a..abf26f1e1447 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1898,7 +1898,7 @@ extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 # define task_thread_info(task)	(&(task)->thread_info)
-#elif !defined(__HAVE_THREAD_FUNCTIONS)
+#else
 # define task_thread_info(task)	((struct thread_info *)(task)->stack)
 #endif
 
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index bf10bdb487dd..2e52cc421bce 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -33,7 +33,7 @@ static __always_inline unsigned long *end_of_stack(const struct task_struct *tas
 #endif
 }
 
-#elif !defined(__HAVE_THREAD_FUNCTIONS)
+#else
 
 #define task_stack_page(task)	((void *)(task)->stack)
 
-- 
cgit v1.2.3


From 0ac8f14ef22a1592b44dc90272aab35e43b0106a Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Wed, 2 Oct 2024 00:40:16 +0100
Subject: sched/wait: Remove unused bit_wait_io_timeout

bit_wait_io_timeout has been unused since 2016's
commit 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit")

Remove it.

Signed-off-by: "Dr. David Alan Gilbert" <linux@treblig.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Link: https://lore.kernel.org/r/20241001234016.231696-1-linux@treblig.org
---
 include/linux/wait_bit.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 6346e26fbfd1..9e29d79fc790 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -49,7 +49,6 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
 extern int bit_wait(struct wait_bit_key *key, int mode);
 extern int bit_wait_io(struct wait_bit_key *key, int mode);
 extern int bit_wait_timeout(struct wait_bit_key *key, int mode);
-extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode);
 
 /**
  * wait_on_bit - wait for a bit to be cleared
-- 
cgit v1.2.3


From 066c779b094b63754e0742ad8675d72d6c0a46f6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 3 Oct 2024 18:48:19 +0300
Subject: platform/x86: intel_scu_ipc: Don't use "proxy" headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update header inclusions to follow IWYU (Include What You Use)
principle.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Link: https://lore.kernel.org/r/20241003154819.1075141-1-andriy.shevchenko@linux.intel.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/x86/intel_scu_ipc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/x86/intel_scu_ipc.h b/include/linux/platform_data/x86/intel_scu_ipc.h
index 0ca9962e97f2..b287627759f7 100644
--- a/include/linux/platform_data/x86/intel_scu_ipc.h
+++ b/include/linux/platform_data/x86/intel_scu_ipc.h
@@ -2,9 +2,13 @@
 #ifndef __PLATFORM_X86_INTEL_SCU_IPC_H_
 #define __PLATFORM_X86_INTEL_SCU_IPC_H_
 
+#include <linux/init.h>
 #include <linux/ioport.h>
+#include <linux/types.h>
 
 struct device;
+struct module;
+
 struct intel_scu_ipc_dev;
 
 /**
-- 
cgit v1.2.3


From 4e40eff0b5737c0de39e1ae5812509efbc0b986e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 2 Oct 2024 17:27:18 -0400
Subject: fs: add infrastructure for multigrain timestamps

The VFS has always used coarse-grained timestamps when updating the
ctime and mtime after a change. This has the benefit of allowing
filesystems to optimize away a lot metadata updates, down to around 1
per jiffy, even when a file is under heavy writes.

Unfortunately, this has always been an issue when we're exporting via
NFSv3, which relies on timestamps to validate caches. A lot of changes
can happen in a jiffy, so timestamps aren't sufficient to help the
client decide when to invalidate the cache. Even with NFSv4, a lot of
exported filesystems don't properly support a change attribute and are
subject to the same problems with timestamp granularity. Other
applications have similar issues with timestamps (e.g backup
applications).

If fine-grained timestamps were always used, that would improve the
situation, but that becomes rather expensive, as the underlying
filesystem would have to log a lot more metadata updates.

What is needed is a way to only use fine-grained timestamps when they
are being actively queried. Use the (unused) top bit in
inode->i_ctime_nsec as a flag that indicates whether the current
timestamps have been queried via stat() or the like. When it's set,
allow the update to use a fine-grained timestamp iff it's necessary to
make the ctime show a different value.

If it has been queried, then first see whether the current coarse time
is later than the existing ctime. If it is, accept that value.  If it
isn't, then get a fine-grained timestamp and attempt to stamp the inode
ctime with that value. If that races with another concurrent stamp, then
abandon the update and take the new value without retrying.

Filesystems can opt into this by setting the FS_MGTIME fstype flag.
Others should be unaffected (other than being subject to the same floor
value as multigrain filesystems).

Tested-by: Randy Dunlap <rdunlap@infradead.org> # documentation bits
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/20241002-mgtime-v10-3-d1c4717f5284@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6ca11e241a24..eff688e75f2f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1613,6 +1613,17 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode,
 	return inode_set_mtime_to_ts(inode, ts);
 }
 
+/*
+ * Multigrain timestamps
+ *
+ * Conditionally use fine-grained ctime and mtime timestamps when there
+ * are users actively observing them via getattr. The primary use-case
+ * for this is NFS clients that use the ctime to distinguish between
+ * different states of the file, and that are often fooled by multiple
+ * operations that occur in the same coarse-grained timer tick.
+ */
+#define I_CTIME_QUERIED		((u32)BIT(31))
+
 static inline time64_t inode_get_ctime_sec(const struct inode *inode)
 {
 	return inode->i_ctime_sec;
@@ -1620,7 +1631,7 @@ static inline time64_t inode_get_ctime_sec(const struct inode *inode)
 
 static inline long inode_get_ctime_nsec(const struct inode *inode)
 {
-	return inode->i_ctime_nsec;
+	return inode->i_ctime_nsec & ~I_CTIME_QUERIED;
 }
 
 static inline struct timespec64 inode_get_ctime(const struct inode *inode)
@@ -1631,13 +1642,7 @@ static inline struct timespec64 inode_get_ctime(const struct inode *inode)
 	return ts;
 }
 
-static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
-						      struct timespec64 ts)
-{
-	inode->i_ctime_sec = ts.tv_sec;
-	inode->i_ctime_nsec = ts.tv_nsec;
-	return ts;
-}
+struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts);
 
 /**
  * inode_set_ctime - set the ctime in the inode
@@ -2500,6 +2505,7 @@ struct file_system_type {
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_DISALLOW_NOTIFY_PERM	16	/* Disable fanotify permission events */
 #define FS_ALLOW_IDMAP         32      /* FS has been updated to handle vfs idmappings. */
+#define FS_MGTIME		64	/* FS uses multigrain timestamps */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	int (*init_fs_context)(struct fs_context *);
 	const struct fs_parameter_spec *parameters;
@@ -2523,6 +2529,17 @@ struct file_system_type {
 
 #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
 
+/**
+ * is_mgtime: is this inode using multigrain timestamps
+ * @inode: inode to test for multigrain timestamps
+ *
+ * Return true if the inode uses multigrain timestamps, false otherwise.
+ */
+static inline bool is_mgtime(const struct inode *inode)
+{
+	return inode->i_sb->s_type->fs_flags & FS_MGTIME;
+}
+
 extern struct dentry *mount_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int));
@@ -3262,6 +3279,7 @@ extern void page_put_link(void *);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_link(void *);
+void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode);
 void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
 void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
 void generic_fill_statx_atomic_writes(struct kstat *stat,
-- 
cgit v1.2.3


From fcd4904e2f6908d5c255fa5818bcf8ad32a6f0e8 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 5 Oct 2024 19:23:03 +0100
Subject: netfs: Remove call to folio_index()

Calling folio_index() is pointless overhead; directly dereferencing
folio->index is fine.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Link: https://lore.kernel.org/r/20241005182307.3190401-2-willy@infradead.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/netfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 1d7c52821e55..72a208fd4496 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -451,7 +451,7 @@ TRACE_EVENT(netfs_folio,
 		    struct address_space *__m = READ_ONCE(folio->mapping);
 		    __entry->ino = __m ? __m->host->i_ino : 0;
 		    __entry->why = why;
-		    __entry->index = folio_index(folio);
+		    __entry->index = folio->index;
 		    __entry->nr = folio_nr_pages(folio);
 			   ),
 
-- 
cgit v1.2.3


From 8fd3395ec9051a52828fcca2328cb50a69dea8ef Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 31 Jul 2024 11:49:04 -0400
Subject: get rid of ...lookup...fdget_rcu() family

Once upon a time, predecessors of those used to do file lookup
without bumping a refcount, provided that caller held rcu_read_lock()
across the lookup and whatever it wanted to read from the struct
file found.  When struct file allocation switched to SLAB_TYPESAFE_BY_RCU,
that stopped being feasible and these primitives started to bump the
file refcount for lookup result, requiring the caller to call fput()
afterwards.

But that turned them pointless - e.g.
	rcu_read_lock();
	file = lookup_fdget_rcu(fd);
	rcu_read_unlock();
is equivalent to
	file = fget_raw(fd);
and all callers of lookup_fdget_rcu() are of that form.  Similarly,
task_lookup_fdget_rcu() calls can be replaced with calling fget_task().
task_lookup_next_fdget_rcu() doesn't have direct counterparts, but
its callers would be happier if we replaced it with an analogue that
deals with RCU internally.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fdtable.h | 4 ----
 include/linux/file.h    | 1 +
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index b1c5722f2b3c..e25e2cb65d30 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -92,10 +92,6 @@ static inline struct file *files_lookup_fd_locked(struct files_struct *files, un
 	return files_lookup_fd_raw(files, fd);
 }
 
-struct file *lookup_fdget_rcu(unsigned int fd);
-struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd);
-struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd);
-
 static inline bool close_on_exec(unsigned int fd, const struct files_struct *files)
 {
 	return test_bit(fd, files_fdtable(files)->close_on_exec);
diff --git a/include/linux/file.h b/include/linux/file.h
index f98de143245a..ec4ad5e6a061 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -72,6 +72,7 @@ static inline void fdput(struct fd fd)
 extern struct file *fget(unsigned int fd);
 extern struct file *fget_raw(unsigned int fd);
 extern struct file *fget_task(struct task_struct *task, unsigned int fd);
+extern struct file *fget_task_next(struct task_struct *task, unsigned int *fd);
 extern void __f_unlock_pos(struct file *);
 
 struct fd fdget(unsigned int fd);
-- 
cgit v1.2.3


From cab0515211f483e392d6862021ed008f49058561 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 2 Jun 2024 17:48:36 -0400
Subject: move close_range(2) into fs/file.c, fold __close_range() into it

	We never had callers for __close_range() except for close_range(2)
itself.  Nothing of that sort has appeared in four years and if any users
do show up, we can always separate those suckers again.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fdtable.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index e25e2cb65d30..c45306a9f007 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -111,7 +111,6 @@ int iterate_fd(struct files_struct *, unsigned,
 		const void *);
 
 extern int close_fd(unsigned int fd);
-extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
 extern struct file *file_close_fd(unsigned int fd);
 
 extern struct kmem_cache *files_cachep;
-- 
cgit v1.2.3


From dad6c45cbd40b57db95c9d46e01ff6d302e86042 Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Fri, 16 Feb 2024 11:41:16 -0500
Subject: drm/amdkfd: Output migrate end event if migrate failed

If page migration failed, also output migrate end event to match with
migrate start event, with failure error_code added to the end of the
migrate message macro. This will not break uAPI because application uses
old message macro sscanf drop and ignore the error_code.

Output GPU page fault restore end event if migration failed.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 717307d6b5b7..fa9f9846b88e 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -609,6 +609,7 @@ struct kfd_ioctl_smi_events_args {
  *    migrate_update: GPU page fault is recovered by 'M' for migrate, 'U' for update
  *    rw: 'W' for write page fault, 'R' for read page fault
  *    rescheduled: 'R' if the queue restore failed and rescheduled to try again
+ *    error_code: migrate failure error code, 0 if no error
  */
 #define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\
 		"%x %s\n", (reset_seq_num), (reset_cause)
@@ -630,9 +631,9 @@ struct kfd_ioctl_smi_events_args {
 		"%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\
 		(from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger)
 
-#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger)\
-		"%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size),\
-		(from), (to), (migrate_trigger)
+#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger, error_code) \
+		"%lld -%d @%lx(%lx) %x->%x %d %d\n", (ns), (pid), (start), (size),\
+		(from), (to), (migrate_trigger), (error_code)
 
 #define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\
 		"%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger)
-- 
cgit v1.2.3


From a3ab2d45b9887ee609cd3bea39f668236935774c Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Tue, 30 Jul 2024 15:33:23 -0400
Subject: drm/amdkfd: SMI report dropped event count

Add new SMI event to report the dropped event count.

When the event kfifo is full, drop count is not zero, or no enough space
left to store the event message, increase drop count.

After reading event out from kfifo, if event was dropped, drop_count is
not zero, generate a dropped event record and reset drop count to zero.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index fa9f9846b88e..7afd66d45313 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -530,6 +530,7 @@ enum kfd_smi_event {
 	KFD_SMI_EVENT_QUEUE_EVICTION = 9,
 	KFD_SMI_EVENT_QUEUE_RESTORE = 10,
 	KFD_SMI_EVENT_UNMAP_FROM_GPU = 11,
+	KFD_SMI_EVENT_DROPPED_EVENT = 12,
 
 	/*
 	 * max event number, as a flag bit to get events from all processes,
@@ -610,6 +611,7 @@ struct kfd_ioctl_smi_events_args {
  *    rw: 'W' for write page fault, 'R' for read page fault
  *    rescheduled: 'R' if the queue restore failed and rescheduled to try again
  *    error_code: migrate failure error code, 0 if no error
+ *    drop_count: how many events dropped when fifo is full
  */
 #define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\
 		"%x %s\n", (reset_seq_num), (reset_cause)
@@ -645,6 +647,10 @@ struct kfd_ioctl_smi_events_args {
 		"%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\
 		(node), (unmap_trigger)
 
+#define KFD_EVENT_FMT_DROPPED_EVENT(ns, pid, drop_count)\
+		"%lld -%d %d\n", (ns), (pid), (drop_count)
+
+
 /**************************************************************************************************
  * CRIU IOCTLs (Checkpoint Restore In Userspace)
  *
-- 
cgit v1.2.3


From 539770616521e5b046ca7612eb79ba11b53edb1d Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 3 Oct 2024 12:52:17 +0100
Subject: net: dsa: remove obsolete phylink dsa_switch operations

No driver now uses the DSA switch phylink members, so we can now remove
the method pointers, but we need to leave empty shim functions to allow
those drivers that do not provide phylink MAC operations structure to
continue functioning.

Signed-off-by: Russell King (oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com> # sja1105, felix, dsa_loop
Link: https://patch.msgid.link/E1swKNV-0060oN-1b@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dsa.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index d7a6c2930277..72ae65e7246a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -885,21 +885,6 @@ struct dsa_switch_ops {
 	 */
 	void	(*phylink_get_caps)(struct dsa_switch *ds, int port,
 				    struct phylink_config *config);
-	struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds,
-						      int port,
-						      phy_interface_t iface);
-	void	(*phylink_mac_config)(struct dsa_switch *ds, int port,
-				      unsigned int mode,
-				      const struct phylink_link_state *state);
-	void	(*phylink_mac_link_down)(struct dsa_switch *ds, int port,
-					 unsigned int mode,
-					 phy_interface_t interface);
-	void	(*phylink_mac_link_up)(struct dsa_switch *ds, int port,
-				       unsigned int mode,
-				       phy_interface_t interface,
-				       struct phy_device *phydev,
-				       int speed, int duplex,
-				       bool tx_pause, bool rx_pause);
 	void	(*phylink_fixed_state)(struct dsa_switch *ds, int port,
 				       struct phylink_link_state *state);
 	/*
-- 
cgit v1.2.3


From a3f5f4c2f9b6bc2aa6f5a3e8e23b7519e4f2e3e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 4 Oct 2024 13:47:20 +0000
Subject: ipv4: remove fib_info_devhash[]

Upcoming per-netns RTNL conversion needs to get rid
of shared hash tables.

fib_info_devhash[] is one of them.

It is unclear why we used a hash table, because
a single hlist_head per net device was cheaper and scalable.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20241004134720.579244-5-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 49a7e7db0883..3baf8e539b6f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2211,6 +2211,9 @@ struct net_device {
 
 	/* Protocol-specific pointers */
 	struct in_device __rcu	*ip_ptr;
+	/** @fib_nh_head: nexthops associated with this netdev */
+	struct hlist_head	fib_nh_head;
+
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
 	struct vlan_info __rcu	*vlan_info;
 #endif
-- 
cgit v1.2.3


From 83134ef4609388f6b9ca31a384f531155196c2a7 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 4 Oct 2024 12:13:31 +0200
Subject: netkit: Add option for scrubbing skb meta data

Jordan reported that when running Cilium with netkit in per-endpoint-routes
mode, network policy misclassifies traffic. In this direct routing mode
of Cilium which is used in case of GKE/EKS/AKS, the Pod's BPF program to
enforce policy sits on the netkit primary device's egress side.

The issue here is that in case of netkit's netkit_prep_forward(), it will
clear meta data such as skb->mark and skb->priority before executing the
BPF program. Thus, identity data stored in there from earlier BPF programs
(e.g. from tcx ingress on the physical device) gets cleared instead of
being made available for the primary's program to process. While for traffic
egressing the Pod via the peer device this might be desired, this is
different for the primary one where compared to tcx egress on the host
veth this information would be available.

To address this, add a new parameter for the device orchestration to
allow control of skb->mark and skb->priority scrubbing, to make the two
accessible from BPF (and eventually leave it up to the program to scrub).
By default, the current behavior is retained. For netkit peer this also
enables the use case where applications could cooperate/signal intent to
the BPF program.

Note that struct netkit has a 4 byte hole between policy and bundle which
is used here, in other words, struct netkit's first cacheline content used
in fast-path does not get moved around.

Fixes: 35dfaad7188c ("netkit, bpf: Add bpf programmable net device")
Reported-by: Jordan Rife <jrife@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://github.com/cilium/cilium/issues/34042
Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://lore.kernel.org/r/20241004101335.117711-1-daniel@iogearbox.net
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/uapi/linux/if_link.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6dc258993b17..2acc7687e017 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1292,6 +1292,19 @@ enum netkit_mode {
 	NETKIT_L3,
 };
 
+/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to
+ * the BPF program if attached. This also means the latter can
+ * consume the two fields if they were populated earlier.
+ *
+ * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before
+ * invoking the attached BPF program when the peer device resides
+ * in a different network namespace. This is the default behavior.
+ */
+enum netkit_scrub {
+	NETKIT_SCRUB_NONE,
+	NETKIT_SCRUB_DEFAULT,
+};
+
 enum {
 	IFLA_NETKIT_UNSPEC,
 	IFLA_NETKIT_PEER_INFO,
@@ -1299,6 +1312,8 @@ enum {
 	IFLA_NETKIT_POLICY,
 	IFLA_NETKIT_PEER_POLICY,
 	IFLA_NETKIT_MODE,
+	IFLA_NETKIT_SCRUB,
+	IFLA_NETKIT_PEER_SCRUB,
 	__IFLA_NETKIT_MAX,
 };
 #define IFLA_NETKIT_MAX	(__IFLA_NETKIT_MAX - 1)
-- 
cgit v1.2.3


From 754e707e20e47482384f6e64eb3af08273292010 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Wed, 2 Oct 2024 17:46:07 -0700
Subject: drm/print: Introduce drm_line_printer

This drm printer wrapper can be used to increase the robustness of
the captured output generated by any other drm_printer to make sure
we didn't lost any intermediate lines of the output by adding line
numbers to each output line. Helpful for capturing some crash data.

v2: Extended short int counters to full int (JohnH)

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241003004611.2323493-8-John.C.Harrison@Intel.com
---
 include/drm/drm_print.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index d2676831d765..b3906dc04388 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -177,6 +177,10 @@ struct drm_printer {
 	void *arg;
 	const void *origin;
 	const char *prefix;
+	struct {
+		unsigned int series;
+		unsigned int counter;
+	} line;
 	enum drm_debug_category category;
 };
 
@@ -187,6 +191,7 @@ void __drm_puts_seq_file(struct drm_printer *p, const char *str);
 void __drm_printfn_info(struct drm_printer *p, struct va_format *vaf);
 void __drm_printfn_dbg(struct drm_printer *p, struct va_format *vaf);
 void __drm_printfn_err(struct drm_printer *p, struct va_format *vaf);
+void __drm_printfn_line(struct drm_printer *p, struct va_format *vaf);
 
 __printf(2, 3)
 void drm_printf(struct drm_printer *p, const char *f, ...);
@@ -411,6 +416,65 @@ static inline struct drm_printer drm_err_printer(struct drm_device *drm,
 	return p;
 }
 
+/**
+ * drm_line_printer - construct a &drm_printer that prefixes outputs with line numbers
+ * @p: the &struct drm_printer which actually generates the output
+ * @prefix: optional output prefix, or NULL for no prefix
+ * @series: optional unique series identifier, or 0 to omit identifier in the output
+ *
+ * This printer can be used to increase the robustness of the captured output
+ * to make sure we didn't lost any intermediate lines of the output. Helpful
+ * while capturing some crash data.
+ *
+ * Example 1::
+ *
+ *	void crash_dump(struct drm_device *drm)
+ *	{
+ *		static unsigned int id;
+ *		struct drm_printer p = drm_err_printer(drm, "crash");
+ *		struct drm_printer lp = drm_line_printer(&p, "dump", ++id);
+ *
+ *		drm_printf(&lp, "foo");
+ *		drm_printf(&lp, "bar");
+ *	}
+ *
+ * Above code will print into the dmesg something like::
+ *
+ *	[ ] 0000:00:00.0: [drm] *ERROR* crash dump 1.1: foo
+ *	[ ] 0000:00:00.0: [drm] *ERROR* crash dump 1.2: bar
+ *
+ * Example 2::
+ *
+ *	void line_dump(struct device *dev)
+ *	{
+ *		struct drm_printer p = drm_info_printer(dev);
+ *		struct drm_printer lp = drm_line_printer(&p, NULL, 0);
+ *
+ *		drm_printf(&lp, "foo");
+ *		drm_printf(&lp, "bar");
+ *	}
+ *
+ * Above code will print::
+ *
+ *	[ ] 0000:00:00.0: [drm] 1: foo
+ *	[ ] 0000:00:00.0: [drm] 2: bar
+ *
+ * RETURNS:
+ * The &drm_printer object
+ */
+static inline struct drm_printer drm_line_printer(struct drm_printer *p,
+						  const char *prefix,
+						  unsigned int series)
+{
+	struct drm_printer lp = {
+		.printfn = __drm_printfn_line,
+		.arg = p,
+		.prefix = prefix,
+		.line = { .series = series, },
+	};
+	return lp;
+}
+
 /*
  * struct device based logging
  *
-- 
cgit v1.2.3


From 56c594d8df64e726e803652ee9f4ab08659d4574 Mon Sep 17 00:00:00 2001
From: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Date: Thu, 3 Oct 2024 14:43:09 +0200
Subject: drm: add DRM_SET_CLIENT_NAME ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Giving the opportunity to userspace to associate a free-form
name with a drm_file struct is helpful for tracking and debugging.

This is similar to the existing DMA_BUF_SET_NAME ioctl.

Access to client_name is protected by a mutex, and the 'clients' debugfs
file has been updated to print it.

Userspace MR to use this ioctl:
   https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1428

If the string passed by userspace contains chars that would mess up output
when it's going to be printed (in dmesg, fdinfo, etc), -EINVAL is returned.

A 0-length string is a valid use, and clears the existing name.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241003124506.470931-2-pierre-eric.pelloux-prayer@amd.com
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/drm/drm_file.h |  9 +++++++++
 include/uapi/drm/drm.h | 17 +++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 8c0030c77308..d4f1c115ea0f 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -388,6 +388,15 @@ struct drm_file {
 	 * Per-file buffer caches used by the PRIME buffer sharing code.
 	 */
 	struct drm_prime_file_private prime;
+
+	/**
+	 * @client_name:
+	 *
+	 * Userspace-provided name; useful for accounting and debugging.
+	 */
+	const char *client_name;
+	/** @name_lock: Protects @client_name. */
+	struct mutex client_name_lock;
 };
 
 /**
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 16122819edfe..7fba37b94401 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -1024,6 +1024,13 @@ struct drm_crtc_queue_sequence {
 	__u64 user_data;	/* user data passed to event */
 };
 
+#define DRM_CLIENT_NAME_MAX_LEN		64
+struct drm_set_client_name {
+	__u64 name_len;
+	__u64 name;
+};
+
+
 #if defined(__cplusplus)
 }
 #endif
@@ -1288,6 +1295,16 @@ extern "C" {
  */
 #define DRM_IOCTL_MODE_CLOSEFB		DRM_IOWR(0xD0, struct drm_mode_closefb)
 
+/**
+ * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file
+ *
+ * Having a name allows for easier tracking and debugging.
+ * The length of the name (without null ending char) must be
+ * <= DRM_CLIENT_NAME_MAX_LEN.
+ * The call will fail if the name contains whitespaces or non-printable chars.
+ */
+#define DRM_IOCTL_SET_CLIENT_NAME	DRM_IOWR(0xD1, struct drm_set_client_name)
+
 /*
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
-- 
cgit v1.2.3


From 20a4da20e0bd0297bf1ce083362e78b6702f991e Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 4 Oct 2024 11:01:00 +0200
Subject: net: phy: Add support for PHY timing-role configuration via device
 tree

Introduce support for configuring the master/slave role of PHYs based on
the `timing-role` property in the device tree. While this functionality
is necessary for Single Pair Ethernet (SPE) PHYs (1000/100/10Base-T1)
where hardware strap pins may be unavailable or incorrectly set, it
works for any PHY type.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Divya Koppera <divya.koppera@microchip.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index a98bc91a0cde..ff762a3d8270 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1260,6 +1260,7 @@ size_t phy_speeds(unsigned int *speeds, size_t size,
 		  unsigned long *mask);
 void of_set_phy_supported(struct phy_device *phydev);
 void of_set_phy_eee_broken(struct phy_device *phydev);
+void of_set_phy_timing_role(struct phy_device *phydev);
 int phy_speed_down_core(struct phy_device *phydev);
 
 /**
-- 
cgit v1.2.3


From 95397784be23f66c5d4280b0a4c4e0d1ee74f2ef Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Mon, 2 Sep 2024 10:42:31 +0200
Subject: media: staging: drop omap4iss

The omap4 camera driver has seen no progress since forever, and
now OMAP4 support has also been dropped from u-boot (1). So it is
time to retire this driver.

(1): https://lists.denx.de/pipermail/u-boot/2024-July/558846.html

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
---
 include/linux/platform_data/media/omap4iss.h | 66 ----------------------------
 1 file changed, 66 deletions(-)
 delete mode 100644 include/linux/platform_data/media/omap4iss.h

(limited to 'include')

diff --git a/include/linux/platform_data/media/omap4iss.h b/include/linux/platform_data/media/omap4iss.h
deleted file mode 100644
index 2a511a8fcda7..000000000000
--- a/include/linux/platform_data/media/omap4iss.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ARCH_ARM_PLAT_OMAP4_ISS_H
-#define ARCH_ARM_PLAT_OMAP4_ISS_H
-
-#include <linux/i2c.h>
-
-struct iss_device;
-
-enum iss_interface_type {
-	ISS_INTERFACE_CSI2A_PHY1,
-	ISS_INTERFACE_CSI2B_PHY2,
-};
-
-/**
- * struct iss_csiphy_lane: CSI2 lane position and polarity
- * @pos: position of the lane
- * @pol: polarity of the lane
- */
-struct iss_csiphy_lane {
-	u8 pos;
-	u8 pol;
-};
-
-#define ISS_CSIPHY1_NUM_DATA_LANES	4
-#define ISS_CSIPHY2_NUM_DATA_LANES	1
-
-/**
- * struct iss_csiphy_lanes_cfg - CSI2 lane configuration
- * @data: Configuration of one or two data lanes
- * @clk: Clock lane configuration
- */
-struct iss_csiphy_lanes_cfg {
-	struct iss_csiphy_lane data[ISS_CSIPHY1_NUM_DATA_LANES];
-	struct iss_csiphy_lane clk;
-};
-
-/**
- * struct iss_csi2_platform_data - CSI2 interface platform data
- * @crc: Enable the cyclic redundancy check
- * @vpclk_div: Video port output clock control
- */
-struct iss_csi2_platform_data {
-	unsigned crc:1;
-	unsigned vpclk_div:2;
-	struct iss_csiphy_lanes_cfg lanecfg;
-};
-
-struct iss_subdev_i2c_board_info {
-	struct i2c_board_info *board_info;
-	int i2c_adapter_id;
-};
-
-struct iss_v4l2_subdevs_group {
-	struct iss_subdev_i2c_board_info *subdevs;
-	enum iss_interface_type interface;
-	union {
-		struct iss_csi2_platform_data csi2;
-	} bus; /* gcc < 4.6.0 chokes on anonymous union initializers */
-};
-
-struct iss_platform_data {
-	struct iss_v4l2_subdevs_group *subdevs;
-	void (*set_constraints)(struct iss_device *iss, bool enable);
-};
-
-#endif
-- 
cgit v1.2.3


From ec763c234d7f60c5bce0fa2611ba79f5be1af76b Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Fri, 4 Oct 2024 15:10:28 -0700
Subject: Revert "rtnetlink: add guard for RTNL"

This reverts commit 464eb03c4a7cfb32cb3324249193cf6bb5b35152.

Once we have a per-netns RTNL, we won't use guard(rtnl).

Also, there's no users for now.

  $ grep -rnI "guard(rtnl" || true
  $

Suggested-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/netdev/CANn89i+KoYzUH+VPLdGmLABYf5y4TW0hrM4UAeQQJ9AREty0iw@mail.gmail.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/rtnetlink.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index a7da7dfc06a2..cdfc897f1e3c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -7,7 +7,6 @@
 #include <linux/netdevice.h>
 #include <linux/wait.h>
 #include <linux/refcount.h>
-#include <linux/cleanup.h>
 #include <uapi/linux/rtnetlink.h>
 
 extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
@@ -47,8 +46,6 @@ extern int rtnl_is_locked(void);
 extern int rtnl_lock_killable(void);
 extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
 
-DEFINE_LOCK_GUARD_0(rtnl, rtnl_lock(), rtnl_unlock())
-
 extern wait_queue_head_t netdev_unregistering_wq;
 extern atomic_t dev_unreg_count;
 extern struct rw_semaphore pernet_ops_rwsem;
-- 
cgit v1.2.3


From 76aed95319da25d6884dff01d5f0149e4b542f96 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Fri, 4 Oct 2024 15:10:29 -0700
Subject: rtnetlink: Add per-netns RTNL.

The goal is to break RTNL down into per-netns mutex.

This patch adds per-netns mutex and its helper functions, rtnl_net_lock()
and rtnl_net_unlock().

rtnl_net_lock() acquires the global RTNL and per-netns RTNL mutex, and
rtnl_net_unlock() releases them.

We will replace 800+ rtnl_lock() with rtnl_net_lock() and finally removes
rtnl_lock() in rtnl_net_lock().

When we need to nest per-netns RTNL mutex, we will use __rtnl_net_lock(),
and its locking order is defined by rtnl_net_lock_cmp_fn() as follows:

  1. init_net is first
  2. netns address ascending order

Note that the conversion will be done under CONFIG_DEBUG_NET_SMALL_RTNL
with LOCKDEP so that we can carefully add the extra mutex without slowing
down RTNL operations during conversion.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/rtnetlink.h   | 21 +++++++++++++++++++++
 include/net/net_namespace.h |  4 ++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index cdfc897f1e3c..edd840a49989 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -92,6 +92,27 @@ static inline bool lockdep_rtnl_is_held(void)
 #define rcu_replace_pointer_rtnl(rp, p)			\
 	rcu_replace_pointer(rp, p, lockdep_rtnl_is_held())
 
+#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
+void __rtnl_net_lock(struct net *net);
+void __rtnl_net_unlock(struct net *net);
+void rtnl_net_lock(struct net *net);
+void rtnl_net_unlock(struct net *net);
+int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
+#else
+static inline void __rtnl_net_lock(struct net *net) {}
+static inline void __rtnl_net_unlock(struct net *net) {}
+
+static inline void rtnl_net_lock(struct net *net)
+{
+	rtnl_lock();
+}
+
+static inline void rtnl_net_unlock(struct net *net)
+{
+	rtnl_unlock();
+}
+#endif
+
 static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
 {
 	return rtnl_dereference(dev->ingress_queue);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index e67b483cc8bb..873c0f9fdac6 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -188,6 +188,10 @@ struct net {
 #if IS_ENABLED(CONFIG_SMC)
 	struct netns_smc	smc;
 #endif
+#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
+	/* Move to a better place when the config guard is removed. */
+	struct mutex		rtnl_mutex;
+#endif
 } __randomize_layout;
 
 #include <linux/seq_file_net.h>
-- 
cgit v1.2.3


From 844e5e7e656d3a7a904fd5607f8491d6fd01db8e Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Fri, 4 Oct 2024 15:10:30 -0700
Subject: rtnetlink: Add assertion helpers for per-netns RTNL.

Once an RTNL scope is converted with rtnl_net_lock(), we will replace
RTNL helper functions inside the scope with the following per-netns
alternatives:

  ASSERT_RTNL()           -> ASSERT_RTNL_NET(net)
  rcu_dereference_rtnl(p) -> rcu_dereference_rtnl_net(net, p)

Note that the per-netns helpers are equivalent to the conventional
helpers unless CONFIG_DEBUG_NET_SMALL_RTNL is enabled.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/rtnetlink.h | 45 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index edd840a49989..8468a4ce8510 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -51,6 +51,10 @@ extern atomic_t dev_unreg_count;
 extern struct rw_semaphore pernet_ops_rwsem;
 extern struct rw_semaphore net_rwsem;
 
+#define ASSERT_RTNL() \
+	WARN_ONCE(!rtnl_is_locked(), \
+		  "RTNL: assertion failed at %s (%d)\n", __FILE__,  __LINE__)
+
 #ifdef CONFIG_PROVE_LOCKING
 extern bool lockdep_rtnl_is_held(void);
 #else
@@ -98,6 +102,22 @@ void __rtnl_net_unlock(struct net *net);
 void rtnl_net_lock(struct net *net);
 void rtnl_net_unlock(struct net *net);
 int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
+
+bool rtnl_net_is_locked(struct net *net);
+
+#define ASSERT_RTNL_NET(net)						\
+	WARN_ONCE(!rtnl_net_is_locked(net),				\
+		  "RTNL_NET: assertion failed at %s (%d)\n",		\
+		  __FILE__,  __LINE__)
+
+bool lockdep_rtnl_net_is_held(struct net *net);
+
+#define rcu_dereference_rtnl_net(net, p)				\
+	rcu_dereference_check(p, lockdep_rtnl_net_is_held(net))
+#define rtnl_net_dereference(net, p)					\
+	rcu_dereference_protected(p, lockdep_rtnl_net_is_held(net))
+#define rcu_replace_pointer_rtnl_net(net, rp, p)			\
+	rcu_replace_pointer(rp, p, lockdep_rtnl_net_is_held(net))
 #else
 static inline void __rtnl_net_lock(struct net *net) {}
 static inline void __rtnl_net_unlock(struct net *net) {}
@@ -111,6 +131,27 @@ static inline void rtnl_net_unlock(struct net *net)
 {
 	rtnl_unlock();
 }
+
+static inline void ASSERT_RTNL_NET(struct net *net)
+{
+	ASSERT_RTNL();
+}
+
+static inline void *rcu_dereference_rtnl_net(struct net *net, void *p)
+{
+	return rcu_dereference_rtnl(p);
+}
+
+static inline void *rtnl_net_dereference(struct net *net, void *p)
+{
+	return rtnl_dereference(p);
+}
+
+static inline void *rcu_replace_pointer_rtnl_net(struct net *net,
+						 void *rp, void *p)
+{
+	return rcu_replace_pointer_rtnl(rp, p);
+}
 #endif
 
 static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
@@ -140,10 +181,6 @@ void rtnetlink_init(void);
 void __rtnl_unlock(void);
 void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail);
 
-#define ASSERT_RTNL() \
-	WARN_ONCE(!rtnl_is_locked(), \
-		  "RTNL: assertion failed at %s (%d)\n", __FILE__,  __LINE__)
-
 extern int ndo_dflt_fdb_dump(struct sk_buff *skb,
 			     struct netlink_callback *cb,
 			     struct net_device *dev,
-- 
cgit v1.2.3


From 9ab440a9d0426cf7842240891cc457155db1a97e Mon Sep 17 00:00:00 2001
From: Shekhar Chauhan <shekhar.chauhan@intel.com>
Date: Mon, 7 Oct 2024 08:41:44 -0700
Subject: drm/xe/ptl: L3bank mask is not available on the media GT

On PTL platforms with media version 30.00, the fuse registers for
reporting L3 bank availability to the GT just read out as ~0 and do not
provide proper values.  Xe does not use the L3 bank mask for anything
internally; it only passes the mask through to userspace via the GT
topology query.

Since we don't have any way to get the real L3 bank mask, we don't want
to pass garbage to userspace.  Passing a zeroed mask or a copy of the
primary GT's L3 bank mask would also be inaccurate and likely to cause
confusion for userspace.  The best approach is to simply not include L3
in the list of masks returned by the topology query in cases where we
aren't able to provide a meaningful value.  This won't change the
behavior for any existing platforms (where we can always obtain L3 masks
successfully for all GTs), it will only prevent us from mis-reporting
bad information on upcoming platform(s).

There's a good chance this will become a formal workaround in the
future, but for now we don't have a lineage number so "no_media_l3" is
used in place of a lineage as the OOB workaround descriptor.

v2:
 - Re-calculate query size to properly match data returned. (Gustavo)
 - Update kerneldoc to clarify that the L3bank mask may not be included
   in the query results if the hardware doesn't make it available.
   (Gustavo)

Cc: Matt Atwood <matthew.s.atwood@intel.com>
Cc: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Co-developed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Acked-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241007154143.2021124-2-matthew.d.roper@intel.com
---
 include/uapi/drm/xe_drm.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index b6fbe4988f2e..c4182e95a619 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -512,7 +512,9 @@ struct drm_xe_query_gt_list {
  *    containing the following in mask:
  *    ``DSS_COMPUTE    ff ff ff ff 00 00 00 00``
  *    means 32 DSS are available for compute.
- *  - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks
+ *  - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks.  This type
+ *    may be omitted if the driver is unable to query the mask from the
+ *    hardware.
  *  - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU)
  *    available per Dual Sub Slices (DSS). For example a query response
  *    containing the following in mask:
-- 
cgit v1.2.3


From a8f2cdd27d114ed6c3354a0e39502e6d56215804 Mon Sep 17 00:00:00 2001
From: Dmitry Perchanov <dmitry.perchanov@intel.com>
Date: Mon, 26 Aug 2024 16:04:23 +0300
Subject: media: v4l: Add luma 16-bit interlaced pixel format

The formats added by this patch are:

        V4L2_PIX_FMT_Y16I

Interlaced lumina format primary use in RealSense Depth cameras with
stereo stream for left and right image sensors.

Signed-off-by: Dmitry Perchanov <dmitry.perchanov@intel.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://lore.kernel.org/r/568efbd75290e286b8ad9e7347b5f43745121020.camel@intel.com
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/uapi/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 27239cb64065..21a8aa575ea3 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -798,6 +798,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_S5C_UYVY_JPG v4l2_fourcc('S', '5', 'C', 'I') /* S5C73M3 interleaved UYVY/JPEG */
 #define V4L2_PIX_FMT_Y8I      v4l2_fourcc('Y', '8', 'I', ' ') /* Greyscale 8-bit L/R interleaved */
 #define V4L2_PIX_FMT_Y12I     v4l2_fourcc('Y', '1', '2', 'I') /* Greyscale 12-bit L/R interleaved */
+#define V4L2_PIX_FMT_Y16I     v4l2_fourcc('Y', '1', '6', 'I') /* Greyscale 16-bit L/R interleaved */
 #define V4L2_PIX_FMT_Z16      v4l2_fourcc('Z', '1', '6', ' ') /* Depth data 16-bit */
 #define V4L2_PIX_FMT_MT21C    v4l2_fourcc('M', 'T', '2', '1') /* Mediatek compressed block mode  */
 #define V4L2_PIX_FMT_MM21     v4l2_fourcc('M', 'M', '2', '1') /* Mediatek 8-bit block mode, two non-contiguous planes */
-- 
cgit v1.2.3


From 55b834873e800a5809787ce6aedcb70ee49a596f Mon Sep 17 00:00:00 2001
From: Dmitry Perchanov <dmitry.perchanov@intel.com>
Date: Mon, 26 Aug 2024 16:05:04 +0300
Subject: media: uvcvideo: Add luma 16-bit interlaced pixel format

The formats added by this patch are:

        UVC_GUID_FORMAT_Y16I

Interlaced lumina format primary use in RealSense Depth cameras with
stereo stream for left and right image sensors.

Signed-off-by: Dmitry Perchanov <dmitry.perchanov@intel.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://lore.kernel.org/r/a717a912035b0a0f82b2f35719cca0c5269e995f.camel@intel.com
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/linux/usb/uvc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h
index 88d96095bcb1..1c16be20c966 100644
--- a/include/linux/usb/uvc.h
+++ b/include/linux/usb/uvc.h
@@ -118,6 +118,9 @@
 #define UVC_GUID_FORMAT_Y12I \
 	{ 'Y',  '1',  '2',  'I', 0x00, 0x00, 0x10, 0x00, \
 	 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}
+#define UVC_GUID_FORMAT_Y16I \
+	{ 'Y',  '1',  '6',  'I', 0x00, 0x00, 0x10, 0x00, \
+	 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}
 #define UVC_GUID_FORMAT_Z16 \
 	{ 'Z',  '1',  '6',  ' ', 0x00, 0x00, 0x10, 0x00, \
 	 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}
-- 
cgit v1.2.3


From a7e742e416bc331c0f409d6a3630471896e696d3 Mon Sep 17 00:00:00 2001
From: David Given <dg@cowlark.com>
Date: Wed, 18 Sep 2024 20:05:39 +0200
Subject: media: uvcvideo: Add support for the D3DFMT_R5G6B5 pixmap type

This media format is used by the NXP Semiconductors 1fc9:009b chipset,
used by the Kaiweets KTI-W02 infrared camera.

Signed-off-by: David Given <dg@cowlark.com>
Reviewed-by: Ricardo Ribalda <ribalda@chromium.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://lore.kernel.org/r/20240918180540.10830-1-dg@cowlark.com
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/linux/usb/uvc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h
index 1c16be20c966..bce95153e5a6 100644
--- a/include/linux/usb/uvc.h
+++ b/include/linux/usb/uvc.h
@@ -143,6 +143,9 @@
 #define UVC_GUID_FORMAT_D3DFMT_L8 \
 	{0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, \
 	 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}
+#define UVC_GUID_FORMAT_D3DFMT_R5G6B5 \
+	{0x7b, 0xeb, 0x36, 0xe4, 0x4f, 0x52, 0xce, 0x11, \
+	 0x9f, 0x53, 0x00, 0x20, 0xaf, 0x0b, 0xa7, 0x70}
 #define UVC_GUID_FORMAT_KSMEDIA_L8_IR \
 	{0x32, 0x00, 0x00, 0x00, 0x02, 0x00, 0x10, 0x00, \
 	 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}
-- 
cgit v1.2.3


From 2298d8a81f2dc6987448e5ddd823f4892194f5b6 Mon Sep 17 00:00:00 2001
From: Haridhar Kalvala <haridhar.kalvala@intel.com>
Date: Mon, 7 Oct 2024 18:35:08 -0700
Subject: drm/xe/ptl: Add PTL platform definition

PTL is an integrated GPU based on the Xe3 architecture.

v2: explicitly turn off display until display patches land.

Bspec: 72574
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>
Reviewed-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241008013509.61233-6-matthew.s.atwood@intel.com
---
 include/drm/intel/xe_pciids.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h
index 65520a90c17c..6d8d013f74e0 100644
--- a/include/drm/intel/xe_pciids.h
+++ b/include/drm/intel/xe_pciids.h
@@ -220,4 +220,15 @@
 	MACRO__(0xE20D, ## __VA_ARGS__), \
 	MACRO__(0xE212, ## __VA_ARGS__)
 
+#define XE_PTL_IDS(MACRO__, ...) \
+	MACRO__(0xB080, ## __VA_ARGS__), \
+	MACRO__(0xB081, ## __VA_ARGS__), \
+	MACRO__(0xB082, ## __VA_ARGS__), \
+	MACRO__(0xB090, ## __VA_ARGS__), \
+	MACRO__(0xB091, ## __VA_ARGS__), \
+	MACRO__(0xB092, ## __VA_ARGS__), \
+	MACRO__(0xB0A0, ## __VA_ARGS__), \
+	MACRO__(0xB0A1, ## __VA_ARGS__), \
+	MACRO__(0xB0A2, ## __VA_ARGS__)
+
 #endif
-- 
cgit v1.2.3


From 36e69b160705b65bf136c2fb6a1194447eeb8478 Mon Sep 17 00:00:00 2001
From: Dragan Simic <dsimic@manjaro.org>
Date: Sun, 29 Sep 2024 11:21:16 +0200
Subject: driver core: Add device probe log helper dev_warn_probe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some drivers can still provide their functionality to a certain extent
even when some of their resource acquisitions eventually fail.  In such
cases, emitting errors isn't the desired action, but warnings should be
emitted instead.

To solve this, introduce dev_warn_probe() as a new device probe log helper,
which behaves identically as the already existing dev_err_probe(), while it
produces warnings instead of errors.  The intended use is with the resources
that are actually optional for a particular driver.

While there, copyedit the kerneldoc for dev_err_probe() a bit, to simplify
its wording a bit, and reuse it as the kerneldoc for dev_warn_probe(), with
the necessary wording adjustments, of course.

Signed-off-by: Dragan Simic <dsimic@manjaro.org>
Tested-by: Hélène Vulquin <oss@helene.moe>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2be0a28538bb2a3d1bcc91e2ca1f2d0dc09146d9.1727601608.git.dsimic@manjaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/dev_printk.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h
index ca32b5bb28eb..eb2094e43050 100644
--- a/include/linux/dev_printk.h
+++ b/include/linux/dev_printk.h
@@ -276,6 +276,7 @@ do {									\
 			dev_driver_string(dev), dev_name(dev), ## arg)
 
 __printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...);
+__printf(3, 4) int dev_warn_probe(const struct device *dev, int err, const char *fmt, ...);
 
 /* Simple helper for dev_err_probe() when ERR_PTR() is to be returned. */
 #define dev_err_ptr_probe(dev, ___err, fmt, ...) \
-- 
cgit v1.2.3


From 0b028ff7e70ecbe5240ad92e36a664af5cf7f382 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sun, 6 Oct 2024 23:55:11 +0100
Subject: auxdisplay: Remove unused functions

cfag12864b_getrate() and cfag12864b_isenabled() were both added
in commit 70e840499aae ("[PATCH] drivers: add LCD support")
but never used.

Remove them.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 include/linux/cfag12864b.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/cfag12864b.h b/include/linux/cfag12864b.h
index 6617d9c68d86..83e6613d12ae 100644
--- a/include/linux/cfag12864b.h
+++ b/include/linux/cfag12864b.h
@@ -27,13 +27,6 @@
  */
 extern unsigned char * cfag12864b_buffer;
 
-/*
- * Get the refresh rate of the LCD
- *
- * Returns the refresh rate (hertz).
- */
-extern unsigned int cfag12864b_getrate(void);
-
 /*
  * Enable refreshing
  *
@@ -49,16 +42,6 @@ extern unsigned char cfag12864b_enable(void);
  */
 extern void cfag12864b_disable(void);
 
-/*
- * Is enabled refreshing? (is anyone using the module?)
- *
- * Returns 0 if refreshing is not enabled (anyone is using it),
- * or != 0 if refreshing is enabled (someone is using it).
- *
- * Useful for buffer read-only modules.
- */
-extern unsigned char cfag12864b_isenabled(void);
-
 /*
  * Is the module inited?
  */
-- 
cgit v1.2.3


From 581434654e01ec79dd02c21448ac84e2ce2d1a64 Mon Sep 17 00:00:00 2001
From: Chen Ridong <chenridong@huawei.com>
Date: Tue, 8 Oct 2024 11:24:58 +0000
Subject: workqueue: Adjust WQ_MAX_ACTIVE from 512 to 2048

WQ_MAX_ACTIVE is currently set to 512, which was established approximately
15 yeas ago. However, with the significant increase in machine sizes and
capabilities, the previous limit of 256 concurrent tasks is no longer
sufficient. Therefore, we propose to increase WQ_MAX_ACTIVE to 2048.
and WQ_DFL_ACTIVE is 1024 now.

Signed-off-by: Chen Ridong <chenridong@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 59c2695e12e7..b0dc957c3e56 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -412,7 +412,7 @@ enum wq_flags {
 };
 
 enum wq_consts {
-	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
+	WQ_MAX_ACTIVE		= 2048,	  /* I like 2048, better ideas? */
 	WQ_UNBOUND_MAX_ACTIVE	= WQ_MAX_ACTIVE,
 	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
 
-- 
cgit v1.2.3


From aefa398d93d5db7c555be78a605ff015357f127d Mon Sep 17 00:00:00 2001
From: Joshua Hahn <joshua.hahn6@gmail.com>
Date: Wed, 2 Oct 2024 11:47:16 -0700
Subject: cgroup/rstat: Tracking cgroup-level niced CPU time

Cgroup-level CPU statistics currently include time spent on
user/system processes, but do not include niced CPU time (despite
already being tracked). This patch exposes niced CPU time to the
userspace, allowing users to get a better understanding of their
hardware limits and can facilitate more informed workload distribution.

A new field 'ntime' is added to struct cgroup_base_stat as opposed to
struct task_cputime to minimize footprint.

Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 47ae4c4d924c..0a80ef9191a6 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -327,6 +327,7 @@ struct cgroup_base_stat {
 #ifdef CONFIG_SCHED_CORE
 	u64 forceidle_sum;
 #endif
+	u64 ntime;
 };
 
 /*
-- 
cgit v1.2.3


From 49e4154f4b16345da5e219b23ed9737a6e735bc1 Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian@huaweicloud.com>
Date: Wed, 11 Sep 2024 09:00:26 +0800
Subject: tracing: Remove TRACE_EVENT_FL_FILTERED logic

After commit dcb0b5575d24 ("tracing: Remove TRACE_EVENT_FL_USE_CALL_FILTER
 logic"), no one's going to set the TRACE_EVENT_FL_FILTERED or change the
call->filter, so remove related logic.

Link: https://lore.kernel.org/20240911010026.2302849-1-zhengyejian@huaweicloud.com
Signed-off-by: Zheng Yejian <zhengyejian@huaweicloud.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 42bedcddd511..f8f2e52653df 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -326,7 +326,6 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer);
 
 enum {
-	TRACE_EVENT_FL_FILTERED_BIT,
 	TRACE_EVENT_FL_CAP_ANY_BIT,
 	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 	TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
@@ -341,7 +340,6 @@ enum {
 
 /*
  * Event flags:
- *  FILTERED	  - The event has a filter attached
  *  CAP_ANY	  - Any user can enable for perf
  *  NO_SET_FILTER - Set when filter has error and is to be ignored
  *  IGNORE_ENABLE - For trace internal events, do not enable with debugfs file
@@ -356,7 +354,6 @@ enum {
  *                   to a tracepoint yet, then it is cleared when it is.
  */
 enum {
-	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
 	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 	TRACE_EVENT_FL_IGNORE_ENABLE	= (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
@@ -381,7 +378,6 @@ struct trace_event_call {
 	};
 	struct trace_event	event;
 	char			*print_fmt;
-	struct event_filter	*filter;
 	/*
 	 * Static events can disappear with modules,
 	 * where as dynamic ones need their own ref count.
-- 
cgit v1.2.3


From 02f220b5267042d0de649614eec84ded8aeecb4f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Oct 2024 20:26:55 +0200
Subject: wifi: ipw2x00/lib80211: move remaining lib80211 into libipw

There's already much code in libipw that used to be shared
with more drivers, but now with the prior cleanups, those old
Intel ipw2x00 drivers are also the only ones using whatever is
now left of lib80211. Move lib80211 entirely into libipw.

Link: https://patch.msgid.link/20241007202707.915ef7b9e7c7.Ib9876d2fe3c90f11d6df458b16d0b7d4bf551a8d@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/lib80211.h | 122 -------------------------------------------------
 1 file changed, 122 deletions(-)
 delete mode 100644 include/net/lib80211.h

(limited to 'include')

diff --git a/include/net/lib80211.h b/include/net/lib80211.h
deleted file mode 100644
index fd0f15d87d80..000000000000
--- a/include/net/lib80211.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * lib80211.h -- common bits for IEEE802.11 wireless drivers
- *
- * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
- *
- * Some bits copied from old ieee80211 component, w/ original copyright
- * notices below:
- *
- * Original code based on Host AP (software wireless LAN access point) driver
- * for Intersil Prism2/2.5/3.
- *
- * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
- * <j@w1.fi>
- * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
- *
- * Adaption to a generic IEEE 802.11 stack by James Ketrenos
- * <jketreno@linux.intel.com>
- *
- * Copyright (c) 2004, Intel Corporation
- *
- */
-
-#ifndef LIB80211_H
-#define LIB80211_H
-
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/atomic.h>
-#include <linux/if.h>
-#include <linux/skbuff.h>
-#include <linux/ieee80211.h>
-#include <linux/timer.h>
-#include <linux/seq_file.h>
-
-#define NUM_WEP_KEYS	4
-
-enum {
-	IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0),
-};
-
-struct module;
-
-struct lib80211_crypto_ops {
-	const char *name;
-	struct list_head list;
-
-	/* init new crypto context (e.g., allocate private data space,
-	 * select IV, etc.); returns NULL on failure or pointer to allocated
-	 * private data on success */
-	void *(*init) (int keyidx);
-
-	/* deinitialize crypto context and free allocated private data */
-	void (*deinit) (void *priv);
-
-	/* encrypt/decrypt return < 0 on error or >= 0 on success. The return
-	 * value from decrypt_mpdu is passed as the keyidx value for
-	 * decrypt_msdu. skb must have enough head and tail room for the
-	 * encryption; if not, error will be returned; these functions are
-	 * called for all MPDUs (i.e., fragments).
-	 */
-	int (*encrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv);
-	int (*decrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv);
-
-	/* These functions are called for full MSDUs, i.e. full frames.
-	 * These can be NULL if full MSDU operations are not needed. */
-	int (*encrypt_msdu) (struct sk_buff * skb, int hdr_len, void *priv);
-	int (*decrypt_msdu) (struct sk_buff * skb, int keyidx, int hdr_len,
-			     void *priv);
-
-	int (*set_key) (void *key, int len, u8 * seq, void *priv);
-	int (*get_key) (void *key, int len, u8 * seq, void *priv);
-
-	/* procfs handler for printing out key information and possible
-	 * statistics */
-	void (*print_stats) (struct seq_file *m, void *priv);
-
-	/* Crypto specific flag get/set for configuration settings */
-	unsigned long (*get_flags) (void *priv);
-	unsigned long (*set_flags) (unsigned long flags, void *priv);
-
-	/* maximum number of bytes added by encryption; encrypt buf is
-	 * allocated with extra_prefix_len bytes, copy of in_buf, and
-	 * extra_postfix_len; encrypt need not use all this space, but
-	 * the result must start at the beginning of the buffer and correct
-	 * length must be returned */
-	int extra_mpdu_prefix_len, extra_mpdu_postfix_len;
-	int extra_msdu_prefix_len, extra_msdu_postfix_len;
-
-	struct module *owner;
-};
-
-struct lib80211_crypt_data {
-	struct list_head list;	/* delayed deletion list */
-	const struct lib80211_crypto_ops *ops;
-	void *priv;
-	atomic_t refcnt;
-};
-
-struct lib80211_crypt_info {
-	char *name;
-	/* Most clients will already have a lock,
-	   so just point to that. */
-	spinlock_t *lock;
-
-	struct lib80211_crypt_data *crypt[NUM_WEP_KEYS];
-	int tx_keyidx;		/* default TX key index (crypt[tx_keyidx]) */
-	struct list_head crypt_deinit_list;
-	struct timer_list crypt_deinit_timer;
-	int crypt_quiesced;
-};
-
-int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name,
-                                spinlock_t *lock);
-void lib80211_crypt_info_free(struct lib80211_crypt_info *info);
-int lib80211_register_crypto_ops(const struct lib80211_crypto_ops *ops);
-int lib80211_unregister_crypto_ops(const struct lib80211_crypto_ops *ops);
-const struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name);
-void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info,
-				    struct lib80211_crypt_data **crypt);
-
-#endif /* LIB80211_H */
-- 
cgit v1.2.3


From 3a1d429ebd43bcfdf3590096ca72cbf593d1598b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Oct 2024 21:02:53 +0200
Subject: wifi: wext/libipw: move spy implementation to libipw

There's no driver left using this other than ipw2200,
so move the data bookkeeping and code into libipw.

Link: https://patch.msgid.link/20241007210254.037d864cda7d.Ib2197cb056ff05746d3521a5fba637062acb7314@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/iw_handler.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 7af1082ea9a0..a7b502958d27 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -418,8 +418,6 @@ struct iw_spy_data {
 struct libipw_device;
 /* The struct */
 struct iw_public_data {
-	/* Driver enhanced spy support */
-	struct iw_spy_data *		spy_data;
 	/* Legacy structure managed by the ipw2x00-specific IEEE 802.11 layer */
 	struct libipw_device *		libipw;
 };
@@ -443,22 +441,6 @@ static inline void wireless_nlevent_flush(void) {}
 /* We may need a function to send a stream of events to user space.
  * More on that later... */
 
-/* Standard handler for SIOCSIWSPY */
-int iw_handler_set_spy(struct net_device *dev, struct iw_request_info *info,
-		       union iwreq_data *wrqu, char *extra);
-/* Standard handler for SIOCGIWSPY */
-int iw_handler_get_spy(struct net_device *dev, struct iw_request_info *info,
-		       union iwreq_data *wrqu, char *extra);
-/* Standard handler for SIOCSIWTHRSPY */
-int iw_handler_set_thrspy(struct net_device *dev, struct iw_request_info *info,
-			  union iwreq_data *wrqu, char *extra);
-/* Standard handler for SIOCGIWTHRSPY */
-int iw_handler_get_thrspy(struct net_device *dev, struct iw_request_info *info,
-			  union iwreq_data *wrqu, char *extra);
-/* Driver call to update spy records */
-void wireless_spy_update(struct net_device *dev, unsigned char *address,
-			 struct iw_quality *wstats);
-
 /************************* INLINE FUNCTIONS *************************/
 /*
  * Function that are so simple that it's more efficient inlining them
-- 
cgit v1.2.3


From 836265d31631e28000fc8917ce697fc687a58724 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Oct 2024 21:35:25 +0200
Subject: wifi: remove iw_public_data from struct net_device

Given the previous patches, we no longer need the
struct iw_public_data etc., it's only used by the
old Intel drivers (and ps3_gelic creates it but
then doesn't use it). Remove all of that, including
the pointer in struct net_device.

Link: https://patch.msgid.link/20241007213525.8b2d52b60531.I6a27aaf30bded9a0977f07f47fba2bd31a3b3330@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/netdevice.h |  2 --
 include/net/iw_handler.h  | 18 ------------------
 2 files changed, 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e87b5e488325..f0abed1e6e45 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1773,7 +1773,6 @@ enum netdev_reg_state {
  *	@wireless_handlers:	List of functions to handle Wireless Extensions,
  *				instead of ioctl,
  *				see <net/iw_handler.h> for details.
- *	@wireless_data:	Instance data managed by the core of wireless extensions
  *
  *	@netdev_ops:	Includes several pointers to callbacks,
  *			if one wants to override the ndo_*() functions
@@ -2150,7 +2149,6 @@ struct net_device {
 
 #ifdef CONFIG_WIRELESS_EXT
 	const struct iw_handler_def *wireless_handlers;
-	struct iw_public_data	*wireless_data;
 #endif
 	const struct ethtool_ops *ethtool_ops;
 #ifdef CONFIG_NET_L3_MASTER_DEV
diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index a7b502958d27..fc44fcca1d5c 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -404,24 +404,6 @@ struct iw_spy_data {
 	u_char			spy_thr_under[IW_MAX_SPY];
 };
 
-/* --------------------- DEVICE WIRELESS DATA --------------------- */
-/*
- * This is all the wireless data specific to a device instance that
- * is managed by the core of Wireless Extensions or the 802.11 layer.
- * We only keep pointer to those structures, so that a driver is free
- * to share them between instances.
- * This structure should be initialised before registering the device.
- * Access to this data follow the same rules as any other struct net_device
- * data (i.e. valid as long as struct net_device exist, same locking rules).
- */
-/* Forward declaration */
-struct libipw_device;
-/* The struct */
-struct iw_public_data {
-	/* Legacy structure managed by the ipw2x00-specific IEEE 802.11 layer */
-	struct libipw_device *		libipw;
-};
-
 /**************************** PROTOTYPES ****************************/
 /*
  * Functions part of the Wireless Extensions (defined in net/wireless/wext-core.c).
-- 
cgit v1.2.3


From aee809aaa2d13bf560fe38d28c4969605e6d9d0e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Oct 2024 21:47:16 +0200
Subject: wifi: cfg80211: unexport wireless_nlevent_flush()

This no longer needs to be exported, so don't export it.

Link: https://patch.msgid.link/20241007214715.3dd736dc3ac0.I1388536e99c37f28a007dd753c473ad21513d9a9@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/iw_handler.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index fc44fcca1d5c..804587b7592b 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -413,12 +413,6 @@ struct iw_spy_data {
 /* Send a single event to user space */
 void wireless_send_event(struct net_device *dev, unsigned int cmd,
 			 union iwreq_data *wrqu, const char *extra);
-#ifdef CONFIG_WEXT_CORE
-/* flush all previous wext events - if work is done from netdev notifiers */
-void wireless_nlevent_flush(void);
-#else
-static inline void wireless_nlevent_flush(void) {}
-#endif
 
 /* We may need a function to send a stream of events to user space.
  * More on that later... */
-- 
cgit v1.2.3


From 9e1a98aac11b757e2634304bbe6bed5f5a58e0ee Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Oct 2024 21:50:25 +0200
Subject: wifi: wext: merge adjacent CONFIG_COMPAT ifdef blocks

Simplify this, and also add a comment at the #endif.

Link: https://patch.msgid.link/20241007215025.5ecdad1e02ed.I54efa895efc496e06ba41e1c39c9df9e23b0171f@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/wireless.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index e6e34d74dda0..03e5d3fe226d 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -21,8 +21,7 @@ struct compat_iw_point {
 	__u16 length;
 	__u16 flags;
 };
-#endif
-#ifdef CONFIG_COMPAT
+
 struct __compat_iw_event {
 	__u16		len;			/* Real length of this stuff */
 	__u16		cmd;			/* Wireless IOCTL */
@@ -49,5 +48,5 @@ struct __compat_iw_event {
 #define IW_EV_COMPAT_POINT_LEN	\
 	(IW_EV_COMPAT_LCP_LEN + sizeof(struct compat_iw_point) - \
 	 IW_EV_COMPAT_POINT_OFF)
-#endif
+#endif /* CONFIG_COMPAT */
 #endif	/* _LINUX_WIRELESS_H */
-- 
cgit v1.2.3


From ff919efb5fe8256fba132b5f2c58df3c38bdd0b3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Oct 2024 22:00:03 +0200
Subject: wireless: wext: shorten struct iw_ioctl_description

There's no need for "future" extensions in an internal
struct, and we don't need a u32 for flags, use just a
u8. Also remove the unused IW_DESCR_FLAG_WAIT flag.

Link: https://patch.msgid.link/20241007220003.309bd52fa763.I9a1229fa7f2be53d4f50e63671ed441d0968bb41@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/iw_handler.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 804587b7592b..c9b46b996197 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -279,8 +279,6 @@
 #define IW_DESCR_FLAG_RESTRICT	0x0004	/* GET : request is ROOT only */
 				/* SET : Omit payload from generated iwevent */
 #define IW_DESCR_FLAG_NOMAX	0x0008	/* GET : no limit on request size */
-/* Driver level flags */
-#define IW_DESCR_FLAG_WAIT	0x0100	/* Wait for driver event */
 
 /****************************** TYPES ******************************/
 
@@ -373,11 +371,10 @@ struct iw_handler_def {
  */
 struct iw_ioctl_description {
 	__u8	header_type;		/* NULL, iw_point or other */
-	__u8	token_type;		/* Future */
+	__u8	flags;			/* Special handling of the request */
 	__u16	token_size;		/* Granularity of payload */
 	__u16	min_tokens;		/* Min acceptable token number */
 	__u16	max_tokens;		/* Max acceptable token number */
-	__u32	flags;			/* Special handling of the request */
 };
 
 /* Need to think of short header translation table. Later. */
-- 
cgit v1.2.3


From da5e06dee58ad153a4933fd40fc53d571bfef373 Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Sun, 6 Oct 2024 07:26:09 +0900
Subject: net-timestamp: namespacify the sysctl_tstamp_allow_data

Let it be tuned in per netns by admins.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20241005222609.94980-1-kerneljasonxing@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netns/core.h | 1 +
 include/net/sock.h       | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 78214f1b43a2..9b36f0ff0c20 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -15,6 +15,7 @@ struct netns_core {
 	int	sysctl_somaxconn;
 	int	sysctl_optmem_max;
 	u8	sysctl_txrehash;
+	u8	sysctl_tstamp_allow_data;
 
 #ifdef CONFIG_PROC_FS
 	struct prot_inuse __percpu *prot_inuse;
diff --git a/include/net/sock.h b/include/net/sock.h
index e282127092ab..b32f1424ecc5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2824,8 +2824,6 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
 extern __u32 sysctl_wmem_max;
 extern __u32 sysctl_rmem_max;
 
-extern int sysctl_tstamp_allow_data;
-
 extern __u32 sysctl_wmem_default;
 extern __u32 sysctl_rmem_default;
 
-- 
cgit v1.2.3


From 3fe3dbaf26723c473d42a58b636a2500586b821e Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Mon, 7 Oct 2024 01:44:56 +0100
Subject: caif: Remove unused cfsrvl_getphyid

cfsrvl_getphyid() has been unused since 2011's commit
f36214408470 ("caif: Use RCU and lists in cfcnfg.c for managing caif link layers")

Remove it.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20241007004456.149899-1-linux@treblig.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/caif/cfsrvl.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h
index 5ee7b322e18b..a000dc45f966 100644
--- a/include/net/caif/cfsrvl.h
+++ b/include/net/caif/cfsrvl.h
@@ -40,7 +40,6 @@ void cfsrvl_init(struct cfsrvl *service,
 			struct dev_info *dev_info,
 			bool supports_flowctrl);
 bool cfsrvl_ready(struct cfsrvl *service, int *err);
-u8 cfsrvl_getphyid(struct cflayer *layer);
 
 static inline void cfsrvl_get(struct cflayer *layr)
 {
-- 
cgit v1.2.3


From 4a8840af5f53f2902eba91130fae650879f18e7a Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Tue, 8 Oct 2024 12:17:19 -0700
Subject: tracepoints: Use new static branch API

The old static key API is deprecated.  Switch to the new one.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Link: https://lore.kernel.org/7a08dae3c5eddb14b13864923c1b58ac1f4af83c.1728414936.git.jpoimboe@kernel.org
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint-defs.h | 4 ++--
 include/linux/tracepoint.h      | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 4dc4955f0fbf..60a6e8314d4c 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -31,7 +31,7 @@ struct tracepoint_func {
 
 struct tracepoint {
 	const char *name;		/* Tracepoint name */
-	struct static_key key;
+	struct static_key_false key;
 	struct static_call_key *static_call_key;
 	void *static_call_tramp;
 	void *iterator;
@@ -83,7 +83,7 @@ struct bpf_raw_event_map {
 
 #ifdef CONFIG_TRACEPOINTS
 # define tracepoint_enabled(tp) \
-	static_key_false(&(__tracepoint_##tp).key)
+	static_branch_unlikely(&(__tracepoint_##tp).key)
 #else
 # define tracepoint_enabled(tracepoint) false
 #endif
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 93a9f3070b48..2a29334bbc02 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -248,7 +248,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 #define __DECLARE_TRACE_RCU(name, proto, args, cond)			\
 	static inline void trace_##name##_rcuidle(proto)		\
 	{								\
-		if (static_key_false(&__tracepoint_##name.key))		\
+		if (static_branch_unlikely(&__tracepoint_##name.key))	\
 			__DO_TRACE(name,				\
 				TP_ARGS(args),				\
 				TP_CONDITION(cond), 1);			\
@@ -274,7 +274,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	extern struct tracepoint __tracepoint_##name;			\
 	static inline void trace_##name(proto)				\
 	{								\
-		if (static_key_false(&__tracepoint_##name.key))		\
+		if (static_branch_unlikely(&__tracepoint_##name.key))	\
 			__DO_TRACE(name,				\
 				TP_ARGS(args),				\
 				TP_CONDITION(cond), 0);			\
@@ -311,7 +311,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	static inline bool						\
 	trace_##name##_enabled(void)					\
 	{								\
-		return static_key_false(&__tracepoint_##name.key);	\
+		return static_branch_unlikely(&__tracepoint_##name.key);\
 	}
 
 /*
@@ -328,7 +328,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	struct tracepoint __tracepoint_##_name	__used			\
 	__section("__tracepoints") = {					\
 		.name = __tpstrtab_##_name,				\
-		.key = STATIC_KEY_INIT_FALSE,				\
+		.key = STATIC_KEY_FALSE_INIT,				\
 		.static_call_key = &STATIC_CALL_KEY(tp_func_##_name),	\
 		.static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
 		.iterator = &__traceiter_##_name,			\
-- 
cgit v1.2.3


From 48bcda6848232667f13b4e97588de488c83c37d4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 3 Oct 2024 18:16:29 -0400
Subject: tracing: Remove definition of trace_*_rcuidle()

The trace_*_rcuidle() variant of a tracepoint was to handle places where a
tracepoint was located but RCU was not "watching". All those locations
have been removed, and RCU should be watching where all tracepoints are
located. We can now remove the trace_*_rcuidle() variant.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Link: https://lore.kernel.org/20241003181629.36209057@gandalf.local.home
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h        | 50 ++-------------------------------------
 include/trace/events/preemptirq.h |  8 -------
 2 files changed, 2 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 2a29334bbc02..7e4af7b3633c 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -196,67 +196,25 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 #define __DO_TRACE_CALL(name, args)	__traceiter_##name(NULL, args)
 #endif /* CONFIG_HAVE_STATIC_CALL */
 
-/*
- * ARCH_WANTS_NO_INSTR archs are expected to have sanitized entry and idle
- * code that disallow any/all tracing/instrumentation when RCU isn't watching.
- */
-#ifdef CONFIG_ARCH_WANTS_NO_INSTR
-#define RCUIDLE_COND(rcuidle)	(rcuidle)
-#else
-/* srcu can't be used from NMI */
-#define RCUIDLE_COND(rcuidle)	(rcuidle && in_nmi())
-#endif
-
 /*
  * it_func[0] is never NULL because there is at least one element in the array
  * when the array itself is non NULL.
  */
-#define __DO_TRACE(name, args, cond, rcuidle)				\
+#define __DO_TRACE(name, args, cond)					\
 	do {								\
 		int __maybe_unused __idx = 0;				\
 									\
 		if (!(cond))						\
 			return;						\
 									\
-		if (WARN_ONCE(RCUIDLE_COND(rcuidle),			\
-			      "Bad RCU usage for tracepoint"))		\
-			return;						\
-									\
 		/* keep srcu and sched-rcu usage consistent */		\
 		preempt_disable_notrace();				\
 									\
-		/*							\
-		 * For rcuidle callers, use srcu since sched-rcu	\
-		 * doesn't work from the idle path.			\
-		 */							\
-		if (rcuidle) {						\
-			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
-			ct_irq_enter_irqson();				\
-		}							\
-									\
 		__DO_TRACE_CALL(name, TP_ARGS(args));			\
 									\
-		if (rcuidle) {						\
-			ct_irq_exit_irqson();				\
-			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
-		}							\
-									\
 		preempt_enable_notrace();				\
 	} while (0)
 
-#ifndef MODULE
-#define __DECLARE_TRACE_RCU(name, proto, args, cond)			\
-	static inline void trace_##name##_rcuidle(proto)		\
-	{								\
-		if (static_branch_unlikely(&__tracepoint_##name.key))	\
-			__DO_TRACE(name,				\
-				TP_ARGS(args),				\
-				TP_CONDITION(cond), 1);			\
-	}
-#else
-#define __DECLARE_TRACE_RCU(name, proto, args, cond)
-#endif
-
 /*
  * Make sure the alignment of the structure in the __tracepoints section will
  * not add unwanted padding between the beginning of the section and the
@@ -277,14 +235,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		if (static_branch_unlikely(&__tracepoint_##name.key))	\
 			__DO_TRACE(name,				\
 				TP_ARGS(args),				\
-				TP_CONDITION(cond), 0);			\
+				TP_CONDITION(cond));			\
 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
 			WARN_ONCE(!rcu_is_watching(),			\
 				  "RCU not watching for tracepoint");	\
 		}							\
 	}								\
-	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
-			    PARAMS(cond))				\
 	static inline int						\
 	register_trace_##name(void (*probe)(data_proto), void *data)	\
 	{								\
@@ -375,8 +331,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
 	static inline void trace_##name(proto)				\
 	{ }								\
-	static inline void trace_##name##_rcuidle(proto)		\
-	{ }								\
 	static inline int						\
 	register_trace_##name(void (*probe)(data_proto),		\
 			      void *data)				\
diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h
index 3f249e150c0c..f99562d2b496 100644
--- a/include/trace/events/preemptirq.h
+++ b/include/trace/events/preemptirq.h
@@ -43,8 +43,6 @@ DEFINE_EVENT(preemptirq_template, irq_enable,
 #else
 #define trace_irq_enable(...)
 #define trace_irq_disable(...)
-#define trace_irq_enable_rcuidle(...)
-#define trace_irq_disable_rcuidle(...)
 #endif
 
 #ifdef CONFIG_TRACE_PREEMPT_TOGGLE
@@ -58,8 +56,6 @@ DEFINE_EVENT(preemptirq_template, preempt_enable,
 #else
 #define trace_preempt_enable(...)
 #define trace_preempt_disable(...)
-#define trace_preempt_enable_rcuidle(...)
-#define trace_preempt_disable_rcuidle(...)
 #endif
 
 #endif /* _TRACE_PREEMPTIRQ_H */
@@ -69,10 +65,6 @@ DEFINE_EVENT(preemptirq_template, preempt_enable,
 #else /* !CONFIG_PREEMPTIRQ_TRACEPOINTS */
 #define trace_irq_enable(...)
 #define trace_irq_disable(...)
-#define trace_irq_enable_rcuidle(...)
-#define trace_irq_disable_rcuidle(...)
 #define trace_preempt_enable(...)
 #define trace_preempt_disable(...)
-#define trace_preempt_enable_rcuidle(...)
-#define trace_preempt_disable_rcuidle(...)
 #endif
-- 
cgit v1.2.3


From e53244e2c8931f9e80c1841293aea86ef8ad32a3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 3 Oct 2024 18:42:20 -0400
Subject: tracepoint: Remove SRCU protection

With the removal of the trace_*_rcuidle() tracepoints, there is no reason
to protect tracepoints with SRCU. The reason the SRCU protection was
added, was because it can protect tracepoints when RCU is not "watching".
Now that tracepoints are only used when RCU is watching, remove the SRCU
protection. It just made things more complex and confusing anyway.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Link: https://lore.kernel.org/20241003184220.0dc21d35@gandalf.local.home
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 7e4af7b3633c..3d33b9872cec 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -32,8 +32,6 @@ struct trace_eval_map {
 
 #define TRACEPOINT_DEFAULT_PRIO	10
 
-extern struct srcu_struct tracepoint_srcu;
-
 extern int
 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
 extern int
@@ -109,7 +107,6 @@ void for_each_tracepoint_in_module(struct module *mod,
 #ifdef CONFIG_TRACEPOINTS
 static inline void tracepoint_synchronize_unregister(void)
 {
-	synchronize_srcu(&tracepoint_srcu);
 	synchronize_rcu();
 }
 #else
@@ -207,7 +204,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		if (!(cond))						\
 			return;						\
 									\
-		/* keep srcu and sched-rcu usage consistent */		\
 		preempt_disable_notrace();				\
 									\
 		__DO_TRACE_CALL(name, TP_ARGS(args));			\
-- 
cgit v1.2.3


From db03488897a70367aeafe82d07a78943d2a6068e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 9 Oct 2024 08:33:05 +0200
Subject: Revert "wifi: cfg80211: unexport wireless_nlevent_flush()"

Revert this, I neglected to take into account the fact that
cfg80211 itself can be a module, but wext is always builtin.

Fixes: aee809aaa2d1 ("wifi: cfg80211: unexport wireless_nlevent_flush()")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/iw_handler.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index c9b46b996197..b80e474cb0aa 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -410,6 +410,12 @@ struct iw_spy_data {
 /* Send a single event to user space */
 void wireless_send_event(struct net_device *dev, unsigned int cmd,
 			 union iwreq_data *wrqu, const char *extra);
+#ifdef CONFIG_WEXT_CORE
+/* flush all previous wext events - if work is done from netdev notifiers */
+void wireless_nlevent_flush(void);
+#else
+static inline void wireless_nlevent_flush(void) {}
+#endif
 
 /* We may need a function to send a stream of events to user space.
  * More on that later... */
-- 
cgit v1.2.3


From 975bdea8c470cf10637c58129edaae731fec9e93 Mon Sep 17 00:00:00 2001
From: Dzmitry Sankouski <dsankouski@gmail.com>
Date: Sun, 6 Oct 2024 21:18:19 +0300
Subject: drm/mipi-dsi: add mipi_dsi_compression_mode_multi

mipi_dsi_compression_mode_multi can help with
error handling.

Signed-off-by: Dzmitry Sankouski <dsankouski@gmail.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20241006-starqltechn_integration_upstream-v6-1-8336b9cd6c34@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20241006-starqltechn_integration_upstream-v6-1-8336b9cd6c34@gmail.com
---
 include/drm/drm_mipi_dsi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index f725f8654611..94400a78031f 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -280,6 +280,8 @@ void mipi_dsi_compression_mode_ext_multi(struct mipi_dsi_multi_context *ctx,
 					 bool enable,
 					 enum mipi_dsi_compression_algo algo,
 					 unsigned int pps_selector);
+void mipi_dsi_compression_mode_multi(struct mipi_dsi_multi_context *ctx,
+				     bool enable);
 void mipi_dsi_picture_parameter_set_multi(struct mipi_dsi_multi_context *ctx,
 					  const struct drm_dsc_picture_parameter_set *pps);
 
-- 
cgit v1.2.3


From f7a870d0be12e3ae38cbe899d858994c5b51f22b Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Wed, 9 Oct 2024 17:15:35 +0900
Subject: ata: libata: Remove unused macro definitions

ATA_TMOUT_BOOT and ATA_TMOUT_BOOT_QUICK are not used anywhere. Delete
these definitions.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20241009081535.376994-1-dlemoal@kernel.org
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/libata.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 9b4a6ff03235..c1a85d46eba6 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -270,9 +270,7 @@ enum {
 
 	/* bits 24:31 of host->flags are reserved for LLD specific flags */
 
-	/* various lengths of time */
-	ATA_TMOUT_BOOT		= 30000,	/* heuristic */
-	ATA_TMOUT_BOOT_QUICK	=  7000,	/* heuristic */
+	/* Various lengths of time */
 	ATA_TMOUT_INTERNAL_QUICK = 5000,
 	ATA_TMOUT_MAX_PARK	= 30000,
 
-- 
cgit v1.2.3


From d12586e1072d92070783c854819a0ca8c82c5439 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sat, 5 Oct 2024 23:38:25 +0200
Subject: platform/x86: wmi: Implement proper shutdown handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When performing a system shutdown under Windows, all WMI clients are
terminated. This means that the ACPI BIOS might expect all WMI devices
to be disabled when shutting down.

Emulate this behaviour by disabling all active WMI devices during
shutdown. Also introduce a new WMI driver callback to allow WMI drivers
to perform any device-specific actions before disabling the WMI device.

Tested on a Dell Inspiron 3505.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20241005213825.701887-2-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/wmi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index 3275470b5531..120019677fc6 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -56,6 +56,7 @@ u8 wmidev_instance_count(struct wmi_device *wdev);
  * @no_singleton: Driver can be instantiated multiple times
  * @probe: Callback for device binding
  * @remove: Callback for device unbinding
+ * @shutdown: Callback for device shutdown
  * @notify: Callback for receiving WMI events
  *
  * This represents WMI drivers which handle WMI devices.
@@ -68,6 +69,7 @@ struct wmi_driver {
 
 	int (*probe)(struct wmi_device *wdev, const void *context);
 	void (*remove)(struct wmi_device *wdev);
+	void (*shutdown)(struct wmi_device *wdev);
 	void (*notify)(struct wmi_device *device, union acpi_object *data);
 };
 
-- 
cgit v1.2.3


From f042365a26b07006064c2cfa2293e16cad243b0d Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 4 Oct 2024 11:20:56 +0100
Subject: net: pcs: xpcs: provide a helper to get the phylink pcs given xpcs

Provide a helper to provide the pointer to the phylink_pcs struct
given a valid xpcs pointer. This will be necessary when we make
struct dw_xpcs private to pcs-xpcs.c

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pcs/pcs-xpcs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index abda475111d1..868515f3cc88 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -64,6 +64,7 @@ struct dw_xpcs {
 	bool need_reset;
 };
 
+struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs);
 int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
 void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
 int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
-- 
cgit v1.2.3


From accd5f5cd2e1b0ed6805a7c24765648d213561e5 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 4 Oct 2024 11:21:01 +0100
Subject: net: pcs: xpcs: move definition of struct dw_xpcs to private header

There should be no reason for anything outside the XPCS code to know
the contents of struct dw_xpcs - this is a private structure to XPCS.
Move the definition to the private pcs-xpcs.h header, leaving a
declaration in the global pcs/pcs-xpcs.h

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pcs/pcs-xpcs.h | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 868515f3cc88..b5b5d17998b8 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -21,8 +21,6 @@
 #define DW_AN_C37_1000BASEX		4
 #define DW_10GBASER			5
 
-struct dw_xpcs_desc;
-
 enum dw_xpcs_pcs_id {
 	DW_XPCS_ID_NATIVE = 0,
 	NXP_SJA1105_XPCS_ID = 0x00000010,
@@ -48,21 +46,7 @@ struct dw_xpcs_info {
 	u32 pma;
 };
 
-enum dw_xpcs_clock {
-	DW_XPCS_CORE_CLK,
-	DW_XPCS_PAD_CLK,
-	DW_XPCS_NUM_CLKS,
-};
-
-struct dw_xpcs {
-	struct dw_xpcs_info info;
-	const struct dw_xpcs_desc *desc;
-	struct mdio_device *mdiodev;
-	struct clk_bulk_data clks[DW_XPCS_NUM_CLKS];
-	struct phylink_pcs pcs;
-	phy_interface_t interface;
-	bool need_reset;
-};
+struct dw_xpcs;
 
 struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs);
 int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
-- 
cgit v1.2.3


From 20503272422693d793b84f88bf23fe4e955d3a33 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sun, 6 Oct 2024 08:17:58 +0100
Subject: ptp: Add support for the AMZNC10C 'vmclock' device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The vmclock device addresses the problem of live migration with
precision clocks. The tolerances of a hardware counter (e.g. TSC) are
typically around ±50PPM. A guest will use NTP/PTP/PPS to discipline that
counter against an external source of 'real' time, and track the precise
frequency of the counter as it changes with environmental conditions.

When a guest is live migrated, anything it knows about the frequency of
the underlying counter becomes invalid. It may move from a host where
the counter running at -50PPM of its nominal frequency, to a host where
it runs at +50PPM. There will also be a step change in the value of the
counter, as the correctness of its absolute value at migration is
limited by the accuracy of the source and destination host's time
synchronization.

In its simplest form, the device merely advertises a 'disruption_marker'
which indicates that the guest should throw away any NTP synchronization
it thinks it has, and start again.

Because the shared memory region can be exposed all the way to userspace
through the /dev/vmclock0 node, applications can still use time from a
fast vDSO 'system call', and check the disruption marker to be sure that
their timestamp is indeed truthful.

The structure also allows for the precise time, as known by the host, to
be exposed directly to guests so that they don't have to wait for NTP to
resync from scratch. The PTP driver consumes this information if present.
Like the KVM PTP clock, this PTP driver can convert TSC-based cross
timestamps into KVM clock values. Unlike the KVM PTP clock, it does so
only when such is actually helpful.

The values and fields are based on the nascent virtio-rtc specification,
and the intent is that a version (hopefully precisely this version) of
this structure will be included as an optional part of that spec. In the
meantime, this driver supports the simple ACPI form of the device which
is being shipped in certain commercial hypervisors (and submitted for
inclusion in QEMU).

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/vmclock-abi.h | 182 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 include/uapi/linux/vmclock-abi.h

(limited to 'include')

diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h
new file mode 100644
index 000000000000..2d99b29ac44a
--- /dev/null
+++ b/include/uapi/linux/vmclock-abi.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+
+/*
+ * This structure provides a vDSO-style clock to VM guests, exposing the
+ * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch
+ * counter, etc.) and real time. It is designed to address the problem of
+ * live migration, which other clock enlightenments do not.
+ *
+ * When a guest is live migrated, this affects the clock in two ways.
+ *
+ * First, even between identical hosts the actual frequency of the underlying
+ * counter will change within the tolerances of its specification (typically
+ * ±50PPM, or 4 seconds a day). This frequency also varies over time on the
+ * same host, but can be tracked by NTP as it generally varies slowly. With
+ * live migration there is a step change in the frequency, with no warning.
+ *
+ * Second, there may be a step change in the value of the counter itself, as
+ * its accuracy is limited by the precision of the NTP synchronization on the
+ * source and destination hosts.
+ *
+ * So any calibration (NTP, PTP, etc.) which the guest has done on the source
+ * host before migration is invalid, and needs to be redone on the new host.
+ *
+ * In its most basic mode, this structure provides only an indication to the
+ * guest that live migration has occurred. This allows the guest to know that
+ * its clock is invalid and take remedial action. For applications that need
+ * reliable accurate timestamps (e.g. distributed databases), the structure
+ * can be mapped all the way to userspace. This allows the application to see
+ * directly for itself that the clock is disrupted and take appropriate
+ * action, even when using a vDSO-style method to get the time instead of a
+ * system call.
+ *
+ * In its more advanced mode. this structure can also be used to expose the
+ * precise relationship of the CPU counter to real time, as calibrated by the
+ * host. This means that userspace applications can have accurate time
+ * immediately after live migration, rather than having to pause operations
+ * and wait for NTP to recover. This mode does, of course, rely on the
+ * counter being reliable and consistent across CPUs.
+ *
+ * Note that this must be true UTC, never with smeared leap seconds. If a
+ * guest wishes to construct a smeared clock, it can do so. Presenting a
+ * smeared clock through this interface would be problematic because it
+ * actually messes with the apparent counter *period*. A linear smearing
+ * of 1 ms per second would effectively tweak the counter period by 1000PPM
+ * at the start/end of the smearing period, while a sinusoidal smear would
+ * basically be impossible to represent.
+ *
+ * This structure is offered with the intent that it be adopted into the
+ * nascent virtio-rtc standard, as a virtio-rtc that does not address the live
+ * migration problem seems a little less than fit for purpose. For that
+ * reason, certain fields use precisely the same numeric definitions as in
+ * the virtio-rtc proposal. The structure can also be exposed through an ACPI
+ * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for
+ * the fact that it uses a real _CRS to convey the address of the structure
+ * (which should be a full page, to allow for mapping directly to userspace).
+ */
+
+#ifndef __VMCLOCK_ABI_H__
+#define __VMCLOCK_ABI_H__
+
+#include <linux/types.h>
+
+struct vmclock_abi {
+	/* CONSTANT FIELDS */
+	__le32 magic;
+#define VMCLOCK_MAGIC	0x4b4c4356 /* "VCLK" */
+	__le32 size;		/* Size of region containing this structure */
+	__le16 version;	/* 1 */
+	__u8 counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */
+#define VMCLOCK_COUNTER_ARM_VCNT	0
+#define VMCLOCK_COUNTER_X86_TSC		1
+#define VMCLOCK_COUNTER_INVALID		0xff
+	__u8 time_type; /* Matches VIRTIO_RTC_TYPE_xxx */
+#define VMCLOCK_TIME_UTC			0	/* Since 1970-01-01 00:00:00z */
+#define VMCLOCK_TIME_TAI			1	/* Since 1970-01-01 00:00:00z */
+#define VMCLOCK_TIME_MONOTONIC			2	/* Since undefined epoch */
+#define VMCLOCK_TIME_INVALID_SMEARED		3	/* Not supported */
+#define VMCLOCK_TIME_INVALID_MAYBE_SMEARED	4	/* Not supported */
+
+	/* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */
+	__le32 seq_count;	/* Low bit means an update is in progress */
+	/*
+	 * This field changes to another non-repeating value when the CPU
+	 * counter is disrupted, for example on live migration. This lets
+	 * the guest know that it should discard any calibration it has
+	 * performed of the counter against external sources (NTP/PTP/etc.).
+	 */
+	__le64 disruption_marker;
+	__le64 flags;
+	/* Indicates that the tai_offset_sec field is valid */
+#define VMCLOCK_FLAG_TAI_OFFSET_VALID		(1 << 0)
+	/*
+	 * Optionally used to notify guests of pending maintenance events.
+	 * A guest which provides latency-sensitive services may wish to
+	 * remove itself from service if an event is coming up. Two flags
+	 * indicate the approximate imminence of the event.
+	 */
+#define VMCLOCK_FLAG_DISRUPTION_SOON		(1 << 1) /* About a day */
+#define VMCLOCK_FLAG_DISRUPTION_IMMINENT	(1 << 2) /* About an hour */
+#define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID	(1 << 3)
+#define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID	(1 << 4)
+#define VMCLOCK_FLAG_TIME_ESTERROR_VALID	(1 << 5)
+#define VMCLOCK_FLAG_TIME_MAXERROR_VALID	(1 << 6)
+	/*
+	 * If the MONOTONIC flag is set then (other than leap seconds) it is
+	 * guaranteed that the time calculated according this structure at
+	 * any given moment shall never appear to be later than the time
+	 * calculated via the structure at any *later* moment.
+	 *
+	 * In particular, a timestamp based on a counter reading taken
+	 * immediately after setting the low bit of seq_count (and the
+	 * associated memory barrier), using the previously-valid time and
+	 * period fields, shall never be later than a timestamp based on
+	 * a counter reading taken immediately before *clearing* the low
+	 * bit again after the update, using the about-to-be-valid fields.
+	 */
+#define VMCLOCK_FLAG_TIME_MONOTONIC		(1 << 7)
+
+	__u8 pad[2];
+	__u8 clock_status;
+#define VMCLOCK_STATUS_UNKNOWN		0
+#define VMCLOCK_STATUS_INITIALIZING	1
+#define VMCLOCK_STATUS_SYNCHRONIZED	2
+#define VMCLOCK_STATUS_FREERUNNING	3
+#define VMCLOCK_STATUS_UNRELIABLE	4
+
+	/*
+	 * The time exposed through this device is never smeared. This field
+	 * corresponds to the 'subtype' field in virtio-rtc, which indicates
+	 * the smearing method. However in this case it provides a *hint* to
+	 * the guest operating system, such that *if* the guest OS wants to
+	 * provide its users with an alternative clock which does not follow
+	 * UTC, it may do so in a fashion consistent with the other systems
+	 * in the nearby environment.
+	 */
+	__u8 leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */
+#define VMCLOCK_SMEARING_STRICT		0
+#define VMCLOCK_SMEARING_NOON_LINEAR	1
+#define VMCLOCK_SMEARING_UTC_SLS	2
+	__le16 tai_offset_sec; /* Actually two's complement signed */
+	__u8 leap_indicator;
+	/*
+	 * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined
+	 * in the current draft of virtio-rtc, but since smearing cannot be
+	 * used with the shared memory device, some values are not used.
+	 *
+	 * The _POST_POS and _POST_NEG values allow the guest to perform
+	 * its own smearing during the day or so after a leap second when
+	 * such smearing may need to continue being applied for a leap
+	 * second which is now theoretically "historical".
+	 */
+#define VMCLOCK_LEAP_NONE	0x00	/* No known nearby leap second */
+#define VMCLOCK_LEAP_PRE_POS	0x01	/* Positive leap second at EOM */
+#define VMCLOCK_LEAP_PRE_NEG	0x02	/* Negative leap second at EOM */
+#define VMCLOCK_LEAP_POS	0x03	/* Set during 23:59:60 second */
+#define VMCLOCK_LEAP_POST_POS	0x04
+#define VMCLOCK_LEAP_POST_NEG	0x05
+
+	/* Bit shift for counter_period_frac_sec and its error rate */
+	__u8 counter_period_shift;
+	/*
+	 * Paired values of counter and UTC at a given point in time.
+	 */
+	__le64 counter_value;
+	/*
+	 * Counter period, and error margin of same. The unit of these
+	 * fields is 1/2^(64 + counter_period_shift) of a second.
+	 */
+	__le64 counter_period_frac_sec;
+	__le64 counter_period_esterror_rate_frac_sec;
+	__le64 counter_period_maxerror_rate_frac_sec;
+
+	/*
+	 * Time according to time_type field above.
+	 */
+	__le64 time_sec;		/* Seconds since time_type epoch */
+	__le64 time_frac_sec;		/* Units of 1/2^64 of a second */
+	__le64 time_esterror_nanosec;
+	__le64 time_maxerror_nanosec;
+};
+
+#endif /*  __VMCLOCK_ABI_H__ */
-- 
cgit v1.2.3


From 57e3707eb5e3d9a45eef9151f0378313b1d39a17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Mon, 5 Aug 2024 11:39:36 +0200
Subject: bpf: Constify ctl_table argument of filter function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sysctl core is moving to allow "struct ctl_table" in read-only memory.
As a preparation for that all functions handling "struct ctl_table" need
to be able to work with "const struct ctl_table".
As __cgroup_bpf_run_filter_sysctl() does not modify its table, it can be
adapted trivially.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Joel Granados <joel.granados@kernel.org>
---
 include/linux/bpf-cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index ce91d9b2acb9..4dd17128b204 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -138,7 +138,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 				      short access, enum cgroup_bpf_attach_type atype);
 
 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
-				   struct ctl_table *table, int write,
+				   const struct ctl_table *table, int write,
 				   char **buf, size_t *pcount, loff_t *ppos,
 				   enum cgroup_bpf_attach_type atype);
 
-- 
cgit v1.2.3


From 29e1095bb1ad149b5c417719338d9c81d58bf12b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Mon, 5 Aug 2024 11:39:37 +0200
Subject: sysctl: move internal interfaces to const struct ctl_table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As a preparation to make all the core sysctl code work with const struct
ctl_table switch over the internal function to use the const variant.

Some pointers to "struct ctl_table" need to stay non-const as they are
newly allocated and modified before registration.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Joel Granados <joel.granados@kernel.org>
---
 include/linux/sysctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index aa4c6d44aaa0..a473deaf5a91 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -162,7 +162,7 @@ struct ctl_node {
 struct ctl_table_header {
 	union {
 		struct {
-			struct ctl_table *ctl_table;
+			const struct ctl_table *ctl_table;
 			int ctl_table_size;
 			int used;
 			int count;
-- 
cgit v1.2.3


From 7abc9b53bd515d7953d1f4e069b062ec4b5ba9e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Mon, 5 Aug 2024 11:39:38 +0200
Subject: sysctl: allow registration of const struct ctl_table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Putting structure, especially those containing function pointers,
into read-only memory makes the safer and easier to reason about.
Change the sysctl registration APIs to allow registration of
"const struct ctl_table".

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Kees Cook <keescook@chromium.org> # security/*
Signed-off-by: Joel Granados <joel.granados@kernel.org>
---
 include/linux/sysctl.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index a473deaf5a91..202855befa8b 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -223,13 +223,13 @@ extern void retire_sysctl_set(struct ctl_table_set *set);
 
 struct ctl_table_header *__register_sysctl_table(
 	struct ctl_table_set *set,
-	const char *path, struct ctl_table *table, size_t table_size);
-struct ctl_table_header *register_sysctl_sz(const char *path, struct ctl_table *table,
+	const char *path, const struct ctl_table *table, size_t table_size);
+struct ctl_table_header *register_sysctl_sz(const char *path, const struct ctl_table *table,
 					    size_t table_size);
 void unregister_sysctl_table(struct ctl_table_header * table);
 
 extern int sysctl_init_bases(void);
-extern void __register_sysctl_init(const char *path, struct ctl_table *table,
+extern void __register_sysctl_init(const char *path, const struct ctl_table *table,
 				 const char *table_name, size_t table_size);
 #define register_sysctl_init(path, table)	\
 	__register_sysctl_init(path, table, #table, ARRAY_SIZE(table))
@@ -251,7 +251,7 @@ extern int no_unaligned_warning;
 
 #else /* CONFIG_SYSCTL */
 
-static inline void register_sysctl_init(const char *path, struct ctl_table *table)
+static inline void register_sysctl_init(const char *path, const struct ctl_table *table)
 {
 }
 
@@ -261,7 +261,7 @@ static inline struct ctl_table_header *register_sysctl_mount_point(const char *p
 }
 
 static inline struct ctl_table_header *register_sysctl_sz(const char *path,
-							  struct ctl_table *table,
+							  const struct ctl_table *table,
 							  size_t table_size)
 {
 	return NULL;
-- 
cgit v1.2.3


From fc5d96670eb2540d2572a14351e82ffe45d5ac11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 11 Sep 2024 14:18:58 +0200
Subject: drm/ttm: Move swapped objects off the manager's LRU list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resources of swapped objects remains on the TTM_PL_SYSTEM manager's
LRU list, which is bad for the LRU walk efficiency.

Rename the device-wide "pinned" list to "unevictable" and move
also resources of swapped-out objects to that list.

An alternative would be to create an "UNEVICTABLE" priority to
be able to keep the pinned- and swapped objects on their
respective manager's LRU without affecting the LRU walk efficiency.

v2:
- Remove a bogus WARN_ON (Christian König)
- Update ttm_resource_[add|del] bulk move (Christian König)
- Fix TTM KUNIT tests (Intel CI)
v3:
- Check for non-NULL bo->resource in ttm_bo_populate().
v4:
- Don't move to LRU tail during swapout until the resource
  is properly swapped or there was a swapout failure.
  (Intel Ci)
- Add a newline after checkpatch check.
v5:
- Introduce ttm_resource_is_swapped() to avoid a corner-case where
  a newly created resource was considered swapped. (Intel CI)
v6:
- Move an assert.

Cc: Christian König <christian.koenig@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240911121859.85387-2-thomas.hellstrom@linux.intel.com
---
 include/drm/ttm/ttm_bo.h     | 2 ++
 include/drm/ttm/ttm_device.h | 5 +++--
 include/drm/ttm/ttm_tt.h     | 5 +++++
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 7b56d1ca36d7..5804408815be 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -462,5 +462,7 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo);
 pgprot_t ttm_io_prot(struct ttm_buffer_object *bo, struct ttm_resource *res,
 		     pgprot_t tmp);
 void ttm_bo_tt_destroy(struct ttm_buffer_object *bo);
+int ttm_bo_populate(struct ttm_buffer_object *bo,
+		    struct ttm_operation_ctx *ctx);
 
 #endif
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index c22f30535c84..438358f72716 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -252,9 +252,10 @@ struct ttm_device {
 	spinlock_t lru_lock;
 
 	/**
-	 * @pinned: Buffer objects which are pinned and so not on any LRU list.
+	 * @unevictable Buffer objects which are pinned or swapped and as such
+	 * not on an LRU list.
 	 */
-	struct list_head pinned;
+	struct list_head unevictable;
 
 	/**
 	 * @dev_mapping: A pointer to the struct address_space for invalidating
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 2b9d856ff388..991edafdb2dd 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -129,6 +129,11 @@ static inline bool ttm_tt_is_populated(struct ttm_tt *tt)
 	return tt->page_flags & TTM_TT_FLAG_PRIV_POPULATED;
 }
 
+static inline bool ttm_tt_is_swapped(const struct ttm_tt *tt)
+{
+	return tt->page_flags & TTM_TT_FLAG_SWAPPED;
+}
+
 /**
  * ttm_tt_create
  *
-- 
cgit v1.2.3


From 6607c17c6c5e029da03a90085db22daf518232bf Mon Sep 17 00:00:00 2001
From: Shradha Gupta <shradhagupta@linux.microsoft.com>
Date: Tue, 8 Oct 2024 00:06:15 -0700
Subject: net: mana: Enable debugfs files for MANA device

Implement debugfs in MANA driver to be able to view RX,TX,EQ queue
specific attributes and dump their gdma queues.
These dumps can be used by other userspace utilities to improve
debuggability and troubleshooting

Following files are added in debugfs:

/sys/kernel/debug/mana/
|-------------- 1
    |--------------- EQs
    |                 |------- eq0
    |                 |          |---head
    |                 |          |---tail
    |                 |          |---eq_dump
    |                 |------- eq1
    |                 .
    |                 .
    |
    |--------------- adapter-MTU
    |--------------- vport0
                      |------- RX-0
                      |          |---cq_budget
                      |          |---cq_dump
                      |          |---cq_head
                      |          |---cq_tail
                      |          |---rq_head
                      |          |---rq_nbuf
                      |          |---rq_tail
                      |          |---rxq_dump
                      |------- RX-1
                      .
                      .
                      |------- TX-0
                      |          |---cq_budget
                      |          |---cq_dump
                      |          |---cq_head
                      |          |---cq_tail
                      |          |---sq_head
                      |          |---sq_pend_skb_qlen
                      |          |---sq_tail
                      |          |---txq_dump
                      |------- TX-1
                      .
                      .

Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mana/gdma.h | 6 +++++-
 include/net/mana/mana.h | 8 ++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index de47fa533b15..90f56656b572 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -267,7 +267,8 @@ struct gdma_event {
 struct gdma_queue;
 
 struct mana_eq {
-	struct gdma_queue *eq;
+	struct gdma_queue	*eq;
+	struct dentry		*mana_eq_debugfs;
 };
 
 typedef void gdma_eq_callback(void *context, struct gdma_queue *q,
@@ -365,6 +366,7 @@ struct gdma_irq_context {
 
 struct gdma_context {
 	struct device		*dev;
+	struct dentry		*mana_pci_debugfs;
 
 	/* Per-vPort max number of queues */
 	unsigned int		max_num_queues;
@@ -878,5 +880,7 @@ int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req,
 			 u32 resp_len, void *resp);
 
 int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle);
+void mana_register_debugfs(void);
+void mana_unregister_debugfs(void);
 
 #endif /* _GDMA_H */
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 9b0faa24b758..0d00b24eacaf 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -350,6 +350,7 @@ struct mana_rxq {
 	int xdp_rc; /* XDP redirect return code */
 
 	struct page_pool *page_pool;
+	struct dentry *mana_rx_debugfs;
 
 	/* MUST BE THE LAST MEMBER:
 	 * Each receive buffer has an associated mana_recv_buf_oob.
@@ -363,6 +364,8 @@ struct mana_tx_qp {
 	struct mana_cq tx_cq;
 
 	mana_handle_t tx_object;
+
+	struct dentry *mana_tx_debugfs;
 };
 
 struct mana_ethtool_stats {
@@ -407,6 +410,7 @@ struct mana_context {
 	u16 num_ports;
 
 	struct mana_eq *eqs;
+	struct dentry *mana_eqs_debugfs;
 
 	struct net_device *ports[MAX_PORTS_IN_MANA_DEV];
 };
@@ -468,6 +472,9 @@ struct mana_port_context {
 	bool port_st_save; /* Saved port state */
 
 	struct mana_ethtool_stats eth_stats;
+
+	/* Debugfs */
+	struct dentry *mana_port_debugfs;
 };
 
 netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev);
@@ -494,6 +501,7 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues
 void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
 
 extern const struct ethtool_ops mana_ethtool_ops;
+extern struct dentry *mana_debugfs_root;
 
 /* A CQ can be created not associated with any EQ */
 #define GDMA_CQ_NO_EQ  0xffff
-- 
cgit v1.2.3


From 823a566221a5639f6c69424897218e5d6431a970 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 9 Oct 2024 11:20:31 +0200
Subject: locking/ww_mutex: Adjust to lockdep nest_lock requirements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When using mutex_acquire_nest() with a nest_lock, lockdep refcounts the
number of acquired lockdep_maps of mutexes of the same class, and also
keeps a pointer to the first acquired lockdep_map of a class. That pointer
is then used for various comparison-, printing- and checking purposes,
but there is no mechanism to actively ensure that lockdep_map stays in
memory. Instead, a warning is printed if the lockdep_map is freed and
there are still held locks of the same lock class, even if the lockdep_map
itself has been released.

In the context of WW/WD transactions that means that if a user unlocks
and frees a ww_mutex from within an ongoing ww transaction, and that
mutex happens to be the first ww_mutex grabbed in the transaction,
such a warning is printed and there might be a risk of a UAF.

Note that this is only problem when lockdep is enabled and affects only
dereferences of struct lockdep_map.

Adjust to this by adding a fake lockdep_map to the acquired context and
make sure it is the first acquired lockdep map of the associated
ww_mutex class. Then hold it for the duration of the WW/WD transaction.

This has the side effect that trying to lock a ww mutex *without* a
ww_acquire_context but where a such context has been acquire, we'd see
a lockdep splat. The test-ww_mutex.c selftest attempts to do that, so
modify that particular test to not acquire a ww_acquire_context if it
is not going to be used.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20241009092031.6356-1-thomas.hellstrom@linux.intel.com
---
 include/linux/ww_mutex.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index bb763085479a..a401a2f31a77 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -65,6 +65,16 @@ struct ww_acquire_ctx {
 #endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
+	/**
+	 * @first_lock_dep_map: fake lockdep_map for first locked ww_mutex.
+	 *
+	 * lockdep requires the lockdep_map for the first locked ww_mutex
+	 * in a ww transaction to remain in memory until all ww_mutexes of
+	 * the transaction have been unlocked. Ensure this by keeping a
+	 * fake locked ww_mutex lockdep map between ww_acquire_init() and
+	 * ww_acquire_fini().
+	 */
+	struct lockdep_map first_lock_dep_map;
 #endif
 #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
 	unsigned int deadlock_inject_interval;
@@ -146,7 +156,10 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
 	debug_check_no_locks_freed((void *)ctx, sizeof(*ctx));
 	lockdep_init_map(&ctx->dep_map, ww_class->acquire_name,
 			 &ww_class->acquire_key, 0);
+	lockdep_init_map(&ctx->first_lock_dep_map, ww_class->mutex_name,
+			 &ww_class->mutex_key, 0);
 	mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_);
+	mutex_acquire_nest(&ctx->first_lock_dep_map, 0, 0, &ctx->dep_map, _RET_IP_);
 #endif
 #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
 	ctx->deadlock_inject_interval = 1;
@@ -185,6 +198,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx)
 static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
 {
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
+	mutex_release(&ctx->first_lock_dep_map, _THIS_IP_);
 	mutex_release(&ctx->dep_map, _THIS_IP_);
 #endif
 #ifdef DEBUG_WW_MUTEXES
-- 
cgit v1.2.3


From c3e91446a3580353672e965165ab37db2bf6a757 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 9 Oct 2024 17:03:00 +0300
Subject: drm/file: fix client_name_lock kernel-doc warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's client_name_lock, not name_lock. Also unify style while at it.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/r/20241009172650.29169e6f@canb.auug.org.au
Fixes: 56c594d8df64 ("drm: add DRM_SET_CLIENT_NAME ioctl")
Reviewed-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241009140300.1980746-1-jani.nikula@intel.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/drm/drm_file.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index d4f1c115ea0f..f0ef32e9fa5e 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -395,7 +395,10 @@ struct drm_file {
 	 * Userspace-provided name; useful for accounting and debugging.
 	 */
 	const char *client_name;
-	/** @name_lock: Protects @client_name. */
+
+	/**
+	 * @client_name_lock: Protects @client_name.
+	 */
 	struct mutex client_name_lock;
 };
 
-- 
cgit v1.2.3


From 3639fadc7e98a5b0aef399d7beef24b028fdf898 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 9 Oct 2024 17:04:52 +0300
Subject: drm/imx: add forward declarations for types

The imx.h header does not forward declare the types it uses, and the
header is not self-contained. Fix it.

Fixes: cc3e8a216d6b ("drm/imx: add internal bridge handling display-timings DT node")
Cc: Philipp Zabel <p.zabel@pengutronix.de>
Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Cc: imx@lists.linux.dev
Cc: linux-arm-kernel@lists.infradead.org
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20241009140452.1981175-1-jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/bridge/imx.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/drm/bridge/imx.h b/include/drm/bridge/imx.h
index e14f429a9ca2..b93f719fe0e7 100644
--- a/include/drm/bridge/imx.h
+++ b/include/drm/bridge/imx.h
@@ -6,6 +6,10 @@
 #ifndef DRM_IMX_BRIDGE_H
 #define DRM_IMX_BRIDGE_H
 
+struct device;
+struct device_node;
+struct drm_bridge;
+
 struct drm_bridge *devm_imx_drm_legacy_bridge(struct device *dev,
 					      struct device_node *np,
 					      int type);
-- 
cgit v1.2.3


From 5461f3fd74a89757f95f351eb0bc26aafc2a2e91 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Fri, 20 Sep 2024 00:27:58 +0100
Subject: backlight: Remove notifier

backlight_register_notifier and backlight_unregister_notifier have
been unused since commit 6cb634d0dc85 ("ACPI: video: Remove code to
unregister acpi_video backlight when a native backlight registers")

With those not being called, it means that the backlight_notifier
list is always empty.

Remove the functions, the list itself and the enum used in the
notifications.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Reviewed-by: Simona Vetter <simona.vetter@ffwll.ch>
Link: https://lore.kernel.org/r/20240919232758.639925-1-linux@treblig.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/backlight.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index ea9c1bc8148e..f5652e5a9060 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -66,24 +66,6 @@ enum backlight_type {
 	BACKLIGHT_TYPE_MAX,
 };
 
-/**
- * enum backlight_notification - the type of notification
- *
- * The notifications that is used for notification sent to the receiver
- * that registered notifications using backlight_register_notifier().
- */
-enum backlight_notification {
-	/**
-	 * @BACKLIGHT_REGISTERED: The backlight device is registered.
-	 */
-	BACKLIGHT_REGISTERED,
-
-	/**
-	 * @BACKLIGHT_UNREGISTERED: The backlight revice is unregistered.
-	 */
-	BACKLIGHT_UNREGISTERED,
-};
-
 /** enum backlight_scale - the type of scale used for brightness values
  *
  * The type of scale used for brightness values.
@@ -421,8 +403,6 @@ void devm_backlight_device_unregister(struct device *dev,
 				      struct backlight_device *bd);
 void backlight_force_update(struct backlight_device *bd,
 			    enum backlight_update_reason reason);
-int backlight_register_notifier(struct notifier_block *nb);
-int backlight_unregister_notifier(struct notifier_block *nb);
 struct backlight_device *backlight_device_get_by_name(const char *name);
 struct backlight_device *backlight_device_get_by_type(enum backlight_type type);
 int backlight_device_set_brightness(struct backlight_device *bd,
-- 
cgit v1.2.3


From 4c93ede2b0c73a7708f46a01669769d15d31e1d2 Mon Sep 17 00:00:00 2001
From: R Sundar <prosunofficial@gmail.com>
Date: Thu, 3 Oct 2024 08:08:06 +0530
Subject: drm: Fix for kernel doc warning

Added colon in kernel-doc comment to fix the warning.

./include/drm/drm_drv.h:372: warning: Incorrect use of kernel-doc format:          * @fbdev_probe
./include/drm/drm_drv.h:435: warning: Function parameter or struct member 'fbdev_probe' not described in 'drm_driver'

Signed-off-by: R Sundar <prosunofficial@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241003023806.17537-1-prosunofficial@gmail.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/drm_drv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 36a606af4ba1..1bbbcb8e2d23 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -369,7 +369,7 @@ struct drm_driver {
 			       uint64_t *offset);
 
 	/**
-	 * @fbdev_probe
+	 * @fbdev_probe:
 	 *
 	 * Allocates and initialize the fb_info structure for fbdev emulation.
 	 * Furthermore it also needs to allocate the DRM framebuffer used to
-- 
cgit v1.2.3


From 6f1067cfbee72b04fc42234f7f1588f838cec0b6 Mon Sep 17 00:00:00 2001
From: Thomas Richard <thomas.richard@bootlin.com>
Date: Tue, 1 Oct 2024 13:53:27 +0200
Subject: mfd: Add Congatec Board Controller driver

Add core MFD driver for the Board Controller found on some Congatec SMARC
module. This Board Controller provides functions like watchdog, GPIO, and
I2C busses.

This commit adds support only for the conga-SA7 module.

Signed-off-by: Thomas Richard <thomas.richard@bootlin.com>
Link: https://lore.kernel.org/r/20241001-congatec-board-controller-v3-1-39ceceed5c47@bootlin.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/cgbc.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 include/linux/mfd/cgbc.h

(limited to 'include')

diff --git a/include/linux/mfd/cgbc.h b/include/linux/mfd/cgbc.h
new file mode 100644
index 000000000000..badbec4c7033
--- /dev/null
+++ b/include/linux/mfd/cgbc.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Congatec Board Controller driver definitions
+ *
+ * Copyright (C) 2024 Bootlin
+ * Author: Thomas Richard <thomas.richard@bootlin.com>
+ */
+
+#ifndef _LINUX_MFD_CGBC_H_
+
+/**
+ * struct cgbc_version - Board Controller device version structure
+ * @feature:	Board Controller feature number
+ * @major:	Board Controller major revision
+ * @minor:	Board Controller minor revision
+ */
+struct cgbc_version {
+	unsigned char feature;
+	unsigned char major;
+	unsigned char minor;
+};
+
+/**
+ * struct cgbc_device_data - Internal representation of the Board Controller device
+ * @io_session:		Pointer to the session IO memory
+ * @io_cmd:		Pointer to the command IO memory
+ * @session:		Session id returned by the Board Controller
+ * @dev:		Pointer to kernel device structure
+ * @cgbc_version:	Board Controller version structure
+ * @mutex:		Board Controller mutex
+ */
+struct cgbc_device_data {
+	void __iomem		*io_session;
+	void __iomem		*io_cmd;
+	u8			session;
+	struct device		*dev;
+	struct cgbc_version	version;
+	struct mutex		lock;
+};
+
+int cgbc_command(struct cgbc_device_data *cgbc, void *cmd, unsigned int cmd_size,
+		 void *data, unsigned int data_size, u8 *status);
+
+#endif /*_LINUX_MFD_CGBC_H_*/
-- 
cgit v1.2.3


From 0e6caab8db8bc9359d1a8363e1ee9b2205ed704d Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 8 Oct 2024 21:07:11 -0400
Subject: tracing: Declare system call tracepoints with TRACE_EVENT_SYSCALL

In preparation for allowing system call tracepoints to handle page
faults, introduce TRACE_EVENT_SYSCALL to declare the sys_enter/sys_exit
tracepoints.

Move the common code between __DECLARE_TRACE and __DECLARE_TRACE_SYSCALL
into __DECLARE_TRACE_COMMON.

This change is not meant to alter the generated code, and only prepares
the following modifications.

Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Link: https://lore.kernel.org/20241009010718.2050182-2-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h      | 53 +++++++++++++++++++++++++++++++----------
 include/trace/bpf_probe.h       |  3 +++
 include/trace/define_trace.h    |  5 ++++
 include/trace/events/syscalls.h |  4 ++--
 include/trace/perf.h            |  3 +++
 include/trace/trace_events.h    | 28 ++++++++++++++++++++++
 6 files changed, 81 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 3d33b9872cec..76e441b39a96 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -197,7 +197,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
  * it_func[0] is never NULL because there is at least one element in the array
  * when the array itself is non NULL.
  */
-#define __DO_TRACE(name, args, cond)					\
+#define __DO_TRACE(name, args, cond, syscall)				\
 	do {								\
 		int __maybe_unused __idx = 0;				\
 									\
@@ -222,21 +222,10 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
  * site if it is not watching, as it will need to be active when the
  * tracepoint is enabled.
  */
-#define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
+#define __DECLARE_TRACE_COMMON(name, proto, args, cond, data_proto)	\
 	extern int __traceiter_##name(data_proto);			\
 	DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name);	\
 	extern struct tracepoint __tracepoint_##name;			\
-	static inline void trace_##name(proto)				\
-	{								\
-		if (static_branch_unlikely(&__tracepoint_##name.key))	\
-			__DO_TRACE(name,				\
-				TP_ARGS(args),				\
-				TP_CONDITION(cond));			\
-		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
-			WARN_ONCE(!rcu_is_watching(),			\
-				  "RCU not watching for tracepoint");	\
-		}							\
-	}								\
 	static inline int						\
 	register_trace_##name(void (*probe)(data_proto), void *data)	\
 	{								\
@@ -266,6 +255,34 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		return static_branch_unlikely(&__tracepoint_##name.key);\
 	}
 
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
+	__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \
+	static inline void trace_##name(proto)				\
+	{								\
+		if (static_branch_unlikely(&__tracepoint_##name.key))	\
+			__DO_TRACE(name,				\
+				TP_ARGS(args),				\
+				TP_CONDITION(cond), 0);			\
+		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
+			WARN_ONCE(!rcu_is_watching(),			\
+				  "RCU not watching for tracepoint");	\
+		}							\
+	}
+
+#define __DECLARE_TRACE_SYSCALL(name, proto, args, cond, data_proto)	\
+	__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \
+	static inline void trace_##name(proto)				\
+	{								\
+		if (static_branch_unlikely(&__tracepoint_##name.key))	\
+			__DO_TRACE(name,				\
+				TP_ARGS(args),				\
+				TP_CONDITION(cond), 1);			\
+		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
+			WARN_ONCE(!rcu_is_watching(),			\
+				  "RCU not watching for tracepoint");	\
+		}							\
+	}
+
 /*
  * We have no guarantee that gcc and the linker won't up-align the tracepoint
  * structures, so we create an array of pointers that will be used for iteration
@@ -348,6 +365,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		return false;						\
 	}
 
+#define __DECLARE_TRACE_SYSCALL	__DECLARE_TRACE
+
 #define DEFINE_TRACE_FN(name, reg, unreg, proto, args)
 #define DEFINE_TRACE(name, proto, args)
 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
@@ -409,6 +428,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
 			PARAMS(void *__data, proto))
 
+#define DECLARE_TRACE_SYSCALL(name, proto, args)			\
+	__DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args),	\
+				cpu_online(raw_smp_processor_id()),	\
+				PARAMS(void *__data, proto))
+
 #define TRACE_EVENT_FLAGS(event, flag)
 
 #define TRACE_EVENT_PERF_PERM(event, expr...)
@@ -546,6 +570,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 			      struct, assign, print)		\
 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
 				PARAMS(args), PARAMS(cond))
+#define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign,	\
+			    print, reg, unreg)			\
+	DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args))
 
 #define TRACE_EVENT_FLAGS(event, flag)
 
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index a2ea11cc912e..c85bbce5aaa5 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -53,6 +53,9 @@ __bpf_trace_##call(void *__data, proto)					\
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 	__BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args))
 
+#undef DECLARE_EVENT_SYSCALL_CLASS
+#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
+
 /*
  * This part is compiled out, it is only here as a build time check
  * to make sure that if the tracepoint handling changes, the
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 00723935dcc7..ff5fa17a6259 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -46,6 +46,10 @@
 		assign, print, reg, unreg)			\
 	DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args))
 
+#undef TRACE_EVENT_SYSCALL
+#define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, print, reg, unreg) \
+	DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args))
+
 #undef TRACE_EVENT_NOP
 #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)
 
@@ -107,6 +111,7 @@
 #undef TRACE_EVENT
 #undef TRACE_EVENT_FN
 #undef TRACE_EVENT_FN_COND
+#undef TRACE_EVENT_SYSCALL
 #undef TRACE_EVENT_CONDITION
 #undef TRACE_EVENT_NOP
 #undef DEFINE_EVENT_NOP
diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
index b6e0cbc2c71f..f31ff446b468 100644
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -15,7 +15,7 @@
 
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
 
-TRACE_EVENT_FN(sys_enter,
+TRACE_EVENT_SYSCALL(sys_enter,
 
 	TP_PROTO(struct pt_regs *regs, long id),
 
@@ -41,7 +41,7 @@ TRACE_EVENT_FN(sys_enter,
 
 TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY)
 
-TRACE_EVENT_FN(sys_exit,
+TRACE_EVENT_SYSCALL(sys_exit,
 
 	TP_PROTO(struct pt_regs *regs, long ret),
 
diff --git a/include/trace/perf.h b/include/trace/perf.h
index 2c11181c82e0..ded997af481e 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -55,6 +55,9 @@ perf_trace_##call(void *__data, proto)					\
 				  head, __task);			\
 }
 
+#undef DECLARE_EVENT_SYSCALL_CLASS
+#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
+
 /*
  * This part is compiled out, it is only here as a build time check
  * to make sure that if the tracepoint handling changes, the
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index c2f9cabf154d..8bcbb9ee44de 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -45,6 +45,16 @@
 			     PARAMS(print));		       \
 	DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
 
+#undef TRACE_EVENT_SYSCALL
+#define TRACE_EVENT_SYSCALL(name, proto, args, tstruct, assign, print, reg, unreg) \
+	DECLARE_EVENT_SYSCALL_CLASS(name,		       \
+			     PARAMS(proto),		       \
+			     PARAMS(args),		       \
+			     PARAMS(tstruct),		       \
+			     PARAMS(assign),		       \
+			     PARAMS(print));		       \
+	DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
+
 #include "stages/stage1_struct_define.h"
 
 #undef DECLARE_EVENT_CLASS
@@ -57,6 +67,9 @@
 									\
 	static struct trace_event_class event_class_##name;
 
+#undef DECLARE_EVENT_SYSCALL_CLASS
+#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
+
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)	\
 	static struct trace_event_call	__used		\
@@ -117,6 +130,9 @@
 		tstruct;						\
 	};
 
+#undef DECLARE_EVENT_SYSCALL_CLASS
+#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
+
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)
 
@@ -208,6 +224,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = {	\
 	.trace			= trace_raw_output_##call,		\
 };
 
+#undef DECLARE_EVENT_SYSCALL_CLASS
+#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
+
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, call, proto, args, print)		\
 static notrace enum print_line_t					\
@@ -265,6 +284,9 @@ static inline notrace int trace_event_get_offsets_##call(		\
 	return __data_size;						\
 }
 
+#undef DECLARE_EVENT_SYSCALL_CLASS
+#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -409,6 +431,9 @@ trace_event_raw_event_##call(void *__data, proto)			\
  * fail to compile unless it too is updated.
  */
 
+#undef DECLARE_EVENT_SYSCALL_CLASS
+#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
+
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, call, proto, args)			\
 static inline void ftrace_test_probe_##call(void)			\
@@ -434,6 +459,9 @@ static struct trace_event_class __used __refdata event_class_##call = { \
 	_TRACE_PERF_INIT(call)						\
 };
 
+#undef DECLARE_EVENT_SYSCALL_CLASS
+#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
+
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, call, proto, args)			\
 									\
-- 
cgit v1.2.3


From 13d750c2c03e9861e15268574ed2c239cca9c9d5 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 8 Oct 2024 21:07:12 -0400
Subject: tracing/ftrace: disable preemption in syscall probe

In preparation for allowing system call enter/exit instrumentation to
handle page faults, make sure that ftrace can handle this change by
explicitly disabling preemption within the ftrace system call tracepoint
probes to respect the current expectations within ftrace ring buffer
code.

This change does not yet allow ftrace to take page faults per se within
its probe, but allows its existing probes to adapt to the upcoming
change.

Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Link: https://lore.kernel.org/20241009010718.2050182-3-mathieu.desnoyers@efficios.com
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/trace_events.h | 39 ++++++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 8bcbb9ee44de..63071aa5923d 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -263,6 +263,9 @@ static struct trace_event_fields trace_event_fields_##call[] = {	\
 	tstruct								\
 	{} };
 
+#undef DECLARE_EVENT_SYSCALL_CLASS
+#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
+
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)
 
@@ -396,11 +399,11 @@ static inline notrace int trace_event_get_offsets_##call(		\
 
 #include "stages/stage6_event_callback.h"
 
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
-									\
+
+#undef __DECLARE_EVENT_CLASS
+#define __DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
 static notrace void							\
-trace_event_raw_event_##call(void *__data, proto)			\
+do_trace_event_raw_event_##call(void *__data, proto)			\
 {									\
 	struct trace_event_file *trace_file = __data;			\
 	struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
@@ -425,15 +428,35 @@ trace_event_raw_event_##call(void *__data, proto)			\
 									\
 	trace_event_buffer_commit(&fbuffer);				\
 }
+
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
+__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \
+		      PARAMS(assign), PARAMS(print))			\
+static notrace void							\
+trace_event_raw_event_##call(void *__data, proto)			\
+{									\
+	do_trace_event_raw_event_##call(__data, args);			\
+}
+
+#undef DECLARE_EVENT_SYSCALL_CLASS
+#define DECLARE_EVENT_SYSCALL_CLASS(call, proto, args, tstruct, assign, print) \
+__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \
+		      PARAMS(assign), PARAMS(print))			\
+static notrace void							\
+trace_event_raw_event_##call(void *__data, proto)			\
+{									\
+	preempt_disable_notrace();					\
+	do_trace_event_raw_event_##call(__data, args);			\
+	preempt_enable_notrace();					\
+}
+
 /*
  * The ftrace_test_probe is compiled out, it is only here as a build time check
  * to make sure that if the tracepoint handling changes, the ftrace probe will
  * fail to compile unless it too is updated.
  */
 
-#undef DECLARE_EVENT_SYSCALL_CLASS
-#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
-
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, call, proto, args)			\
 static inline void ftrace_test_probe_##call(void)			\
@@ -443,6 +466,8 @@ static inline void ftrace_test_probe_##call(void)			\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+#undef __DECLARE_EVENT_CLASS
+
 #include "stages/stage7_class_define.h"
 
 #undef DECLARE_EVENT_CLASS
-- 
cgit v1.2.3


From 65e7462a16cea593025ca3b34c5d74e69b027ee0 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 8 Oct 2024 21:07:13 -0400
Subject: tracing/perf: disable preemption in syscall probe

In preparation for allowing system call enter/exit instrumentation to
handle page faults, make sure that perf can handle this change by
explicitly disabling preemption within the perf system call tracepoint
probes to respect the current expectations within perf ring buffer code.

This change does not yet allow perf to take page faults per se within
its probe, but allows its existing probes to adapt to the upcoming
change.

Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Link: https://lore.kernel.org/20241009010718.2050182-4-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/perf.h | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/perf.h b/include/trace/perf.h
index ded997af481e..15cde7eac8b4 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -12,10 +12,10 @@
 #undef __perf_task
 #define __perf_task(t)	(__task = (t))
 
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
+#undef __DECLARE_EVENT_CLASS
+#define __DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
 static notrace void							\
-perf_trace_##call(void *__data, proto)					\
+do_perf_trace_##call(void *__data, proto)				\
 {									\
 	struct trace_event_call *event_call = __data;			\
 	struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
@@ -55,8 +55,39 @@ perf_trace_##call(void *__data, proto)					\
 				  head, __task);			\
 }
 
+/*
+ * Define unused __count and __task variables to use @args to pass
+ * arguments to do_perf_trace_##call. This is needed because the
+ * macros __perf_count and __perf_task introduce the side-effect to
+ * store copies into those local variables.
+ */
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
+__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \
+		      PARAMS(assign), PARAMS(print))			\
+static notrace void							\
+perf_trace_##call(void *__data, proto)					\
+{									\
+	u64 __count __attribute__((unused));				\
+	struct task_struct *__task __attribute__((unused));		\
+									\
+	do_perf_trace_##call(__data, args);				\
+}
+
 #undef DECLARE_EVENT_SYSCALL_CLASS
-#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_SYSCALL_CLASS(call, proto, args, tstruct, assign, print) \
+__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \
+		      PARAMS(assign), PARAMS(print))			\
+static notrace void							\
+perf_trace_##call(void *__data, proto)					\
+{									\
+	u64 __count __attribute__((unused));				\
+	struct task_struct *__task __attribute__((unused));		\
+									\
+	preempt_disable_notrace();					\
+	do_perf_trace_##call(__data, args);				\
+	preempt_enable_notrace();					\
+}
 
 /*
  * This part is compiled out, it is only here as a build time check
@@ -76,4 +107,7 @@ static inline void perf_test_probe_##call(void)				\
 	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef __DECLARE_EVENT_CLASS
+
 #endif /* CONFIG_PERF_EVENTS */
-- 
cgit v1.2.3


From 4aadde89d81fbf4cf3105a61dbc48888b819ecfb Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 8 Oct 2024 21:07:14 -0400
Subject: tracing/bpf: disable preemption in syscall probe

In preparation for allowing system call enter/exit instrumentation to
handle page faults, make sure that bpf can handle this change by
explicitly disabling preemption within the bpf system call tracepoint
probes to respect the current expectations within bpf tracing code.

This change does not yet allow bpf to take page faults per se within its
probe, but allows its existing probes to adapt to the upcoming change.

Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Link: https://lore.kernel.org/20241009010718.2050182-5-mathieu.desnoyers@efficios.com
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: Andrii Nakryiko <andrii@kernel.org> # BPF parts
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/bpf_probe.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index c85bbce5aaa5..fec97c93e1c9 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -53,8 +53,18 @@ __bpf_trace_##call(void *__data, proto)					\
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 	__BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args))
 
+#define __BPF_DECLARE_TRACE_SYSCALL(call, proto, args)			\
+static notrace void							\
+__bpf_trace_##call(void *__data, proto)					\
+{									\
+	preempt_disable_notrace();					\
+	CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args));	\
+	preempt_enable_notrace();					\
+}
+
 #undef DECLARE_EVENT_SYSCALL_CLASS
-#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_SYSCALL_CLASS(call, proto, args, tstruct, assign, print)	\
+	__BPF_DECLARE_TRACE_SYSCALL(call, PARAMS(proto), PARAMS(args))
 
 /*
  * This part is compiled out, it is only here as a build time check
-- 
cgit v1.2.3


From a363d27cdbc2bc2d1899b5a1520b64e3590fcd9a Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 8 Oct 2024 21:07:15 -0400
Subject: tracing: Allow system call tracepoints to handle page faults

Use Tasks Trace RCU to protect iteration of system call enter/exit
tracepoint probes to allow those probes to handle page faults.

In preparation for this change, all tracers registering to system call
enter/exit tracepoints should expect those to be called with preemption
enabled.

This allows tracers to fault-in userspace system call arguments such as
path strings within their probe callbacks.

Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Link: https://lore.kernel.org/20241009010718.2050182-6-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 76e441b39a96..0dc67fad706c 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/rcupdate.h>
+#include <linux/rcupdate_trace.h>
 #include <linux/tracepoint-defs.h>
 #include <linux/static_call.h>
 
@@ -107,6 +108,7 @@ void for_each_tracepoint_in_module(struct module *mod,
 #ifdef CONFIG_TRACEPOINTS
 static inline void tracepoint_synchronize_unregister(void)
 {
+	synchronize_rcu_tasks_trace();
 	synchronize_rcu();
 }
 #else
@@ -196,6 +198,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 /*
  * it_func[0] is never NULL because there is at least one element in the array
  * when the array itself is non NULL.
+ *
+ * With @syscall=0, the tracepoint callback array dereference is
+ * protected by disabling preemption.
+ * With @syscall=1, the tracepoint callback array dereference is
+ * protected by Tasks Trace RCU, which allows probes to handle page
+ * faults.
  */
 #define __DO_TRACE(name, args, cond, syscall)				\
 	do {								\
@@ -204,11 +212,17 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		if (!(cond))						\
 			return;						\
 									\
-		preempt_disable_notrace();				\
+		if (syscall)						\
+			rcu_read_lock_trace();				\
+		else							\
+			preempt_disable_notrace();			\
 									\
 		__DO_TRACE_CALL(name, TP_ARGS(args));			\
 									\
-		preempt_enable_notrace();				\
+		if (syscall)						\
+			rcu_read_unlock_trace();			\
+		else							\
+			preempt_enable_notrace();			\
 	} while (0)
 
 /*
-- 
cgit v1.2.3


From a3204c740a59bebb3b37a294d83f4e353303a52c Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 8 Oct 2024 21:07:16 -0400
Subject: tracing/ftrace: Add might_fault check to syscall probes

Add a might_fault() check to validate that the ftrace sys_enter/sys_exit
probe callbacks are indeed called from a context where page faults can
be handled.

Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Link: https://lore.kernel.org/20241009010718.2050182-7-mathieu.desnoyers@efficios.com
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/trace_events.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 63071aa5923d..4f22136fd465 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -446,6 +446,7 @@ __DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \
 static notrace void							\
 trace_event_raw_event_##call(void *__data, proto)			\
 {									\
+	might_fault();							\
 	preempt_disable_notrace();					\
 	do_trace_event_raw_event_##call(__data, args);			\
 	preempt_enable_notrace();					\
-- 
cgit v1.2.3


From cdb537ac417938408ee819992f432c410f2d01a2 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 8 Oct 2024 21:07:17 -0400
Subject: tracing/perf: Add might_fault check to syscall probes

Add a might_fault() check to validate that the perf sys_enter/sys_exit
probe callbacks are indeed called from a context where page faults can
be handled.

Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Link: https://lore.kernel.org/20241009010718.2050182-8-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/perf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/trace/perf.h b/include/trace/perf.h
index 15cde7eac8b4..a1754b73a8f5 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -84,6 +84,7 @@ perf_trace_##call(void *__data, proto)					\
 	u64 __count __attribute__((unused));				\
 	struct task_struct *__task __attribute__((unused));		\
 									\
+	might_fault();							\
 	preempt_disable_notrace();					\
 	do_perf_trace_##call(__data, args);				\
 	preempt_enable_notrace();					\
-- 
cgit v1.2.3


From 0850e1bc88b1bdc30f7f0b223a92eb22e5f06be0 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 8 Oct 2024 21:07:18 -0400
Subject: tracing/bpf: Add might_fault check to syscall probes

Add a might_fault() check to validate that the bpf sys_enter/sys_exit
probe callbacks are indeed called from a context where page faults can
be handled.

Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Link: https://lore.kernel.org/20241009010718.2050182-9-mathieu.desnoyers@efficios.com
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: Andrii Nakryiko <andrii@kernel.org> # BPF parts
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/bpf_probe.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index fec97c93e1c9..183fa2aa2935 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -57,6 +57,7 @@ __bpf_trace_##call(void *__data, proto)					\
 static notrace void							\
 __bpf_trace_##call(void *__data, proto)					\
 {									\
+	might_fault();							\
 	preempt_disable_notrace();					\
 	CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args));	\
 	preempt_enable_notrace();					\
-- 
cgit v1.2.3


From 21291491e3f39d1dac6453e376f7619b21239b5e Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Wed, 9 Oct 2024 01:35:52 +0100
Subject: clk: Remove unused clk_hw_rate_is_protected

clk_hw_rate_is_protected() was added in 2017's commit
e55a839a7a1c ("clk: add clock protection mechanism to clk core")

but has been unused.

Remove it.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Link: https://lore.kernel.org/r/20241009003552.254675-1-linux@treblig.org
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk-provider.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 7e43caabb54b..5cbd112d1187 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -1360,7 +1360,6 @@ unsigned long clk_hw_get_flags(const struct clk_hw *hw);
 	(clk_hw_get_flags((hw)) & CLK_SET_RATE_PARENT)
 
 bool clk_hw_is_prepared(const struct clk_hw *hw);
-bool clk_hw_rate_is_protected(const struct clk_hw *hw);
 bool clk_hw_is_enabled(const struct clk_hw *hw);
 bool __clk_is_enabled(struct clk *clk);
 struct clk *__clk_lookup(const char *name);
-- 
cgit v1.2.3


From a82fcb16d977fa8ac52bdf37d5feb240366722ed Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@kernel.org>
Date: Wed, 21 Aug 2024 17:24:28 -0700
Subject: clk: test: Add test managed of_clk_add_hw_provider()

Add a test managed version of of_clk_add_hw_provider() that
automatically unregisters the clk_hw provider upon test conclusion.

Cc: Brendan Higgins <brendan.higgins@linux.dev>
Cc: David Gow <davidgow@google.com>
Cc: Rae Moar <rmoar@google.com>
Cc: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
Link: https://lore.kernel.org/r/20240822002433.1163814-2-sboyd@kernel.org
---
 include/kunit/clk.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/kunit/clk.h b/include/kunit/clk.h
index 73bc99cefe7b..0afae7688157 100644
--- a/include/kunit/clk.h
+++ b/include/kunit/clk.h
@@ -25,4 +25,8 @@ int clk_hw_register_kunit(struct kunit *test, struct device *dev, struct clk_hw
 int of_clk_hw_register_kunit(struct kunit *test, struct device_node *node,
 			     struct clk_hw *hw);
 
+int of_clk_add_hw_provider_kunit(struct kunit *test, struct device_node *np,
+				 struct clk_hw *(*get)(struct of_phandle_args *clkspec, void *data),
+				 void *data);
+
 #endif
-- 
cgit v1.2.3


From 00977af42106e82bb2971c59e20d5e02c52e2a65 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@kernel.org>
Date: Wed, 21 Aug 2024 17:24:29 -0700
Subject: of: kunit: Extract some overlay boiler plate into macros

Make the lives of __of_overlay_apply_kunit() callers easier by
extracting some of the boiler plate involved in referencing the DT
overlays.

Cc: Brendan Higgins <brendan.higgins@linux.dev>
Cc: David Gow <davidgow@google.com>
Cc: Rae Moar <rmoar@google.com>
Cc: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
Link: https://lore.kernel.org/r/20240822002433.1163814-3-sboyd@kernel.org
---
 include/kunit/of.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/kunit/of.h b/include/kunit/of.h
index 48d4e70c9666..75a760a4e2a5 100644
--- a/include/kunit/of.h
+++ b/include/kunit/of.h
@@ -62,6 +62,13 @@ static inline int __of_overlay_apply_kunit(struct kunit *test,
 					  &unused);
 }
 
+#define of_overlay_begin(overlay_name) __dtbo_##overlay_name##_begin
+#define of_overlay_end(overlay_name) __dtbo_##overlay_name##_end
+
+#define OF_OVERLAY_DECLARE(overlay_name)			\
+	extern uint8_t of_overlay_begin(overlay_name)[];	\
+	extern uint8_t of_overlay_end(overlay_name)[]		\
+
 /**
  * of_overlay_apply_kunit() - Test managed of_overlay_fdt_apply() for built-in overlays
  * @test: test context
@@ -104,12 +111,11 @@ static inline int __of_overlay_apply_kunit(struct kunit *test,
  */
 #define of_overlay_apply_kunit(test, overlay_name)		\
 ({								\
-	extern uint8_t __dtbo_##overlay_name##_begin[];		\
-	extern uint8_t __dtbo_##overlay_name##_end[];		\
+	OF_OVERLAY_DECLARE(overlay_name);			\
 								\
 	__of_overlay_apply_kunit((test),			\
-			__dtbo_##overlay_name##_begin,		\
-			__dtbo_##overlay_name##_end);		\
+			of_overlay_begin(overlay_name),		\
+			of_overlay_end(overlay_name));		\
 })
 
 #endif
-- 
cgit v1.2.3


From 2b78d30620d7f8a9f9ce312ad21200ec7a554bd9 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Mon, 7 Oct 2024 20:24:29 +0200
Subject: ipv4: Convert ip_route_use_hint() to dscp_t.

Pass a dscp_t variable to ip_route_use_hint(), instead of a plain u8,
to prevent accidental setting of ECN bits in ->flowi4_tos.

Only ip_rcv_finish_core() actually calls ip_route_use_hint(). Use the
ip4h_dscp() helper to get the DSCP from the IPv4 header.

While there, modify the declaration of ip_route_use_hint() in
include/net/route.h so that it matches the prototype of its
implementation in net/ipv4/route.c.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/c40994fdf804db7a363d04fdee01bf48dddda676.1728302212.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/route.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index 5e4374d66927..c219c0fecdcf 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -203,8 +203,8 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			  struct in_device *in_dev, u32 *itag);
 int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			 dscp_t dscp, struct net_device *dev);
-int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
-		      u8 tos, struct net_device *devin,
+int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+		      dscp_t dscp, struct net_device *dev,
 		      const struct sk_buff *hint);
 
 static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
-- 
cgit v1.2.3


From d32976408744a589f04b5c939f8f01f7167e5167 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Mon, 7 Oct 2024 20:24:54 +0200
Subject: ipv4: Convert ip_mc_validate_source() to dscp_t.

Pass a dscp_t variable to ip_mc_validate_source(), instead of a plain
u8, to prevent accidental setting of ECN bits in ->flowi4_tos.

Callers of ip_mc_validate_source() to consider are:

  * ip_route_input_mc() which already has a dscp_t variable to pass as
    parameter. We just need to remove the inet_dscp_to_dsfield()
    conversion.

  * udp_v4_early_demux() which gets the DSCP directly from the IPv4
    header and can simply use the ip4h_dscp() helper.

Also, stop including net/inet_dscp.h in udp.c as we don't use any of
its declarations anymore.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/c91b2cca04718b7ee6cf5b9c1d5b40507d65a8d4.1728302212.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/route.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index c219c0fecdcf..586e59f7ed8a 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -198,8 +198,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
 	fl4->fl4_gre_key = gre_key;
 	return ip_route_output_key(net, fl4);
 }
+
 int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-			  u8 tos, struct net_device *dev,
+			  dscp_t dscp, struct net_device *dev,
 			  struct in_device *in_dev, u32 *itag);
 int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			 dscp_t dscp, struct net_device *dev);
-- 
cgit v1.2.3


From d36236ab52754ef6bd083be945e9c2e93f466022 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Mon, 7 Oct 2024 20:25:02 +0200
Subject: ipv4: Convert fib_validate_source() to dscp_t.

Pass a dscp_t variable to fib_validate_source(), instead of a plain u8,
to prevent accidental setting of ECN bits in ->flowi4_tos.

All callers of fib_validate_source() already have a dscp_t variable to
pass as parameter. We just need to remove the inet_dscp_to_dsfield()
conversions.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/08612a4519bc5a3578bb493fbaad82437ebb73dc.1728302212.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip_fib.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 967e4dc555fa..06130933542d 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -449,8 +449,9 @@ int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
 __be32 fib_compute_spec_dst(struct sk_buff *skb);
 bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev);
 int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
-			u8 tos, int oif, struct net_device *dev,
+			dscp_t dscp, int oif, struct net_device *dev,
 			struct in_device *idev, u32 *itag);
+
 #ifdef CONFIG_IP_ROUTE_CLASSID
 static inline int fib_num_tclassid_users(struct net *net)
 {
-- 
cgit v1.2.3


From 80c549cd1ab0241a7af262690a0ff9991fc74ec5 Mon Sep 17 00:00:00 2001
From: Alexander Zubkov <green@qrator.net>
Date: Tue, 8 Oct 2024 18:27:57 +0200
Subject: Fix misspelling of "accept*" in net

Several files have "accept*" misspelled as "accpet*" in the comments.
Fix all such occurrences.

Signed-off-by: Alexander Zubkov <green@qrator.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20241008162756.22618-2-green@qrator.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/udp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index 1a0fe8b151fb..d85d671deed3 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -31,7 +31,7 @@ struct udphdr {
 #define UDP_CORK	1	/* Never send partially complete segments */
 #define UDP_ENCAP	100	/* Set the socket to accept encapsulated packets */
 #define UDP_NO_CHECK6_TX 101	/* Disable sending checksum for UDP6X */
-#define UDP_NO_CHECK6_RX 102	/* Disable accpeting checksum for UDP6 */
+#define UDP_NO_CHECK6_RX 102	/* Disable accepting checksum for UDP6 */
 #define UDP_SEGMENT	103	/* Set GSO segmentation size */
 #define UDP_GRO		104	/* This socket can receive UDP GRO packets */
 
-- 
cgit v1.2.3


From 87173021f1583ee37f4801fcde354729da8db3dc Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 8 Oct 2024 10:29:03 -0700
Subject: ipv4: Link IPv4 address to per-netns hash table.

As a prep for per-netns RTNL conversion, we want to namespacify
the IPv4 address hash table and the GC work.

Let's allocate the per-netns IPv4 address hash table to
net->ipv4.inet_addr_lst and link IPv4 addresses into it.

The actual users will be converted later.

Note that the IPv6 address hash table is already namespacified.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241008172906.1326-2-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/inetdevice.h | 1 +
 include/net/netns/ipv4.h   | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index cb5280e6cc21..d0c2bf67a9b0 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -142,6 +142,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 
 struct in_ifaddr {
 	struct hlist_node	hash;
+	struct hlist_node	addr_lst;
 	struct in_ifaddr	__rcu *ifa_next;
 	struct in_device	*ifa_dev;
 	struct rcu_head		rcu_head;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 276f622f3516..29eba2eaaa26 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -270,5 +270,6 @@ struct netns_ipv4 {
 
 	atomic_t	rt_genid;
 	siphash_key_t	ip_id_key;
+	struct hlist_head	*inet_addr_lst;
 };
 #endif
-- 
cgit v1.2.3


From 1675f385213edc14ed849e079d6866b48e552252 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 8 Oct 2024 10:29:05 -0700
Subject: ipv4: Namespacify IPv4 address GC.

Each IPv4 address could have a lifetime, which is useful for DHCP,
and GC is periodically executed as check_lifetime_work.

check_lifetime() does the actual GC under RTNL.

  1. Acquire RTNL
  2. Iterate inet_addr_lst
  3. Remove IPv4 address if expired
  4. Release RTNL

Namespacifying the GC is required for per-netns RTNL, but using the
per-netns hash table will shorten the time on the hash bucket iteration
under RTNL.

Let's add per-netns GC work and use the per-netns hash table.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241008172906.1326-4-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netns/ipv4.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 29eba2eaaa26..66a4cffc44ee 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -271,5 +271,6 @@ struct netns_ipv4 {
 	atomic_t	rt_genid;
 	siphash_key_t	ip_id_key;
 	struct hlist_head	*inet_addr_lst;
+	struct delayed_work	addr_chk_work;
 };
 #endif
-- 
cgit v1.2.3


From 99ee348e6a41cf24b334a1bb7cde87239e8e2d95 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 8 Oct 2024 10:29:06 -0700
Subject: ipv4: Retire global IPv4 hash table inet_addr_lst.

No one uses inet_addr_lst anymore, so let's remove it.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241008172906.1326-5-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/inetdevice.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index d0c2bf67a9b0..d9c690c8c80b 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -141,7 +141,6 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 							  ARP_EVICT_NOCARRIER)
 
 struct in_ifaddr {
-	struct hlist_node	hash;
 	struct hlist_node	addr_lst;
 	struct in_ifaddr	__rcu *ifa_next;
 	struct in_device	*ifa_dev;
-- 
cgit v1.2.3


From ee3283c608dfa21251b0821d7bb198c7ae3189f6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 2 Oct 2024 17:27:16 -0400
Subject: timekeeping: Add interfaces for handling timestamps with a floor
 value

Multigrain timestamps allow the kernel to use fine-grained timestamps when
an inode's attributes is being actively observed via ->getattr().  With
this support, it's possible for a file to get a fine-grained timestamp, and
another modified after it to get a coarse-grained stamp that is earlier
than the fine-grained time.  If this happens then the files can appear to
have been modified in reverse order, which breaks VFS ordering guarantees
[1].

To prevent this, maintain a floor value for multigrain timestamps.
Whenever a fine-grained timestamp is handed out, record it, and when later
coarse-grained stamps are handed out, ensure they are not earlier than that
value. If the coarse-grained timestamp is earlier than the fine-grained
floor, return the floor value instead.

Add a static singleton atomic64_t into timekeeper.c that is used to keep
track of the latest fine-grained time ever handed out. This is tracked as a
monotonic ktime_t value to ensure that it isn't affected by clock
jumps. Because it is updated at different times than the rest of the
timekeeper object, the floor value is managed independently of the
timekeeper via a cmpxchg() operation, and sits on its own cacheline.

Add two new public interfaces:

- ktime_get_coarse_real_ts64_mg() fills a timespec64 with the later of the
  coarse-grained clock and the floor time

- ktime_get_real_ts64_mg() gets the fine-grained clock value, and tries
  to swap it into the floor. A timespec64 is filled with the result.

The floor value is global and updated via a single try_cmpxchg(). If
that fails then the operation raced with a concurrent update. Any
concurrent update must be later than the existing floor value, so any
racing tasks can accept any resulting floor value without retrying.

[1]: POSIX requires that files be stamped with realtime clock values, and
     makes no provision for dealing with backward clock jumps. If a backward
     realtime clock jump occurs, then files can appear to have been modified
     in reverse order.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # documentation bits
Acked-by: John Stultz <jstultz@google.com>
Link: https://lore.kernel.org/all/20241002-mgtime-v10-1-d1c4717f5284@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/timekeeping.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index fc12a9ba2c88..7aa85246c183 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -45,6 +45,10 @@ extern void ktime_get_real_ts64(struct timespec64 *tv);
 extern void ktime_get_coarse_ts64(struct timespec64 *ts);
 extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
 
+/* Multigrain timestamp interfaces */
+extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
+extern void ktime_get_real_ts64_mg(struct timespec64 *ts);
+
 void getboottime64(struct timespec64 *ts);
 
 /*
-- 
cgit v1.2.3


From 2a15385742c689a271345dcbb4c28b9c568bc7ce Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 2 Oct 2024 17:27:17 -0400
Subject: timekeeping: Add percpu counter for tracking floor swap events

The mgtime_floor value is a global variable for tracking the latest
fine-grained timestamp handed out. Because it's a global, track the
number of times that a new floor value is assigned.

Add a new percpu counter to the timekeeping code to track the number of
floor swap events that have occurred. A later patch will add a debugfs
file to display this counter alongside other stats involving multigrain
timestamps.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # documentation bits
Link: https://lore.kernel.org/all/20241002-mgtime-v10-2-d1c4717f5284@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/timekeeping.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 7aa85246c183..84a035e86ac8 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -48,6 +48,7 @@ extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
 /* Multigrain timestamp interfaces */
 extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
 extern void ktime_get_real_ts64_mg(struct timespec64 *ts);
+extern unsigned long timekeeping_get_mg_floor_swaps(void);
 
 void getboottime64(struct timespec64 *ts);
 
-- 
cgit v1.2.3


From 7f2c86cba3c584c7227cddaabdf0ab54c8151e60 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 2 Oct 2024 17:27:20 -0400
Subject: fs: handle delegated timestamps in setattr_copy_mgtime

An update to the inode ctime typically requires the latest clock
value possible. The exception to this rule is when there is a nfsd write
delegation and the server is proxying timestamps from the client.

When nfsd gets a CB_GETATTR response, update the timestamp value in the
inode to the values that the client is tracking. The client doesn't send
a ctime value (since that's always determined by the exported
filesystem), but it can send a mtime value. In the case where it does,
update the ctime to a value commensurate with that instead of the
current time.

If ATTR_DELEG is set, then use ia_ctime value instead of setting the
timestamp to the current time.

With the addition of delegated timestamps, the server may receive a
request to update only the atime, which doesn't involve a ctime update.
Trust the ATTR_CTIME flag in the update and only update the ctime when
it's set.

Tested-by: Randy Dunlap <rdunlap@infradead.org> # documentation bits
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/20241002-mgtime-v10-5-d1c4717f5284@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index eff688e75f2f..ea7ed437d2b1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1544,6 +1544,8 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
 
 struct timespec64 current_time(struct inode *inode);
 struct timespec64 inode_set_ctime_current(struct inode *inode);
+struct timespec64 inode_set_ctime_deleg(struct inode *inode,
+					struct timespec64 update);
 
 static inline time64_t inode_get_atime_sec(const struct inode *inode)
 {
-- 
cgit v1.2.3


From c86e3c47187ad7fa17f8533a4142453991684b46 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 2 Oct 2024 17:27:21 -0400
Subject: fs: tracepoints around multigrain timestamp events

Add some tracepoints around various multigrain timestamp events.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # documentation bits
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/20241002-mgtime-v10-6-d1c4717f5284@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/timestamp.h | 124 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 include/trace/events/timestamp.h

(limited to 'include')

diff --git a/include/trace/events/timestamp.h b/include/trace/events/timestamp.h
new file mode 100644
index 000000000000..c9e5ec930054
--- /dev/null
+++ b/include/trace/events/timestamp.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM timestamp
+
+#if !defined(_TRACE_TIMESTAMP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TIMESTAMP_H
+
+#include <linux/tracepoint.h>
+#include <linux/fs.h>
+
+#define CTIME_QUERIED_FLAGS \
+	{ I_CTIME_QUERIED, "Q" }
+
+DECLARE_EVENT_CLASS(ctime,
+	TP_PROTO(struct inode *inode,
+		 struct timespec64 *ctime),
+
+	TP_ARGS(inode, ctime),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev)
+		__field(ino_t,		ino)
+		__field(time64_t,	ctime_s)
+		__field(u32,		ctime_ns)
+		__field(u32,		gen)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->gen		= inode->i_generation;
+		__entry->ctime_s	= ctime->tv_sec;
+		__entry->ctime_ns	= ctime->tv_nsec;
+	),
+
+	TP_printk("ino=%d:%d:%ld:%u ctime=%lld.%u",
+		MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->gen,
+		__entry->ctime_s, __entry->ctime_ns
+	)
+);
+
+DEFINE_EVENT(ctime, inode_set_ctime_to_ts,
+		TP_PROTO(struct inode *inode,
+			 struct timespec64 *ctime),
+		TP_ARGS(inode, ctime));
+
+DEFINE_EVENT(ctime, ctime_xchg_skip,
+		TP_PROTO(struct inode *inode,
+			 struct timespec64 *ctime),
+		TP_ARGS(inode, ctime));
+
+TRACE_EVENT(ctime_ns_xchg,
+	TP_PROTO(struct inode *inode,
+		 u32 old,
+		 u32 new,
+		 u32 cur),
+
+	TP_ARGS(inode, old, new, cur),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev)
+		__field(ino_t,		ino)
+		__field(u32,		gen)
+		__field(u32,		old)
+		__field(u32,		new)
+		__field(u32,		cur)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->gen		= inode->i_generation;
+		__entry->old		= old;
+		__entry->new		= new;
+		__entry->cur		= cur;
+	),
+
+	TP_printk("ino=%d:%d:%ld:%u old=%u:%s new=%u cur=%u:%s",
+		MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->gen,
+		__entry->old & ~I_CTIME_QUERIED,
+		__print_flags(__entry->old & I_CTIME_QUERIED, "|", CTIME_QUERIED_FLAGS),
+		__entry->new,
+		__entry->cur & ~I_CTIME_QUERIED,
+		__print_flags(__entry->cur & I_CTIME_QUERIED, "|", CTIME_QUERIED_FLAGS)
+	)
+);
+
+TRACE_EVENT(fill_mg_cmtime,
+	TP_PROTO(struct inode *inode,
+		 struct timespec64 *ctime,
+		 struct timespec64 *mtime),
+
+	TP_ARGS(inode, ctime, mtime),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev)
+		__field(ino_t,		ino)
+		__field(time64_t,	ctime_s)
+		__field(time64_t,	mtime_s)
+		__field(u32,		ctime_ns)
+		__field(u32,		mtime_ns)
+		__field(u32,		gen)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->gen		= inode->i_generation;
+		__entry->ctime_s	= ctime->tv_sec;
+		__entry->mtime_s	= mtime->tv_sec;
+		__entry->ctime_ns	= ctime->tv_nsec;
+		__entry->mtime_ns	= mtime->tv_nsec;
+	),
+
+	TP_printk("ino=%d:%d:%ld:%u ctime=%lld.%u mtime=%lld.%u",
+		MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->gen,
+		__entry->ctime_s, __entry->ctime_ns,
+		__entry->mtime_s, __entry->mtime_ns
+	)
+);
+#endif /* _TRACE_TIMESTAMP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 440e3dcd7c739ba5b256196a89e796fb7e59c755 Mon Sep 17 00:00:00 2001
From: Sunyeal Hong <sunyeal.hong@samsung.com>
Date: Wed, 9 Oct 2024 13:21:08 +0900
Subject: dt-bindings: clock: exynosautov920: add peric1, misc and hsi0/1 clock
 definitions

Add peric1, misc and hsi0/1 clock definitions.

- CMU_PERIC1 for USI, IC2 and I3C
- CMU_MISC for MISC, GIC and OTP
- HSI0 for PCIE
- HSI1 for USB and MMC

Signed-off-by: Sunyeal Hong <sunyeal.hong@samsung.com>
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://lore.kernel.org/r/20241009042110.2379903-2-sunyeal.hong@samsung.com
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/dt-bindings/clock/samsung,exynosautov920.h | 47 ++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/samsung,exynosautov920.h b/include/dt-bindings/clock/samsung,exynosautov920.h
index c720f344b6bf..0c681f2ba3d0 100644
--- a/include/dt-bindings/clock/samsung,exynosautov920.h
+++ b/include/dt-bindings/clock/samsung,exynosautov920.h
@@ -160,6 +160,7 @@
 #define DOUT_CLKCMU_SNW_NOC		144
 #define DOUT_CLKCMU_SSP_NOC		145
 #define DOUT_CLKCMU_TAA_NOC		146
+#define DOUT_TCXO_DIV2			147
 
 /* CMU_PERIC0 */
 #define CLK_MOUT_PERIC0_IP_USER		1
@@ -188,4 +189,50 @@
 #define CLK_DOUT_PERIC0_USI_I2C		23
 #define CLK_DOUT_PERIC0_I3C		24
 
+/* CMU_PERIC1 */
+#define CLK_MOUT_PERIC1_IP_USER		1
+#define CLK_MOUT_PERIC1_NOC_USER	2
+#define CLK_MOUT_PERIC1_USI09_USI	3
+#define CLK_MOUT_PERIC1_USI10_USI	4
+#define CLK_MOUT_PERIC1_USI11_USI	5
+#define CLK_MOUT_PERIC1_USI12_USI	6
+#define CLK_MOUT_PERIC1_USI13_USI	7
+#define CLK_MOUT_PERIC1_USI14_USI	8
+#define CLK_MOUT_PERIC1_USI15_USI	9
+#define CLK_MOUT_PERIC1_USI16_USI	10
+#define CLK_MOUT_PERIC1_USI17_USI	11
+#define CLK_MOUT_PERIC1_USI_I2C		12
+#define CLK_MOUT_PERIC1_I3C		13
+
+#define CLK_DOUT_PERIC1_USI09_USI	14
+#define CLK_DOUT_PERIC1_USI10_USI	15
+#define CLK_DOUT_PERIC1_USI11_USI	16
+#define CLK_DOUT_PERIC1_USI12_USI	17
+#define CLK_DOUT_PERIC1_USI13_USI	18
+#define CLK_DOUT_PERIC1_USI14_USI	19
+#define CLK_DOUT_PERIC1_USI15_USI	20
+#define CLK_DOUT_PERIC1_USI16_USI	21
+#define CLK_DOUT_PERIC1_USI17_USI	22
+#define CLK_DOUT_PERIC1_USI_I2C		23
+#define CLK_DOUT_PERIC1_I3C		24
+
+/* CMU_MISC */
+#define CLK_MOUT_MISC_NOC_USER		1
+#define CLK_MOUT_MISC_GIC		2
+
+#define CLK_DOUT_MISC_OTP		3
+#define CLK_DOUT_MISC_NOCP		4
+#define CLK_DOUT_MISC_OSC_DIV2		5
+
+/* CMU_HSI0 */
+#define CLK_MOUT_HSI0_NOC_USER		1
+
+#define CLK_DOUT_HSI0_PCIE_APB		2
+
+/* CMU_HSI1 */
+#define CLK_MOUT_HSI1_MMC_CARD_USER	1
+#define CLK_MOUT_HSI1_NOC_USER		2
+#define CLK_MOUT_HSI1_USBDRD_USER	3
+#define CLK_MOUT_HSI1_USBDRD		4
+
 #endif /* _DT_BINDINGS_CLOCK_EXYNOSAUTOV920_H */
-- 
cgit v1.2.3


From 9e542ff8b79ae1871074d3a7dca7de9e7374eeda Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Tue, 8 Oct 2024 09:32:04 -0700
Subject: net: Remove likely from l3mdev_master_ifindex_by_index

The likely() annotation in l3mdev_master_ifindex_by_index() has been
found to be incorrect 100% of the time in real-world workloads (e.g.,
web servers).

Annotated branches shows the following in these servers:

	correct incorrect  %        Function                  File              Line
	      0 169053813 100 l3mdev_master_ifindex_by_index l3mdev.h             81

This is happening because l3mdev_master_ifindex_by_index() is called
from __inet_check_established(), which calls
l3mdev_master_ifindex_by_index() passing the socked bounded interface.

	l3mdev_master_ifindex_by_index(net, sk->sk_bound_dev_if);

Since most sockets are not going to be bound to a network device,
the likely() is giving the wrong assumption.

Remove the likely() annotation to ensure more accurate branch
prediction.

Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241008163205.3939629-1-leitao@debian.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/l3mdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 031c661aa14d..2d6141f28b53 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -78,7 +78,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
 	struct net_device *dev;
 	int rc = 0;
 
-	if (likely(ifindex)) {
+	if (ifindex) {
 		rcu_read_lock();
 
 		dev = dev_get_by_index_rcu(net, ifindex);
-- 
cgit v1.2.3


From 016f426a14f09faa8bdb68b063c2947edf3108a1 Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Tue, 8 Oct 2024 21:32:09 +0300
Subject: net/mlx5: qos: Flesh out element_attributes in mlx5_ifc.h

This is used for multiple purposes, depending on the scheduling element
created. There are a few helper struct defined a long time ago, but they
are not easy to find in the file and they are about to get new members.
This commit cleans up this area a bit by:
- moving the helper structs closer to where they are relevant.
- defining a helper union to include all of them to help
  discoverability.
- making use of it everywhere element_attributes is used.
- using a consistent 'attr' name.

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/mlx5/mlx5_ifc.h | 67 ++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 96d369112bfa..c79ba6197673 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4105,11 +4105,47 @@ enum {
 	ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP	= 1 << 4,
 };
 
+enum {
+	TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0,
+	TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1,
+	TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2,
+};
+
+enum {
+	TSAR_TYPE_CAP_MASK_DWRR		= 1 << 0,
+	TSAR_TYPE_CAP_MASK_ROUND_ROBIN	= 1 << 1,
+	TSAR_TYPE_CAP_MASK_ETS		= 1 << 2,
+};
+
+struct mlx5_ifc_tsar_element_bits {
+	u8         reserved_at_0[0x8];
+	u8         tsar_type[0x8];
+	u8         reserved_at_10[0x10];
+};
+
+struct mlx5_ifc_vport_element_bits {
+	u8         reserved_at_0[0x10];
+	u8         vport_number[0x10];
+};
+
+struct mlx5_ifc_vport_tc_element_bits {
+	u8         traffic_class[0x4];
+	u8         reserved_at_4[0xc];
+	u8         vport_number[0x10];
+};
+
+union mlx5_ifc_element_attributes_bits {
+	struct mlx5_ifc_tsar_element_bits tsar;
+	struct mlx5_ifc_vport_element_bits vport;
+	struct mlx5_ifc_vport_tc_element_bits vport_tc;
+	u8 reserved_at_0[0x20];
+};
+
 struct mlx5_ifc_scheduling_context_bits {
 	u8         element_type[0x8];
 	u8         reserved_at_8[0x18];
 
-	u8         element_attributes[0x20];
+	union mlx5_ifc_element_attributes_bits element_attributes;
 
 	u8         parent_element_id[0x20];
 
@@ -4798,35 +4834,6 @@ struct mlx5_ifc_register_loopback_control_bits {
 	u8         reserved_at_20[0x60];
 };
 
-struct mlx5_ifc_vport_tc_element_bits {
-	u8         traffic_class[0x4];
-	u8         reserved_at_4[0xc];
-	u8         vport_number[0x10];
-};
-
-struct mlx5_ifc_vport_element_bits {
-	u8         reserved_at_0[0x10];
-	u8         vport_number[0x10];
-};
-
-enum {
-	TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0,
-	TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1,
-	TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2,
-};
-
-enum {
-	TSAR_TYPE_CAP_MASK_DWRR		= 1 << 0,
-	TSAR_TYPE_CAP_MASK_ROUND_ROBIN	= 1 << 1,
-	TSAR_TYPE_CAP_MASK_ETS		= 1 << 2,
-};
-
-struct mlx5_ifc_tsar_element_bits {
-	u8         reserved_at_0[0x8];
-	u8         tsar_type[0x8];
-	u8         reserved_at_10[0x10];
-};
-
 enum {
 	MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_SUCCESS = 0x0,
 	MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL = 0x1,
-- 
cgit v1.2.3


From fceffbfe57af7d9941d08e1a995cccf558d08451 Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Wed, 2 Oct 2024 14:54:58 +0200
Subject: regulator: max5970: Drop unused structs

After splitting the max5970 into a MFD device clean the remaining
code and drop unused structs.

The struct max5970_data and enum max5970_chip_type aren't used.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Acked-by: Lee Jones <lee@kernel.org>
Link: https://patch.msgid.link/20241002125500.78278-1-patrick.rudolph@9elements.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/mfd/max5970.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/max5970.h b/include/linux/mfd/max5970.h
index 762a7d40c843..fc50e89edfaa 100644
--- a/include/linux/mfd/max5970.h
+++ b/include/linux/mfd/max5970.h
@@ -16,18 +16,6 @@
 #define MAX5978_NUM_SWITCHES 1
 #define MAX5970_NUM_LEDS     4
 
-struct max5970_data {
-	int num_switches;
-	u32 irng[MAX5970_NUM_SWITCHES];
-	u32 mon_rng[MAX5970_NUM_SWITCHES];
-	u32 shunt_micro_ohms[MAX5970_NUM_SWITCHES];
-};
-
-enum max5970_chip_type {
-	TYPE_MAX5978 = 1,
-	TYPE_MAX5970,
-};
-
 #define MAX5970_REG_CURRENT_L(ch)		(0x01 + (ch) * 4)
 #define MAX5970_REG_CURRENT_H(ch)		(0x00 + (ch) * 4)
 #define MAX5970_REG_VOLTAGE_L(ch)		(0x03 + (ch) * 4)
-- 
cgit v1.2.3


From 0e8158b4a82eb5bfb69df17510d210b073896f00 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 2 Oct 2024 14:22:24 +0200
Subject: OPP: Rework _set_required_devs() to manage a single device per call

At this point there are no consumer drivers that makes use of
_set_required_devs(), hence it should be straightforward to rework the code
to enable it to better integrate with the PM domain attach procedure.

During attach, one device at the time is being hooked up to its
corresponding PM domain. Therefore, let's update the _set_required_devs()
to align to this behaviour, allowing callers to fill out one required_dev
per call. Subsequent changes starts making use of this.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://lore.kernel.org/r/20241002122232.194245-4-ulf.hansson@linaro.org
---
 include/linux/pm_opp.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 6424692c30b7..bc74bc69107a 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -63,10 +63,11 @@ typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table,
  * @supported_hw_count: Number of elements in the array.
  * @regulator_names: Array of pointers to the names of the regulator, NULL terminated.
  * @genpd_names: Null terminated array of pointers containing names of genpd to
- *		attach. Mutually exclusive with required_devs.
+ *		attach. Mutually exclusive with required_dev.
  * @virt_devs: Pointer to return the array of genpd virtual devices. Mutually
- *		exclusive with required_devs.
- * @required_devs: Required OPP devices. Mutually exclusive with genpd_names/virt_devs.
+ *		exclusive with required_dev.
+ * @required_dev: Required OPP device. Mutually exclusive with genpd_names/virt_devs.
+ * @required_dev_index: The index of the required OPP for the @required_dev.
  *
  * This structure contains platform specific OPP configurations for the device.
  */
@@ -81,7 +82,8 @@ struct dev_pm_opp_config {
 	const char * const *regulator_names;
 	const char * const *genpd_names;
 	struct device ***virt_devs;
-	struct device **required_devs;
+	struct device *required_dev;
+	unsigned int required_dev_index;
 };
 
 #define OPP_LEVEL_UNSET			U32_MAX
-- 
cgit v1.2.3


From 98d277a79126a2df8a2617215846d01f823cfffa Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 2 Oct 2024 14:22:25 +0200
Subject: PM: domains: Support required OPPs in dev_pm_domain_attach_list()

In the multiple PM domain case we need platform code to specify the index
of the corresponding required OPP in DT for a device, which is what
*_opp_attach_genpd() is there to help us with.

However, attaching a device to its PM domains is in general better done
with dev_pm_domain_attach_list(). To avoid having two different ways to
manage this and to prepare for the removal of *_opp_attach_genpd(), let's
extend dev_pm_domain_attach|detach_list() to manage the required OPPs too.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://lore.kernel.org/r/20241002122232.194245-5-ulf.hansson@linaro.org
---
 include/linux/pm_domain.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index b637ec14025f..92f9d56f623d 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -30,9 +30,16 @@
  *				supplier and its PM domain when creating the
  *				device-links.
  *
+ * PD_FLAG_REQUIRED_OPP:	Assign required_devs for the required OPPs. The
+ *				index of the required OPP must correspond to the
+ *				index in the array of the pd_names. If pd_names
+ *				isn't specified, the index just follows the
+ *				index for the attached PM domain.
+ *
  */
 #define PD_FLAG_NO_DEV_LINK		BIT(0)
 #define PD_FLAG_DEV_LINK_ON		BIT(1)
+#define PD_FLAG_REQUIRED_OPP		BIT(2)
 
 struct dev_pm_domain_attach_data {
 	const char * const *pd_names;
@@ -43,6 +50,7 @@ struct dev_pm_domain_attach_data {
 struct dev_pm_domain_list {
 	struct device **pd_devs;
 	struct device_link **pd_links;
+	u32 *opp_tokens;
 	u32 num_pds;
 };
 
-- 
cgit v1.2.3


From e130ca9d4873f8d01e3e7b8137e0c14196a3cfb4 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 2 Oct 2024 14:22:27 +0200
Subject: pmdomain: core: Set the required dev for a required OPP during genpd
 attach

In the single PM domain case there is no need for platform code to specify
the index of the corresponding required OPP in DT, as the index must be
zero. This allows us to assign a required dev for the required OPP from
genpd, while attaching a device to its PM domain.

In this way, we can remove some of the genpd specific code in the OPP core
for the single PM domain case. Although, this cleanup is made from a
subsequent change.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://lore.kernel.org/r/20241002122232.194245-7-ulf.hansson@linaro.org
---
 include/linux/pm_domain.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 92f9d56f623d..76775ab38898 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -252,6 +252,7 @@ struct generic_pm_domain_data {
 	unsigned int performance_state;
 	unsigned int default_pstate;
 	unsigned int rpm_pstate;
+	unsigned int opp_token;
 	bool hw_mode;
 	void *data;
 };
-- 
cgit v1.2.3


From d6caca30a548764f8cfd78393ea09fefdf285212 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 2 Oct 2024 14:22:32 +0200
Subject: OPP: Drop redundant *_opp_attach|detach_genpd()

All users of *_opp_attach|detach_genpd(), have been converted to use
dev|devm_pm_domain_attach|detach_list(), hence let's drop it along with its
corresponding exported functions.

Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20241002122232.194245-12-ulf.hansson@linaro.org
---
 include/linux/pm_opp.h | 38 +-------------------------------------
 1 file changed, 1 insertion(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index bc74bc69107a..568183e3e641 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -62,11 +62,7 @@ typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table,
  * @supported_hw: Array of hierarchy of versions to match.
  * @supported_hw_count: Number of elements in the array.
  * @regulator_names: Array of pointers to the names of the regulator, NULL terminated.
- * @genpd_names: Null terminated array of pointers containing names of genpd to
- *		attach. Mutually exclusive with required_dev.
- * @virt_devs: Pointer to return the array of genpd virtual devices. Mutually
- *		exclusive with required_dev.
- * @required_dev: Required OPP device. Mutually exclusive with genpd_names/virt_devs.
+ * @required_dev: The required OPP device.
  * @required_dev_index: The index of the required OPP for the @required_dev.
  *
  * This structure contains platform specific OPP configurations for the device.
@@ -80,8 +76,6 @@ struct dev_pm_opp_config {
 	const unsigned int *supported_hw;
 	unsigned int supported_hw_count;
 	const char * const *regulator_names;
-	const char * const *genpd_names;
-	struct device ***virt_devs;
 	struct device *required_dev;
 	unsigned int required_dev_index;
 };
@@ -677,36 +671,6 @@ static inline void dev_pm_opp_put_config_regulators(int token)
 	dev_pm_opp_clear_config(token);
 }
 
-/* genpd helpers */
-static inline int dev_pm_opp_attach_genpd(struct device *dev,
-					  const char * const *names,
-					  struct device ***virt_devs)
-{
-	struct dev_pm_opp_config config = {
-		.genpd_names = names,
-		.virt_devs = virt_devs,
-	};
-
-	return dev_pm_opp_set_config(dev, &config);
-}
-
-static inline void dev_pm_opp_detach_genpd(int token)
-{
-	dev_pm_opp_clear_config(token);
-}
-
-static inline int devm_pm_opp_attach_genpd(struct device *dev,
-					   const char * const *names,
-					   struct device ***virt_devs)
-{
-	struct dev_pm_opp_config config = {
-		.genpd_names = names,
-		.virt_devs = virt_devs,
-	};
-
-	return devm_pm_opp_set_config(dev, &config);
-}
-
 /* prop-name helpers */
 static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 {
-- 
cgit v1.2.3


From 13d68a16430312fc21990f48326366eb73891202 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 9 Oct 2024 10:09:47 +0200
Subject: genetlink: extend info user-storage to match NL cb ctx

This allows a more uniform implementation of non-dump and dump
operations, and will be used later in the series to avoid some
per-operation allocation.

Additionally rename the NL_ASSERT_DUMP_CTX_FITS macro, to
fit a more extended usage.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Link: https://patch.msgid.link/1130cc2896626b84587a2a5f96a5c6829638f4da.1728460186.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netlink.h | 5 +++--
 include/net/genetlink.h | 8 ++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index b332c2048c75..a3ca198a3a9e 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -34,6 +34,7 @@ struct netlink_skb_parms {
 
 #define NETLINK_CB(skb)		(*(struct netlink_skb_parms*)&((skb)->cb))
 #define NETLINK_CREDS(skb)	(&NETLINK_CB((skb)).creds)
+#define NETLINK_CTX_SIZE	48
 
 
 void netlink_table_grab(void);
@@ -293,7 +294,7 @@ struct netlink_callback {
 	int			flags;
 	bool			strict_check;
 	union {
-		u8		ctx[48];
+		u8		ctx[NETLINK_CTX_SIZE];
 
 		/* args is deprecated. Cast a struct over ctx instead
 		 * for proper type safety.
@@ -302,7 +303,7 @@ struct netlink_callback {
 	};
 };
 
-#define NL_ASSERT_DUMP_CTX_FITS(type_name)				\
+#define NL_ASSERT_CTX_FITS(type_name)					\
 	BUILD_BUG_ON(sizeof(type_name) >				\
 		     sizeof_field(struct netlink_callback, ctx))
 
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 9ab49bfeae78..9d3726e8f90e 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -124,7 +124,8 @@ struct genl_family {
  * @genlhdr: generic netlink message header
  * @attrs: netlink attributes
  * @_net: network namespace
- * @user_ptr: user pointers
+ * @ctx: storage space for the use by the family
+ * @user_ptr: user pointers (deprecated, use ctx instead)
  * @extack: extended ACK report struct
  */
 struct genl_info {
@@ -135,7 +136,10 @@ struct genl_info {
 	struct genlmsghdr *	genlhdr;
 	struct nlattr **	attrs;
 	possible_net_t		_net;
-	void *			user_ptr[2];
+	union {
+		u8		ctx[NETLINK_CTX_SIZE];
+		void *		user_ptr[2];
+	};
 	struct netlink_ext_ack *extack;
 };
 
-- 
cgit v1.2.3


From 04e65df94b3112a1b319b6deb5bab83fd740bc7d Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 9 Oct 2024 10:09:48 +0200
Subject: netlink: spec: add shaper YAML spec

Define the user-space visible interface to query, configure and delete
network shapers via yaml definition.

Add dummy implementations for the relevant NL callbacks.

set() and delete() operations touch a single shaper creating/updating or
deleting it.
The group() operation creates a shaper's group, nesting multiple input
shapers under the specified output shaper.

Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Link: https://patch.msgid.link/7a33a1ff370bdbcd0cd3f909575c912cd56f41da.1728460186.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/net_shaper.h | 78 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 include/uapi/linux/net_shaper.h

(limited to 'include')

diff --git a/include/uapi/linux/net_shaper.h b/include/uapi/linux/net_shaper.h
new file mode 100644
index 000000000000..9e3fa63618ee
--- /dev/null
+++ b/include/uapi/linux/net_shaper.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/*	Documentation/netlink/specs/net_shaper.yaml */
+/* YNL-GEN uapi header */
+
+#ifndef _UAPI_LINUX_NET_SHAPER_H
+#define _UAPI_LINUX_NET_SHAPER_H
+
+#define NET_SHAPER_FAMILY_NAME		"net-shaper"
+#define NET_SHAPER_FAMILY_VERSION	1
+
+/**
+ * enum net_shaper_scope - Defines the shaper @id interpretation.
+ * @NET_SHAPER_SCOPE_UNSPEC: The scope is not specified.
+ * @NET_SHAPER_SCOPE_NETDEV: The main shaper for the given network device.
+ * @NET_SHAPER_SCOPE_QUEUE: The shaper is attached to the given device queue,
+ *   the @id represents the queue number.
+ * @NET_SHAPER_SCOPE_NODE: The shaper allows grouping of queues or other node
+ *   shapers; can be nested in either @netdev shapers or other @node shapers,
+ *   allowing placement in any location of the scheduling tree, except leaves
+ *   and root.
+ */
+enum net_shaper_scope {
+	NET_SHAPER_SCOPE_UNSPEC,
+	NET_SHAPER_SCOPE_NETDEV,
+	NET_SHAPER_SCOPE_QUEUE,
+	NET_SHAPER_SCOPE_NODE,
+
+	/* private: */
+	__NET_SHAPER_SCOPE_MAX,
+	NET_SHAPER_SCOPE_MAX = (__NET_SHAPER_SCOPE_MAX - 1)
+};
+
+/**
+ * enum net_shaper_metric - Different metric supported by the shaper.
+ * @NET_SHAPER_METRIC_BPS: Shaper operates on a bits per second basis.
+ * @NET_SHAPER_METRIC_PPS: Shaper operates on a packets per second basis.
+ */
+enum net_shaper_metric {
+	NET_SHAPER_METRIC_BPS,
+	NET_SHAPER_METRIC_PPS,
+};
+
+enum {
+	NET_SHAPER_A_HANDLE = 1,
+	NET_SHAPER_A_METRIC,
+	NET_SHAPER_A_BW_MIN,
+	NET_SHAPER_A_BW_MAX,
+	NET_SHAPER_A_BURST,
+	NET_SHAPER_A_PRIORITY,
+	NET_SHAPER_A_WEIGHT,
+	NET_SHAPER_A_IFINDEX,
+	NET_SHAPER_A_PARENT,
+	NET_SHAPER_A_LEAVES,
+
+	__NET_SHAPER_A_MAX,
+	NET_SHAPER_A_MAX = (__NET_SHAPER_A_MAX - 1)
+};
+
+enum {
+	NET_SHAPER_A_HANDLE_SCOPE = 1,
+	NET_SHAPER_A_HANDLE_ID,
+
+	__NET_SHAPER_A_HANDLE_MAX,
+	NET_SHAPER_A_HANDLE_MAX = (__NET_SHAPER_A_HANDLE_MAX - 1)
+};
+
+enum {
+	NET_SHAPER_CMD_GET = 1,
+	NET_SHAPER_CMD_SET,
+	NET_SHAPER_CMD_DELETE,
+	NET_SHAPER_CMD_GROUP,
+
+	__NET_SHAPER_CMD_MAX,
+	NET_SHAPER_CMD_MAX = (__NET_SHAPER_CMD_MAX - 1)
+};
+
+#endif /* _UAPI_LINUX_NET_SHAPER_H */
-- 
cgit v1.2.3


From 4b623f9f0f59652ea71fcb27d60b4c3b65126dbb Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 9 Oct 2024 10:09:49 +0200
Subject: net-shapers: implement NL get operation

Introduce the basic infrastructure to implement the net-shaper
core functionality. Each network devices carries a net-shaper cache,
the NL get() operation fetches the data from such cache.

The cache is initially empty, will be fill by the set()/group()
operation implemented later and is destroyed at device cleanup time.

The net_shaper_fill_handle(), net_shaper_ctx_init(), and
net_shaper_generic_pre() implementations handle generic index type
attributes, despite the current caller always pass a constant value
to avoid more noise in later patches using them with different
attributes.

Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Link: https://patch.msgid.link/ddd10fd645a9367803ad02fca4a5664ea5ace170.1728460186.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h |  21 ++++++++
 include/net/net_shaper.h  | 120 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 141 insertions(+)
 create mode 100644 include/net/net_shaper.h

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3baf8e539b6f..e6b93d01e631 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1603,6 +1603,14 @@ struct net_device_ops {
 	int			(*ndo_hwtstamp_set)(struct net_device *dev,
 						    struct kernel_hwtstamp_config *kernel_config,
 						    struct netlink_ext_ack *extack);
+
+#if IS_ENABLED(CONFIG_NET_SHAPER)
+	/**
+	 * @net_shaper_ops: Device shaping offload operations
+	 * see include/net/net_shapers.h
+	 */
+	const struct net_shaper_ops *net_shaper_ops;
+#endif
 };
 
 /**
@@ -2406,6 +2414,19 @@ struct net_device {
 
 	u64			max_pacing_offload_horizon;
 
+	/**
+	 * @lock: protects @net_shaper_hierarchy, feel free to use for other
+	 * netdev-scope protection. Ordering: take after rtnl_lock.
+	 */
+	struct mutex		lock;
+
+#if IS_ENABLED(CONFIG_NET_SHAPER)
+	/**
+	 * @net_shaper_hierarchy: data tracking the current shaper status
+	 *  see include/net/net_shapers.h
+	 */
+	struct net_shaper_hierarchy *net_shaper_hierarchy;
+#endif
 	u8			priv[] ____cacheline_aligned
 				       __counted_by(priv_len);
 } ____cacheline_aligned;
diff --git a/include/net/net_shaper.h b/include/net/net_shaper.h
new file mode 100644
index 000000000000..5c3f49b52fe9
--- /dev/null
+++ b/include/net/net_shaper.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _NET_SHAPER_H_
+#define _NET_SHAPER_H_
+
+#include <linux/types.h>
+
+#include <uapi/linux/net_shaper.h>
+
+struct net_device;
+struct devlink;
+struct netlink_ext_ack;
+
+enum net_shaper_binding_type {
+	NET_SHAPER_BINDING_TYPE_NETDEV,
+	/* NET_SHAPER_BINDING_TYPE_DEVLINK_PORT */
+};
+
+struct net_shaper_binding {
+	enum net_shaper_binding_type type;
+	union {
+		struct net_device *netdev;
+		struct devlink *devlink;
+	};
+};
+
+struct net_shaper_handle {
+	enum net_shaper_scope scope;
+	u32 id;
+};
+
+/**
+ * struct net_shaper - represents a shaping node on the NIC H/W
+ * zeroed field are considered not set.
+ * @parent: Unique identifier for the shaper parent, usually implied
+ * @handle: Unique identifier for this shaper
+ * @metric: Specify if the rate limits refers to PPS or BPS
+ * @bw_min: Minimum guaranteed rate for this shaper
+ * @bw_max: Maximum peak rate allowed for this shaper
+ * @burst: Maximum burst for the peek rate of this shaper
+ * @priority: Scheduling priority for this shaper
+ * @weight: Scheduling weight for this shaper
+ */
+struct net_shaper {
+	struct net_shaper_handle parent;
+	struct net_shaper_handle handle;
+	enum net_shaper_metric metric;
+	u64 bw_min;
+	u64 bw_max;
+	u64 burst;
+	u32 priority;
+	u32 weight;
+
+	/* private: */
+	u32 leaves; /* accounted only for NODE scope */
+	struct rcu_head rcu;
+};
+
+/**
+ * struct net_shaper_ops - Operations on device H/W shapers
+ *
+ * The operations applies to either net_device and devlink objects.
+ * The initial shaping configuration at device initialization is empty:
+ * does not constraint the rate in any way.
+ * The network core keeps track of the applied user-configuration in
+ * the net_device or devlink structure.
+ * The operations are serialized via a per device lock.
+ *
+ * Device not supporting any kind of nesting should not provide the
+ * group operation.
+ *
+ * Each shaper is uniquely identified within the device with a 'handle'
+ * comprising the shaper scope and a scope-specific id.
+ */
+struct net_shaper_ops {
+	/**
+	 * @group: create the specified shapers scheduling group
+	 *
+	 * Nest the @leaves shapers identified under the * @node shaper.
+	 * All the shapers belong to the device specified by @binding.
+	 * The @leaves arrays size is specified by @leaves_count.
+	 * Create either the @leaves and the @node shaper; or if they already
+	 * exists, links them together in the desired way.
+	 * @leaves scope must be NET_SHAPER_SCOPE_QUEUE.
+	 */
+	int (*group)(struct net_shaper_binding *binding, int leaves_count,
+		     const struct net_shaper *leaves,
+		     const struct net_shaper *node,
+		     struct netlink_ext_ack *extack);
+
+	/**
+	 * @set: Updates the specified shaper
+	 *
+	 * Updates or creates the @shaper on the device specified by @binding.
+	 */
+	int (*set)(struct net_shaper_binding *binding,
+		   const struct net_shaper *shaper,
+		   struct netlink_ext_ack *extack);
+
+	/**
+	 * @delete: Removes the specified shaper
+	 *
+	 * Removes the shaper configuration as identified by the given @handle
+	 * on the device specified by @binding, restoring the default behavior.
+	 */
+	int (*delete)(struct net_shaper_binding *binding,
+		      const struct net_shaper_handle *handle,
+		      struct netlink_ext_ack *extack);
+
+	/**
+	 * @capabilities: get the shaper features supported by the device
+	 *
+	 * Fills the bitmask @cap with the supported capabilities for the
+	 * specified @scope and device specified by @binding.
+	 */
+	void (*capabilities)(struct net_shaper_binding *binding,
+			     enum net_shaper_scope scope, unsigned long *cap);
+};
+
+#endif
-- 
cgit v1.2.3


From 14bba9285aedefb99647d716b0f61bf32081e387 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 9 Oct 2024 10:09:54 +0200
Subject: netlink: spec: add shaper introspection support

Allow the user-space to fine-grain query the shaping features
supported by the NIC on each domain.

Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Link: https://patch.msgid.link/3ddd10e450e3fe7d4b944c0d0b886d4483529ee6.1728460186.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/net_shaper.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/net_shaper.h b/include/uapi/linux/net_shaper.h
index 9e3fa63618ee..d8834b59f7d7 100644
--- a/include/uapi/linux/net_shaper.h
+++ b/include/uapi/linux/net_shaper.h
@@ -65,11 +65,28 @@ enum {
 	NET_SHAPER_A_HANDLE_MAX = (__NET_SHAPER_A_HANDLE_MAX - 1)
 };
 
+enum {
+	NET_SHAPER_A_CAPS_IFINDEX = 1,
+	NET_SHAPER_A_CAPS_SCOPE,
+	NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS,
+	NET_SHAPER_A_CAPS_SUPPORT_METRIC_PPS,
+	NET_SHAPER_A_CAPS_SUPPORT_NESTING,
+	NET_SHAPER_A_CAPS_SUPPORT_BW_MIN,
+	NET_SHAPER_A_CAPS_SUPPORT_BW_MAX,
+	NET_SHAPER_A_CAPS_SUPPORT_BURST,
+	NET_SHAPER_A_CAPS_SUPPORT_PRIORITY,
+	NET_SHAPER_A_CAPS_SUPPORT_WEIGHT,
+
+	__NET_SHAPER_A_CAPS_MAX,
+	NET_SHAPER_A_CAPS_MAX = (__NET_SHAPER_A_CAPS_MAX - 1)
+};
+
 enum {
 	NET_SHAPER_CMD_GET = 1,
 	NET_SHAPER_CMD_SET,
 	NET_SHAPER_CMD_DELETE,
 	NET_SHAPER_CMD_GROUP,
+	NET_SHAPER_CMD_CAP_GET,
 
 	__NET_SHAPER_CMD_MAX,
 	NET_SHAPER_CMD_MAX = (__NET_SHAPER_CMD_MAX - 1)
-- 
cgit v1.2.3


From 608a5c05c39b75fa2539ce9e521d289c5a5326f7 Mon Sep 17 00:00:00 2001
From: Wenjun Wu <wenjun1.wu@intel.com>
Date: Wed, 9 Oct 2024 10:09:58 +0200
Subject: virtchnl: support queue rate limit and quanta size configuration

This patch adds new virtchnl opcodes and structures for rate limit
and quanta size configuration, which include:
1. VIRTCHNL_OP_CONFIG_QUEUE_BW, to configure max bandwidth for each
VF per queue.
2. VIRTCHNL_OP_CONFIG_QUANTA, to configure quanta size per queue.
3. VIRTCHNL_OP_GET_QOS_CAPS, VF queries current QoS configuration, such
as enabled TCs, arbiter type, up2tc and bandwidth of VSI node. The
configuration is previously set by DCB and PF, and now is the potential
QoS capability of VF. VF can take it as reference to configure queue TC
mapping.

Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Link: https://patch.msgid.link/839002f7bd6f63b985a060a51b079f6e6dbbe237.1728460186.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/avf/virtchnl.h | 119 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

(limited to 'include')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index f41395264dca..223e433c39fe 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -89,6 +89,9 @@ enum virtchnl_rx_hsplit {
 	VIRTCHNL_RX_HSPLIT_SPLIT_SCTP    = 8,
 };
 
+enum virtchnl_bw_limit_type {
+	VIRTCHNL_BW_SHAPER = 0,
+};
 /* END GENERIC DEFINES */
 
 /* Opcodes for VF-PF communication. These are placed in the v_opcode field
@@ -151,6 +154,11 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55,
 	VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56,
 	VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57,
+	/* opcode 57 - 65 are reserved */
+	VIRTCHNL_OP_GET_QOS_CAPS = 66,
+	/* opcode 68 through 111 are reserved */
+	VIRTCHNL_OP_CONFIG_QUEUE_BW = 112,
+	VIRTCHNL_OP_CONFIG_QUANTA = 113,
 	VIRTCHNL_OP_MAX,
 };
 
@@ -261,6 +269,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC	BIT(26)
 #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF		BIT(27)
 #define VIRTCHNL_VF_OFFLOAD_FDIR_PF		BIT(28)
+#define VIRTCHNL_VF_OFFLOAD_QOS			BIT(29)
 
 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
 			       VIRTCHNL_VF_OFFLOAD_VLAN | \
@@ -1416,6 +1425,85 @@ struct virtchnl_fdir_del {
 
 VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
 
+struct virtchnl_shaper_bw {
+	/* Unit is Kbps */
+	u32 committed;
+	u32 peak;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_shaper_bw);
+
+/* VIRTCHNL_OP_GET_QOS_CAPS
+ * VF sends this message to get its QoS Caps, such as
+ * TC number, Arbiter and Bandwidth.
+ */
+struct virtchnl_qos_cap_elem {
+	u8 tc_num;
+	u8 tc_prio;
+#define VIRTCHNL_ABITER_STRICT      0
+#define VIRTCHNL_ABITER_ETS         2
+	u8 arbiter;
+#define VIRTCHNL_STRICT_WEIGHT      1
+	u8 weight;
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_qos_cap_elem);
+
+struct virtchnl_qos_cap_list {
+	u16 vsi_id;
+	u16 num_elem;
+	struct virtchnl_qos_cap_elem cap[];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_qos_cap_list);
+#define virtchnl_qos_cap_list_LEGACY_SIZEOF	44
+
+/* VIRTCHNL_OP_CONFIG_QUEUE_BW */
+struct virtchnl_queue_bw {
+	u16 queue_id;
+	u8 tc;
+	u8 pad;
+	struct virtchnl_shaper_bw shaper;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_bw);
+
+struct virtchnl_queues_bw_cfg {
+	u16 vsi_id;
+	u16 num_queues;
+	struct virtchnl_queue_bw cfg[];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_queues_bw_cfg);
+#define virtchnl_queues_bw_cfg_LEGACY_SIZEOF	16
+
+enum virtchnl_queue_type {
+	VIRTCHNL_QUEUE_TYPE_TX			= 0,
+	VIRTCHNL_QUEUE_TYPE_RX			= 1,
+};
+
+/* structure to specify a chunk of contiguous queues */
+struct virtchnl_queue_chunk {
+	/* see enum virtchnl_queue_type */
+	s32 type;
+	u16 start_queue_id;
+	u16 num_queues;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_queue_chunk);
+
+struct virtchnl_quanta_cfg {
+	u16 quanta_size;
+	struct virtchnl_queue_chunk queue_select;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_quanta_cfg);
+
 #define __vss_byone(p, member, count, old)				      \
 	(struct_size(p, member, count) + (old - 1 - struct_size(p, member, 0)))
 
@@ -1438,6 +1526,8 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
 		 __vss(virtchnl_vlan_filter_list_v2, __vss_byelem, p, m, c),  \
 		 __vss(virtchnl_tc_info, __vss_byelem, p, m, c),	      \
 		 __vss(virtchnl_rdma_qvlist_info, __vss_byelem, p, m, c),     \
+		 __vss(virtchnl_qos_cap_list, __vss_byelem, p, m, c),	      \
+		 __vss(virtchnl_queues_bw_cfg, __vss_byelem, p, m, c),	      \
 		 __vss(virtchnl_rss_key, __vss_byone, p, m, c),		      \
 		 __vss(virtchnl_rss_lut, __vss_byone, p, m, c))
 
@@ -1637,6 +1727,35 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2:
 		valid_len = sizeof(struct virtchnl_vlan_setting);
 		break;
+	case VIRTCHNL_OP_GET_QOS_CAPS:
+		break;
+	case VIRTCHNL_OP_CONFIG_QUEUE_BW:
+		valid_len = virtchnl_queues_bw_cfg_LEGACY_SIZEOF;
+		if (msglen >= valid_len) {
+			struct virtchnl_queues_bw_cfg *q_bw =
+				(struct virtchnl_queues_bw_cfg *)msg;
+
+			valid_len = virtchnl_struct_size(q_bw, cfg,
+							 q_bw->num_queues);
+			if (q_bw->num_queues == 0) {
+				err_msg_format = true;
+				break;
+			}
+		}
+		break;
+	case VIRTCHNL_OP_CONFIG_QUANTA:
+		valid_len = sizeof(struct virtchnl_quanta_cfg);
+		if (msglen >= valid_len) {
+			struct virtchnl_quanta_cfg *q_quanta =
+				(struct virtchnl_quanta_cfg *)msg;
+
+			if (q_quanta->quanta_size == 0 ||
+			    q_quanta->queue_select.num_queues == 0) {
+				err_msg_format = true;
+				break;
+			}
+		}
+		break;
 	/* These are always errors coming from the VF. */
 	case VIRTCHNL_OP_EVENT:
 	case VIRTCHNL_OP_UNKNOWN:
-- 
cgit v1.2.3


From 5bd48a3a14df4b3ee1be0757efcc0f40d4f57b35 Mon Sep 17 00:00:00 2001
From: Matteo Croce <teknoraver@meta.com>
Date: Thu, 10 Oct 2024 04:56:52 +0100
Subject: bpf: fix argument type in bpf_loop documentation

The `index` argument to bpf_loop() is threaded as an u64.
This lead in a subtle verifier denial where clang cloned the argument
in another register[1].

[1] https://github.com/systemd/systemd/pull/34650#issuecomment-2401092895

Signed-off-by: Matteo Croce <teknoraver@meta.com>
Link: https://lore.kernel.org/r/20241010035652.17830-1-technoboy85@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8ab4d8184b9d..874af0186fe8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5371,7 +5371,7 @@ union bpf_attr {
  *		Currently, the **flags** must be 0. Currently, nr_loops is
  *		limited to 1 << 23 (~8 million) loops.
  *
- *		long (\*callback_fn)(u32 index, void \*ctx);
+ *		long (\*callback_fn)(u64 index, void \*ctx);
  *
  *		where **index** is the current index in the loop. The index
  *		is zero-indexed.
-- 
cgit v1.2.3


From 0a6c61bc9c636e9a32d9f5a4d6d3b031d08763ab Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Thu, 10 Oct 2024 23:59:09 +0900
Subject: fgraph: Simplify return address printing in function graph tracer

Simplify return address printing in the function graph tracer by removing
fgraph_extras. Since this feature is only used by the function graph
tracer and the feature flags can directly accessible from the function
graph tracer, fgraph_extras can be removed from the fgraph callback.

Cc: Donglin Peng <dolinux.peng@gmail.com>
Link: https://lore.kernel.org/172857234900.270774.15378354017601069781.stgit@devnote2
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 2ac3b3b53cd0..4c7dd5e58c9f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1068,29 +1068,20 @@ struct ftrace_graph_ret {
 	unsigned long long rettime;
 } __packed;
 
-struct fgraph_extras;
 struct fgraph_ops;
 
 /* Type of the callback handlers for tracing function graph*/
 typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *,
 				       struct fgraph_ops *); /* return */
 typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *,
-				      struct fgraph_ops *,
-				      struct fgraph_extras *); /* entry */
+				      struct fgraph_ops *); /* entry */
 
 extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace,
-				   struct fgraph_ops *gops,
-				   struct fgraph_extras *extras);
+				   struct fgraph_ops *gops);
 bool ftrace_pids_enabled(struct ftrace_ops *ops);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
-/* Used to convey some extra datas when creating a graph entry */
-struct fgraph_extras {
-	u32 flags;
-	unsigned long retaddr;
-};
-
 struct fgraph_ops {
 	trace_func_graph_ent_t		entryfunc;
 	trace_func_graph_ret_t		retfunc;
@@ -1131,13 +1122,12 @@ function_graph_enter(unsigned long ret, unsigned long func,
 
 struct ftrace_ret_stack *
 ftrace_graph_get_ret_stack(struct task_struct *task, int skip);
+unsigned long ftrace_graph_top_ret_addr(struct task_struct *task);
 
 unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
 				    unsigned long ret, unsigned long *retp);
 unsigned long *fgraph_get_task_var(struct fgraph_ops *gops);
 
-u32 graph_tracer_flags_get(u32 flags);
-
 /*
  * Sometimes we don't want to trace a function with the function
  * graph tracer but we want them to keep traced by the usual function
-- 
cgit v1.2.3


From bafffd56c608106d11e7aec851f114dcd66b2091 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Thu, 10 Oct 2024 14:54:46 +0100
Subject: clocksource: Remove unused clocksource_change_rating

clocksource_change_rating() has been unused since 2017's commit
63ed4e0c67df ("Drivers: hv: vmbus: Consolidate all Hyper-V specific clocksource code")

Remove it.

__clocksource_change_rating now only has one use which is ifdef'd.
Move it into the ifdef'd section.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20241010135446.213098-1-linux@treblig.org
---
 include/linux/clocksource.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index d35b677b08fe..ef1b16da6ad5 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -215,7 +215,6 @@ static inline s64 clocksource_cyc2ns(u64 cycles, u32 mult, u32 shift)
 
 extern int clocksource_unregister(struct clocksource*);
 extern void clocksource_touch_watchdog(void);
-extern void clocksource_change_rating(struct clocksource *cs, int rating);
 extern void clocksource_suspend(void);
 extern void clocksource_resume(void);
 extern struct clocksource * __init clocksource_default_clock(void);
-- 
cgit v1.2.3


From c6ca31981b545ad3081007b6aa88b6aab1b0cece Mon Sep 17 00:00:00 2001
From: Martin Kelly <martin.kelly@crowdstrike.com>
Date: Thu, 10 Oct 2024 12:33:01 -0700
Subject: bpf: Update bpf_override_return() comment

The documentation says CONFIG_FUNCTION_ERROR_INJECTION is supported only
on x86. This was presumably true at the time of writing, but it's now
supported on many other architectures too. Drop this statement, since
it's not correct anymore and it fits better in other documentation
anyway.

Signed-off-by: Martin Kelly <martin.kelly@crowdstrike.com>
Link: https://lore.kernel.org/r/20241010193301.995909-1-martin.kelly@crowdstrike.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 874af0186fe8..627c4195f04f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3103,10 +3103,6 @@ union bpf_attr {
  * 		with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
  * 		option, and in this case it only works on functions tagged with
  * 		**ALLOW_ERROR_INJECTION** in the kernel code.
- *
- * 		Also, the helper is only available for the architectures having
- * 		the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
- * 		x86 architecture is the only one to support this feature.
  * 	Return
  * 		0
  *
-- 
cgit v1.2.3


From 445936f9e258eca624c8239056bd8cd6e853b3fd Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Mon, 23 Sep 2024 11:59:57 +0200
Subject: thermal: core: Add user thresholds support

The user thresholds mechanism is a way to have the userspace to tell
the thermal framework to send a notification when a temperature limit
is crossed. There is no id, no hysteresis, just the temperature and
the direction of the limit crossing. That means we can be notified
when a threshold is crossed the way up only, or the way down only or
both ways. That allows to create hysteresis values if it is needed.

A threshold can be added, deleted or flushed. The latter means all
thresholds belonging to a thermal zone will be deleted.

When a threshold is added:

 - if the same threshold (temperature and direction) exists, an error
   is returned

 - if a threshold is specified with the same temperature but a
   different direction, the specified direction is added

 - if there is no threshold with the same temperature then it is
   created

When a threshold is deleted:

 - if the same threshold (temperature and direction) exists, it is
   deleted

 - if a threshold is specified with the same temperature but a
   different direction, the specified direction is removed

 - if there is no threshold with the same temperature, then an error
   is returned

When the threshold are flushed:

 - All thresholds related to a thermal zone are deleted

When a threshold is crossed:

 - the userspace does not need to know which threshold(s) have been
   crossed, it will be notified with the current temperature and the
   previous temperature

 - if multiple thresholds have been crossed between two updates only
   one notification will be send to the userspace, it is pointless to
   send a notification per thresholds crossed as the userspace can
   handle that easily when it has the temperature delta information

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://patch.msgid.link/20240923100005.2532430-2-daniel.lezcano@linaro.org
[ rjw: Subject edit, use BIT(0) and BIT(1) in symbol definitions ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/thermal.h      | 3 +++
 include/uapi/linux/thermal.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 25ea8fe2313e..bcaa92732e14 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -56,6 +56,9 @@ enum thermal_notify_event {
 	THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */
 	THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */
 	THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */
+	THERMAL_TZ_ADD_THRESHOLD, /* Threshold added */
+	THERMAL_TZ_DEL_THRESHOLD, /* Threshold deleted */
+	THERMAL_TZ_FLUSH_THRESHOLDS, /* All thresholds deleted */
 };
 
 /**
diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
index fc78bf3aead7..2e6f60a36173 100644
--- a/include/uapi/linux/thermal.h
+++ b/include/uapi/linux/thermal.h
@@ -3,6 +3,8 @@
 #define _UAPI_LINUX_THERMAL_H
 
 #define THERMAL_NAME_LENGTH	20
+#define THERMAL_THRESHOLD_WAY_UP	BIT(0)
+#define THERMAL_THRESHOLD_WAY_DOWN	BIT(1)
 
 enum thermal_device_mode {
 	THERMAL_DEVICE_DISABLED = 0,
-- 
cgit v1.2.3


From 9fb6fef0fb49124291837af1da5028f79d53f98e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Fri, 14 Jun 2024 13:06:03 +0300
Subject: resource: Add resource set range and size helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Setting the end address for a resource with a given size lacks a helper and
is therefore coded manually unlike the getter side which has a helper for
resource size calculation. Also, almost all callsites that calculate the
end address for a resource also set the start address right before it like
this:

  res->start = start_addr;
  res->end = res->start + size - 1;

Add resource_set_range(res, start_addr, size) that sets the start address
and calculates the end address to simplify this often repeated fragment.

Also add resource_set_size() for the cases where setting the start address
of the resource is not necessary but mention in its kerneldoc that
resource_set_range() is preferred when setting both addresses.

Link: https://lore.kernel.org/r/20240614100606.15830-2-ilpo.jarvinen@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/ioport.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 6e9fb667a1c5..5385349f0b8a 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -249,6 +249,38 @@ struct resource *lookup_resource(struct resource *root, resource_size_t start);
 int adjust_resource(struct resource *res, resource_size_t start,
 		    resource_size_t size);
 resource_size_t resource_alignment(struct resource *res);
+
+/**
+ * resource_set_size - Calculate resource end address from size and start
+ * @res: Resource descriptor
+ * @size: Size of the resource
+ *
+ * Calculate the end address for @res based on @size.
+ *
+ * Note: The start address of @res must be set when calling this function.
+ * Prefer resource_set_range() if setting both the start address and @size.
+ */
+static inline void resource_set_size(struct resource *res, resource_size_t size)
+{
+	res->end = res->start + size - 1;
+}
+
+/**
+ * resource_set_range - Set resource start and end addresses
+ * @res: Resource descriptor
+ * @start: Start address for the resource
+ * @size: Size of the resource
+ *
+ * Set @res start address and calculate the end address based on @size.
+ */
+static inline void resource_set_range(struct resource *res,
+				      resource_size_t start,
+				      resource_size_t size)
+{
+	res->start = start;
+	resource_set_size(res, size);
+}
+
 static inline resource_size_t resource_size(const struct resource *res)
 {
 	return res->end - res->start + 1;
-- 
cgit v1.2.3


From 7888af4166d4ab07ba51234be6ba332b7807e901 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 8 Oct 2024 19:05:28 -0400
Subject: ftrace: Make ftrace_regs abstract from direct use

ftrace_regs was created to hold registers that store information to save
function parameters, return value and stack. Since it is a subset of
pt_regs, it should only be used by its accessor functions. But because
pt_regs can easily be taken from ftrace_regs (on most archs), it is
tempting to use it directly. But when running on other architectures, it
may fail to build or worse, build but crash the kernel!

Instead, make struct ftrace_regs an empty structure and have the
architectures define __arch_ftrace_regs and all the accessor functions
will typecast to it to get to the actual fields. This will help avoid
usage of ftrace_regs directly.

Link: https://lore.kernel.org/all/20241007171027.629bdafd@gandalf.local.home/

Cc: "linux-arch@vger.kernel.org" <linux-arch@vger.kernel.org>
Cc: "x86@kernel.org" <x86@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Naveen N Rao <naveen@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Paul  Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas  Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav  Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lore.kernel.org/20241008230628.958778821@goodmis.org
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Heiko Carstens <hca@linux.ibm.com> # s390
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4c7dd5e58c9f..66f10291a0b2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -115,8 +115,6 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val
 
 extern int ftrace_enabled;
 
-#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
-
 /**
  * ftrace_regs - ftrace partial/optimal register set
  *
@@ -142,11 +140,28 @@ extern int ftrace_enabled;
  *
  * NOTE: user *must not* access regs directly, only do it via APIs, because
  * the member can be changed according to the architecture.
+ * This is why the structure is empty here, so that nothing accesses
+ * the ftrace_regs directly.
  */
 struct ftrace_regs {
+	/* Nothing to see here, use the accessor functions! */
+};
+
+#define ftrace_regs_size()	sizeof(struct __arch_ftrace_regs)
+
+#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+
+struct __arch_ftrace_regs {
 	struct pt_regs		regs;
 };
-#define arch_ftrace_get_regs(fregs) (&(fregs)->regs)
+
+struct ftrace_regs;
+#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs))
+
+static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
+{
+	return &arch_ftrace_regs(fregs)->regs;
+}
 
 /*
  * ftrace_regs_set_instruction_pointer() is to be defined by the architecture
-- 
cgit v1.2.3


From 483c5c2bc6b1c0d17214e2672032def605ff2ba9 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@cqsoftware.com.cn>
Date: Fri, 20 Sep 2024 13:34:23 +0800
Subject: serial: clean up uart_info

Since commit ebd2c8f6d2ec ("serial: kill off uart_info") has
removed uart_info, the uart_info declaration looks lonely,
let it go.

Signed-off-by: Yanteng Si <siyanteng@cqsoftware.com.cn>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20240920053423.1373354-1-siyanteng@cqsoftware.com.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/platform_data/sa11x0-serial.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/platform_data/sa11x0-serial.h b/include/linux/platform_data/sa11x0-serial.h
index 8b79ab08af45..a88096bc74e4 100644
--- a/include/linux/platform_data/sa11x0-serial.h
+++ b/include/linux/platform_data/sa11x0-serial.h
@@ -10,7 +10,6 @@
 #define SA11X0_SERIAL_H
 
 struct uart_port;
-struct uart_info;
 
 /*
  * This is a temporary structure for registering these
-- 
cgit v1.2.3


From 7738a7ab9d12c5371ed97114ee2132d4512e9fd5 Mon Sep 17 00:00:00 2001
From: Parker Newman <pnewman@connecttech.com>
Date: Wed, 2 Oct 2024 11:12:33 -0400
Subject: misc: eeprom: eeprom_93cx6: Add quirk for extra read clock cycle

Add a quirk similar to eeprom_93xx46 to add an extra clock cycle before
reading data from the EEPROM.

The 93Cx6 family of EEPROMs output a "dummy 0 bit" between the writing
of the op-code/address from the host to the EEPROM and the reading of
the actual data from the EEPROM.

More info can be found on page 6 of the AT93C46 datasheet (linked below).
Similar notes are found in other 93xx6 datasheets.

In summary the read operation for a 93Cx6 EEPROM is:
Write to EEPROM:	110[A5-A0]	(9 bits)
Read from EEPROM:	0[D15-D0]	(17 bits)

Where:
	110 is the start bit and READ OpCode
	[A5-A0] is the address to read from
	0 is a "dummy bit" preceding the actual data
	[D15-D0] is the actual data.

Looking at the READ timing diagrams in the 93Cx6 datasheets the dummy
bit should be clocked out on the last address bit clock cycle meaning it
should be discarded naturally.

However, depending on the hardware configuration sometimes this dummy
bit is not discarded. This is the case with Exar PCI UARTs which require
an extra clock cycle between sending the address and reading the data.

Datasheet: https://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-5193-SEEPROM-AT93C46D-Datasheet.pdf
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Parker Newman <pnewman@connecttech.com>
Link: https://lore.kernel.org/r/0f23973efefccd2544705a0480b4ad4c2353e407.1727880931.git.pnewman@connecttech.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/eeprom_93cx6.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/eeprom_93cx6.h b/include/linux/eeprom_93cx6.h
index c860c72a921d..3a485cc0e0fa 100644
--- a/include/linux/eeprom_93cx6.h
+++ b/include/linux/eeprom_93cx6.h
@@ -11,6 +11,8 @@
 	Supported chipsets: 93c46, 93c56 and 93c66.
  */
 
+#include <linux/bits.h>
+
 /*
  * EEPROM operation defines.
  */
@@ -34,6 +36,7 @@
  * @register_write(struct eeprom_93cx6 *eeprom): handler to
  * write to the eeprom register by using all reg_* fields.
  * @width: eeprom width, should be one of the PCI_EEPROM_WIDTH_* defines
+ * @quirks: eeprom or controller quirks
  * @drive_data: Set if we're driving the data line.
  * @reg_data_in: register field to indicate data input
  * @reg_data_out: register field to indicate data output
@@ -50,6 +53,9 @@ struct eeprom_93cx6 {
 	void (*register_write)(struct eeprom_93cx6 *eeprom);
 
 	int width;
+	unsigned int quirks;
+/* Some EEPROMs require an extra clock cycle before reading */
+#define PCI_EEPROM_QUIRK_EXTRA_READ_CYCLE	BIT(0)
 
 	char drive_data;
 	char reg_data_in;
@@ -71,3 +77,8 @@ extern void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable);
 
 extern void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom,
 			       u8 addr, u16 data);
+
+static inline bool has_quirk_extra_read_cycle(struct eeprom_93cx6 *eeprom)
+{
+	return eeprom->quirks & PCI_EEPROM_QUIRK_EXTRA_READ_CYCLE;
+}
-- 
cgit v1.2.3


From 11dad94b50263bbe87d015041c77be61c8c44161 Mon Sep 17 00:00:00 2001
From: Andrew Kreimer <algonell@gmail.com>
Date: Thu, 10 Oct 2024 12:13:55 +0300
Subject: phy: sun4i-usb: Fix a typo

Fix a typo in comments: wether -> whether.

Signed-off-by: Andrew Kreimer <algonell@gmail.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Link: https://lore.kernel.org/r/20241010091355.8271-1-algonell@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/phy/phy-sun4i-usb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/phy/phy-sun4i-usb.h b/include/linux/phy/phy-sun4i-usb.h
index 91eb755ee73b..f3e7b13608e4 100644
--- a/include/linux/phy/phy-sun4i-usb.h
+++ b/include/linux/phy/phy-sun4i-usb.h
@@ -11,7 +11,7 @@
 /**
  * sun4i_usb_phy_set_squelch_detect() - Enable/disable squelch detect
  * @phy: reference to a sun4i usb phy
- * @enabled: wether to enable or disable squelch detect
+ * @enabled: whether to enable or disable squelch detect
  */
 void sun4i_usb_phy_set_squelch_detect(struct phy *phy, bool enabled);
 
-- 
cgit v1.2.3


From e38501cee5364aeb3bd265b484a8e47baa6634aa Mon Sep 17 00:00:00 2001
From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Date: Mon, 30 Sep 2024 21:53:03 +0200
Subject: accel/ivpu: Stop using hardcoded DRIVER_DATE

Hardcoded driver date is useless, so use kernel version as a driver date
to make identifying .ko file easier. Also allow to pass DRIVER_DATE
on build time to allow versioning the driver in case it is built out
of the tree.

Reviewed-by: Karol Wachowski <karol.wachowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240930195322.461209-13-jacek.lawrynowicz@linux.intel.com
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
---
 include/uapi/drm/ivpu_accel.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h
index 084fb529e1e9..234664d35250 100644
--- a/include/uapi/drm/ivpu_accel.h
+++ b/include/uapi/drm/ivpu_accel.h
@@ -12,9 +12,6 @@
 extern "C" {
 #endif
 
-#define DRM_IVPU_DRIVER_MAJOR 1
-#define DRM_IVPU_DRIVER_MINOR 0
-
 #define DRM_IVPU_GET_PARAM		  0x00
 #define DRM_IVPU_SET_PARAM		  0x01
 #define DRM_IVPU_BO_CREATE		  0x02
-- 
cgit v1.2.3


From 65b5353193e5a8476814a184e8e1a2627d59f2b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Thu, 10 Oct 2024 14:45:45 +0200
Subject: drm/ttm: Fix incorrect use of kernel-doc format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a missing colon.

Cc: dri-devel@lists.freedesktop.org
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/linux-next/20241010160942.192caf60@canb.auug.org.au/
Fixes: fc5d96670eb2 ("drm/ttm: Move swapped objects off the manager's LRU list")
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241010124545.82023-1-thomas.hellstrom@linux.intel.com
---
 include/drm/ttm/ttm_device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 438358f72716..39b8636b1845 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -252,7 +252,7 @@ struct ttm_device {
 	spinlock_t lru_lock;
 
 	/**
-	 * @unevictable Buffer objects which are pinned or swapped and as such
+	 * @unevictable: Buffer objects which are pinned or swapped and as such
 	 * not on an LRU list.
 	 */
 	struct list_head unevictable;
-- 
cgit v1.2.3


From d677aebd663ddc287f2b2bda098474694a0ca875 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Oct 2024 03:41:00 +0000
Subject: tcp: move sysctl_tcp_l3mdev_accept to netns_ipv4_read_rx

sysctl_tcp_l3mdev_accept is read from TCP receive fast path from
tcp_v6_early_demux(),
 __inet6_lookup_established,
  inet_request_bound_dev_if().

Move it to netns_ipv4_read_rx.

Remove the '#ifdef CONFIG_NET_L3_MASTER_DEV' that was guarding
its definition.

Note this adds a hole of three bytes that could be filled later.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Cc: Wei Wang <weiwan@google.com>
Cc: Coco Li <lixiaoyan@google.com>
Link: https://patch.msgid.link/20241010034100.320832-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netns/ipv4.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 66a4cffc44ee..3b1de80b5c25 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -76,6 +76,8 @@ struct netns_ipv4 {
 	__cacheline_group_begin(netns_ipv4_read_rx);
 	u8 sysctl_ip_early_demux;
 	u8 sysctl_tcp_early_demux;
+	u8 sysctl_tcp_l3mdev_accept;
+	/* 3 bytes hole, try to pack */
 	int sysctl_tcp_reordering;
 	int sysctl_tcp_rmem[3];
 	__cacheline_group_end(netns_ipv4_read_rx);
@@ -151,9 +153,6 @@ struct netns_ipv4 {
 
 	u8 sysctl_fwmark_reflect;
 	u8 sysctl_tcp_fwmark_accept;
-#ifdef CONFIG_NET_L3_MASTER_DEV
-	u8 sysctl_tcp_l3mdev_accept;
-#endif
 	u8 sysctl_tcp_mtu_probing;
 	int sysctl_tcp_mtu_probe_floor;
 	int sysctl_tcp_base_mss;
-- 
cgit v1.2.3


From ed870e35db660724ff0d815d9a3ef9a6247ffbab Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 9 Oct 2024 10:32:09 -0700
Subject: lsm: add the lsm_prop data structure

When more than one security module is exporting data to audit and
networking sub-systems a single 32 bit integer is no longer
sufficient to represent the data. Add a structure to be used instead.

The lsm_prop structure definition is intended to keep the LSM
specific information private to the individual security modules.
The module specific information is included in a new set of
header files under include/lsm. Each security module is allowed
to define the information included for its use in the lsm_prop.
SELinux includes a u32 secid. Smack includes a pointer into its
global label list. The conditional compilation based on feature
inclusion is contained in the include/lsm files.

Cc: apparmor@lists.ubuntu.com
Cc: bpf@vger.kernel.org
Cc: selinux@vger.kernel.org
Cc: linux-security-module@vger.kernel.org
Suggested-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: John Johansen <john.johansen@canonical.com>
[PM: added include/linux/lsm/ to MAINTAINERS, subj tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm/apparmor.h | 17 +++++++++++++++++
 include/linux/lsm/bpf.h      | 16 ++++++++++++++++
 include/linux/lsm/selinux.h  | 16 ++++++++++++++++
 include/linux/lsm/smack.h    | 17 +++++++++++++++++
 include/linux/security.h     | 20 ++++++++++++++++++++
 5 files changed, 86 insertions(+)
 create mode 100644 include/linux/lsm/apparmor.h
 create mode 100644 include/linux/lsm/bpf.h
 create mode 100644 include/linux/lsm/selinux.h
 create mode 100644 include/linux/lsm/smack.h

(limited to 'include')

diff --git a/include/linux/lsm/apparmor.h b/include/linux/lsm/apparmor.h
new file mode 100644
index 000000000000..612cbfacb072
--- /dev/null
+++ b/include/linux/lsm/apparmor.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module interface to other subsystems.
+ * AppArmor presents single pointer to an aa_label structure.
+ */
+#ifndef __LINUX_LSM_APPARMOR_H
+#define __LINUX_LSM_APPARMOR_H
+
+struct aa_label;
+
+struct lsm_prop_apparmor {
+#ifdef CONFIG_SECURITY_APPARMOR
+	struct aa_label *label;
+#endif
+};
+
+#endif /* ! __LINUX_LSM_APPARMOR_H */
diff --git a/include/linux/lsm/bpf.h b/include/linux/lsm/bpf.h
new file mode 100644
index 000000000000..8106e206fcef
--- /dev/null
+++ b/include/linux/lsm/bpf.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module interface to other subsystems.
+ * BPF may present a single u32 value.
+ */
+#ifndef __LINUX_LSM_BPF_H
+#define __LINUX_LSM_BPF_H
+#include <linux/types.h>
+
+struct lsm_prop_bpf {
+#ifdef CONFIG_BPF_LSM
+	u32 secid;
+#endif
+};
+
+#endif /* ! __LINUX_LSM_BPF_H */
diff --git a/include/linux/lsm/selinux.h b/include/linux/lsm/selinux.h
new file mode 100644
index 000000000000..9455a6b5b910
--- /dev/null
+++ b/include/linux/lsm/selinux.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module interface to other subsystems.
+ * SELinux presents a single u32 value which is known as a secid.
+ */
+#ifndef __LINUX_LSM_SELINUX_H
+#define __LINUX_LSM_SELINUX_H
+#include <linux/types.h>
+
+struct lsm_prop_selinux {
+#ifdef CONFIG_SECURITY_SELINUX
+	u32 secid;
+#endif
+};
+
+#endif /* ! __LINUX_LSM_SELINUX_H */
diff --git a/include/linux/lsm/smack.h b/include/linux/lsm/smack.h
new file mode 100644
index 000000000000..ff730dd7a734
--- /dev/null
+++ b/include/linux/lsm/smack.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module interface to other subsystems.
+ * Smack presents a pointer into the global Smack label list.
+ */
+#ifndef __LINUX_LSM_SMACK_H
+#define __LINUX_LSM_SMACK_H
+
+struct smack_known;
+
+struct lsm_prop_smack {
+#ifdef CONFIG_SECURITY_SMACK
+	struct smack_known *skp;
+#endif
+};
+
+#endif /* ! __LINUX_LSM_SMACK_H */
diff --git a/include/linux/security.h b/include/linux/security.h
index b86ec2afc691..555249a8d121 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -34,6 +34,10 @@
 #include <linux/sockptr.h>
 #include <linux/bpf.h>
 #include <uapi/linux/lsm.h>
+#include <linux/lsm/selinux.h>
+#include <linux/lsm/smack.h>
+#include <linux/lsm/apparmor.h>
+#include <linux/lsm/bpf.h>
 
 struct linux_binprm;
 struct cred;
@@ -152,6 +156,22 @@ enum lockdown_reason {
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
+/* scaffolding */
+struct lsm_prop_scaffold {
+	u32 secid;
+};
+
+/*
+ * Data exported by the security modules
+ */
+struct lsm_prop {
+	struct lsm_prop_selinux selinux;
+	struct lsm_prop_smack smack;
+	struct lsm_prop_apparmor apparmor;
+	struct lsm_prop_bpf bpf;
+	struct lsm_prop_scaffold scaffold;
+};
+
 extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1];
 extern u32 lsm_active_cnt;
 extern const struct lsm_id *lsm_idlist[];
-- 
cgit v1.2.3


From 870b7fdc660b38c4e1bd8bf48e62aa352ddf8f42 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 9 Oct 2024 10:32:10 -0700
Subject: lsm: use lsm_prop in security_audit_rule_match

Change the secid parameter of security_audit_rule_match
to a lsm_prop structure pointer. Pass the entry from the
lsm_prop structure for the approprite slot to the LSM hook.

Change the users of security_audit_rule_match to use the
lsm_prop instead of a u32. The scaffolding function lsmprop_init()
fills the structure with the value of the old secid, ensuring that
it is available to the appropriate module hook. The sources of
the secid, security_task_getsecid() and security_inode_getsecid(),
will be converted to use the lsm_prop structure later in the series.
At that point the use of lsmprop_init() is dropped.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: subject line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 3 ++-
 include/linux/security.h      | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 9eca013aa5e1..ea7f17e37756 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -416,7 +416,8 @@ LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring,
 LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr,
 	 void **lsmrule, gfp_t gfp)
 LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule)
-LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule)
+LSM_HOOK(int, 0, audit_rule_match, struct lsm_prop *prop, u32 field, u32 op,
+	 void *lsmrule)
 LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule)
 #endif /* CONFIG_AUDIT */
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 555249a8d121..a4f020491e7c 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2115,7 +2115,8 @@ static inline void security_key_post_create_or_update(struct key *keyring,
 int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule,
 			     gfp_t gfp);
 int security_audit_rule_known(struct audit_krule *krule);
-int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule);
+int security_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op,
+			      void *lsmrule);
 void security_audit_rule_free(void *lsmrule);
 
 #else
@@ -2131,8 +2132,8 @@ static inline int security_audit_rule_known(struct audit_krule *krule)
 	return 0;
 }
 
-static inline int security_audit_rule_match(u32 secid, u32 field, u32 op,
-					    void *lsmrule)
+static inline int security_audit_rule_match(struct lsm_prop *prop, u32 field,
+					    u32 op, void *lsmrule)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 6f2f724f0e116d9ea960ff3dd645add12e60e176 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 9 Oct 2024 10:32:11 -0700
Subject: lsm: add lsmprop_to_secctx hook

Add a new hook security_lsmprop_to_secctx() and its LSM specific
implementations. The LSM specific code will use the lsm_prop element
allocated for that module. This allows for the possibility that more
than one module may be called upon to translate a secid to a string,
as can occur in the audit code.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: subject line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h |  2 ++
 include/linux/security.h      | 10 +++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index ea7f17e37756..ed6ea0b1ec57 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -294,6 +294,8 @@ LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size)
 LSM_HOOK(int, 0, ismaclabel, const char *name)
 LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata,
 	 u32 *seclen)
+LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop,
+	 char **secdata, u32 *seclen)
 LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid)
 LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen)
 LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode)
diff --git a/include/linux/security.h b/include/linux/security.h
index a4f020491e7c..f1c68e38b15d 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -535,6 +535,7 @@ int security_setprocattr(int lsmid, const char *name, void *value, size_t size);
 int security_netlink_send(struct sock *sk, struct sk_buff *skb);
 int security_ismaclabel(const char *name);
 int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
+int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen);
 int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
 void security_release_secctx(char *secdata, u32 seclen);
 void security_inode_invalidate_secctx(struct inode *inode);
@@ -1488,7 +1489,14 @@ static inline int security_ismaclabel(const char *name)
 	return 0;
 }
 
-static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
+static inline int security_secid_to_secctx(u32 secid, char **secdata,
+					   u32 *seclen)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int security_lsmprop_to_secctx(struct lsm_prop *prop,
+					     char **secdata, u32 *seclen)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3


From 7183abccd8ac2c486363e267b5d84032818eb725 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 9 Oct 2024 10:32:12 -0700
Subject: audit: maintain an lsm_prop in audit_context

Replace the secid value stored in struct audit_context with a struct
lsm_prop. Change the code that uses this value to accommodate the
change. security_audit_rule_match() expects a lsm_prop, so existing
scaffolding can be removed. A call to security_secid_to_secctx()
is changed to security_lsmprop_to_secctx().  The call to
security_ipc_getsecid() is scaffolded.

A new function lsmprop_is_set() is introduced to identify whether
an lsm_prop contains a non-zero value.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: subject line tweak, fix lsmprop_is_set() typo]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/security.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index f1c68e38b15d..c029bfe2c5bb 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -291,6 +291,19 @@ static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id)
 
 #ifdef CONFIG_SECURITY
 
+/**
+ * lsmprop_is_set - report if there is a value in the lsm_prop
+ * @prop: Pointer to the exported LSM data
+ *
+ * Returns true if there is a value set, false otherwise
+ */
+static inline bool lsmprop_is_set(struct lsm_prop *prop)
+{
+	const struct lsm_prop empty = {};
+
+	return !!memcmp(prop, &empty, sizeof(*prop));
+}
+
 int call_blocking_lsm_notifier(enum lsm_event event, void *data);
 int register_blocking_lsm_notifier(struct notifier_block *nb);
 int unregister_blocking_lsm_notifier(struct notifier_block *nb);
@@ -552,6 +565,17 @@ int security_bdev_setintegrity(struct block_device *bdev,
 			       size_t size);
 #else /* CONFIG_SECURITY */
 
+/**
+ * lsmprop_is_set - report if there is a value in the lsm_prop
+ * @prop: Pointer to the exported LSM data
+ *
+ * Returns true if there is a value set, false otherwise
+ */
+static inline bool lsmprop_is_set(struct lsm_prop *prop)
+{
+	return false;
+}
+
 static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
 {
 	return 0;
-- 
cgit v1.2.3


From f4602f163c98bc93c118e196466c1c98186adb67 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 9 Oct 2024 10:32:13 -0700
Subject: lsm: use lsm_prop in security_ipc_getsecid

There may be more than one LSM that provides IPC data for auditing.
Change security_ipc_getsecid() to fill in a lsm_prop structure instead
of the u32 secid.  Change the name to security_ipc_getlsmprop() to
reflect the change.

Cc: audit@vger.kernel.org
Cc: linux-security-module@vger.kernel.org
Cc: selinux@vger.kernel.org
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: subject line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h |  4 ++--
 include/linux/security.h      | 18 +++++++++++++++---
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index ed6ea0b1ec57..6ef2a345ea03 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -256,8 +256,8 @@ LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p,
 	 struct inode *inode)
 LSM_HOOK(int, 0, userns_create, const struct cred *cred)
 LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag)
-LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp,
-	 u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, ipc_getlsmprop, struct kern_ipc_perm *ipcp,
+	 struct lsm_prop *prop)
 LSM_HOOK(int, 0, msg_msg_alloc_security, struct msg_msg *msg)
 LSM_HOOK(void, LSM_RET_VOID, msg_msg_free_security, struct msg_msg *msg)
 LSM_HOOK(int, 0, msg_queue_alloc_security, struct kern_ipc_perm *perm)
diff --git a/include/linux/security.h b/include/linux/security.h
index c029bfe2c5bb..15aef5f68e77 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -289,6 +289,17 @@ static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id)
 	return kernel_load_data_str[id];
 }
 
+/**
+ * lsmprop_init - initialize a lsm_prop structure
+ * @prop: Pointer to the data to initialize
+ *
+ * Set all secid for all modules to the specified value.
+ */
+static inline void lsmprop_init(struct lsm_prop *prop)
+{
+	memset(prop, 0, sizeof(*prop));
+}
+
 #ifdef CONFIG_SECURITY
 
 /**
@@ -515,7 +526,7 @@ int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 void security_task_to_inode(struct task_struct *p, struct inode *inode);
 int security_create_user_ns(const struct cred *cred);
 int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag);
-void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid);
+void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp, struct lsm_prop *prop);
 int security_msg_msg_alloc(struct msg_msg *msg);
 void security_msg_msg_free(struct msg_msg *msg);
 int security_msg_queue_alloc(struct kern_ipc_perm *msq);
@@ -1377,9 +1388,10 @@ static inline int security_ipc_permission(struct kern_ipc_perm *ipcp,
 	return 0;
 }
 
-static inline void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid)
+static inline void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp,
+					   struct lsm_prop *prop)
 {
-	*secid = 0;
+	lsmprop_init(prop);
 }
 
 static inline int security_msg_msg_alloc(struct msg_msg *msg)
-- 
cgit v1.2.3


From 37f670aacd481128ad9a940ac2d3372aecd92824 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 9 Oct 2024 10:32:15 -0700
Subject: lsm: use lsm_prop in security_current_getsecid

Change the security_current_getsecid_subj() and
security_task_getsecid_obj() interfaces to fill in a lsm_prop structure
instead of a u32 secid.  Audit interfaces will need to collect all
possible security data for possible reporting.

Cc: linux-integrity@vger.kernel.org
Cc: audit@vger.kernel.org
Cc: selinux@vger.kernel.org
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: subject line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h |  6 +++---
 include/linux/security.h      | 13 +++++++------
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 6ef2a345ea03..8a90fd9ff3c8 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -235,9 +235,9 @@ LSM_HOOK(int, 0, task_fix_setgroups, struct cred *new, const struct cred * old)
 LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid)
 LSM_HOOK(int, 0, task_getpgid, struct task_struct *p)
 LSM_HOOK(int, 0, task_getsid, struct task_struct *p)
-LSM_HOOK(void, LSM_RET_VOID, current_getsecid_subj, u32 *secid)
-LSM_HOOK(void, LSM_RET_VOID, task_getsecid_obj,
-	 struct task_struct *p, u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, current_getlsmprop_subj, struct lsm_prop *prop)
+LSM_HOOK(void, LSM_RET_VOID, task_getlsmprop_obj,
+	 struct task_struct *p, struct lsm_prop *prop)
 LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice)
 LSM_HOOK(int, 0, task_setioprio, struct task_struct *p, int ioprio)
 LSM_HOOK(int, 0, task_getioprio, struct task_struct *p)
diff --git a/include/linux/security.h b/include/linux/security.h
index 15aef5f68e77..9bc8153f4e8b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -507,8 +507,8 @@ int security_task_fix_setgroups(struct cred *new, const struct cred *old);
 int security_task_setpgid(struct task_struct *p, pid_t pgid);
 int security_task_getpgid(struct task_struct *p);
 int security_task_getsid(struct task_struct *p);
-void security_current_getsecid_subj(u32 *secid);
-void security_task_getsecid_obj(struct task_struct *p, u32 *secid);
+void security_current_getlsmprop_subj(struct lsm_prop *prop);
+void security_task_getlsmprop_obj(struct task_struct *p, struct lsm_prop *prop);
 int security_task_setnice(struct task_struct *p, int nice);
 int security_task_setioprio(struct task_struct *p, int ioprio);
 int security_task_getioprio(struct task_struct *p);
@@ -1305,14 +1305,15 @@ static inline int security_task_getsid(struct task_struct *p)
 	return 0;
 }
 
-static inline void security_current_getsecid_subj(u32 *secid)
+static inline void security_current_getlsmprop_subj(struct lsm_prop *prop)
 {
-	*secid = 0;
+	lsmprop_init(prop);
 }
 
-static inline void security_task_getsecid_obj(struct task_struct *p, u32 *secid)
+static inline void security_task_getlsmprop_obj(struct task_struct *p,
+						struct lsm_prop *prop)
 {
-	*secid = 0;
+	lsmprop_init(prop);
 }
 
 static inline int security_task_setnice(struct task_struct *p, int nice)
-- 
cgit v1.2.3


From 07f9d2c1132c9b838538b606dfcdab2506cd2ae4 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 9 Oct 2024 10:32:16 -0700
Subject: lsm: use lsm_prop in security_inode_getsecid

Change the security_inode_getsecid() interface to fill in a
lsm_prop structure instead of a u32 secid. This allows for its
callers to gather data from all registered LSMs. Data is provided
for IMA and audit. Change the name to security_inode_getlsmprop().

Cc: linux-integrity@vger.kernel.org
Cc: selinux@vger.kernel.org
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: subj line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 3 ++-
 include/linux/security.h      | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 8a90fd9ff3c8..23ad7e4f8c67 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -176,7 +176,8 @@ LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode,
 	 const char *name, const void *value, size_t size, int flags)
 LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer,
 	 size_t buffer_size)
-LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, inode_getlsmprop, struct inode *inode,
+	 struct lsm_prop *prop)
 LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new)
 LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src,
 	 const char *name)
diff --git a/include/linux/security.h b/include/linux/security.h
index 9bc8153f4e8b..2b19ef5d799c 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -452,7 +452,7 @@ int security_inode_getsecurity(struct mnt_idmap *idmap,
 			       void **buffer, bool alloc);
 int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags);
 int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size);
-void security_inode_getsecid(struct inode *inode, u32 *secid);
+void security_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop);
 int security_inode_copy_up(struct dentry *src, struct cred **new);
 int security_inode_copy_up_xattr(struct dentry *src, const char *name);
 int security_inode_setintegrity(const struct inode *inode,
@@ -1076,9 +1076,10 @@ static inline int security_inode_listsecurity(struct inode *inode, char *buffer,
 	return 0;
 }
 
-static inline void security_inode_getsecid(struct inode *inode, u32 *secid)
+static inline void security_inode_getlsmprop(struct inode *inode,
+					     struct lsm_prop *prop)
 {
-	*secid = 0;
+	lsmprop_init(prop);
 }
 
 static inline int security_inode_copy_up(struct dentry *src, struct cred **new)
-- 
cgit v1.2.3


From b0654ca42998440df42ba2ccc3b7dbe3bf5b7bb5 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 9 Oct 2024 10:32:18 -0700
Subject: lsm: create new security_cred_getlsmprop LSM hook

Create a new LSM hook security_cred_getlsmprop() which, like
security_cred_getsecid(), fetches LSM specific attributes from the
cred structure.  The associated data elements in the audit sub-system
are changed from a secid to a lsm_prop to accommodate multiple possible
LSM audit users.

Cc: linux-integrity@vger.kernel.org
Cc: audit@vger.kernel.org
Cc: selinux@vger.kernel.org
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: subj line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 2 ++
 include/linux/security.h      | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 23ad7e4f8c67..eb2937599cb0 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -218,6 +218,8 @@ LSM_HOOK(int, 0, cred_prepare, struct cred *new, const struct cred *old,
 LSM_HOOK(void, LSM_RET_VOID, cred_transfer, struct cred *new,
 	 const struct cred *old)
 LSM_HOOK(void, LSM_RET_VOID, cred_getsecid, const struct cred *c, u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, cred_getlsmprop, const struct cred *c,
+	 struct lsm_prop *prop)
 LSM_HOOK(int, 0, kernel_act_as, struct cred *new, u32 secid)
 LSM_HOOK(int, 0, kernel_create_files_as, struct cred *new, struct inode *inode)
 LSM_HOOK(int, 0, kernel_module_request, char *kmod_name)
diff --git a/include/linux/security.h b/include/linux/security.h
index 2b19ef5d799c..acd2e5d1b0ff 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -488,6 +488,7 @@ void security_cred_free(struct cred *cred);
 int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
 void security_transfer_creds(struct cred *new, const struct cred *old);
 void security_cred_getsecid(const struct cred *c, u32 *secid);
+void security_cred_getlsmprop(const struct cred *c, struct lsm_prop *prop);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(char *kmod_name);
@@ -1229,6 +1230,10 @@ static inline void security_cred_getsecid(const struct cred *c, u32 *secid)
 	*secid = 0;
 }
 
+static inline void security_cred_getlsmprop(const struct cred *c,
+					    struct lsm_prop *prop)
+{ }
+
 static inline int security_kernel_act_as(struct cred *cred, u32 secid)
 {
 	return 0;
-- 
cgit v1.2.3


From 05a344e54d0b4892736526e4a309851da8ee9c89 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 9 Oct 2024 10:32:20 -0700
Subject: netlabel,smack: use lsm_prop for audit data

Replace the secid in the netlbl_audit structure with an lsm_prop.
Remove scaffolding that was required when the value was a secid.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: fix the subject line]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/net/netlabel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 529160f76cac..8de8344ee93c 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -97,7 +97,7 @@ struct calipso_doi;
 
 /* NetLabel audit information */
 struct netlbl_audit {
-	u32 secid;
+	struct lsm_prop prop;
 	kuid_t loginuid;
 	unsigned int sessionid;
 };
-- 
cgit v1.2.3


From 8afd8c8faa24249e48f5007aee46209299377588 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 9 Oct 2024 10:32:21 -0700
Subject: lsm: remove lsm_prop scaffolding

Remove the scaffold member from the lsm_prop. Remove the
remaining places it is being set.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: subj line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/security.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index acd2e5d1b0ff..fd690fa73162 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -156,11 +156,6 @@ enum lockdown_reason {
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
-/* scaffolding */
-struct lsm_prop_scaffold {
-	u32 secid;
-};
-
 /*
  * Data exported by the security modules
  */
@@ -169,7 +164,6 @@ struct lsm_prop {
 	struct lsm_prop_smack smack;
 	struct lsm_prop_apparmor apparmor;
 	struct lsm_prop_bpf bpf;
-	struct lsm_prop_scaffold scaffold;
 };
 
 extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1];
-- 
cgit v1.2.3


From 7e6c0cb33f7c2aa78b20724239bd7bda3a882652 Mon Sep 17 00:00:00 2001
From: Clint Taylor <clinton.a.taylor@intel.com>
Date: Thu, 10 Oct 2024 15:43:02 -0700
Subject: drm/i915/xe3lpd: reuse xe2lpd definition

xe3_lpd display is functionally identical to xe2_lpd for now so reuse
the device description.  A separate xe3 definition will be added in the
future if/when new feature flags are required.

Signed-off-by: Clint Taylor <clinton.a.taylor@intel.com>
Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241010224311.50133-2-matthew.s.atwood@intel.com
---
 include/drm/intel/i915_pciids.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h
index 02156c6f79b6..6b92f8c3731b 100644
--- a/include/drm/intel/i915_pciids.h
+++ b/include/drm/intel/i915_pciids.h
@@ -794,4 +794,16 @@
 	MACRO__(0xE20D, ## __VA_ARGS__), \
 	MACRO__(0xE212, ## __VA_ARGS__)
 
+/* PTL */
+#define INTEL_PTL_IDS(MACRO__, ...) \
+	MACRO__(0xB080, ## __VA_ARGS__), \
+	MACRO__(0xB081, ## __VA_ARGS__), \
+	MACRO__(0xB082, ## __VA_ARGS__), \
+	MACRO__(0xB090, ## __VA_ARGS__), \
+	MACRO__(0xB091, ## __VA_ARGS__), \
+	MACRO__(0xB092, ## __VA_ARGS__), \
+	MACRO__(0xB0A0, ## __VA_ARGS__), \
+	MACRO__(0xB0A1, ## __VA_ARGS__), \
+	MACRO__(0xB0A2, ## __VA_ARGS__)
+
 #endif /* _I915_PCIIDS_H */
-- 
cgit v1.2.3


From a716ff52bebfe806fbcf5227cee4c7bda4e6724b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 9 Oct 2024 18:44:01 +0000
Subject: fib: rules: use READ_ONCE()/WRITE_ONCE() on ops->fib_rules_seq

Using RTNL to protect ops->fib_rules_seq reads seems a big hammer.

Writes are protected by RTNL.
We can use READ_ONCE() on readers.

Constify 'struct net' argument of fib_rules_seq_read()
and lookup_rules_ops().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20241009184405.3752829-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/fib_rules.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index d17855c52ef9..04383d90a1e3 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -176,7 +176,7 @@ int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table);
 bool fib_rule_matchall(const struct fib_rule *rule);
 int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
 		   struct netlink_ext_ack *extack);
-unsigned int fib_rules_seq_read(struct net *net, int family);
+unsigned int fib_rules_seq_read(const struct net *net, int family);
 
 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		   struct netlink_ext_ack *extack);
-- 
cgit v1.2.3


From 16207384d29287a19f81436e1953b41946aa8258 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 9 Oct 2024 18:44:02 +0000
Subject: ipv4: use READ_ONCE()/WRITE_ONCE() on net->ipv4.fib_seq

Using RTNL to protect ops->fib_rules_seq reads seems a big hammer.

Writes are protected by RTNL.
We can use READ_ONCE() when reading it.

Constify 'struct net' argument of fib4_rules_seq_read()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20241009184405.3752829-3-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip_fib.h     | 4 ++--
 include/net/netns/ipv4.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 06130933542d..b6e44f4eaa4c 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -347,7 +347,7 @@ static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb,
 	return 0;
 }
 
-static inline unsigned int fib4_rules_seq_read(struct net *net)
+static inline unsigned int fib4_rules_seq_read(const struct net *net)
 {
 	return 0;
 }
@@ -411,7 +411,7 @@ static inline bool fib4_has_custom_rules(const struct net *net)
 bool fib4_rule_default(const struct fib_rule *rule);
 int fib4_rules_dump(struct net *net, struct notifier_block *nb,
 		    struct netlink_ext_ack *extack);
-unsigned int fib4_rules_seq_read(struct net *net);
+unsigned int fib4_rules_seq_read(const struct net *net);
 
 static inline bool fib4_rules_early_flow_dissect(struct net *net,
 						 struct sk_buff *skb,
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 3b1de80b5c25..3c014170e001 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -262,7 +262,7 @@ struct netns_ipv4 {
 #endif
 
 	struct fib_notifier_ops	*notifier_ops;
-	unsigned int	fib_seq;	/* protected by rtnl_mutex */
+	unsigned int	fib_seq;	/* writes protected by rtnl_mutex */
 
 	struct fib_notifier_ops	*ipmr_notifier_ops;
 	unsigned int	ipmr_seq;	/* protected by rtnl_mutex */
-- 
cgit v1.2.3


From e60ea45447768c48309b944596a8a34f6bae50e2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 9 Oct 2024 18:44:03 +0000
Subject: ipv6: use READ_ONCE()/WRITE_ONCE() on fib6_table->fib_seq

Using RTNL to protect ops->fib_rules_seq reads seems a big hammer.

Writes are protected by RTNL.
We can use READ_ONCE() when reading it.

Constify 'struct net' argument of fib6_tables_seq_read() and
fib6_rules_seq_read().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20241009184405.3752829-4-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip6_fib.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 6cb867ce4878..7c87873ae211 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -394,7 +394,7 @@ struct fib6_table {
 	struct fib6_node	tb6_root;
 	struct inet_peer_base	tb6_peers;
 	unsigned int		flags;
-	unsigned int		fib_seq;
+	unsigned int		fib_seq; /* writes protected by rtnl_mutex */
 	struct hlist_head       tb6_gc_hlist;	/* GC candidates */
 #define RT6_TABLE_HAS_DFLT_ROUTER	BIT(0)
 };
@@ -563,7 +563,7 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
 int __net_init fib6_notifier_init(struct net *net);
 void __net_exit fib6_notifier_exit(struct net *net);
 
-unsigned int fib6_tables_seq_read(struct net *net);
+unsigned int fib6_tables_seq_read(const struct net *net);
 int fib6_tables_dump(struct net *net, struct notifier_block *nb,
 		     struct netlink_ext_ack *extack);
 
@@ -632,7 +632,7 @@ void fib6_rules_cleanup(void);
 bool fib6_rule_default(const struct fib_rule *rule);
 int fib6_rules_dump(struct net *net, struct notifier_block *nb,
 		    struct netlink_ext_ack *extack);
-unsigned int fib6_rules_seq_read(struct net *net);
+unsigned int fib6_rules_seq_read(const struct net *net);
 
 static inline bool fib6_rules_early_flow_dissect(struct net *net,
 						 struct sk_buff *skb,
@@ -676,7 +676,7 @@ static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb,
 {
 	return 0;
 }
-static inline unsigned int fib6_rules_seq_read(struct net *net)
+static inline unsigned int fib6_rules_seq_read(const struct net *net)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 2698acd6ea4770809af7e65bb8b3250e0a3a807e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 9 Oct 2024 18:44:05 +0000
Subject: net: do not acquire rtnl in fib_seq_sum()

After we made sure no fib_seq_read() handlers needs RTNL anymore,
we can remove RTNL from fib_seq_sum().

Note that after RTNL was dropped, fib_seq_sum() result was possibly
outdated anyway.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20241009184405.3752829-6-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/fib_notifier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h
index 6d59221ff05a..48aad6128fea 100644
--- a/include/net/fib_notifier.h
+++ b/include/net/fib_notifier.h
@@ -28,7 +28,7 @@ enum fib_event_type {
 struct fib_notifier_ops {
 	int family;
 	struct list_head list;
-	unsigned int (*fib_seq_read)(struct net *net);
+	unsigned int (*fib_seq_read)(const struct net *net);
 	int (*fib_dump)(struct net *net, struct notifier_block *nb,
 			struct netlink_ext_ack *extack);
 	struct module *owner;
-- 
cgit v1.2.3


From ccb32f2357c08f64947e0fc32f9a9551ae69c6b3 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 28 Aug 2024 16:24:07 +0200
Subject: media: v4l2-core: add v4l2_debugfs_root()

This new function returns the dentry of the top-level debugfs "v4l2"
directory. If it does not exist yet, then it is created first.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/v4l2-dev.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index d82dfdbf6e58..1b6222fab24e 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -62,6 +62,7 @@ struct v4l2_ioctl_callbacks;
 struct video_device;
 struct v4l2_device;
 struct v4l2_ctrl_handler;
+struct dentry;
 
 /**
  * enum v4l2_video_device_flags - Flags used by &struct video_device
@@ -539,6 +540,20 @@ static inline int video_is_registered(struct video_device *vdev)
 	return test_bit(V4L2_FL_REGISTERED, &vdev->flags);
 }
 
+/**
+ * v4l2_debugfs_root - returns the dentry of the top-level "v4l2" debugfs dir
+ *
+ * If this directory does not yet exist, then it will be created.
+ */
+#ifdef CONFIG_DEBUG_FS
+struct dentry *v4l2_debugfs_root(void);
+#else
+static inline struct dentry *v4l2_debugfs_root(void)
+{
+	return NULL;
+}
+#endif
+
 #if defined(CONFIG_MEDIA_CONTROLLER)
 
 /**
-- 
cgit v1.2.3


From 01c76fc995cdb1e701fa721c5b373777f0469b37 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 28 Aug 2024 16:24:08 +0200
Subject: media: v4l2-core: add v4l2_debugfs_if_alloc/free()

Add new helpers to export received or transmitted HDMI InfoFrames to
debugfs.

This complements similar code in drm where the transmitted HDMI infoframes
are exported to debugfs.

The same names have been used as in drm, so this is consistent.

The exported infoframes can be parsed with the edid-decode utility.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/v4l2-dv-timings.h | 48 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-dv-timings.h b/include/media/v4l2-dv-timings.h
index 8fa963326bf6..13830411bd6c 100644
--- a/include/media/v4l2-dv-timings.h
+++ b/include/media/v4l2-dv-timings.h
@@ -8,6 +8,7 @@
 #ifndef __V4L2_DV_TIMINGS_H
 #define __V4L2_DV_TIMINGS_H
 
+#include <linux/debugfs.h>
 #include <linux/videodev2.h>
 
 /**
@@ -251,4 +252,51 @@ void v4l2_set_edid_phys_addr(u8 *edid, unsigned int size, u16 phys_addr);
 u16 v4l2_phys_addr_for_input(u16 phys_addr, u8 input);
 int v4l2_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port);
 
+/* Add support for exporting InfoFrames to debugfs */
+
+/*
+ * HDMI InfoFrames start with a 3 byte header, then a checksum,
+ * followed by the actual IF payload.
+ *
+ * The payload length is limited to 30 bytes according to the HDMI spec,
+ * but since the length is encoded in 5 bits, it can be 31 bytes theoretically.
+ * So set the max length as 31 + 3 (header) + 1 (checksum) = 35.
+ */
+#define V4L2_DEBUGFS_IF_MAX_LEN (35)
+
+#define V4L2_DEBUGFS_IF_AVI	BIT(0)
+#define V4L2_DEBUGFS_IF_AUDIO	BIT(1)
+#define V4L2_DEBUGFS_IF_SPD	BIT(2)
+#define V4L2_DEBUGFS_IF_HDMI	BIT(3)
+
+typedef ssize_t (*v4l2_debugfs_if_read_t)(u32 type, void *priv,
+					  struct file *filp, char __user *ubuf,
+					  size_t count, loff_t *ppos);
+
+struct v4l2_debugfs_if {
+	struct dentry *if_dir;
+	void *priv;
+
+	v4l2_debugfs_if_read_t if_read;
+};
+
+#ifdef CONFIG_DEBUG_FS
+struct v4l2_debugfs_if *v4l2_debugfs_if_alloc(struct dentry *root, u32 if_types,
+					      void *priv,
+					      v4l2_debugfs_if_read_t if_read);
+void v4l2_debugfs_if_free(struct v4l2_debugfs_if *infoframes);
+#else
+static inline
+struct v4l2_debugfs_if *v4l2_debugfs_if_alloc(struct dentry *root, u32 if_types,
+					      void *priv,
+					      v4l2_debugfs_if_read_t if_read)
+{
+	return NULL;
+}
+
+static inline void v4l2_debugfs_if_free(struct v4l2_debugfs_if *infoframes)
+{
+}
+#endif
+
 #endif
-- 
cgit v1.2.3


From 8fa714ca334e0880665a14fed13b16e3e01a67b2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 10 Oct 2024 21:15:35 +0300
Subject: iio: Convert unsigned to unsigned int

Simple type conversion with no functional change implied.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20241010181535.3083262-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio-opaque.h |  2 +-
 include/linux/iio/iio.h        | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h
index 5aec3945555b..a89e7e43e441 100644
--- a/include/linux/iio/iio-opaque.h
+++ b/include/linux/iio/iio-opaque.h
@@ -70,7 +70,7 @@ struct iio_dev_opaque {
 
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry			*debugfs_dentry;
-	unsigned			cached_reg_addr;
+	unsigned int			cached_reg_addr;
 	char				read_buf[20];
 	unsigned int			read_buf_len;
 #endif
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 18779b631e90..3a9b57187a95 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -282,11 +282,11 @@ struct iio_chan_spec {
 	const struct iio_chan_spec_ext_info *ext_info;
 	const char		*extend_name;
 	const char		*datasheet_name;
-	unsigned		modified:1;
-	unsigned		indexed:1;
-	unsigned		output:1;
-	unsigned		differential:1;
-	unsigned		has_ext_scan_type:1;
+	unsigned int		modified:1;
+	unsigned int		indexed:1;
+	unsigned int		output:1;
+	unsigned int		differential:1;
+	unsigned int		has_ext_scan_type:1;
 };
 
 
@@ -541,13 +541,13 @@ struct iio_info {
 	int (*update_scan_mode)(struct iio_dev *indio_dev,
 				const unsigned long *scan_mask);
 	int (*debugfs_reg_access)(struct iio_dev *indio_dev,
-				  unsigned reg, unsigned writeval,
-				  unsigned *readval);
+				  unsigned int reg, unsigned int writeval,
+				  unsigned int *readval);
 	int (*fwnode_xlate)(struct iio_dev *indio_dev,
 			    const struct fwnode_reference_args *iiospec);
-	int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned val);
+	int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned int val);
 	int (*hwfifo_flush_to_buffer)(struct iio_dev *indio_dev,
-				      unsigned count);
+				      unsigned int count);
 };
 
 /**
@@ -609,7 +609,7 @@ struct iio_dev {
 	int				scan_bytes;
 
 	const unsigned long		*available_scan_masks;
-	unsigned			__private masklength;
+	unsigned int			__private masklength;
 	const unsigned long		*active_scan_mask;
 	bool				scan_timestamp;
 	struct iio_trigger		*trig;
-- 
cgit v1.2.3


From 06f5531958dd5decaabb21c6fa1da3dcaf8dfc24 Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Date: Mon, 26 Aug 2024 17:24:09 +0000
Subject: media: videodev2: Add flag to unconditionally enumerate pixel formats

When the index is ORed with V4L2_FMTDESC_FLAG_ENUM_ALL the
driver clears the flag and enumerate all the possible formats,
ignoring any limitations from the current configuration.
Drivers which do not support this flag yet always return an EINVAL.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
[hverkuil: improved doc when the new flag is not supported by the driver]
---
 include/uapi/linux/videodev2.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 21a8aa575ea3..ded023edac70 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -907,6 +907,9 @@ struct v4l2_fmtdesc {
 #define V4L2_FMT_FLAG_CSC_QUANTIZATION		0x0100
 #define V4L2_FMT_FLAG_META_LINE_BASED		0x0200
 
+/*  Format description flag, to be ORed with the index */
+#define V4L2_FMTDESC_FLAG_ENUM_ALL		0x80000000
+
 	/* Frame Size and frame rate enumeration */
 /*
  *	F R A M E   S I Z E   E N U M E R A T I O N
-- 
cgit v1.2.3


From 454bbde8f0d465e93e5a3a4003ac6c7e62fa4473 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Wed, 9 Oct 2024 10:28:19 +0800
Subject: net: skb: add pskb_network_may_pull_reason() helper

Introduce the function pskb_network_may_pull_reason() and make
pskb_network_may_pull() a simple inline call to it. The drop reasons of
it just come from pskb_may_pull_reason.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 39f1d16f3628..48f1e0fa2a13 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3130,9 +3130,15 @@ static inline int skb_inner_network_offset(const struct sk_buff *skb)
 	return skb_inner_network_header(skb) - skb->data;
 }
 
+static inline enum skb_drop_reason
+pskb_network_may_pull_reason(struct sk_buff *skb, unsigned int len)
+{
+	return pskb_may_pull_reason(skb, skb_network_offset(skb) + len);
+}
+
 static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 {
-	return pskb_may_pull(skb, skb_network_offset(skb) + len);
+	return pskb_network_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET;
 }
 
 /*
-- 
cgit v1.2.3


From 7f20dbd7de7b9b2804e6bf54b0c22f2bc447cd64 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Wed, 9 Oct 2024 10:28:20 +0800
Subject: net: tunnel: add pskb_inet_may_pull_reason() helper

Introduce the function pskb_inet_may_pull_reason() and make
pskb_inet_may_pull a simple inline call to it. The drop reasons of it just
come from pskb_may_pull_reason().

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 6194fbb564c6..7fc2f7bf837a 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -439,7 +439,8 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,
 int ip_tunnel_encap_setup(struct ip_tunnel *t,
 			  struct ip_tunnel_encap *ipencap);
 
-static inline bool pskb_inet_may_pull(struct sk_buff *skb)
+static inline enum skb_drop_reason
+pskb_inet_may_pull_reason(struct sk_buff *skb)
 {
 	int nhlen;
 
@@ -456,7 +457,12 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb)
 		nhlen = 0;
 	}
 
-	return pskb_network_may_pull(skb, nhlen);
+	return pskb_network_may_pull_reason(skb, nhlen);
+}
+
+static inline bool pskb_inet_may_pull(struct sk_buff *skb)
+{
+	return pskb_inet_may_pull_reason(skb) == SKB_NOT_DROPPED_YET;
 }
 
 /* Variant of pskb_inet_may_pull().
-- 
cgit v1.2.3


From 9990ddf47d4168088e2246c3d418bf526e40830d Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Wed, 9 Oct 2024 10:28:21 +0800
Subject: net: tunnel: make skb_vlan_inet_prepare() return drop reasons

Make skb_vlan_inet_prepare return the skb drop reasons, which is just
what pskb_may_pull_reason() returns. Meanwhile, adjust all the call of
it.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 7fc2f7bf837a..4e4f9e24c9c1 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -467,11 +467,12 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb)
 
 /* Variant of pskb_inet_may_pull().
  */
-static inline bool skb_vlan_inet_prepare(struct sk_buff *skb,
-					 bool inner_proto_inherit)
+static inline enum skb_drop_reason
+skb_vlan_inet_prepare(struct sk_buff *skb, bool inner_proto_inherit)
 {
 	int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN;
 	__be16 type = skb->protocol;
+	enum skb_drop_reason reason;
 
 	/* Essentially this is skb_protocol(skb, true)
 	 * And we get MAC len.
@@ -492,11 +493,13 @@ static inline bool skb_vlan_inet_prepare(struct sk_buff *skb,
 	/* For ETH_P_IPV6/ETH_P_IP we make sure to pull
 	 * a base network header in skb->head.
 	 */
-	if (!pskb_may_pull(skb, maclen + nhlen))
-		return false;
+	reason = pskb_may_pull_reason(skb, maclen + nhlen);
+	if (reason)
+		return reason;
 
 	skb_set_network_header(skb, maclen);
-	return true;
+
+	return SKB_NOT_DROPPED_YET;
 }
 
 static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
-- 
cgit v1.2.3


From 4c06d9daf8e6215447ca8a2ddd59fa09862c9bae Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Wed, 9 Oct 2024 10:28:22 +0800
Subject: net: vxlan: add skb drop reasons to vxlan_rcv()

Introduce skb drop reasons to the function vxlan_rcv(). Following new
drop reasons are added:

  SKB_DROP_REASON_VXLAN_INVALID_HDR
  SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND
  SKB_DROP_REASON_IP_TUNNEL_ECN

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dropreason-core.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 4748680e8c88..98259d2b3e92 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -92,6 +92,9 @@
 	FN(PACKET_SOCK_ERROR)		\
 	FN(TC_CHAIN_NOTFOUND)		\
 	FN(TC_RECLASSIFY_LOOP)		\
+	FN(VXLAN_INVALID_HDR)		\
+	FN(VXLAN_VNI_NOT_FOUND)		\
+	FN(IP_TUNNEL_ECN)		\
 	FNe(MAX)
 
 /**
@@ -418,6 +421,19 @@ enum skb_drop_reason {
 	 * iterations.
 	 */
 	SKB_DROP_REASON_TC_RECLASSIFY_LOOP,
+	/**
+	 * @SKB_DROP_REASON_VXLAN_INVALID_HDR: VXLAN header is invalid. E.g.:
+	 * 1) reserved fields are not zero
+	 * 2) "I" flag is not set
+	 */
+	SKB_DROP_REASON_VXLAN_INVALID_HDR,
+	/** @SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND: no VXLAN device found for VNI */
+	SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND,
+	/**
+	 * @SKB_DROP_REASON_IP_TUNNEL_ECN: skb is dropped according to
+	 * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail.
+	 */
+	SKB_DROP_REASON_IP_TUNNEL_ECN,
 	/**
 	 * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
 	 * shouldn't be used as a real 'reason' - only for tracing code gen
-- 
cgit v1.2.3


From 289fd4e75219a96f77c5d679166035cd5118d139 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Wed, 9 Oct 2024 10:28:24 +0800
Subject: net: vxlan: make vxlan_snoop() return drop reasons

Change the return type of vxlan_snoop() from bool to enum
skb_drop_reason. In this commit, two drop reasons are introduced:

  SKB_DROP_REASON_MAC_INVALID_SOURCE
  SKB_DROP_REASON_VXLAN_ENTRY_EXISTS

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dropreason-core.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 98259d2b3e92..1cb8d7c953be 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -94,6 +94,8 @@
 	FN(TC_RECLASSIFY_LOOP)		\
 	FN(VXLAN_INVALID_HDR)		\
 	FN(VXLAN_VNI_NOT_FOUND)		\
+	FN(MAC_INVALID_SOURCE)		\
+	FN(VXLAN_ENTRY_EXISTS)		\
 	FN(IP_TUNNEL_ECN)		\
 	FNe(MAX)
 
@@ -429,6 +431,13 @@ enum skb_drop_reason {
 	SKB_DROP_REASON_VXLAN_INVALID_HDR,
 	/** @SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND: no VXLAN device found for VNI */
 	SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND,
+	/** @SKB_DROP_REASON_MAC_INVALID_SOURCE: source mac is invalid */
+	SKB_DROP_REASON_MAC_INVALID_SOURCE,
+	/**
+	 * @SKB_DROP_REASON_VXLAN_ENTRY_EXISTS: trying to migrate a static
+	 * entry or an entry pointing to a nexthop.
+	 */
+	SKB_DROP_REASON_VXLAN_ENTRY_EXISTS,
 	/**
 	 * @SKB_DROP_REASON_IP_TUNNEL_ECN: skb is dropped according to
 	 * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail.
-- 
cgit v1.2.3


From d209706f562ee4fa81bdf24cf6b679c3222aa06c Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Wed, 9 Oct 2024 10:28:25 +0800
Subject: net: vxlan: make vxlan_set_mac() return drop reasons

Change the return type of vxlan_set_mac() from bool to enum
skb_drop_reason. In this commit, the drop reason
"SKB_DROP_REASON_LOCAL_MAC" is introduced for the case that the source
mac of the packet is a local mac.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dropreason-core.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 1cb8d7c953be..fbf92d442c1b 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -97,6 +97,7 @@
 	FN(MAC_INVALID_SOURCE)		\
 	FN(VXLAN_ENTRY_EXISTS)		\
 	FN(IP_TUNNEL_ECN)		\
+	FN(LOCAL_MAC)			\
 	FNe(MAX)
 
 /**
@@ -443,6 +444,11 @@ enum skb_drop_reason {
 	 * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail.
 	 */
 	SKB_DROP_REASON_IP_TUNNEL_ECN,
+	/**
+	 * @SKB_DROP_REASON_LOCAL_MAC: the source MAC address is equal to
+	 * the MAC address of the local netdev.
+	 */
+	SKB_DROP_REASON_LOCAL_MAC,
 	/**
 	 * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
 	 * shouldn't be used as a real 'reason' - only for tracing code gen
-- 
cgit v1.2.3


From b71a576e452b800efeac49ecca116d954601d911 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Wed, 9 Oct 2024 10:28:26 +0800
Subject: net: vxlan: use kfree_skb_reason() in vxlan_xmit()

Replace kfree_skb() with kfree_skb_reason() in vxlan_xmit(). Following
new skb drop reasons are introduced for vxlan:

/* no remote found for xmit */
SKB_DROP_REASON_VXLAN_NO_REMOTE
/* packet without necessary metadata reached a device which is
 * in "external" mode
 */
SKB_DROP_REASON_TUNNEL_TXINFO

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dropreason-core.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index fbf92d442c1b..d59bb96c5a02 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -96,7 +96,9 @@
 	FN(VXLAN_VNI_NOT_FOUND)		\
 	FN(MAC_INVALID_SOURCE)		\
 	FN(VXLAN_ENTRY_EXISTS)		\
+	FN(VXLAN_NO_REMOTE)		\
 	FN(IP_TUNNEL_ECN)		\
+	FN(TUNNEL_TXINFO)		\
 	FN(LOCAL_MAC)			\
 	FNe(MAX)
 
@@ -439,11 +441,18 @@ enum skb_drop_reason {
 	 * entry or an entry pointing to a nexthop.
 	 */
 	SKB_DROP_REASON_VXLAN_ENTRY_EXISTS,
+	/** @SKB_DROP_REASON_VXLAN_NO_REMOTE: no remote found for xmit */
+	SKB_DROP_REASON_VXLAN_NO_REMOTE,
 	/**
 	 * @SKB_DROP_REASON_IP_TUNNEL_ECN: skb is dropped according to
 	 * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail.
 	 */
 	SKB_DROP_REASON_IP_TUNNEL_ECN,
+	/**
+	 * @SKB_DROP_REASON_TUNNEL_TXINFO: packet without necessary metadata
+	 * reached a device which is in "external" mode.
+	 */
+	SKB_DROP_REASON_TUNNEL_TXINFO,
 	/**
 	 * @SKB_DROP_REASON_LOCAL_MAC: the source MAC address is equal to
 	 * the MAC address of the local netdev.
-- 
cgit v1.2.3


From 7948483001039c00dcff4b94c181d7e14693a38c Mon Sep 17 00:00:00 2001
From: Gerhard Engleder <eg@keba.com>
Date: Fri, 11 Oct 2024 21:12:52 +0200
Subject: misc: keba: Add SPI controller device

Add support for the SPI controller auxiliary device. This enables access
to the SPI flash of the FPGA and some other SPI devices.

The actual list of SPI devices is detected by reading some bits out of
the previously registered I2C EEPROM.

Signed-off-by: Gerhard Engleder <eg@keba.com>
Link: https://lore.kernel.org/r/20241011191257.19702-4-gerhard@engleder-embedded.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/misc/keba.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h
index 323b31a847c5..1bd5409c6f6f 100644
--- a/include/linux/misc/keba.h
+++ b/include/linux/misc/keba.h
@@ -7,6 +7,7 @@
 #include <linux/auxiliary_bus.h>
 
 struct i2c_board_info;
+struct spi_board_info;
 
 /**
  * struct keba_i2c_auxdev - KEBA I2C auxiliary device
@@ -22,4 +23,18 @@ struct keba_i2c_auxdev {
 	struct i2c_board_info *info;
 };
 
+/**
+ * struct keba_spi_auxdev - KEBA SPI auxiliary device
+ * @auxdev: auxiliary device object
+ * @io: address range of SPI controller IO memory
+ * @info_size: number of SPI devices to be probed
+ * @info: SPI devices to be probed
+ */
+struct keba_spi_auxdev {
+	struct auxiliary_device auxdev;
+	struct resource io;
+	int info_size;
+	struct spi_board_info *info;
+};
+
 #endif /* _LINUX_MISC_KEBA_H */
-- 
cgit v1.2.3


From f965d315bcbd65adfe5e3c161e46b5dc0a463f68 Mon Sep 17 00:00:00 2001
From: Gerhard Engleder <eg@keba.com>
Date: Fri, 11 Oct 2024 21:12:55 +0200
Subject: misc: keba: Add fan device

Add support for the fan auxiliary device. This enables monitoring of the
fan.

Signed-off-by: Gerhard Engleder <eg@keba.com>
Link: https://lore.kernel.org/r/20241011191257.19702-7-gerhard@engleder-embedded.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/misc/keba.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h
index 1bd5409c6f6f..451777acc262 100644
--- a/include/linux/misc/keba.h
+++ b/include/linux/misc/keba.h
@@ -37,4 +37,14 @@ struct keba_spi_auxdev {
 	struct spi_board_info *info;
 };
 
+/**
+ * struct keba_fan_auxdev - KEBA fan auxiliary device
+ * @auxdev: auxiliary device object
+ * @io: address range of fan controller IO memory
+ */
+struct keba_fan_auxdev {
+	struct auxiliary_device auxdev;
+	struct resource io;
+};
+
 #endif /* _LINUX_MISC_KEBA_H */
-- 
cgit v1.2.3


From ca7b844b91920573835ad11daaa30630ce112fe1 Mon Sep 17 00:00:00 2001
From: Gerhard Engleder <eg@keba.com>
Date: Fri, 11 Oct 2024 21:12:56 +0200
Subject: misc: keba: Add battery device

Add support for the battery auxiliary device. This enables monitoring of
the battery.

Signed-off-by: Gerhard Engleder <eg@keba.com>
Link: https://lore.kernel.org/r/20241011191257.19702-8-gerhard@engleder-embedded.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/misc/keba.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h
index 451777acc262..ca52716f8437 100644
--- a/include/linux/misc/keba.h
+++ b/include/linux/misc/keba.h
@@ -47,4 +47,14 @@ struct keba_fan_auxdev {
 	struct resource io;
 };
 
+/**
+ * struct keba_batt_auxdev - KEBA battery auxiliary device
+ * @auxdev: auxiliary device object
+ * @io: address range of battery controller IO memory
+ */
+struct keba_batt_auxdev {
+	struct auxiliary_device auxdev;
+	struct resource io;
+};
+
 #endif /* _LINUX_MISC_KEBA_H */
-- 
cgit v1.2.3


From a27b406a49225a17849c86221a32f2d598702719 Mon Sep 17 00:00:00 2001
From: Gerhard Engleder <eg@keba.com>
Date: Fri, 11 Oct 2024 21:12:57 +0200
Subject: misc: keba: Add UART devices

Add support for the UART auxiliary devices. This enables access to up to
3 different UARTs, which are implemented in the FPGA.

Signed-off-by: Gerhard Engleder <eg@keba.com>
Link: https://lore.kernel.org/r/20241011191257.19702-9-gerhard@engleder-embedded.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/misc/keba.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h
index ca52716f8437..a81d6fa70851 100644
--- a/include/linux/misc/keba.h
+++ b/include/linux/misc/keba.h
@@ -57,4 +57,16 @@ struct keba_batt_auxdev {
 	struct resource io;
 };
 
+/**
+ * struct keba_uart_auxdev - KEBA UART auxiliary device
+ * @auxdev: auxiliary device object
+ * @io: address range of UART controller IO memory
+ * @irq: number of UART controller interrupt
+ */
+struct keba_uart_auxdev {
+	struct auxiliary_device auxdev;
+	struct resource io;
+	unsigned int irq;
+};
+
 #endif /* _LINUX_MISC_KEBA_H */
-- 
cgit v1.2.3


From cec78a59abc9b1a06f742cb96479fc0bb1fe4e90 Mon Sep 17 00:00:00 2001
From: Zijun Hu <quic_zijuhu@quicinc.com>
Date: Tue, 17 Sep 2024 18:42:56 +0800
Subject: list: Remove duplicated and unused macro list_for_each_reverse

Remove macro list_for_each_reverse due to below reasons:

- it is same as list_for_each_prev.
- it is not used by current kernel tree.

Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
Link: https://lore.kernel.org/r/20240917-fix_list-v2-1-d2914665e89f@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/list.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/list.h b/include/linux/list.h
index 5f4b0a39cf46..29a375889fb8 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -686,14 +686,6 @@ static inline void list_splice_tail_init(struct list_head *list,
 #define list_for_each(pos, head) \
 	for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next)
 
-/**
- * list_for_each_reverse - iterate backwards over a list
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- */
-#define list_for_each_reverse(pos, head) \
-	for (pos = (head)->prev; pos != (head); pos = pos->prev)
-
 /**
  * list_for_each_rcu - Iterate over a list in an RCU-safe fashion
  * @pos:	the &struct list_head to use as a loop cursor.
-- 
cgit v1.2.3


From 0ebe74c53b8b62bde7b02415d28e75aa25d6c2e6 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sun, 29 Sep 2024 15:11:12 +0100
Subject: drivers/base: Remove unused auxiliary_find_device

auxiliary_find_device has been unused since commit
1c5de097bea3 ("net/mlx5: Fix mlx5_get_next_dev() peer device matching")

which was the only use since it was originally added.

Remove it.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20240929141112.69824-1-linux@treblig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/auxiliary_bus.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index 31762324bcc9..65dd7f154374 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -269,8 +269,4 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv);
 #define module_auxiliary_driver(__auxiliary_driver) \
 	module_driver(__auxiliary_driver, auxiliary_driver_register, auxiliary_driver_unregister)
 
-struct auxiliary_device *auxiliary_find_device(struct device *start,
-					       const void *data,
-					       device_match_t match);
-
 #endif /* _AUXILIARY_BUS_H_ */
-- 
cgit v1.2.3


From cd068d51594d9635bf6688fc78717572b78bce6a Mon Sep 17 00:00:00 2001
From: Keita Aihara <keita.aihara@sony.com>
Date: Fri, 13 Sep 2024 18:44:17 +0900
Subject: mmc: core: Add SD card quirk for broken poweroff notification

GIGASTONE Gaming Plus microSD cards manufactured on 02/2022 report that
they support poweroff notification and cache, but they are not working
correctly.

Flush Cache bit never gets cleared in sd_flush_cache() and Poweroff
Notification Ready bit also never gets set to 1 within 1 second from the
end of busy of CMD49 in sd_poweroff_notify().

This leads to I/O error and runtime PM error state.

I observed that the same card manufactured on 01/2024 works as expected.

This problem seems similar to the Kingston cards fixed with
commit c467c8f08185 ("mmc: Add MMC_QUIRK_BROKEN_SD_CACHE for Kingston
Canvas Go Plus from 11/2019") and should be handled using quirks.

CID for the problematic card is here.
12345641535443002000000145016200

Manufacturer ID is 0x12 and defined as CID_MANFID_GIGASTONE as of now,
but would like comments on what naming is appropriate because MID list
is not public and not sure it's right.

Signed-off-by: Keita Aihara <keita.aihara@sony.com>
Link: https://lore.kernel.org/r/20240913094417.GA4191647@sony.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/card.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index f34407cc2788..543446392776 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -294,6 +294,7 @@ struct mmc_card {
 #define MMC_QUIRK_BROKEN_SD_DISCARD	(1<<14)	/* Disable broken SD discard support */
 #define MMC_QUIRK_BROKEN_SD_CACHE	(1<<15)	/* Disable broken SD cache support */
 #define MMC_QUIRK_BROKEN_CACHE_FLUSH	(1<<16)	/* Don't flush cache until the write has occurred */
+#define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY	(1<<17) /* Disable broken SD poweroff notify support */
 
 	bool			written_flag;	/* Indicates eMMC has been written since power on */
 	bool			reenable_cmdq;	/* Re-enable Command Queue */
-- 
cgit v1.2.3


From fce2ce78af1e14dc1316aaddb5b3308be05cf452 Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@wdc.com>
Date: Sun, 6 Oct 2024 08:11:39 +0300
Subject: mmc: sd: SDUC Support Recognition

Ultra Capacity SD cards (SDUC) was already introduced in SD7.0.  Those
cards support capacity larger than 2TB and up to including 128TB.

ACMD41 was extended to support the host-card handshake during
initialization.  The card expects that the HCS & HO2T bits to be set in
the command argument, and sets the applicable bits in the R3 returned
response.  On the contrary, if a SDUC card is inserted to a
non-supporting host, it will never respond to this ACMD41 until
eventually, the host will timed out and give up.

Also, add SD CSD version 3.0 - designated for SDUC, and properly parse
the csd register as the c_size field got expanded to 28 bits.

Do not enable SDUC for now - leave it to the last patch in the series.

Tested-by: Ricky WU <ricky_wu@realtek.com>
Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20241006051148.160278-2-avri.altman@wdc.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/card.h | 2 +-
 include/linux/mmc/sd.h   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 543446392776..eb67d3d5ff5b 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -35,7 +35,7 @@ struct mmc_csd {
 	unsigned int		wp_grp_size;
 	unsigned int		read_blkbits;
 	unsigned int		write_blkbits;
-	unsigned int		capacity;
+	sector_t		capacity;
 	unsigned int		read_partial:1,
 				read_misalign:1,
 				write_partial:1,
diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h
index 6727576a8755..865cc0ca8543 100644
--- a/include/linux/mmc/sd.h
+++ b/include/linux/mmc/sd.h
@@ -36,6 +36,7 @@
 /* OCR bit definitions */
 #define SD_OCR_S18R		(1 << 24)    /* 1.8V switching request */
 #define SD_ROCR_S18A		SD_OCR_S18R  /* 1.8V switching accepted by card */
+#define SD_OCR_2T		(1 << 27)    /* HO2T/CO2T - SDUC support */
 #define SD_OCR_XPC		(1 << 28)    /* SDXC power control */
 #define SD_OCR_CCS		(1 << 30)    /* Card Capacity Status */
 
-- 
cgit v1.2.3


From 375b535941bea65b37451f0fd398e28bf4f3bdc3 Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@wdc.com>
Date: Sun, 6 Oct 2024 08:11:40 +0300
Subject: mmc: sd: Add Extension memory addressing

SDUC memory addressing spans beyond 2TB and up to 128TB.  Therefore, 38
bits are required to access the entire memory space of all sectors.
Those extra 6 bits are to be carried by CMD22 prior of sending
read/write/erase commands: CMD17, CMD18, CMD24, CMD25, CMD32, and CMD33.

CMD22 will carry the higher order 6 bits, and must precedes any of the
above commands even if it targets sector < 2TB.

No error related to address or length is indicated in CMD22 but rather
in the read/write command itself.

Tested-by: Ricky WU <ricky_wu@realtek.com>
Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20241006051148.160278-3-avri.altman@wdc.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/sd.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h
index 865cc0ca8543..af5fc70e09a2 100644
--- a/include/linux/mmc/sd.h
+++ b/include/linux/mmc/sd.h
@@ -15,6 +15,9 @@
 #define SD_SEND_IF_COND           8   /* bcr  [11:0] See below   R7  */
 #define SD_SWITCH_VOLTAGE         11  /* ac                      R1  */
 
+/* Class 2 */
+#define SD_ADDR_EXT		 22   /* ac   [5:0]              R1  */
+
   /* class 10 */
 #define SD_SWITCH                 6   /* adtc [31:0] See below   R1  */
 
-- 
cgit v1.2.3


From 403a0293f1c230524e0185b31f69c02a6aed12c7 Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@wdc.com>
Date: Sun, 6 Oct 2024 08:11:42 +0300
Subject: mmc: core: Add open-ended Ext memory addressing

For open-ended read/write - just send CMD22 before issuing the command.

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20241006051148.160278-5-avri.altman@wdc.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/core.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index f0ac2e469b32..a890a71288ef 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -96,6 +96,10 @@ struct mmc_command {
 	unsigned int		busy_timeout;	/* busy detect timeout in ms */
 	struct mmc_data		*data;		/* data segment associated with cmd */
 	struct mmc_request	*mrq;		/* associated request */
+
+	/* for SDUC */
+	bool has_ext_addr;
+	u8 ext_addr;
 };
 
 struct mmc_data {
-- 
cgit v1.2.3


From 79daeb241db7901e4bd53cce9ab046f376a63a4c Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Sep 2024 18:28:16 +0800
Subject: mmc: core: Prepare to support SD UHS-II cards

The SD UHS-II interface was introduced to the SD spec v4.00 several years
ago. The interface is fundamentally different from an electrical and a
protocol point of view, comparing to the legacy SD interface.

However, the legacy SD protocol is supported through a specific transport
layer (SD-TRAN) defined in the UHS-II addendum of the spec. This allows the
SD card to be managed in a very similar way as a legacy SD card, hence a
lot of code can be re-used to support these new types of cards through the
mmc subsystem.

Moreover, an SD card that supports the UHS-II interface shall also be
backwards compatible with the legacy SD interface, which allows a UHS-II
card to be inserted into a legacy slot. As a matter of fact, this is
already supported by mmc subsystem as of today.

To prepare to add support for UHS-II, this change puts the basic foundation
in the mmc core in place, allowing it to be more easily reviewed before
subsequent changes implements the actual support.

Basically, the approach here adds a new UHS-II bus_ops type and adds a
separate initialization path for the UHS-II card. The intent is to avoid us
from sprinkling the legacy initialization path, but also to simplify
implementation of the UHS-II specific bits.

At this point, there is only one new host ops added to manage the various
ios settings needed for UHS-II. Additional host ops that are needed, are
being added from subsequent changes.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20240913102836.6144-3-victorshihgli@gmail.com
---
 include/linux/mmc/card.h |  7 +++++++
 include/linux/mmc/host.h | 23 +++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index eb67d3d5ff5b..0bb1ad1ea4dc 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -209,6 +209,11 @@ struct sd_ext_reg {
 #define SD_EXT_PERF_CMD_QUEUE	(1<<4)
 };
 
+struct sd_uhs2_config {
+	u32			node_id;
+	/* TODO: Extend with more register configs. */
+};
+
 struct sdio_cccr {
 	unsigned int		sdio_vsn;
 	unsigned int		sd_vsn;
@@ -320,6 +325,8 @@ struct mmc_card {
 	struct sd_ext_reg	ext_power;	/* SD extension reg for PM */
 	struct sd_ext_reg	ext_perf;	/* SD extension reg for PERF */
 
+	struct sd_uhs2_config	uhs2_config;	/* SD UHS-II config */
+
 	unsigned int		sdio_funcs;	/* number of SDIO functions */
 	atomic_t		sdio_funcs_probed; /* number of probed SDIO funcs */
 	struct sdio_cccr	cccr;		/* common card info */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 8fc2b328ec4d..e1d380de3aaa 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -64,6 +64,10 @@ struct mmc_ios {
 #define MMC_TIMING_MMC_HS400	10
 #define MMC_TIMING_SD_EXP	11
 #define MMC_TIMING_SD_EXP_1_2V	12
+#define MMC_TIMING_UHS2_SPEED_A    13
+#define MMC_TIMING_UHS2_SPEED_A_HD 14
+#define MMC_TIMING_UHS2_SPEED_B    15
+#define MMC_TIMING_UHS2_SPEED_B_HD 16
 
 	unsigned char	signal_voltage;		/* signalling voltage (1.8V or 3.3V) */
 
@@ -92,6 +96,14 @@ struct mmc_clk_phase_map {
 	struct mmc_clk_phase phase[MMC_NUM_CLK_PHASES];
 };
 
+struct sd_uhs2_caps {
+	/* TODO: Add UHS-II capabilities for the host. */
+};
+
+enum sd_uhs2_operation {
+	UHS2_SET_IOS,
+};
+
 struct mmc_host;
 
 enum mmc_err_stat {
@@ -219,6 +231,14 @@ struct mmc_host_ops {
 
 	/* Initialize an SD express card, mandatory for MMC_CAP2_SD_EXP. */
 	int	(*init_sd_express)(struct mmc_host *host, struct mmc_ios *ios);
+
+	/*
+	 * The uhs2_control callback is used to execute SD UHS-II specific
+	 * operations. It's mandatory to implement for hosts that supports the
+	 * SD UHS-II interface (MMC_CAP2_SD_UHS2). Expected return values are a
+	 * negative errno in case of a failure or zero for success.
+	 */
+	int	(*uhs2_control)(struct mmc_host *host, enum sd_uhs2_operation op);
 };
 
 struct mmc_cqe_ops {
@@ -379,6 +399,7 @@ struct mmc_host {
 				 MMC_CAP2_HS200_1_2V_SDR)
 #define MMC_CAP2_SD_EXP		(1 << 7)	/* SD express via PCIe */
 #define MMC_CAP2_SD_EXP_1_2V	(1 << 8)	/* SD express 1.2V */
+#define MMC_CAP2_SD_UHS2	(1 << 9)	/* SD UHS-II support */
 #define MMC_CAP2_CD_ACTIVE_HIGH	(1 << 10)	/* Card-detect signal active high */
 #define MMC_CAP2_RO_ACTIVE_HIGH	(1 << 11)	/* Write-protect signal active high */
 #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14)	/* Don't power up before scan */
@@ -405,6 +426,8 @@ struct mmc_host {
 #endif
 #define MMC_CAP2_ALT_GPT_TEGRA	(1 << 28)	/* Host with eMMC that has GPT entry at a non-standard location */
 
+	struct sd_uhs2_caps	uhs2_caps;	/* Host UHS-II capabilities */
+
 	int			fixed_drv_type;	/* fixed driver type for non-removable media */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
-- 
cgit v1.2.3


From 153196d550c747367bdbec5cd545a572c5310451 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Sep 2024 18:28:17 +0800
Subject: mmc: core: Announce successful insertion of an SD UHS-II card

To inform the users about SD UHS-II cards, let's extend the print at card
insertion with a "UHS-II" substring. Within this change, it seems
reasonable to convert from using "ultra high speed" into "UHS-I speed", for
the UHS-I type, as it should makes it more clear.

Note that, the new print for UHS-II cards doesn't include the actual
selected speed mode. Instead, this is going to be added from subsequent
change.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20240913102836.6144-4-victorshihgli@gmail.com
---
 include/linux/mmc/host.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index e1d380de3aaa..9a148280bf42 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -638,6 +638,14 @@ static inline int mmc_card_uhs(struct mmc_card *card)
 		card->host->ios.timing <= MMC_TIMING_UHS_DDR50;
 }
 
+static inline bool mmc_card_uhs2(struct mmc_host *host)
+{
+	return host->ios.timing == MMC_TIMING_UHS2_SPEED_A ||
+	       host->ios.timing == MMC_TIMING_UHS2_SPEED_A_HD ||
+	       host->ios.timing == MMC_TIMING_UHS2_SPEED_B ||
+	       host->ios.timing == MMC_TIMING_UHS2_SPEED_B_HD;
+}
+
 void mmc_retune_timer_stop(struct mmc_host *host);
 
 static inline void mmc_retune_needed(struct mmc_host *host)
-- 
cgit v1.2.3


From a56ffd3a83ed2e10e0d9e0b199547bfa0d206aac Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Sep 2024 18:28:18 +0800
Subject: mmc: core: Extend support for mmc regulators with a vqmmc2

To allow an additional external regulator to be controlled by an mmc host
driver, let's add support for a vqmmc2 regulator to the mmc core.

For an SD UHS-II interface the vqmmc2 regulator may correspond to the so
called vdd2 supply, as described by the SD spec. Initially, only 1.8V is
needed, hence limit the new helper function, mmc_regulator_set_vqmmc2() to
this too.

Note that, to allow for flexibility mmc host drivers need to manage the
enable/disable of the vqmmc2 regulator themselves, while the regulator is
looked up through the common mmc_regulator_get_supply().

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20240913102836.6144-5-victorshihgli@gmail.com
---
 include/linux/mmc/host.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 9a148280bf42..a7f790c75bc8 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -75,6 +75,9 @@ struct mmc_ios {
 #define MMC_SIGNAL_VOLTAGE_180	1
 #define MMC_SIGNAL_VOLTAGE_120	2
 
+	unsigned char	vqmmc2_voltage;
+#define MMC_VQMMC2_VOLTAGE_180	0
+
 	unsigned char	drv_type;		/* driver type (A, B, C, D) */
 
 #define MMC_SET_DRIVER_TYPE_B	0
@@ -308,6 +311,7 @@ struct mmc_pwrseq;
 struct mmc_supply {
 	struct regulator *vmmc;		/* Card power supply */
 	struct regulator *vqmmc;	/* Optional Vccq supply */
+	struct regulator *vqmmc2;	/* Optional supply for phy */
 };
 
 struct mmc_ctx {
@@ -590,6 +594,7 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc,
 			struct regulator *supply,
 			unsigned short vdd_bit);
 int mmc_regulator_set_vqmmc(struct mmc_host *mmc, struct mmc_ios *ios);
+int mmc_regulator_set_vqmmc2(struct mmc_host *mmc, struct mmc_ios *ios);
 #else
 static inline int mmc_regulator_set_ocr(struct mmc_host *mmc,
 				 struct regulator *supply,
@@ -603,6 +608,12 @@ static inline int mmc_regulator_set_vqmmc(struct mmc_host *mmc,
 {
 	return -EINVAL;
 }
+
+static inline int mmc_regulator_set_vqmmc2(struct mmc_host *mmc,
+					   struct mmc_ios *ios)
+{
+	return -EINVAL;
+}
 #endif
 
 int mmc_regulator_get_supply(struct mmc_host *mmc);
-- 
cgit v1.2.3


From a9a75f9dc23c1562dcb261c0a8f3d6fc70d246cd Mon Sep 17 00:00:00 2001
From: Victor Shih <victor.shih@genesyslogic.com.tw>
Date: Fri, 13 Sep 2024 18:28:19 +0800
Subject: mmc: core: Add definitions for SD UHS-II cards

Add UHS-II specific data structures for commands and defines for
registers, as described in Part 1 UHS-II Addendum Version 1.01.

UHS-II related definitions are listed below:
  1. UHS-II card capability: sd_uhs2_caps{}
  2. UHS-II configuration: sd_uhs2_config{}
  3. UHS-II register I/O address and register field definitions: sd_uhs2.h

Signed-off-by: Jason Lai <jason.lai@genesyslogic.com.tw>
Signed-off-by: Victor Shih <victor.shih@genesyslogic.com.tw>
Link: https://lore.kernel.org/r/20240913102836.6144-6-victorshihgli@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/card.h    |  31 +++++-
 include/linux/mmc/host.h    |  25 ++++-
 include/linux/mmc/sd_uhs2.h | 240 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 294 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/mmc/sd_uhs2.h

(limited to 'include')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 0bb1ad1ea4dc..526fce581657 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -188,6 +188,12 @@ struct sd_switch_caps {
 #define SD_MAX_CURRENT_400	(1 << SD_SET_CURRENT_LIMIT_400)
 #define SD_MAX_CURRENT_600	(1 << SD_SET_CURRENT_LIMIT_600)
 #define SD_MAX_CURRENT_800	(1 << SD_SET_CURRENT_LIMIT_800)
+
+#define SD4_SET_POWER_LIMIT_0_72W	0
+#define SD4_SET_POWER_LIMIT_1_44W	1
+#define SD4_SET_POWER_LIMIT_2_16W	2
+#define SD4_SET_POWER_LIMIT_2_88W	3
+#define SD4_SET_POWER_LIMIT_1_80W	4
 };
 
 struct sd_ext_reg {
@@ -211,7 +217,30 @@ struct sd_ext_reg {
 
 struct sd_uhs2_config {
 	u32			node_id;
-	/* TODO: Extend with more register configs. */
+
+	u32			n_fcu;
+	u32			maxblk_len;
+	u8			n_lanes;
+	u8			dadr_len;
+	u8			app_type;
+	u8			phy_minor_rev;
+	u8			phy_major_rev;
+	u8			can_hibernate;
+	u8			n_lss_sync;
+	u8			n_lss_dir;
+	u8			link_minor_rev;
+	u8			link_major_rev;
+	u8			dev_type;
+	u8			n_data_gap;
+
+	u32			n_fcu_set;
+	u32			maxblk_len_set;
+	u8			n_lanes_set;
+	u8			speed_range_set;
+	u8			n_lss_sync_set;
+	u8			n_lss_dir_set;
+	u8			n_data_gap_set;
+	u8			max_retry_set;
 };
 
 struct sdio_cccr {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index a7f790c75bc8..0980d06ed419 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -17,6 +17,7 @@
 #include <linux/mmc/pm.h>
 #include <linux/dma-direction.h>
 #include <linux/blk-crypto-profile.h>
+#include <linux/mmc/sd_uhs2.h>
 
 struct mmc_ios {
 	unsigned int	clock;			/* clock rate */
@@ -100,7 +101,29 @@ struct mmc_clk_phase_map {
 };
 
 struct sd_uhs2_caps {
-	/* TODO: Add UHS-II capabilities for the host. */
+	u32	dap;
+	u32	gap;
+	u32	group_desc;
+	u32	maxblk_len;
+	u32	n_fcu;
+	u8	n_lanes;
+	u8	addr64;
+	u8	card_type;
+	u8	phy_rev;
+	u8	speed_range;
+	u8	n_lss_sync;
+	u8	n_lss_dir;
+	u8	link_rev;
+	u8	host_type;
+	u8	n_data_gap;
+
+	u32	maxblk_len_set;
+	u32	n_fcu_set;
+	u8	n_lanes_set;
+	u8	n_lss_sync_set;
+	u8	n_lss_dir_set;
+	u8	n_data_gap_set;
+	u8	max_retry_set;
 };
 
 enum sd_uhs2_operation {
diff --git a/include/linux/mmc/sd_uhs2.h b/include/linux/mmc/sd_uhs2.h
new file mode 100644
index 000000000000..7abe9bd870c7
--- /dev/null
+++ b/include/linux/mmc/sd_uhs2.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Header file for UHS-II packets, Host Controller registers and I/O
+ * accessors.
+ *
+ *  Copyright (C) 2014 Intel Corp, All Rights Reserved.
+ */
+#ifndef LINUX_MMC_UHS2_H
+#define LINUX_MMC_UHS2_H
+
+/* LINK Layer definition */
+/*
+ * UHS2 Header:
+ * Refer to UHS-II Addendum Version 1.02 Figure 5-2, the format of CCMD Header is described below:
+ *      bit [3:0]  : DID(Destination ID = Node ID of UHS2 card)
+ *      bit [6:4]  : TYP(Packet Type)
+ *                   000b: CCMD(Control command packet)
+ *                   001b: DCMD(Data command packet)
+ *                   010b: RES(Response packet)
+ *                   011b: DATA(Data payload packet)
+ *                   111b: MSG(Message packet)
+ *                   Others: Reserved
+ *      bit [7]    : NP(Native Packet)
+ *      bit [10:8] : TID(Transaction ID)
+ *      bit [11]   : Reserved
+ *      bit [15:12]: SID(Source ID 0: Node ID of Host)
+ *
+ * Broadcast CCMD issued by Host is represented as DID=SID=0.
+ */
+/*
+ * UHS2 Argument:
+ * Refer to UHS-II Addendum Version 1.02 Figure 6-5, the format of CCMD Argument is described below:
+ *      bit [3:0]  : MSB of IOADR
+ *      bit [5:4]  : PLEN(Payload Length)
+ *                   00b: 0 byte
+ *                   01b: 4 bytes
+ *                   10b: 8 bytes
+ *                   11b: 16 bytes
+ *      bit [6]    : Reserved
+ *      bit [7]    : R/W(Read/Write)
+ *                   0: Control read command
+ *                   1: Control write command
+ *      bit [15:8] : LSB of IOADR
+ *
+ * I/O Address specifies the address of register in UHS-II I/O space accessed by CCMD.
+ * The unit of I/O Address is 4 Bytes. It is transmitted in MSB first, LSB last.
+ */
+#define UHS2_NATIVE_PACKET_POS	7
+#define UHS2_NATIVE_PACKET	(1 << UHS2_NATIVE_PACKET_POS)
+
+#define UHS2_PACKET_TYPE_POS	4
+#define UHS2_PACKET_TYPE_CCMD	(0 << UHS2_PACKET_TYPE_POS)
+#define UHS2_PACKET_TYPE_DCMD	(1 << UHS2_PACKET_TYPE_POS)
+#define UHS2_PACKET_TYPE_RES	(2 << UHS2_PACKET_TYPE_POS)
+#define UHS2_PACKET_TYPE_DATA	(3 << UHS2_PACKET_TYPE_POS)
+#define UHS2_PACKET_TYPE_MSG	(7 << UHS2_PACKET_TYPE_POS)
+
+#define UHS2_DEST_ID_MASK	0x0F
+#define UHS2_DEST_ID		0x1
+
+#define UHS2_SRC_ID_POS		12
+#define UHS2_SRC_ID_MASK	0xF000
+
+#define UHS2_TRANS_ID_POS	8
+#define UHS2_TRANS_ID_MASK	0x0700
+
+/* UHS2 MSG */
+#define UHS2_MSG_CTG_POS	5
+#define UHS2_MSG_CTG_LMSG	0x00
+#define UHS2_MSG_CTG_INT	0x60
+#define UHS2_MSG_CTG_AMSG	0x80
+
+#define UHS2_MSG_CTG_FCREQ	0x00
+#define UHS2_MSG_CTG_FCRDY	0x01
+#define UHS2_MSG_CTG_STAT	0x02
+
+#define UHS2_MSG_CODE_POS			8
+#define UHS2_MSG_CODE_FC_UNRECOVER_ERR		0x8
+#define UHS2_MSG_CODE_STAT_UNRECOVER_ERR	0x8
+#define UHS2_MSG_CODE_STAT_RECOVER_ERR		0x1
+
+/* TRANS Layer definition */
+
+/* Native packets*/
+#define UHS2_NATIVE_CMD_RW_POS	7
+#define UHS2_NATIVE_CMD_WRITE	(1 << UHS2_NATIVE_CMD_RW_POS)
+#define UHS2_NATIVE_CMD_READ	(0 << UHS2_NATIVE_CMD_RW_POS)
+
+#define UHS2_NATIVE_CMD_PLEN_POS	4
+#define UHS2_NATIVE_CMD_PLEN_4B		(1 << UHS2_NATIVE_CMD_PLEN_POS)
+#define UHS2_NATIVE_CMD_PLEN_8B		(2 << UHS2_NATIVE_CMD_PLEN_POS)
+#define UHS2_NATIVE_CMD_PLEN_16B	(3 << UHS2_NATIVE_CMD_PLEN_POS)
+
+#define UHS2_NATIVE_CCMD_GET_MIOADR_MASK	0xF00
+#define UHS2_NATIVE_CCMD_MIOADR_MASK		0x0F
+
+#define UHS2_NATIVE_CCMD_LIOADR_POS		8
+#define UHS2_NATIVE_CCMD_GET_LIOADR_MASK	0x0FF
+
+#define UHS2_CCMD_DEV_INIT_COMPLETE_FLAG	BIT(11)
+#define UHS2_DEV_INIT_PAYLOAD_LEN		1
+#define UHS2_DEV_INIT_RESP_LEN			6
+#define UHS2_DEV_ENUM_PAYLOAD_LEN		1
+#define UHS2_DEV_ENUM_RESP_LEN			8
+#define UHS2_CFG_WRITE_PAYLOAD_LEN		2
+#define UHS2_CFG_WRITE_PHY_SET_RESP_LEN		4
+#define UHS2_CFG_WRITE_GENERIC_SET_RESP_LEN	5
+#define UHS2_GO_DORMANT_PAYLOAD_LEN		1
+
+/*
+ * UHS2 Argument:
+ * Refer to UHS-II Addendum Version 1.02 Figure 6-8, the format of DCMD Argument is described below:
+ *      bit [3:0]  : Reserved
+ *      bit [6:3]  : TMODE(Transfer Mode)
+ *                   bit 3: DAM(Data Access Mode)
+ *                   bit 4: TLUM(TLEN Unit Mode)
+ *                   bit 5: LM(Length Mode)
+ *                   bit 6: DM(Duplex Mode)
+ *      bit [7]    : R/W(Read/Write)
+ *                   0: Control read command
+ *                   1: Control write command
+ *      bit [15:8] : Reserved
+ *
+ * I/O Address specifies the address of register in UHS-II I/O space accessed by CCMD.
+ * The unit of I/O Address is 4 Bytes. It is transmitted in MSB first, LSB last.
+ */
+#define UHS2_DCMD_DM_POS		6
+#define UHS2_DCMD_2L_HD_MODE		(1 << UHS2_DCMD_DM_POS)
+#define UHS2_DCMD_LM_POS		5
+#define UHS2_DCMD_LM_TLEN_EXIST		(1 << UHS2_DCMD_LM_POS)
+#define UHS2_DCMD_TLUM_POS		4
+#define UHS2_DCMD_TLUM_BYTE_MODE	(1 << UHS2_DCMD_TLUM_POS)
+#define UHS2_NATIVE_DCMD_DAM_POS	3
+#define UHS2_NATIVE_DCMD_DAM_IO		(1 << UHS2_NATIVE_DCMD_DAM_POS)
+
+#define UHS2_RES_NACK_POS	7
+#define UHS2_RES_NACK_MASK	(0x1 << UHS2_RES_NACK_POS)
+
+#define UHS2_RES_ECODE_POS	4
+#define UHS2_RES_ECODE_MASK	0x7
+#define UHS2_RES_ECODE_COND	1
+#define UHS2_RES_ECODE_ARG	2
+#define UHS2_RES_ECODE_GEN	3
+
+/* IOADR of device registers */
+#define UHS2_IOADR_GENERIC_CAPS		0x00
+#define UHS2_IOADR_PHY_CAPS		0x02
+#define UHS2_IOADR_LINK_CAPS		0x04
+#define UHS2_IOADR_RSV_CAPS		0x06
+#define UHS2_IOADR_GENERIC_SETTINGS	0x08
+#define UHS2_IOADR_PHY_SETTINGS		0x0A
+#define UHS2_IOADR_LINK_SETTINGS	0x0C
+#define UHS2_IOADR_PRESET		0x40
+
+/* SD application packets */
+#define UHS2_SD_CMD_INDEX_POS	8
+
+#define UHS2_SD_CMD_APP_POS	14
+#define UHS2_SD_CMD_APP		(1 << UHS2_SD_CMD_APP_POS)
+
+/* UHS-II Device Registers */
+#define UHS2_DEV_CONFIG_REG	0x000
+
+/* General Caps and Settings registers */
+#define UHS2_DEV_CONFIG_GEN_CAPS	(UHS2_DEV_CONFIG_REG + 0x000)
+#define UHS2_DEV_CONFIG_N_LANES_POS	8
+#define UHS2_DEV_CONFIG_N_LANES_MASK	0x3F
+#define UHS2_DEV_CONFIG_2L_HD_FD	0x1
+#define UHS2_DEV_CONFIG_2D1U_FD		0x2
+#define UHS2_DEV_CONFIG_1D2U_FD		0x4
+#define UHS2_DEV_CONFIG_2D2U_FD		0x8
+#define UHS2_DEV_CONFIG_DADR_POS	14
+#define UHS2_DEV_CONFIG_DADR_MASK	0x1
+#define UHS2_DEV_CONFIG_APP_POS		16
+#define UHS2_DEV_CONFIG_APP_MASK	0xFF
+#define UHS2_DEV_CONFIG_APP_SD_MEM	0x1
+
+#define UHS2_DEV_CONFIG_GEN_SET			(UHS2_DEV_CONFIG_REG + 0x008)
+#define UHS2_DEV_CONFIG_GEN_SET_N_LANES_POS	8
+#define UHS2_DEV_CONFIG_GEN_SET_2L_FD_HD	0x0
+#define UHS2_DEV_CONFIG_GEN_SET_2D1U_FD		0x2
+#define UHS2_DEV_CONFIG_GEN_SET_1D2U_FD		0x3
+#define UHS2_DEV_CONFIG_GEN_SET_2D2U_FD		0x4
+#define UHS2_DEV_CONFIG_GEN_SET_CFG_COMPLETE	BIT(31)
+
+/* PHY Caps and Settings registers */
+#define UHS2_DEV_CONFIG_PHY_CAPS	(UHS2_DEV_CONFIG_REG + 0x002)
+#define UHS2_DEV_CONFIG_PHY_MINOR_MASK	0xF
+#define UHS2_DEV_CONFIG_PHY_MAJOR_POS	4
+#define UHS2_DEV_CONFIG_PHY_MAJOR_MASK	0x3
+#define UHS2_DEV_CONFIG_CAN_HIBER_POS	15
+#define UHS2_DEV_CONFIG_CAN_HIBER_MASK	0x1
+#define UHS2_DEV_CONFIG_PHY_CAPS1	(UHS2_DEV_CONFIG_REG + 0x003)
+#define UHS2_DEV_CONFIG_N_LSS_SYN_MASK	0xF
+#define UHS2_DEV_CONFIG_N_LSS_DIR_POS	4
+#define UHS2_DEV_CONFIG_N_LSS_DIR_MASK	0xF
+
+#define UHS2_DEV_CONFIG_PHY_SET			(UHS2_DEV_CONFIG_REG + 0x00A)
+#define UHS2_DEV_CONFIG_PHY_SET_SPEED_POS	6
+#define UHS2_DEV_CONFIG_PHY_SET_SPEED_A		0x0
+#define UHS2_DEV_CONFIG_PHY_SET_SPEED_B		0x1
+
+/* LINK-TRAN Caps and Settings registers */
+#define UHS2_DEV_CONFIG_LINK_TRAN_CAPS		(UHS2_DEV_CONFIG_REG + 0x004)
+#define UHS2_DEV_CONFIG_LT_MINOR_MASK		0xF
+#define UHS2_DEV_CONFIG_LT_MAJOR_POS		4
+#define UHS2_DEV_CONFIG_LT_MAJOR_MASK		0x3
+#define UHS2_DEV_CONFIG_N_FCU_POS		8
+#define UHS2_DEV_CONFIG_N_FCU_MASK		0xFF
+#define UHS2_DEV_CONFIG_DEV_TYPE_POS		16
+#define UHS2_DEV_CONFIG_DEV_TYPE_MASK		0x7
+#define UHS2_DEV_CONFIG_MAX_BLK_LEN_POS		20
+#define UHS2_DEV_CONFIG_MAX_BLK_LEN_MASK	0xFFF
+#define UHS2_DEV_CONFIG_LINK_TRAN_CAPS1		(UHS2_DEV_CONFIG_REG + 0x005)
+#define UHS2_DEV_CONFIG_N_DATA_GAP_MASK		0xFF
+
+#define UHS2_DEV_CONFIG_LINK_TRAN_SET		(UHS2_DEV_CONFIG_REG + 0x00C)
+#define UHS2_DEV_CONFIG_LT_SET_MAX_BLK_LEN	0x200
+#define UHS2_DEV_CONFIG_LT_SET_MAX_RETRY_POS	16
+
+/* Preset register */
+#define UHS2_DEV_CONFIG_PRESET	(UHS2_DEV_CONFIG_REG + 0x040)
+
+#define UHS2_DEV_INT_REG	0x100
+
+#define UHS2_DEV_STATUS_REG	0x180
+
+#define UHS2_DEV_CMD_REG		0x200
+#define UHS2_DEV_CMD_FULL_RESET		(UHS2_DEV_CMD_REG + 0x000)
+#define UHS2_DEV_CMD_GO_DORMANT_STATE	(UHS2_DEV_CMD_REG + 0x001)
+#define UHS2_DEV_CMD_DORMANT_HIBER	BIT(7)
+#define UHS2_DEV_CMD_DEVICE_INIT	(UHS2_DEV_CMD_REG + 0x002)
+#define UHS2_DEV_INIT_COMPLETE_FLAG	BIT(11)
+#define UHS2_DEV_CMD_ENUMERATE		(UHS2_DEV_CMD_REG + 0x003)
+#define UHS2_DEV_CMD_TRANS_ABORT	(UHS2_DEV_CMD_REG + 0x004)
+
+#define UHS2_RCLK_MAX	52000000
+#define UHS2_RCLK_MIN	26000000
+
+#endif /* LINUX_MMC_UHS2_H */
-- 
cgit v1.2.3


From 5bb798cfdfd00182065cf55c1ff4b2c08d3be13f Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 28 Sep 2024 18:20:56 +0200
Subject: memstick: Constify struct memstick_device_id

'struct memstick_device_id' are not modified in these drivers.

Constifying this structure moves some data to a read-only section, so
increases overall security.

Update memstick_dev_match(), memstick_bus_match() and struct
memstick_driver accordingly.

On a x86_64, with allmodconfig, as an example:
Before:
======
   text	   data	    bss	    dec	    hex	filename
  74055	   3455	     88	  77598	  12f1e	drivers/memstick/core/ms_block.o

After:
=====
   text	   data	    bss	    dec	    hex	filename
  74087	   3423	     88	  77598	  12f1e	drivers/memstick/core/ms_block.o

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/6509d6f6ed64193f04e747a98ccea7492c976ca8.1727540434.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/memstick.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memstick.h b/include/linux/memstick.h
index ebf73d4ee969..107bdcbedf79 100644
--- a/include/linux/memstick.h
+++ b/include/linux/memstick.h
@@ -293,7 +293,7 @@ struct memstick_host {
 };
 
 struct memstick_driver {
-	struct memstick_device_id *id_table;
+	const struct memstick_device_id *id_table;
 	int                       (*probe)(struct memstick_dev *card);
 	void                      (*remove)(struct memstick_dev *card);
 	int                       (*suspend)(struct memstick_dev *card,
-- 
cgit v1.2.3


From 7e019dcc470f27066c98697e43d930df8d54bd9c Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Wed, 9 Oct 2024 09:50:07 -0400
Subject: sched: Improve cache locality of RSEQ concurrency IDs for
 intermittent workloads

commit 223baf9d17f25 ("sched: Fix performance regression introduced by mm_cid")
introduced a per-mm/cpu current concurrency id (mm_cid), which keeps
a reference to the concurrency id allocated for each CPU. This reference
expires shortly after a 100ms delay.

These per-CPU references keep the per-mm-cid data cache-local in
situations where threads are running at least once on each CPU within
each 100ms window, thus keeping the per-cpu reference alive.

However, intermittent workloads behaving in bursts spaced by more than
100ms on each CPU exhibit bad cache locality and degraded performance
compared to purely per-cpu data indexing, because concurrency IDs are
allocated over various CPUs and cores, therefore losing cache locality
of the associated data.

Introduce the following changes to improve per-mm-cid cache locality:

- Add a "recent_cid" field to the per-mm/cpu mm_cid structure to keep
  track of which mm_cid value was last used, and use it as a hint to
  attempt re-allocating the same concurrency ID the next time this
  mm/cpu needs to allocate a concurrency ID,

- Add a per-mm CPUs allowed mask, which keeps track of the union of
  CPUs allowed for all threads belonging to this mm. This cpumask is
  only set during the lifetime of the mm, never cleared, so it
  represents the union of all the CPUs allowed since the beginning of
  the mm lifetime (note that the mm_cpumask() is really arch-specific
  and tailored to the TLB flush needs, and is thus _not_ a viable
  approach for this),

- Add a per-mm nr_cpus_allowed to keep track of the weight of the
  per-mm CPUs allowed mask (for fast access),

- Add a per-mm max_nr_cid to keep track of the highest number of
  concurrency IDs allocated for the mm. This is used for expanding the
  concurrency ID allocation within the upper bound defined by:

    min(mm->nr_cpus_allowed, mm->mm_users)

  When the next unused CID value reaches this threshold, stop trying
  to expand the cid allocation and use the first available cid value
  instead.

  Spreading allocation to use all the cid values within the range

    [ 0, min(mm->nr_cpus_allowed, mm->mm_users) - 1 ]

  improves cache locality while preserving mm_cid compactness within the
  expected user limits,

- In __mm_cid_try_get, only return cid values within the range
  [ 0, mm->nr_cpus_allowed ] rather than [ 0, nr_cpu_ids ]. This
  prevents allocating cids above the number of allowed cpus in
  rare scenarios where cid allocation races with a concurrent
  remote-clear of the per-mm/cpu cid. This improvement is made
  possible by the addition of the per-mm CPUs allowed mask,

- In sched_mm_cid_migrate_to, use mm->nr_cpus_allowed rather than
  t->nr_cpus_allowed. This criterion was really meant to compare
  the number of mm->mm_users to the number of CPUs allowed for the
  entire mm. Therefore, the prior comparison worked fine when all
  threads shared the same CPUs allowed mask, but not so much in
  scenarios where those threads have different masks (e.g. each
  thread pinned to a single CPU). This improvement is made
  possible by the addition of the per-mm CPUs allowed mask.

* Benchmarks

Each thread increments 16kB worth of 8-bit integers in bursts, with
a configurable delay between each thread's execution. Each thread run
one after the other (no threads run concurrently). The order of
thread execution in the sequence is random. The thread execution
sequence begins again after all threads have executed. The 16kB areas
are allocated with rseq_mempool and indexed by either cpu_id, mm_cid
(not cache-local), or cache-local mm_cid. Each thread is pinned to its
own core.

Testing configurations:

8-core/1-L3:        Use 8 cores within a single L3
24-core/24-L3:      Use 24 cores, 1 core per L3
192-core/24-L3:     Use 192 cores (all cores in the system)
384-thread/24-L3:   Use 384 HW threads (all HW threads in the system)

Intermittent workload delays between threads: 200ms, 10ms.

Hardware:

CPU(s):                   384
  On-line CPU(s) list:    0-383
Vendor ID:                AuthenticAMD
  Model name:             AMD EPYC 9654 96-Core Processor
    Thread(s) per core:   2
    Core(s) per socket:   96
    Socket(s):            2
Caches (sum of all):
  L1d:                    6 MiB (192 instances)
  L1i:                    6 MiB (192 instances)
  L2:                     192 MiB (192 instances)
  L3:                     768 MiB (24 instances)

Each result is an average of 5 test runs. The cache-local speedup
is calculated as: (cache-local mm_cid) / (mm_cid).

Intermittent workload delay: 200ms

                     per-cpu     mm_cid    cache-local mm_cid    cache-local speedup
                         (ns)      (ns)                  (ns)
8-core/1-L3             1374      19289                  1336            14.4x
24-core/24-L3           2423      26721                  1594            16.7x
192-core/24-L3          2291      15826                  2153             7.3x
384-thread/24-L3        1874      13234                  1907             6.9x

Intermittent workload delay: 10ms

                     per-cpu     mm_cid    cache-local mm_cid    cache-local speedup
                         (ns)      (ns)                  (ns)
8-core/1-L3               662       756                   686             1.1x
24-core/24-L3            1378      3648                  1035             3.5x
192-core/24-L3           1439     10833                  1482             7.3x
384-thread/24-L3         1503     10570                  1556             6.8x

[ This deprecates the prior "sched: NUMA-aware per-memory-map concurrency IDs"
  patch series with a simpler and more general approach. ]

[ This patch applies on top of v6.12-rc1. ]

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/lkml/20240823185946.418340-1-mathieu.desnoyers@efficios.com/
---
 include/linux/mm_types.h | 72 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 63 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6e3bdf8e38bc..381d22eba088 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -782,6 +782,7 @@ struct vm_area_struct {
 struct mm_cid {
 	u64 time;
 	int cid;
+	int recent_cid;
 };
 #endif
 
@@ -852,6 +853,27 @@ struct mm_struct {
 		 * When the next mm_cid scan is due (in jiffies).
 		 */
 		unsigned long mm_cid_next_scan;
+		/**
+		 * @nr_cpus_allowed: Number of CPUs allowed for mm.
+		 *
+		 * Number of CPUs allowed in the union of all mm's
+		 * threads allowed CPUs.
+		 */
+		unsigned int nr_cpus_allowed;
+		/**
+		 * @max_nr_cid: Maximum number of concurrency IDs allocated.
+		 *
+		 * Track the highest number of concurrency IDs allocated for the
+		 * mm.
+		 */
+		atomic_t max_nr_cid;
+		/**
+		 * @cpus_allowed_lock: Lock protecting mm cpus_allowed.
+		 *
+		 * Provide mutual exclusion for mm cpus_allowed and
+		 * mm nr_cpus_allowed updates.
+		 */
+		raw_spinlock_t cpus_allowed_lock;
 #endif
 #ifdef CONFIG_MMU
 		atomic_long_t pgtables_bytes;	/* size of all page tables */
@@ -1170,18 +1192,30 @@ static inline int mm_cid_clear_lazy_put(int cid)
 	return cid & ~MM_CID_LAZY_PUT;
 }
 
+/*
+ * mm_cpus_allowed: Union of all mm's threads allowed CPUs.
+ */
+static inline cpumask_t *mm_cpus_allowed(struct mm_struct *mm)
+{
+	unsigned long bitmap = (unsigned long)mm;
+
+	bitmap += offsetof(struct mm_struct, cpu_bitmap);
+	/* Skip cpu_bitmap */
+	bitmap += cpumask_size();
+	return (struct cpumask *)bitmap;
+}
+
 /* Accessor for struct mm_struct's cidmask. */
 static inline cpumask_t *mm_cidmask(struct mm_struct *mm)
 {
-	unsigned long cid_bitmap = (unsigned long)mm;
+	unsigned long cid_bitmap = (unsigned long)mm_cpus_allowed(mm);
 
-	cid_bitmap += offsetof(struct mm_struct, cpu_bitmap);
-	/* Skip cpu_bitmap */
+	/* Skip mm_cpus_allowed */
 	cid_bitmap += cpumask_size();
 	return (struct cpumask *)cid_bitmap;
 }
 
-static inline void mm_init_cid(struct mm_struct *mm)
+static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
 {
 	int i;
 
@@ -1189,17 +1223,22 @@ static inline void mm_init_cid(struct mm_struct *mm)
 		struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i);
 
 		pcpu_cid->cid = MM_CID_UNSET;
+		pcpu_cid->recent_cid = MM_CID_UNSET;
 		pcpu_cid->time = 0;
 	}
+	mm->nr_cpus_allowed = p->nr_cpus_allowed;
+	atomic_set(&mm->max_nr_cid, 0);
+	raw_spin_lock_init(&mm->cpus_allowed_lock);
+	cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask);
 	cpumask_clear(mm_cidmask(mm));
 }
 
-static inline int mm_alloc_cid_noprof(struct mm_struct *mm)
+static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *p)
 {
 	mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid);
 	if (!mm->pcpu_cid)
 		return -ENOMEM;
-	mm_init_cid(mm);
+	mm_init_cid(mm, p);
 	return 0;
 }
 #define mm_alloc_cid(...)	alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__))
@@ -1212,16 +1251,31 @@ static inline void mm_destroy_cid(struct mm_struct *mm)
 
 static inline unsigned int mm_cid_size(void)
 {
-	return cpumask_size();
+	return 2 * cpumask_size();	/* mm_cpus_allowed(), mm_cidmask(). */
+}
+
+static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask)
+{
+	struct cpumask *mm_allowed = mm_cpus_allowed(mm);
+
+	if (!mm)
+		return;
+	/* The mm_cpus_allowed is the union of each thread allowed CPUs masks. */
+	raw_spin_lock(&mm->cpus_allowed_lock);
+	cpumask_or(mm_allowed, mm_allowed, cpumask);
+	WRITE_ONCE(mm->nr_cpus_allowed, cpumask_weight(mm_allowed));
+	raw_spin_unlock(&mm->cpus_allowed_lock);
 }
 #else /* CONFIG_SCHED_MM_CID */
-static inline void mm_init_cid(struct mm_struct *mm) { }
-static inline int mm_alloc_cid(struct mm_struct *mm) { return 0; }
+static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { }
+static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; }
 static inline void mm_destroy_cid(struct mm_struct *mm) { }
+
 static inline unsigned int mm_cid_size(void)
 {
 	return 0;
 }
+static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) { }
 #endif /* CONFIG_SCHED_MM_CID */
 
 struct mmu_gather;
-- 
cgit v1.2.3


From 689274a56c0c088796d359f6c6267323931a2429 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 30 Sep 2024 15:03:26 +0200
Subject: drm: Remove DRM aperture helpers

The DRM aperture helpers are wrappers around video helpers from
<linux/aperture.h>. There are no callers of these functions. Remove
them entirely.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Acked-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240930130921.689876-29-tzimmermann@suse.de
---
 include/drm/drm_aperture.h | 38 --------------------------------------
 1 file changed, 38 deletions(-)
 delete mode 100644 include/drm/drm_aperture.h

(limited to 'include')

diff --git a/include/drm/drm_aperture.h b/include/drm/drm_aperture.h
deleted file mode 100644
index cbe33b49fd5d..000000000000
--- a/include/drm/drm_aperture.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-
-#ifndef _DRM_APERTURE_H_
-#define _DRM_APERTURE_H_
-
-#include <linux/types.h>
-
-struct drm_device;
-struct drm_driver;
-struct pci_dev;
-
-int devm_aperture_acquire_from_firmware(struct drm_device *dev, resource_size_t base,
-					resource_size_t size);
-
-int drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size,
-						 const struct drm_driver *req_driver);
-
-int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev,
-						     const struct drm_driver *req_driver);
-
-/**
- * drm_aperture_remove_framebuffers - remove all existing framebuffers
- * @req_driver: requesting DRM driver
- *
- * This function removes all graphics device drivers. Use this function on systems
- * that can have their framebuffer located anywhere in memory.
- *
- * Returns:
- * 0 on success, or a negative errno code otherwise
- */
-static inline int
-drm_aperture_remove_framebuffers(const struct drm_driver *req_driver)
-{
-	return drm_aperture_remove_conflicting_framebuffers(0, (resource_size_t)-1,
-							    req_driver);
-}
-
-#endif
-- 
cgit v1.2.3


From c2f8fde8689272a55b9319b69dfe7e8f0e2e9dfe Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 14 Oct 2024 11:40:56 +0200
Subject: fs: add helper to use mount option as path or fd

Allow filesystems to use a mount option either as a
file or path.

Link: https://lore.kernel.org/r/20241014-work-overlayfs-v3-1-32b3fed1286e@kernel.org
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs_parser.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index 6cf713a7e6c6..3cef566088fc 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -28,7 +28,8 @@ typedef int fs_param_type(struct p_log *,
  */
 fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64,
 	fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev,
-	fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid;
+	fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid,
+	fs_param_is_file_or_string;
 
 /*
  * Specification of the type of value a parameter wants.
@@ -133,6 +134,8 @@ static inline bool fs_validate_description(const char *name,
 #define fsparam_bdev(NAME, OPT)	__fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL)
 #define fsparam_path(NAME, OPT)	__fsparam(fs_param_is_path, NAME, OPT, 0, NULL)
 #define fsparam_fd(NAME, OPT)	__fsparam(fs_param_is_fd, NAME, OPT, 0, NULL)
+#define fsparam_file_or_string(NAME, OPT) \
+				__fsparam(fs_param_is_file_or_string, NAME, OPT, 0, NULL)
 #define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL)
 #define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL)
 
-- 
cgit v1.2.3


From b692bf9a7543af7ad11a59d182a3757578f0ba53 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Mon, 7 Oct 2024 14:24:53 +0200
Subject: xsk: Get rid of xdp_buff_xsk::xskb_list_node

Let's bring xdp_buff_xsk back to occupying 2 cachelines by removing
xskb_list_node - for the purpose of gathering the xskb frags
free_list_node can be used, head of the list (xsk_buff_pool::xskb_list)
stays as-is, just reuse the node ptr.

It is safe to do as a single xdp_buff_xsk can never reside in two
pool's lists simultaneously.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20241007122458.282590-2-maciej.fijalkowski@intel.com
---
 include/net/xdp_sock_drv.h  | 14 +++++++-------
 include/net/xsk_buff_pool.h |  1 -
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 0a5dca2b2b3f..360bc1244c6a 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -126,8 +126,8 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
 	if (likely(!xdp_buff_has_frags(xdp)))
 		goto out;
 
-	list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
-		list_del(&pos->xskb_list_node);
+	list_for_each_entry_safe(pos, tmp, xskb_list, free_list_node) {
+		list_del(&pos->free_list_node);
 		xp_free(pos);
 	}
 
@@ -140,7 +140,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
 {
 	struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp);
 
-	list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list);
+	list_add_tail(&frag->free_list_node, &frag->pool->xskb_list);
 }
 
 static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
@@ -150,9 +150,9 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
 	struct xdp_buff_xsk *frag;
 
 	frag = list_first_entry_or_null(&xskb->pool->xskb_list,
-					struct xdp_buff_xsk, xskb_list_node);
+					struct xdp_buff_xsk, free_list_node);
 	if (frag) {
-		list_del(&frag->xskb_list_node);
+		list_del(&frag->free_list_node);
 		ret = &frag->xdp;
 	}
 
@@ -163,7 +163,7 @@ static inline void xsk_buff_del_tail(struct xdp_buff *tail)
 {
 	struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
 
-	list_del(&xskb->xskb_list_node);
+	list_del(&xskb->free_list_node);
 }
 
 static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
@@ -172,7 +172,7 @@ static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
 	struct xdp_buff_xsk *frag;
 
 	frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
-			       xskb_list_node);
+			       free_list_node);
 	return &frag->xdp;
 }
 
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index bacb33f1e3e5..aa7f1d0b3a5e 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -30,7 +30,6 @@ struct xdp_buff_xsk {
 	struct xsk_buff_pool *pool;
 	u64 orig_addr;
 	struct list_head free_list_node;
-	struct list_head xskb_list_node;
 };
 
 #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
-- 
cgit v1.2.3


From 30ec2c1baaead43903ad63ff8e3083949059083c Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Mon, 7 Oct 2024 14:24:54 +0200
Subject: xsk: s/free_list_node/list_node/

Now that free_list_node's purpose is two-folded, make it just a
'list_node'.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20241007122458.282590-3-maciej.fijalkowski@intel.com
---
 include/net/xdp_sock_drv.h  | 14 +++++++-------
 include/net/xsk_buff_pool.h |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 360bc1244c6a..40085afd9160 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -126,8 +126,8 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
 	if (likely(!xdp_buff_has_frags(xdp)))
 		goto out;
 
-	list_for_each_entry_safe(pos, tmp, xskb_list, free_list_node) {
-		list_del(&pos->free_list_node);
+	list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
+		list_del(&pos->list_node);
 		xp_free(pos);
 	}
 
@@ -140,7 +140,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
 {
 	struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp);
 
-	list_add_tail(&frag->free_list_node, &frag->pool->xskb_list);
+	list_add_tail(&frag->list_node, &frag->pool->xskb_list);
 }
 
 static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
@@ -150,9 +150,9 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
 	struct xdp_buff_xsk *frag;
 
 	frag = list_first_entry_or_null(&xskb->pool->xskb_list,
-					struct xdp_buff_xsk, free_list_node);
+					struct xdp_buff_xsk, list_node);
 	if (frag) {
-		list_del(&frag->free_list_node);
+		list_del(&frag->list_node);
 		ret = &frag->xdp;
 	}
 
@@ -163,7 +163,7 @@ static inline void xsk_buff_del_tail(struct xdp_buff *tail)
 {
 	struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
 
-	list_del(&xskb->free_list_node);
+	list_del(&xskb->list_node);
 }
 
 static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
@@ -172,7 +172,7 @@ static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
 	struct xdp_buff_xsk *frag;
 
 	frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
-			       free_list_node);
+			       list_node);
 	return &frag->xdp;
 }
 
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index aa7f1d0b3a5e..af8b6f776f86 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -29,7 +29,7 @@ struct xdp_buff_xsk {
 	dma_addr_t frame_dma;
 	struct xsk_buff_pool *pool;
 	u64 orig_addr;
-	struct list_head free_list_node;
+	struct list_head list_node;
 };
 
 #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
-- 
cgit v1.2.3


From bea14124bacbe5c9366381e62635eed28ac892ae Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Mon, 7 Oct 2024 14:24:55 +0200
Subject: xsk: Get rid of xdp_buff_xsk::orig_addr

Continue the process of dieting xdp_buff_xsk by removing orig_addr
member. It can be calculated from xdp->data_hard_start where it was
previously used, so it is not anything that has to be carried around in
struct used widely in hot path.

This has been used for initializing xdp_buff_xsk::frame_dma during pool
setup and as a shortcut in xp_get_handle() to retrieve address provided
to xsk Rx queue.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20241007122458.282590-4-maciej.fijalkowski@intel.com
---
 include/net/xsk_buff_pool.h | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index af8b6f776f86..468a23b1b4c5 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -28,7 +28,6 @@ struct xdp_buff_xsk {
 	dma_addr_t dma;
 	dma_addr_t frame_dma;
 	struct xsk_buff_pool *pool;
-	u64 orig_addr;
 	struct list_head list_node;
 };
 
@@ -119,7 +118,6 @@ void xp_free(struct xdp_buff_xsk *xskb);
 static inline void xp_init_xskb_addr(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool,
 				     u64 addr)
 {
-	xskb->orig_addr = addr;
 	xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
 }
 
@@ -221,14 +219,19 @@ static inline void xp_release(struct xdp_buff_xsk *xskb)
 		xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
 }
 
-static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb)
+static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb,
+				struct xsk_buff_pool *pool)
 {
-	u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
+	u64 orig_addr = xskb->xdp.data - pool->addrs;
+	u64 offset;
 
-	offset += xskb->pool->headroom;
-	if (!xskb->pool->unaligned)
-		return xskb->orig_addr + offset;
-	return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+	if (!pool->unaligned)
+		return orig_addr;
+
+	offset = xskb->xdp.data - xskb->xdp.data_hard_start;
+	orig_addr -= offset;
+	offset += pool->headroom;
+	return orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
 }
 
 static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool)
-- 
cgit v1.2.3


From 6e126872191df946a6fe01b79273119d32d96711 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Mon, 7 Oct 2024 14:24:56 +0200
Subject: xsk: Carry a copy of xdp_zc_max_segs within xsk_buff_pool

This so we avoid dereferencing struct net_device within hot path.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20241007122458.282590-5-maciej.fijalkowski@intel.com
---
 include/net/xsk_buff_pool.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 468a23b1b4c5..bb03cee716b3 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -76,6 +76,7 @@ struct xsk_buff_pool {
 	u32 chunk_size;
 	u32 chunk_shift;
 	u32 frame_len;
+	u32 xdp_zc_max_segs;
 	u8 tx_metadata_len; /* inherited from umem */
 	u8 cached_need_wakeup;
 	bool uses_need_wakeup;
-- 
cgit v1.2.3


From 6e3ea06240adfef7b46e2338dd824541c31de06d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 7 Oct 2024 18:03:23 +0300
Subject: dmaengine: acpi: Drop unused devm_acpi_dma_controller_free()

After introduction a few years ago the devm_acpi_dma_controller_free()
was never used. Drop it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20241007150436.2183575-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/acpi_dma.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi_dma.h b/include/linux/acpi_dma.h
index 72cedb916a9c..3ef1ec7a04cb 100644
--- a/include/linux/acpi_dma.h
+++ b/include/linux/acpi_dma.h
@@ -65,7 +65,6 @@ int devm_acpi_dma_controller_register(struct device *dev,
 		struct dma_chan *(*acpi_dma_xlate)
 		(struct acpi_dma_spec *, struct acpi_dma *),
 		void *data);
-void devm_acpi_dma_controller_free(struct device *dev);
 
 struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev,
 						      size_t index);
@@ -94,9 +93,6 @@ static inline int devm_acpi_dma_controller_register(struct device *dev,
 {
 	return -ENODEV;
 }
-static inline void devm_acpi_dma_controller_free(struct device *dev)
-{
-}
 
 static inline struct dma_chan *acpi_dma_request_slave_chan_by_index(
 		struct device *dev, size_t index)
-- 
cgit v1.2.3


From 662f045332addc961940e48eb920caa954abbf09 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 7 Oct 2024 18:03:25 +0300
Subject: dmaengine: acpi: Clean up headers

There is a few things done:
- include only the headers we are direct user of
- when pointer is in use, provide a forward declaration
- add missing headers
- sort alphabetically

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20241007150436.2183575-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/acpi_dma.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi_dma.h b/include/linux/acpi_dma.h
index 3ef1ec7a04cb..e748b2877602 100644
--- a/include/linux/acpi_dma.h
+++ b/include/linux/acpi_dma.h
@@ -11,10 +11,11 @@
 #ifndef __LINUX_ACPI_DMA_H
 #define __LINUX_ACPI_DMA_H
 
-#include <linux/list.h>
-#include <linux/device.h>
 #include <linux/err.h>
 #include <linux/dmaengine.h>
+#include <linux/types.h>
+
+struct device;
 
 /**
  * struct acpi_dma_spec - slave device DMA resources
-- 
cgit v1.2.3


From 6ba55951e70bf7a8d89c03aa5612d2e0c81ded6d Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Thu, 10 Oct 2024 11:27:15 -0500
Subject: logic_pio: Constify fwnode_handle

The fwnode_handle passed into find_io_range_by_fwnode() and
logic_pio_trans_hwaddr() are not modified, so make them const.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20241010-dt-const-v1-2-87a51f558425@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/logic_pio.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h
index babf4e3c28ba..8f1a9408302f 100644
--- a/include/linux/logic_pio.h
+++ b/include/linux/logic_pio.h
@@ -17,7 +17,7 @@ enum {
 
 struct logic_pio_hwaddr {
 	struct list_head list;
-	struct fwnode_handle *fwnode;
+	const struct fwnode_handle *fwnode;
 	resource_size_t hw_start;
 	resource_size_t io_start;
 	resource_size_t size; /* range size populated */
@@ -110,8 +110,8 @@ void logic_outsl(unsigned long addr, const void *buffer, unsigned int count);
 #endif /* CONFIG_INDIRECT_PIO */
 #define MMIO_UPPER_LIMIT (IO_SPACE_LIMIT - PIO_INDIRECT_SIZE)
 
-struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode);
-unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode,
+struct logic_pio_hwaddr *find_io_range_by_fwnode(const struct fwnode_handle *fwnode);
+unsigned long logic_pio_trans_hwaddr(const struct fwnode_handle *fwnode,
 			resource_size_t hw_addr, resource_size_t size);
 int logic_pio_register_range(struct logic_pio_hwaddr *newrange);
 void logic_pio_unregister_range(struct logic_pio_hwaddr *range);
-- 
cgit v1.2.3


From 78e2baf3d96edd21c6f26d8afc0e68d02ec2c51c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Oct 2024 17:48:13 +0000
Subject: net: add TIME_WAIT logic to sk_to_full_sk()

TCP will soon attach TIME_WAIT sockets to some ACK and RST.

Make sure sk_to_full_sk() detects this and does not return
a non full socket.

v3: also changed sk_const_to_full_sk()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Brian Vazquez <brianvv@google.com>
Link: https://patch.msgid.link/20241010174817.1543642-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/bpf-cgroup.h | 2 +-
 include/net/inet_sock.h    | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index ce91d9b2acb9..f0f219271daf 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -209,7 +209,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) {		       \
 		typeof(sk) __sk = sk_to_full_sk(sk);			       \
-		if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) &&	       \
+		if (__sk && __sk == skb_to_full_sk(skb) &&	       \
 		    cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS))	       \
 			__ret = __cgroup_bpf_run_filter_skb(__sk, skb,	       \
 						      CGROUP_INET_EGRESS); \
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index f01dd273bea6..56d8bc5593d3 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -321,8 +321,10 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
 static inline struct sock *sk_to_full_sk(struct sock *sk)
 {
 #ifdef CONFIG_INET
-	if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
+	if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV)
 		sk = inet_reqsk(sk)->rsk_listener;
+	if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT)
+		sk = NULL;
 #endif
 	return sk;
 }
@@ -331,8 +333,10 @@ static inline struct sock *sk_to_full_sk(struct sock *sk)
 static inline const struct sock *sk_const_to_full_sk(const struct sock *sk)
 {
 #ifdef CONFIG_INET
-	if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
+	if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV)
 		sk = ((const struct request_sock *)sk)->rsk_listener;
+	if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT)
+		sk = NULL;
 #endif
 	return sk;
 }
-- 
cgit v1.2.3


From bc43a3c83cad46a27d6e3bf869acdd926bbe79ad Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Oct 2024 17:48:14 +0000
Subject: net_sched: sch_fq: prepare for TIME_WAIT sockets

TCP stack is not attaching skb to TIME_WAIT sockets yet,
but we would like to allow this in the future.

Add sk_listener_or_tw() helper to detect the three states
that FQ needs to take care.

Like NEW_SYN_RECV, TIME_WAIT are not full sockets and
do not contain sk->sk_pacing_status, sk->sk_pacing_rate.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Brian Vazquez <brianvv@google.com>
Link: https://patch.msgid.link/20241010174817.1543642-3-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sock.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 6da420ab1ee1..2f200d91ef11 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2802,6 +2802,16 @@ static inline bool sk_listener(const struct sock *sk)
 	return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
 }
 
+/* This helper checks if a socket is a LISTEN or NEW_SYN_RECV or TIME_WAIT
+ * TCP SYNACK messages can be attached to LISTEN or NEW_SYN_RECV (depending on SYNCOOKIE)
+ * TCP RST and ACK can be attached to TIME_WAIT.
+ */
+static inline bool sk_listener_or_tw(const struct sock *sk)
+{
+	return (1 << READ_ONCE(sk->sk_state)) &
+	       (TCPF_LISTEN | TCPF_NEW_SYN_RECV | TCPF_TIME_WAIT);
+}
+
 void sock_enable_timestamp(struct sock *sk, enum sock_flags flag);
 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
 		       int type);
-- 
cgit v1.2.3


From 5ced52fa8f0dc23adb067f7b8a009a5ee051efb7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Oct 2024 17:48:15 +0000
Subject: net: add skb_set_owner_edemux() helper

This can be used to attach a socket to an skb,
taking a reference on sk->sk_refcnt.

This helper might be a NOP if sk->sk_refcnt is zero.

Use it from tcp_make_synack().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Brian Vazquez <brianvv@google.com>
Link: https://patch.msgid.link/20241010174817.1543642-4-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sock.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2f200d91ef11..bf7fa3db10ae 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1760,6 +1760,15 @@ void sock_efree(struct sk_buff *skb);
 #ifdef CONFIG_INET
 void sock_edemux(struct sk_buff *skb);
 void sock_pfree(struct sk_buff *skb);
+
+static inline void skb_set_owner_edemux(struct sk_buff *skb, struct sock *sk)
+{
+	skb_orphan(skb);
+	if (refcount_inc_not_zero(&sk->sk_refcnt)) {
+		skb->sk = sk;
+		skb->destructor = sock_edemux;
+	}
+}
 #else
 #define sock_edemux sock_efree
 #endif
-- 
cgit v1.2.3


From 79636038d37e7bd4d078238f2a3f002cab4423bc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Oct 2024 17:48:17 +0000
Subject: ipv4: tcp: give socket pointer to control skbs

ip_send_unicast_reply() send orphaned 'control packets'.

These are RST packets and also ACK packets sent from TIME_WAIT.

Some eBPF programs would prefer to have a meaningful skb->sk
pointer as much as possible.

This means that TCP can now attach TIME_WAIT sockets to outgoing
skbs.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Brian Vazquez <brianvv@google.com>
Link: https://patch.msgid.link/20241010174817.1543642-6-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index bab084df1567..4be0a6a603b2 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -288,7 +288,8 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
 	return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0;
 }
 
-void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk,
+			   struct sk_buff *skb,
 			   const struct ip_options *sopt,
 			   __be32 daddr, __be32 saddr,
 			   const struct ip_reply_arg *arg,
-- 
cgit v1.2.3


From f15e3b3ddb9fab1c1731b6154e2cd6573fb54c4d Mon Sep 17 00:00:00 2001
From: Joe Damato <jdamato@fastly.com>
Date: Fri, 11 Oct 2024 18:44:56 +0000
Subject: net: napi: Make napi_defer_hard_irqs per-NAPI

Add defer_hard_irqs to napi_struct in preparation for per-NAPI
settings.

The existing sysfs parameter is respected; writes to sysfs will write to
all NAPI structs for the device and the net_device defer_hard_irq field.
Reads from sysfs show the net_device field.

The ability to set defer_hard_irqs on specific NAPI instances will be
added in a later commit, via netdev-genl.

Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20241011184527.16393-2-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e6b93d01e631..2e7bc23660ec 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -373,6 +373,7 @@ struct napi_struct {
 	unsigned int		napi_id;
 	struct hrtimer		timer;
 	struct task_struct	*thread;
+	u32			defer_hard_irqs;
 	/* control-path-only fields follow */
 	struct list_head	dev_list;
 	struct hlist_node	napi_hash_node;
@@ -2085,7 +2086,6 @@ struct net_device {
 	unsigned int		real_num_rx_queues;
 	struct netdev_rx_queue	*_rx;
 	unsigned long		gro_flush_timeout;
-	u32			napi_defer_hard_irqs;
 	unsigned int		gro_max_size;
 	unsigned int		gro_ipv4_max_size;
 	rx_handler_func_t __rcu	*rx_handler;
@@ -2413,6 +2413,7 @@ struct net_device {
 	struct dim_irq_moder	*irq_moder;
 
 	u64			max_pacing_offload_horizon;
+	u32			napi_defer_hard_irqs;
 
 	/**
 	 * @lock: protects @net_shaper_hierarchy, feel free to use for other
-- 
cgit v1.2.3


From 516010460011ae74ac3b7383cf90ed27e2711cd6 Mon Sep 17 00:00:00 2001
From: Joe Damato <jdamato@fastly.com>
Date: Fri, 11 Oct 2024 18:44:57 +0000
Subject: netdev-genl: Dump napi_defer_hard_irqs

Support dumping defer_hard_irqs for a NAPI ID.

Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20241011184527.16393-3-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/netdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 7c308f04e7a0..13dc0b027e86 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -122,6 +122,7 @@ enum {
 	NETDEV_A_NAPI_ID,
 	NETDEV_A_NAPI_IRQ,
 	NETDEV_A_NAPI_PID,
+	NETDEV_A_NAPI_DEFER_HARD_IRQS,
 
 	__NETDEV_A_NAPI_MAX,
 	NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
-- 
cgit v1.2.3


From acb8d4ed5661d05f794ef2ce34fd11e699e9ca32 Mon Sep 17 00:00:00 2001
From: Joe Damato <jdamato@fastly.com>
Date: Fri, 11 Oct 2024 18:44:58 +0000
Subject: net: napi: Make gro_flush_timeout per-NAPI

Allow per-NAPI gro_flush_timeout setting.

The existing sysfs parameter is respected; writes to sysfs will write to
all NAPI structs for the device and the net_device gro_flush_timeout
field. Reads from sysfs will read from the net_device field.

The ability to set gro_flush_timeout on specific NAPI instances will be
added in a later commit, via netdev-genl.

Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20241011184527.16393-4-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2e7bc23660ec..93241d4de437 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -373,6 +373,7 @@ struct napi_struct {
 	unsigned int		napi_id;
 	struct hrtimer		timer;
 	struct task_struct	*thread;
+	unsigned long		gro_flush_timeout;
 	u32			defer_hard_irqs;
 	/* control-path-only fields follow */
 	struct list_head	dev_list;
@@ -2085,7 +2086,6 @@ struct net_device {
 	int			ifindex;
 	unsigned int		real_num_rx_queues;
 	struct netdev_rx_queue	*_rx;
-	unsigned long		gro_flush_timeout;
 	unsigned int		gro_max_size;
 	unsigned int		gro_ipv4_max_size;
 	rx_handler_func_t __rcu	*rx_handler;
@@ -2413,6 +2413,7 @@ struct net_device {
 	struct dim_irq_moder	*irq_moder;
 
 	u64			max_pacing_offload_horizon;
+	unsigned long		gro_flush_timeout;
 	u32			napi_defer_hard_irqs;
 
 	/**
-- 
cgit v1.2.3


From 0137891e74576f77a7901718dc0ce08ca074ae74 Mon Sep 17 00:00:00 2001
From: Joe Damato <jdamato@fastly.com>
Date: Fri, 11 Oct 2024 18:44:59 +0000
Subject: netdev-genl: Dump gro_flush_timeout

Support dumping gro_flush_timeout for a NAPI ID.

Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241011184527.16393-5-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/netdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 13dc0b027e86..cacd33359c76 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -123,6 +123,7 @@ enum {
 	NETDEV_A_NAPI_IRQ,
 	NETDEV_A_NAPI_PID,
 	NETDEV_A_NAPI_DEFER_HARD_IRQS,
+	NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
 
 	__NETDEV_A_NAPI_MAX,
 	NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
-- 
cgit v1.2.3


From 86e25f40aa1e9e54e081e55016f65b5c92523989 Mon Sep 17 00:00:00 2001
From: Joe Damato <jdamato@fastly.com>
Date: Fri, 11 Oct 2024 18:45:00 +0000
Subject: net: napi: Add napi_config

Add a persistent NAPI config area for NAPI configuration to the core.
Drivers opt-in to setting the persistent config for a NAPI by passing an
index when calling netif_napi_add_config.

napi_config is allocated in alloc_netdev_mqs, freed in free_netdev
(after the NAPIs are deleted).

Drivers which call netif_napi_add_config will have persistent per-NAPI
settings: NAPI IDs, gro_flush_timeout, and defer_hard_irq settings.

Per-NAPI settings are saved in napi_disable and restored in napi_enable.

Co-developed-by: Martin Karsten <mkarsten@uwaterloo.ca>
Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241011184527.16393-6-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 93241d4de437..8feaca12655e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -342,6 +342,15 @@ struct gro_list {
  */
 #define GRO_HASH_BUCKETS	8
 
+/*
+ * Structure for per-NAPI config
+ */
+struct napi_config {
+	u64 gro_flush_timeout;
+	u32 defer_hard_irqs;
+	unsigned int napi_id;
+};
+
 /*
  * Structure for NAPI scheduling similar to tasklet but with weighting
  */
@@ -379,6 +388,8 @@ struct napi_struct {
 	struct list_head	dev_list;
 	struct hlist_node	napi_hash_node;
 	int			irq;
+	int			index;
+	struct napi_config	*config;
 };
 
 enum {
@@ -1868,9 +1879,6 @@ enum netdev_reg_state {
  *				allocated at register_netdev() time
  *	@real_num_rx_queues: 	Number of RX queues currently active in device
  *	@xdp_prog:		XDP sockets filter program pointer
- *	@gro_flush_timeout:	timeout for GRO layer in NAPI
- *	@napi_defer_hard_irqs:	If not zero, provides a counter that would
- *				allow to avoid NIC hard IRQ, on busy queues.
  *
  *	@rx_handler:		handler for received packets
  *	@rx_handler_data: 	XXX: need comments on this one
@@ -2020,6 +2028,11 @@ enum netdev_reg_state {
  *		   where the clock is recovered.
  *
  *	@max_pacing_offload_horizon: max EDT offload horizon in nsec.
+ *	@napi_config: An array of napi_config structures containing per-NAPI
+ *		      settings.
+ *	@gro_flush_timeout:	timeout for GRO layer in NAPI
+ *	@napi_defer_hard_irqs:	If not zero, provides a counter that would
+ *				allow to avoid NIC hard IRQ, on busy queues.
  *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
@@ -2413,6 +2426,7 @@ struct net_device {
 	struct dim_irq_moder	*irq_moder;
 
 	u64			max_pacing_offload_horizon;
+	struct napi_config	*napi_config;
 	unsigned long		gro_flush_timeout;
 	u32			napi_defer_hard_irqs;
 
@@ -2678,6 +2692,22 @@ netif_napi_add_tx_weight(struct net_device *dev,
 	netif_napi_add_weight(dev, napi, poll, weight);
 }
 
+/**
+ * netif_napi_add_config - initialize a NAPI context with persistent config
+ * @dev: network device
+ * @napi: NAPI context
+ * @poll: polling function
+ * @index: the NAPI index
+ */
+static inline void
+netif_napi_add_config(struct net_device *dev, struct napi_struct *napi,
+		      int (*poll)(struct napi_struct *, int), int index)
+{
+	napi->index = index;
+	napi->config = &dev->napi_config[index];
+	netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
+}
+
 /**
  * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only
  * @dev:  network device
-- 
cgit v1.2.3


From 1287c1ae0fc227e5acef11a539eb4e75646e31c7 Mon Sep 17 00:00:00 2001
From: Joe Damato <jdamato@fastly.com>
Date: Fri, 11 Oct 2024 18:45:01 +0000
Subject: netdev-genl: Support setting per-NAPI config values

Add support to set per-NAPI defer_hard_irqs and gro_flush_timeout.

Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241011184527.16393-7-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/netdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index cacd33359c76..e3ebb49f60d2 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -201,6 +201,7 @@ enum {
 	NETDEV_CMD_NAPI_GET,
 	NETDEV_CMD_QSTATS_GET,
 	NETDEV_CMD_BIND_RX,
+	NETDEV_CMD_NAPI_SET,
 
 	__NETDEV_CMD_MAX,
 	NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
-- 
cgit v1.2.3


From 4971266e1595f76be3f844c834c1f9357a97dbde Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 10 Oct 2024 16:25:03 -0700
Subject: bpf: Add kmem_cache iterator

The new "kmem_cache" iterator will traverse the list of slab caches
and call attached BPF programs for each entry.  It should check the
argument (ctx.s) if it's NULL before using it.

Now the iteration grabs the slab_mutex only if it traverse the list and
releases the mutex when it runs the BPF program.  The kmem_cache entry
is protected by a refcount during the execution.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz> #slab
Link: https://lore.kernel.org/r/20241010232505.1339892-2-namhyung@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index c0e3e1426a82..139bdececdcf 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -283,5 +283,6 @@ extern u32 btf_tracing_ids[];
 extern u32 bpf_cgroup_btf_id[];
 extern u32 bpf_local_storage_map_btf_id[];
 extern u32 btf_bpf_map_id[];
+extern u32 bpf_kmem_cache_btf_id[];
 
 #endif
-- 
cgit v1.2.3


From 6632863226d88383c9e2bedfeeb928ac7f8232b9 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 9 Oct 2024 13:18:08 +0800
Subject: iommu: Remove iommu_present()

The last callsite of iommu_present() is removed by commit <45c690aea8ee>
("drm/tegra: Use iommu_paging_domain_alloc()"). Remove it to avoid dead
code.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20241009051808.29455-1-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bd722f473635..62d1b85c80d3 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -785,7 +785,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
 }
 
 extern int bus_iommu_probe(const struct bus_type *bus);
-extern bool iommu_present(const struct bus_type *bus);
 extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
 extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
 extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus);
@@ -1081,11 +1080,6 @@ struct iommu_iotlb_gather {};
 struct iommu_dirty_bitmap {};
 struct iommu_dirty_ops {};
 
-static inline bool iommu_present(const struct bus_type *bus)
-{
-	return false;
-}
-
 static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
 {
 	return false;
-- 
cgit v1.2.3


From a274465cc3bef2dfd9c9ea5100848dda0a8641e1 Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Thu, 10 Oct 2024 13:54:19 +0100
Subject: net: phy: support 'active-high' property for PHY LEDs

In addition to 'active-low' and 'inactive-high-impedance' also
support 'active-high' property for PHY LED pin configuration.
As only either 'active-high' or 'active-low' can be set at the
same time, WARN and return an error in case both are set.

Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/91598487773d768f254d5faf06cf65b13e972f0e.1728558223.git.daniel@makrotopia.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/phy.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index ff762a3d8270..bf0eb4e5d35c 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -877,8 +877,9 @@ struct phy_plca_status {
 
 /* Modes for PHY LED configuration */
 enum phy_led_modes {
-	PHY_LED_ACTIVE_LOW = 0,
-	PHY_LED_INACTIVE_HIGH_IMPEDANCE = 1,
+	PHY_LED_ACTIVE_HIGH = 0,
+	PHY_LED_ACTIVE_LOW = 1,
+	PHY_LED_INACTIVE_HIGH_IMPEDANCE = 2,
 
 	/* keep it last */
 	__PHY_LED_MODES_NUM,
-- 
cgit v1.2.3


From f68303cf1cf2fb96e20df5499c194f1fe5bab9e2 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Thu, 10 Oct 2024 11:27:14 -0500
Subject: PCI: Constify pci_register_io_range() fwnode_handle

pci_register_io_range() does not modify the passed in fwnode_handle, so
make it const.

Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20241010-dt-const-v1-1-87a51f558425@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/pci.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 573b4c4c2be6..733ff6570e2d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1556,7 +1556,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
 			void *alignf_data);
 
 
-int pci_register_io_range(struct fwnode_handle *fwnode, phys_addr_t addr,
+int pci_register_io_range(const struct fwnode_handle *fwnode, phys_addr_t addr,
 			resource_size_t size);
 unsigned long pci_address_to_pio(phys_addr_t addr);
 phys_addr_t pci_pio_to_address(unsigned long pio);
@@ -2019,7 +2019,7 @@ static inline int pci_request_regions(struct pci_dev *dev, const char *res_name)
 { return -EIO; }
 static inline void pci_release_regions(struct pci_dev *dev) { }
 
-static inline int pci_register_io_range(struct fwnode_handle *fwnode,
+static inline int pci_register_io_range(const struct fwnode_handle *fwnode,
 					phys_addr_t addr, resource_size_t size)
 { return -EINVAL; }
 
-- 
cgit v1.2.3


From ec8c2329da1a8695b3fc80ba67cad9dc75d9a3ec Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Thu, 10 Oct 2024 11:27:16 -0500
Subject: of: Constify struct device_node function arguments

Functions which don't change the refcount or otherwise modify struct
device_node can make struct device_node const.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20241010-dt-const-v1-3-87a51f558425@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of.h         | 14 +++++++-------
 include/linux/of_address.h |  4 ++--
 include/linux/of_irq.h     |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index 85b60ac9eec5..7875b308f13c 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -357,7 +357,7 @@ extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
 extern struct device_node *of_cpu_device_node_get(int cpu);
 extern int of_cpu_node_to_id(struct device_node *np);
 extern struct device_node *of_get_next_cpu_node(struct device_node *prev);
-extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node,
+extern struct device_node *of_get_cpu_state_node(const struct device_node *cpu_node,
 						 int index);
 extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread);
 
@@ -395,7 +395,7 @@ extern int of_phandle_iterator_args(struct of_phandle_iterator *it,
 				    int size);
 
 extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align));
-extern int of_alias_get_id(struct device_node *np, const char *stem);
+extern int of_alias_get_id(const struct device_node *np, const char *stem);
 extern int of_alias_get_highest_id(const char *stem);
 
 bool of_machine_compatible_match(const char *const *compats);
@@ -446,9 +446,9 @@ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur,
  */
 const char *of_prop_next_string(struct property *prop, const char *cur);
 
-bool of_console_check(struct device_node *dn, char *name, int index);
+bool of_console_check(const struct device_node *dn, char *name, int index);
 
-int of_map_id(struct device_node *np, u32 id,
+int of_map_id(const struct device_node *np, u32 id,
 	       const char *map_name, const char *map_mask_name,
 	       struct device_node **target, u32 *id_out);
 
@@ -871,7 +871,7 @@ static inline void of_property_clear_flag(struct property *p, unsigned long flag
 {
 }
 
-static inline int of_map_id(struct device_node *np, u32 id,
+static inline int of_map_id(const struct device_node *np, u32 id,
 			     const char *map_name, const char *map_mask_name,
 			     struct device_node **target, u32 *id_out)
 {
@@ -1734,7 +1734,7 @@ struct of_overlay_notify_data {
 #ifdef CONFIG_OF_OVERLAY
 
 int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
-			 int *ovcs_id, struct device_node *target_base);
+			 int *ovcs_id, const struct device_node *target_base);
 int of_overlay_remove(int *ovcs_id);
 int of_overlay_remove_all(void);
 
@@ -1744,7 +1744,7 @@ int of_overlay_notifier_unregister(struct notifier_block *nb);
 #else
 
 static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
-				       int *ovcs_id, struct device_node *target_base)
+				       int *ovcs_id, const struct device_node *target_base)
 {
 	return -ENOTSUPP;
 }
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 26a19daf0d09..bd46dbcc6e88 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -83,8 +83,8 @@ extern struct of_pci_range *of_pci_range_parser_one(
 					struct of_pci_range *range);
 extern int of_pci_address_to_resource(struct device_node *dev, int bar,
 				      struct resource *r);
-extern int of_pci_range_to_resource(struct of_pci_range *range,
-				    struct device_node *np,
+extern int of_pci_range_to_resource(const struct of_pci_range *range,
+				    const struct device_node *np,
 				    struct resource *res);
 extern int of_range_to_resource(struct device_node *np, int index,
 				struct resource *res);
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index d6d3eae2f145..6337ad4e5fe8 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -48,12 +48,12 @@ extern int of_irq_to_resource_table(struct device_node *dev,
 		struct resource *res, int nr_irqs);
 extern struct device_node *of_irq_find_parent(struct device_node *child);
 extern struct irq_domain *of_msi_get_domain(struct device *dev,
-					    struct device_node *np,
+					    const struct device_node *np,
 					    enum irq_domain_bus_token token);
 extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
 							u32 id,
 							u32 bus_token);
-extern void of_msi_configure(struct device *dev, struct device_node *np);
+extern void of_msi_configure(struct device *dev, const struct device_node *np);
 u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in);
 #else
 static inline void of_irq_init(const struct of_device_id *matches)
-- 
cgit v1.2.3


From 9c63fea9acd077701fd67bbc21b1e77d6b98b7e0 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Thu, 10 Oct 2024 11:27:17 -0500
Subject: of: Constify struct property pointers

Most accesses to struct property do not modify it, so constify struct
property pointers where ever possible in the DT core code.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20241010-dt-const-v1-4-87a51f558425@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index 7875b308f13c..086a60f3b8a6 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -435,7 +435,7 @@ extern int of_detach_node(struct device_node *);
  * of_property_for_each_u32(np, "propname", u)
  *         printk("U32 value: %x\n", u);
  */
-const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur,
+const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur,
 			       u32 *pu);
 /*
  * struct property *prop;
@@ -444,7 +444,7 @@ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur,
  * of_property_for_each_string(np, "propname", prop, s)
  *         printk("String value: %s\n", s);
  */
-const char *of_prop_next_string(struct property *prop, const char *cur);
+const char *of_prop_next_string(const struct property *prop, const char *cur);
 
 bool of_console_check(const struct device_node *dn, char *name, int index);
 
@@ -826,13 +826,13 @@ static inline bool of_console_check(const struct device_node *dn, const char *na
 	return false;
 }
 
-static inline const __be32 *of_prop_next_u32(struct property *prop,
+static inline const __be32 *of_prop_next_u32(const struct property *prop,
 		const __be32 *cur, u32 *pu)
 {
 	return NULL;
 }
 
-static inline const char *of_prop_next_string(struct property *prop,
+static inline const char *of_prop_next_string(const struct property *prop,
 		const char *cur)
 {
 	return NULL;
@@ -899,7 +899,7 @@ static inline const void *of_device_get_match_data(const struct device *dev)
 #define of_node_cmp(s1, s2)		strcasecmp((s1), (s2))
 #endif
 
-static inline int of_prop_val_eq(struct property *p1, struct property *p2)
+static inline int of_prop_val_eq(const struct property *p1, const struct property *p2)
 {
 	return p1->length == p2->length &&
 	       !memcmp(p1->value, p2->value, (size_t)p1->length);
@@ -1252,7 +1252,7 @@ static inline int of_property_read_string_index(const struct device_node *np,
 static inline bool of_property_read_bool(const struct device_node *np,
 					 const char *propname)
 {
-	struct property *prop = of_find_property(np, propname, NULL);
+	const struct property *prop = of_find_property(np, propname, NULL);
 
 	return prop ? true : false;
 }
@@ -1430,7 +1430,7 @@ static inline int of_property_read_s32(const struct device_node *np,
 	     err = of_phandle_iterator_next(it))
 
 #define of_property_for_each_u32(np, propname, u)			\
-	for (struct {struct property *prop; const __be32 *item; } _it =	\
+	for (struct {const struct property *prop; const __be32 *item; } _it =	\
 		{of_find_property(np, propname, NULL),			\
 		 of_prop_next_u32(_it.prop, NULL, &u)};			\
 	     _it.item;							\
-- 
cgit v1.2.3


From d79616b04f0e08178ceb716a5d2ef60ab723d532 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Thu, 10 Oct 2024 11:27:20 -0500
Subject: of/address: Constify of_busses[] array and pointers

The of_busses array is fixed, so it and all struct of_bus pointers can
be const.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20241010-dt-const-v1-7-87a51f558425@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of_address.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index bd46dbcc6e88..9e034363788a 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -10,7 +10,7 @@ struct of_bus;
 
 struct of_pci_range_parser {
 	struct device_node *node;
-	struct of_bus *bus;
+	const struct of_bus *bus;
 	const __be32 *range;
 	const __be32 *end;
 	int na;
-- 
cgit v1.2.3


From 74fe1ad4132234f04fcc75e16600449496a67b5b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 7 Oct 2024 18:50:14 +0200
Subject: debugobjects: Prepare for batching

Move the debug_obj::object pointer into a union and add a pointer to the
last node in a batch. That allows to implement batch processing efficiently
by utilizing the stack property of hlist:

When the first object of a batch is added to the list, then the batch
pointer is set to the hlist node of the object itself. Any subsequent add
retrieves the pointer to the last node from the first object in the list
and uses that for storing the last node pointer in the newly added object.

Add the pointer to the data structure and ensure that all relevant pool
sizes are strictly batch sized. The actual batching implementation follows
in subsequent changes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/all/20241007164914.139204961@linutronix.de
---
 include/linux/debugobjects.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 32444686b6ff..8b95545e7924 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -23,13 +23,17 @@ struct debug_obj_descr;
  * @state:	tracked object state
  * @astate:	current active state
  * @object:	pointer to the real object
+ * @batch_last:	pointer to the last hlist node in a batch
  * @descr:	pointer to an object type specific debug description structure
  */
 struct debug_obj {
-	struct hlist_node	node;
-	enum debug_obj_state	state;
-	unsigned int		astate;
-	void			*object;
+	struct hlist_node		node;
+	enum debug_obj_state		state;
+	unsigned int			astate;
+	union {
+		void			*object;
+		struct hlist_node	*batch_last;
+	};
 	const struct debug_obj_descr *descr;
 };
 
-- 
cgit v1.2.3


From e4cf33ca48128d580e25ebe779b7ba7b4b4cf733 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 10 Oct 2024 20:21:14 -0400
Subject: ftrace: Consolidate ftrace_regs accessor functions for archs using
 pt_regs

Most architectures use pt_regs within ftrace_regs making a lot of the
accessor functions just calls to the pt_regs internally. Instead of
duplication this effort, use a HAVE_ARCH_FTRACE_REGS for architectures
that have their own ftrace_regs that is not based on pt_regs and will
define all the accessor functions, and for the architectures that just use
pt_regs, it will leave it undefined, and the default accessor functions
will be used.

Note, this will also make it easier to add new accessor functions to
ftrace_regs as it will mean having to touch less architectures.

Cc: <linux-arch@vger.kernel.org>
Cc: "x86@kernel.org" <x86@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Naveen N Rao <naveen@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lore.kernel.org/20241010202114.2289f6fd@gandalf.local.home
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Heiko Carstens <hca@linux.ibm.com> # s390
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> # powerpc
Suggested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h      | 32 +++++++-------------------------
 include/linux/ftrace_regs.h | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 25 deletions(-)
 create mode 100644 include/linux/ftrace_regs.h

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 66f10291a0b2..aa9ddd1e4bb6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -113,6 +113,8 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val
 
 #ifdef CONFIG_FUNCTION_TRACER
 
+#include <linux/ftrace_regs.h>
+
 extern int ftrace_enabled;
 
 /**
@@ -150,14 +152,11 @@ struct ftrace_regs {
 #define ftrace_regs_size()	sizeof(struct __arch_ftrace_regs)
 
 #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
-
-struct __arch_ftrace_regs {
-	struct pt_regs		regs;
-};
-
-struct ftrace_regs;
-#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs))
-
+/*
+ * Architectures that define HAVE_DYNAMIC_FTRACE_WITH_ARGS must define their own
+ * arch_ftrace_get_regs() where it only returns pt_regs *if* it is fully
+ * populated. It should return NULL otherwise.
+ */
 static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
 {
 	return &arch_ftrace_regs(fregs)->regs;
@@ -191,23 +190,6 @@ static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs)
 	return ftrace_get_regs(fregs) != NULL;
 }
 
-#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
-#define ftrace_regs_get_instruction_pointer(fregs) \
-	instruction_pointer(ftrace_get_regs(fregs))
-#define ftrace_regs_get_argument(fregs, n) \
-	regs_get_kernel_argument(ftrace_get_regs(fregs), n)
-#define ftrace_regs_get_stack_pointer(fregs) \
-	kernel_stack_pointer(ftrace_get_regs(fregs))
-#define ftrace_regs_return_value(fregs) \
-	regs_return_value(ftrace_get_regs(fregs))
-#define ftrace_regs_set_return_value(fregs, ret) \
-	regs_set_return_value(ftrace_get_regs(fregs), ret)
-#define ftrace_override_function_with_return(fregs) \
-	override_function_with_return(ftrace_get_regs(fregs))
-#define ftrace_regs_query_register_offset(name) \
-	regs_query_register_offset(name)
-#endif
-
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
 			      struct ftrace_ops *op, struct ftrace_regs *fregs);
 
diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h
new file mode 100644
index 000000000000..dea6a0851b74
--- /dev/null
+++ b/include/linux/ftrace_regs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_FTRACE_REGS_H
+#define _LINUX_FTRACE_REGS_H
+
+/*
+ * For archs that just copy pt_regs in ftrace regs, it can use this default.
+ * If an architecture does not use pt_regs, it must define all the below
+ * accessor functions.
+ */
+#ifndef HAVE_ARCH_FTRACE_REGS
+struct __arch_ftrace_regs {
+	struct pt_regs		regs;
+};
+
+#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs))
+
+struct ftrace_regs;
+
+#define ftrace_regs_get_instruction_pointer(fregs) \
+	instruction_pointer(arch_ftrace_get_regs(fregs))
+#define ftrace_regs_get_argument(fregs, n) \
+	regs_get_kernel_argument(arch_ftrace_get_regs(fregs), n)
+#define ftrace_regs_get_stack_pointer(fregs) \
+	kernel_stack_pointer(arch_ftrace_get_regs(fregs))
+#define ftrace_regs_return_value(fregs) \
+	regs_return_value(arch_ftrace_get_regs(fregs))
+#define ftrace_regs_set_return_value(fregs, ret) \
+	regs_set_return_value(arch_ftrace_get_regs(fregs), ret)
+#define ftrace_override_function_with_return(fregs) \
+	override_function_with_return(arch_ftrace_get_regs(fregs))
+#define ftrace_regs_query_register_offset(name) \
+	regs_query_register_offset(name)
+
+#endif /* HAVE_ARCH_FTRACE_REGS */
+
+#endif /* _LINUX_FTRACE_REGS_H */
-- 
cgit v1.2.3


From 39c089a01a7e431383710a566864644cbbc0f8fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <thomas.weissschuh@linutronix.de>
Date: Thu, 10 Oct 2024 17:44:44 +0200
Subject: vdso: Remove timekeeper argument of __arch_update_vsyscall()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No implementation of this hook uses the passed in timekeeper anymore.

This avoids including a non-VDSO header while building the VDSO, which can
lead to compilation errors.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/all/20241010-vdso-generic-arch_update_vsyscall-v1-1-7fe5a3ea4382@linutronix.de
---
 include/asm-generic/vdso/vsyscall.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h
index c835607f78ae..01dafd604188 100644
--- a/include/asm-generic/vdso/vsyscall.h
+++ b/include/asm-generic/vdso/vsyscall.h
@@ -12,8 +12,7 @@ static __always_inline struct vdso_data *__arch_get_k_vdso_data(void)
 #endif /* __arch_get_k_vdso_data */
 
 #ifndef __arch_update_vsyscall
-static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata,
-						   struct timekeeper *tk)
+static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata)
 {
 }
 #endif /* __arch_update_vsyscall */
-- 
cgit v1.2.3


From a066397e8ed1036e8b959050ab6e830ee90d9f58 Mon Sep 17 00:00:00 2001
From: Gregory Price <gourry@gourry.net>
Date: Fri, 13 Sep 2024 19:19:53 -0400
Subject: tpm: fix unsigned/signed mismatch errors related to
 __calc_tpm2_event_size

__calc_tpm2_event_size returns 0 or a positive length, but return values
are often interpreted as ints.  Convert everything over to u32 to avoid
signed/unsigned logic errors.

Signed-off-by: Gregory Price <gourry@gourry.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/tpm_eventlog.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index 7d68a5cc5881..891368e82558 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -157,7 +157,7 @@ struct tcg_algorithm_info {
  * Return: size of the event on success, 0 on failure
  */
 
-static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
+static __always_inline u32 __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
 					 struct tcg_pcr_event *event_header,
 					 bool do_mapping)
 {
-- 
cgit v1.2.3


From 58797abed49d6b78c7af99b03b037f20c7ffb203 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 5 Oct 2024 12:04:17 +0200
Subject: power: supply: core: constify power_supply_battery_info::resist_table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The power supply core never modifies the resist table.
Reflect this in the API, so drivers can mark their static tables as
const.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20241005-power-supply-battery-const-v1-1-c1f721927048@weissschuh.net
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 910d407ebe63..9253411c105f 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -752,7 +752,7 @@ struct power_supply_battery_info {
 	int temp_max;
 	struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX];
 	int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX];
-	struct power_supply_resistance_temp_table *resist_table;
+	const struct power_supply_resistance_temp_table *resist_table;
 	int resist_table_size;
 	const struct power_supply_vbat_ri_table *vbat2ri_discharging;
 	int vbat2ri_discharging_size;
@@ -805,7 +805,7 @@ power_supply_find_ocv2cap_table(struct power_supply_battery_info *info,
 extern int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info,
 					int ocv, int temp);
 extern int
-power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table,
+power_supply_temp2resist_simple(const struct power_supply_resistance_temp_table *table,
 				int table_len, int temp);
 extern int power_supply_vbat2ri(struct power_supply_battery_info *info,
 				int vbat_uv, bool charging);
-- 
cgit v1.2.3


From ce20d5b9e37099a035ab34d4d3f59e1744756385 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 5 Oct 2024 12:04:21 +0200
Subject: power: supply: core: constify power_supply_battery_info::ocv_table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The power supply core never modifies the ocv table.
Reflect this in the API, so drivers can mark their static tables as
const.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20241005-power-supply-battery-const-v1-5-c1f721927048@weissschuh.net
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 9253411c105f..4e29ec39c18f 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -750,7 +750,7 @@ struct power_supply_battery_info {
 	int temp_alert_max;
 	int temp_min;
 	int temp_max;
-	struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX];
+	const struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX];
 	int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX];
 	const struct power_supply_resistance_temp_table *resist_table;
 	int resist_table_size;
@@ -797,9 +797,9 @@ extern bool power_supply_battery_info_has_prop(struct power_supply_battery_info
 extern int power_supply_battery_info_get_prop(struct power_supply_battery_info *info,
 					      enum power_supply_property psp,
 					      union power_supply_propval *val);
-extern int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table,
+extern int power_supply_ocv2cap_simple(const struct power_supply_battery_ocv_table *table,
 				       int table_len, int ocv);
-extern struct power_supply_battery_ocv_table *
+extern const struct power_supply_battery_ocv_table *
 power_supply_find_ocv2cap_table(struct power_supply_battery_info *info,
 				int temp, int *table_len);
 extern int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info,
-- 
cgit v1.2.3


From 49000fee9e639f62ba1f965ed2ae4c5ad18d19e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 5 Oct 2024 12:05:03 +0200
Subject: power: supply: core: add wakeup source inhibit by power_supply_config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To inhibit wakeup users currently have to use dedicated functions to
register the power supply: {,devm_}power_supply_register_no_ws().
This is inconsistent to other runtime settings which can be configured
through struct power_supply_config.
It's also not obvious what _no_ws() is meant to mean.

Extend power_supply_config to also be able to inhibit the wakeup source.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reviewed-by: Tzung-Bi Shih <tzungbi@kernel.org>
Link: https://lore.kernel.org/r/20241005-power-supply-no-wakeup-source-v1-1-1d62bf9bcb1d@weissschuh.net
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 4e29ec39c18f..9a64043798bd 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -236,6 +236,8 @@ struct power_supply_config {
 
 	char **supplied_to;
 	size_t num_supplicants;
+
+	bool no_wakeup_source;
 };
 
 /* Description of power supply */
-- 
cgit v1.2.3


From 85d319e14f301e1c68131b74c1dceabae73d1e81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 5 Oct 2024 12:05:10 +0200
Subject: power: supply: core: remove {,devm_}power_supply_register_no_ws()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The same functionality is available through
power_supply_config::no_wakeup_source, which is more idiomatic.
All users of the old API have been converted.

Also remove the argument "ws" from __power_supply_register(),
as it is now always "true".

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reviewed-by: Tzung-Bi Shih <tzungbi@kernel.org>
Link: https://lore.kernel.org/r/20241005-power-supply-no-wakeup-source-v1-8-1d62bf9bcb1d@weissschuh.net
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 9a64043798bd..14eedefb3ac8 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -874,17 +874,9 @@ power_supply_register(struct device *parent,
 				 const struct power_supply_desc *desc,
 				 const struct power_supply_config *cfg);
 extern struct power_supply *__must_check
-power_supply_register_no_ws(struct device *parent,
-				 const struct power_supply_desc *desc,
-				 const struct power_supply_config *cfg);
-extern struct power_supply *__must_check
 devm_power_supply_register(struct device *parent,
 				 const struct power_supply_desc *desc,
 				 const struct power_supply_config *cfg);
-extern struct power_supply *__must_check
-devm_power_supply_register_no_ws(struct device *parent,
-				 const struct power_supply_desc *desc,
-				 const struct power_supply_config *cfg);
 extern void power_supply_unregister(struct power_supply *psy);
 extern int power_supply_powers(struct power_supply *psy, struct device *dev);
 
-- 
cgit v1.2.3


From 0b582611a8f4270fa357a22a546909b2dd5fc5fe Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Tue, 15 Oct 2024 10:28:43 +0900
Subject: ftrace: Use arch_ftrace_regs() for ftrace_regs_*() macros

Since the arch_ftrace_get_regs(fregs) is only valid when the
FL_SAVE_REGS is set, we need to use `&arch_ftrace_regs()->regs` for
ftrace_regs_*() APIs because those APIs are for ftrace_regs, not
complete pt_regs.

Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/172895572290.107311.16057631001860177198.stgit@devnote2
Fixes: e4cf33ca4812 ("ftrace: Consolidate ftrace_regs accessor functions for archs using pt_regs")
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace_regs.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h
index dea6a0851b74..b78a0a60515b 100644
--- a/include/linux/ftrace_regs.h
+++ b/include/linux/ftrace_regs.h
@@ -17,17 +17,17 @@ struct __arch_ftrace_regs {
 struct ftrace_regs;
 
 #define ftrace_regs_get_instruction_pointer(fregs) \
-	instruction_pointer(arch_ftrace_get_regs(fregs))
+	instruction_pointer(&arch_ftrace_regs(fregs)->regs)
 #define ftrace_regs_get_argument(fregs, n) \
-	regs_get_kernel_argument(arch_ftrace_get_regs(fregs), n)
+	regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
-	kernel_stack_pointer(arch_ftrace_get_regs(fregs))
+	kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs)
 #define ftrace_regs_return_value(fregs) \
-	regs_return_value(arch_ftrace_get_regs(fregs))
+	regs_return_value(&arch_ftrace_regs(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
-	regs_set_return_value(arch_ftrace_get_regs(fregs), ret)
+	regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret)
 #define ftrace_override_function_with_return(fregs) \
-	override_function_with_return(arch_ftrace_get_regs(fregs))
+	override_function_with_return(&arch_ftrace_regs(fregs)->regs)
 #define ftrace_regs_query_register_offset(name) \
 	regs_query_register_offset(name)
 
-- 
cgit v1.2.3


From 2d17932da44fdc1ba835ad05110ab996d2912dbf Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Tue, 15 Oct 2024 10:28:53 +0900
Subject: ftrace: Rename ftrace_regs_return_value to
 ftrace_regs_get_return_value

Rename ftrace_regs_return_value to ftrace_regs_get_return_value as same as
other ftrace_regs_get/set_* APIs. arm64 and riscv are already using this
new name.

Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/172895573350.107311.7564634260652361511.stgit@devnote2
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h
index b78a0a60515b..be1ed0c891d0 100644
--- a/include/linux/ftrace_regs.h
+++ b/include/linux/ftrace_regs.h
@@ -22,7 +22,7 @@ struct ftrace_regs;
 	regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
 	kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
 	regs_return_value(&arch_ftrace_regs(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
 	regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret)
-- 
cgit v1.2.3


From efe8419ae78d65e83edc31aad74b605c12e7d60c Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 14 Oct 2024 16:13:39 +0100
Subject: vdso: Introduce vdso/page.h

The VDSO implementation includes headers from outside of the
vdso/ namespace.

Introduce vdso/page.h to make sure that the generic library
uses only the allowed namespace.

Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> # m68k
Link: https://lore.kernel.org/all/20241014151340.1639555-3-vincenzo.frascino@arm.com
---
 include/vdso/page.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 include/vdso/page.h

(limited to 'include')

diff --git a/include/vdso/page.h b/include/vdso/page.h
new file mode 100644
index 000000000000..4ada1ba6bd1f
--- /dev/null
+++ b/include/vdso/page.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __VDSO_PAGE_H
+#define __VDSO_PAGE_H
+
+#include <uapi/linux/const.h>
+
+/*
+ * PAGE_SHIFT determines the page size.
+ *
+ * Note: This definition is required because PAGE_SHIFT is used
+ * in several places throuout the codebase.
+ */
+#define PAGE_SHIFT      CONFIG_PAGE_SHIFT
+
+#define PAGE_SIZE	(_AC(1,UL) << CONFIG_PAGE_SHIFT)
+
+#if defined(CONFIG_PHYS_ADDR_T_64BIT) && !defined(CONFIG_64BIT)
+/*
+ * Applies only to 32-bit architectures with a 64-bit phys_addr_t.
+ *
+ * Subtle: (1 << CONFIG_PAGE_SHIFT) is an int, not an unsigned long.
+ * So if we assign PAGE_MASK to a larger type it gets extended the
+ * way we want (i.e. with 1s in the high bits)
+ */
+#define PAGE_MASK	(~((1 << CONFIG_PAGE_SHIFT) - 1))
+#else
+#define PAGE_MASK	(~(PAGE_SIZE - 1))
+#endif
+
+#endif	/* __VDSO_PAGE_H */
-- 
cgit v1.2.3


From 102f085d84607462234ac60f6027973b45a9bde2 Mon Sep 17 00:00:00 2001
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
Date: Mon, 14 Oct 2024 10:22:21 +0200
Subject: timers: Rename usleep_idle_range() to usleep_range_idle()

usleep_idle_range() is a variant of usleep_range(). Both are using
usleep_range_state() as a base. To be able to find all the related
functions in one go, rename it usleep_idle_range() to usleep_range_idle().

No functional change.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-4-dc8b907cb62f@linutronix.de
---
 include/linux/delay.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/delay.h b/include/linux/delay.h
index ff9cda975e30..2bc586aa2068 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -68,7 +68,7 @@ static inline void usleep_range(unsigned long min, unsigned long max)
 	usleep_range_state(min, max, TASK_UNINTERRUPTIBLE);
 }
 
-static inline void usleep_idle_range(unsigned long min, unsigned long max)
+static inline void usleep_range_idle(unsigned long min, unsigned long max)
 {
 	usleep_range_state(min, max, TASK_IDLE);
 }
-- 
cgit v1.2.3


From f36eb171410839325fff9cd9b7b7400f7e606962 Mon Sep 17 00:00:00 2001
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
Date: Mon, 14 Oct 2024 10:22:22 +0200
Subject: timers: Update function descriptions of sleep/delay related functions

A lot of commonly used functions for inserting a sleep or delay lack a
proper function description. Add function descriptions to all of them to
have important information in a central place close to the code.

No functional change.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-5-dc8b907cb62f@linutronix.de
---
 include/asm-generic/delay.h | 41 ++++++++++++++++++++++++++++++++++----
 include/linux/delay.h       | 48 +++++++++++++++++++++++++++++++++------------
 2 files changed, 73 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h
index e448ac61430c..a8cee41cc51b 100644
--- a/include/asm-generic/delay.h
+++ b/include/asm-generic/delay.h
@@ -12,11 +12,39 @@ extern void __const_udelay(unsigned long xloops);
 extern void __delay(unsigned long loops);
 
 /*
- * The weird n/20000 thing suppresses a "comparison is always false due to
- * limited range of data type" warning with non-const 8-bit arguments.
+ * Implementation details:
+ *
+ * * The weird n/20000 thing suppresses a "comparison is always false due to
+ *   limited range of data type" warning with non-const 8-bit arguments.
+ * * 0x10c7 is 2**32 / 1000000 (rounded up) -> udelay
+ * * 0x5 is 2**32 / 1000000000 (rounded up) -> ndelay
  */
 
-/* 0x10c7 is 2**32 / 1000000 (rounded up) */
+/**
+ * udelay - Inserting a delay based on microseconds with busy waiting
+ * @usec:	requested delay in microseconds
+ *
+ * When delaying in an atomic context ndelay(), udelay() and mdelay() are the
+ * only valid variants of delaying/sleeping to go with.
+ *
+ * When inserting delays in non atomic context which are shorter than the time
+ * which is required to queue e.g. an hrtimer and to enter then the scheduler,
+ * it is also valuable to use udelay(). But it is not simple to specify a
+ * generic threshold for this which will fit for all systems. An approximation
+ * is a threshold for all delays up to 10 microseconds.
+ *
+ * When having a delay which is larger than the architecture specific
+ * %MAX_UDELAY_MS value, please make sure mdelay() is used. Otherwise a overflow
+ * risk is given.
+ *
+ * Please note that ndelay(), udelay() and mdelay() may return early for several
+ * reasons (https://lists.openwall.net/linux-kernel/2011/01/09/56):
+ *
+ * #. computed loops_per_jiffy too low (due to the time taken to execute the
+ *    timer interrupt.)
+ * #. cache behaviour affecting the time it takes to execute the loop function.
+ * #. CPU clock rate changes.
+ */
 #define udelay(n)							\
 	({								\
 		if (__builtin_constant_p(n)) {				\
@@ -29,7 +57,12 @@ extern void __delay(unsigned long loops);
 		}							\
 	})
 
-/* 0x5 is 2**32 / 1000000000 (rounded up) */
+/**
+ * ndelay - Inserting a delay based on nanoseconds with busy waiting
+ * @nsec:	requested delay in nanoseconds
+ *
+ * See udelay() for basic information about ndelay() and it's variants.
+ */
 #define ndelay(n)							\
 	({								\
 		if (__builtin_constant_p(n)) {				\
diff --git a/include/linux/delay.h b/include/linux/delay.h
index 2bc586aa2068..2de509e4adce 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -6,17 +6,7 @@
  * Copyright (C) 1993 Linus Torvalds
  *
  * Delay routines, using a pre-computed "loops_per_jiffy" value.
- *
- * Please note that ndelay(), udelay() and mdelay() may return early for
- * several reasons:
- *  1. computed loops_per_jiffy too low (due to the time taken to
- *     execute the timer interrupt.)
- *  2. cache behaviour affecting the time it takes to execute the
- *     loop function.
- *  3. CPU clock rate changes.
- *
- * Please see this thread:
- *   https://lists.openwall.net/linux-kernel/2011/01/09/56
+ * Sleep routines using timer list timers or hrtimers.
  */
 
 #include <linux/math.h>
@@ -35,12 +25,21 @@ extern unsigned long loops_per_jiffy;
  * The 2nd mdelay() definition ensures GCC will optimize away the 
  * while loop for the common cases where n <= MAX_UDELAY_MS  --  Paul G.
  */
-
 #ifndef MAX_UDELAY_MS
 #define MAX_UDELAY_MS	5
 #endif
 
 #ifndef mdelay
+/**
+ * mdelay - Inserting a delay based on milliseconds with busy waiting
+ * @n:	requested delay in milliseconds
+ *
+ * See udelay() for basic information about mdelay() and it's variants.
+ *
+ * Please double check, whether mdelay() is the right way to go or whether a
+ * refactoring of the code is the better variant to be able to use msleep()
+ * instead.
+ */
 #define mdelay(n) (\
 	(__builtin_constant_p(n) && (n)<=MAX_UDELAY_MS) ? udelay((n)*1000) : \
 	({unsigned long __ms=(n); while (__ms--) udelay(1000);}))
@@ -63,16 +62,41 @@ unsigned long msleep_interruptible(unsigned int msecs);
 void usleep_range_state(unsigned long min, unsigned long max,
 			unsigned int state);
 
+/**
+ * usleep_range - Sleep for an approximate time
+ * @min:	Minimum time in microseconds to sleep
+ * @max:	Maximum time in microseconds to sleep
+ *
+ * For basic information please refere to usleep_range_state().
+ *
+ * The task will be in the state TASK_UNINTERRUPTIBLE during the sleep.
+ */
 static inline void usleep_range(unsigned long min, unsigned long max)
 {
 	usleep_range_state(min, max, TASK_UNINTERRUPTIBLE);
 }
 
+/**
+ * usleep_range_idle - Sleep for an approximate time with idle time accounting
+ * @min:	Minimum time in microseconds to sleep
+ * @max:	Maximum time in microseconds to sleep
+ *
+ * For basic information please refere to usleep_range_state().
+ *
+ * The sleeping task has the state TASK_IDLE during the sleep to prevent
+ * contribution to the load avarage.
+ */
 static inline void usleep_range_idle(unsigned long min, unsigned long max)
 {
 	usleep_range_state(min, max, TASK_IDLE);
 }
 
+/**
+ * ssleep - wrapper for seconds around msleep
+ * @seconds:	Requested sleep duration in seconds
+ *
+ * Please refere to msleep() for detailed information.
+ */
 static inline void ssleep(unsigned int seconds)
 {
 	msleep(seconds * 1000);
-- 
cgit v1.2.3


From 19e2d91d8cb1f333adf04731f2788ff6ca06cebd Mon Sep 17 00:00:00 2001
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
Date: Mon, 14 Oct 2024 10:22:23 +0200
Subject: delay: Rework udelay and ndelay

udelay() as well as ndelay() are defines and no functions and are using
constants to be able to transform a sleep time into loops and to prevent
too long udelays/ndelays. There was a compiler error with non-const 8 bit
arguments which was fixed by commit a87e553fabe8 ("asm-generic: delay.h fix
udelay and ndelay for 8 bit args"). When using a function, the non-const 8
bit argument is type casted and the problem would be gone.

Transform udelay() and ndelay() into proper functions, remove the no longer
and confusing division, add defines for the magic values and add some
explanations as well.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-6-dc8b907cb62f@linutronix.de
---
 include/asm-generic/delay.h | 65 ++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h
index a8cee41cc51b..76cf237b6e4c 100644
--- a/include/asm-generic/delay.h
+++ b/include/asm-generic/delay.h
@@ -2,6 +2,9 @@
 #ifndef __ASM_GENERIC_DELAY_H
 #define __ASM_GENERIC_DELAY_H
 
+#include <linux/math.h>
+#include <vdso/time64.h>
+
 /* Undefined functions to get compile-time errors */
 extern void __bad_udelay(void);
 extern void __bad_ndelay(void);
@@ -12,13 +15,18 @@ extern void __const_udelay(unsigned long xloops);
 extern void __delay(unsigned long loops);
 
 /*
- * Implementation details:
- *
- * * The weird n/20000 thing suppresses a "comparison is always false due to
- *   limited range of data type" warning with non-const 8-bit arguments.
- * * 0x10c7 is 2**32 / 1000000 (rounded up) -> udelay
- * * 0x5 is 2**32 / 1000000000 (rounded up) -> ndelay
+ * The microseconds/nanosecond delay multiplicators are used to convert a
+ * constant microseconds/nanoseconds value to a value which can be used by the
+ * architectures specific implementation to transform it into loops.
+ */
+#define UDELAY_CONST_MULT	((unsigned long)DIV_ROUND_UP(1ULL << 32, USEC_PER_SEC))
+#define NDELAY_CONST_MULT	((unsigned long)DIV_ROUND_UP(1ULL << 32, NSEC_PER_SEC))
+
+/*
+ * The maximum constant udelay/ndelay value picked out of thin air to prevent
+ * too long constant udelays/ndelays.
  */
+#define DELAY_CONST_MAX   20000
 
 /**
  * udelay - Inserting a delay based on microseconds with busy waiting
@@ -45,17 +53,17 @@ extern void __delay(unsigned long loops);
  * #. cache behaviour affecting the time it takes to execute the loop function.
  * #. CPU clock rate changes.
  */
-#define udelay(n)							\
-	({								\
-		if (__builtin_constant_p(n)) {				\
-			if ((n) / 20000 >= 1)				\
-				 __bad_udelay();			\
-			else						\
-				__const_udelay((n) * 0x10c7ul);		\
-		} else {						\
-			__udelay(n);					\
-		}							\
-	})
+static __always_inline void udelay(unsigned long usec)
+{
+	if (__builtin_constant_p(usec)) {
+		if (usec >= DELAY_CONST_MAX)
+			__bad_udelay();
+		else
+			__const_udelay(usec * UDELAY_CONST_MULT);
+	} else {
+		__udelay(usec);
+	}
+}
 
 /**
  * ndelay - Inserting a delay based on nanoseconds with busy waiting
@@ -63,16 +71,17 @@ extern void __delay(unsigned long loops);
  *
  * See udelay() for basic information about ndelay() and it's variants.
  */
-#define ndelay(n)							\
-	({								\
-		if (__builtin_constant_p(n)) {				\
-			if ((n) / 20000 >= 1)				\
-				__bad_ndelay();				\
-			else						\
-				__const_udelay((n) * 5ul);		\
-		} else {						\
-			__ndelay(n);					\
-		}							\
-	})
+static __always_inline void ndelay(unsigned long nsec)
+{
+	if (__builtin_constant_p(nsec)) {
+		if (nsec >= DELAY_CONST_MAX)
+			__bad_udelay();
+		else
+			__const_udelay(nsec * NDELAY_CONST_MULT);
+	} else {
+		__udelay(nsec);
+	}
+}
+#define ndelay(x) ndelay(x)
 
 #endif /* __ASM_GENERIC_DELAY_H */
-- 
cgit v1.2.3


From 82e11e47c1880362e05c065bef7dbe28a749555c Mon Sep 17 00:00:00 2001
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
Date: Mon, 14 Oct 2024 10:22:24 +0200
Subject: timers: Adjust flseep() to reflect reality

fsleep() simply implements the recommendations of the outdated
documentation in "Documentation/timers/timers-howto.rst". This should be a
user friendly interface to choose always the best timeout function
approach:

- udelay() for very short sleep durations shorter than 10 microseconds
- usleep_range() for sleep durations until 20 milliseconds
- msleep() for the others

The actual implementation has several problems:

- It does not take into account that HZ resolution also has an impact on
  granularity of jiffies and has also an impact on the granularity of the
  buckets of timer wheel levels. This means that accuracy for the timeout
  does not have an upper limit. When executing fsleep(20000) on a HZ=100
  system, the possible additional slack will be 50% as the granularity of
  the buckets in the lowest level is 10 milliseconds.

- The upper limit of usleep_range() is twice the requested timeout. When no
  other interrupts occur in this range, the maximum value is used. This
  means that the requested sleep length has then an additional delay of
  100%.

Change the thresholds for the decisions in fsleep() to make sure the
maximum slack which is added to the sleep duration is 25%.

Note: Outdated documentation will be updated in a followup patch.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-7-dc8b907cb62f@linutronix.de
---
 include/linux/delay.h | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/delay.h b/include/linux/delay.h
index 2de509e4adce..89866bab100d 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -11,6 +11,7 @@
 
 #include <linux/math.h>
 #include <linux/sched.h>
+#include <linux/jiffies.h>
 
 extern unsigned long loops_per_jiffy;
 
@@ -102,15 +103,35 @@ static inline void ssleep(unsigned int seconds)
 	msleep(seconds * 1000);
 }
 
-/* see Documentation/timers/timers-howto.rst for the thresholds */
+static const unsigned int max_slack_shift = 2;
+#define USLEEP_RANGE_UPPER_BOUND	((TICK_NSEC << max_slack_shift) / NSEC_PER_USEC)
+
+/**
+ * fsleep - flexible sleep which autoselects the best mechanism
+ * @usecs:	requested sleep duration in microseconds
+ *
+ * flseep() selects the best mechanism that will provide maximum 25% slack
+ * to the requested sleep duration. Therefore it uses:
+ *
+ * * udelay() loop for sleep durations <= 10 microseconds to avoid hrtimer
+ *   overhead for really short sleep durations.
+ * * usleep_range() for sleep durations which would lead with the usage of
+ *   msleep() to a slack larger than 25%. This depends on the granularity of
+ *   jiffies.
+ * * msleep() for all other sleep durations.
+ *
+ * Note: When %CONFIG_HIGH_RES_TIMERS is not set, all sleeps are processed with
+ * the granularity of jiffies and the slack might exceed 25% especially for
+ * short sleep durations.
+ */
 static inline void fsleep(unsigned long usecs)
 {
 	if (usecs <= 10)
 		udelay(usecs);
-	else if (usecs <= 20000)
-		usleep_range(usecs, 2 * usecs);
+	else if (usecs < USLEEP_RANGE_UPPER_BOUND)
+		usleep_range(usecs, usecs + (usecs >> max_slack_shift));
 	else
-		msleep(DIV_ROUND_UP(usecs, 1000));
+		msleep(DIV_ROUND_UP(usecs, USEC_PER_MSEC));
 }
 
 #endif /* defined(_LINUX_DELAY_H) */
-- 
cgit v1.2.3


From 89124747f096fc0fe44be0162c7b4fb3271739e8 Mon Sep 17 00:00:00 2001
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
Date: Mon, 14 Oct 2024 10:22:29 +0200
Subject: iopoll/regmap/phy/snd: Fix comment referencing outdated timer
 documentation

Function descriptions in iopoll.h, regmap.h, phy.h and sound/soc/sof/ops.h
copied all the same outdated documentation about sleep/delay function
limitations. In those comments, the generic (and still outdated) timer
documentation file is referenced.

As proper function descriptions for used delay and sleep functions are in
place, simply update the descriptions to reference to them. While at it fix
missing colon after "Returns" in function description and move return value
description to the end of the function description.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch> # for phy.h
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-12-dc8b907cb62f@linutronix.de
---
 include/linux/iopoll.h | 52 +++++++++++++++++++++++++-------------------------
 include/linux/phy.h    |  9 +++++----
 include/linux/regmap.h | 38 ++++++++++++++++++------------------
 3 files changed, 50 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index 19a7b00baff4..91324c331a4b 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -19,19 +19,19 @@
  * @op: accessor function (takes @args as its arguments)
  * @val: Variable to read the value into
  * @cond: Break condition (usually involving @val)
- * @sleep_us: Maximum time to sleep between reads in us (0
- *            tight-loops).  Should be less than ~20ms since usleep_range
- *            is used (see Documentation/timers/timers-howto.rst).
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please
+ *            read usleep_range() function description for details and
+ *            limitations.
  * @timeout_us: Timeout in us, 0 means never timeout
  * @sleep_before_read: if it is true, sleep @sleep_us before read.
  * @args: arguments for @op poll
  *
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @args is stored in @val. Must not
- * be called from atomic context if sleep_us or timeout_us are used.
- *
  * When available, you'll probably want to use one of the specialized
  * macros defined below rather than this macro directly.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @args is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
  */
 #define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \
 				sleep_before_read, args...) \
@@ -64,22 +64,22 @@
  * @op: accessor function (takes @args as its arguments)
  * @val: Variable to read the value into
  * @cond: Break condition (usually involving @val)
- * @delay_us: Time to udelay between reads in us (0 tight-loops).  Should
- *            be less than ~10us since udelay is used (see
- *            Documentation/timers/timers-howto.rst).
+ * @delay_us: Time to udelay between reads in us (0 tight-loops). Please
+ *            read udelay() function description for details and
+ *            limitations.
  * @timeout_us: Timeout in us, 0 means never timeout
  * @delay_before_read: if it is true, delay @delay_us before read.
  * @args: arguments for @op poll
  *
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @args is stored in @val.
- *
  * This macro does not rely on timekeeping.  Hence it is safe to call even when
  * timekeeping is suspended, at the expense of an underestimation of wall clock
  * time, which is rather minimal with a non-zero delay_us.
  *
  * When available, you'll probably want to use one of the specialized
  * macros defined below rather than this macro directly.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @args is stored in @val.
  */
 #define read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, \
 					delay_before_read, args...) \
@@ -119,17 +119,17 @@
  * @addr: Address to poll
  * @val: Variable to read the value into
  * @cond: Break condition (usually involving @val)
- * @sleep_us: Maximum time to sleep between reads in us (0
- *            tight-loops).  Should be less than ~20ms since usleep_range
- *            is used (see Documentation/timers/timers-howto.rst).
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please
+ *            read usleep_range() function description for details and
+ *            limitations.
  * @timeout_us: Timeout in us, 0 means never timeout
  *
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @addr is stored in @val. Must not
- * be called from atomic context if sleep_us or timeout_us are used.
- *
  * When available, you'll probably want to use one of the specialized
  * macros defined below rather than this macro directly.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
  */
 #define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us)	\
 	read_poll_timeout(op, val, cond, sleep_us, timeout_us, false, addr)
@@ -140,16 +140,16 @@
  * @addr: Address to poll
  * @val: Variable to read the value into
  * @cond: Break condition (usually involving @val)
- * @delay_us: Time to udelay between reads in us (0 tight-loops).  Should
- *            be less than ~10us since udelay is used (see
- *            Documentation/timers/timers-howto.rst).
+ * @delay_us: Time to udelay between reads in us (0 tight-loops). Please
+ *            read udelay() function description for details and
+ *            limitations.
  * @timeout_us: Timeout in us, 0 means never timeout
  *
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @addr is stored in @val.
- *
  * When available, you'll probably want to use one of the specialized
  * macros defined below rather than this macro directly.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val.
  */
 #define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \
 	read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, false, addr)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index a98bc91a0cde..504766d4b2d5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1378,12 +1378,13 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
  * @regnum: The register on the MMD to read
  * @val: Variable to read the register into
  * @cond: Break condition (usually involving @val)
- * @sleep_us: Maximum time to sleep between reads in us (0
- *            tight-loops).  Should be less than ~20ms since usleep_range
- *            is used (see Documentation/timers/timers-howto.rst).
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please
+ *            read usleep_range() function description for details and
+ *            limitations.
  * @timeout_us: Timeout in us, 0 means never timeout
  * @sleep_before_read: if it is true, sleep @sleep_us before read.
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
  * case, the last read value at @args is stored in @val. Must not
  * be called from atomic context if sleep_us or timeout_us are used.
  */
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index f9ccad32fc5c..75f162b60ba1 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -106,17 +106,17 @@ struct reg_sequence {
  * @addr: Address to poll
  * @val: Unsigned integer variable to read the value into
  * @cond: Break condition (usually involving @val)
- * @sleep_us: Maximum time to sleep between reads in us (0
- *            tight-loops).  Should be less than ~20ms since usleep_range
- *            is used (see Documentation/timers/timers-howto.rst).
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please
+ *            read usleep_range() function description for details and
+ *            limitations.
  * @timeout_us: Timeout in us, 0 means never timeout
  *
- * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_read
+ * This is modelled after the readx_poll_timeout macros in linux/iopoll.h.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_read
  * error return value in case of a error read. In the two former cases,
  * the last read value at @addr is stored in @val. Must not be called
  * from atomic context if sleep_us or timeout_us are used.
- *
- * This is modelled after the readx_poll_timeout macros in linux/iopoll.h.
  */
 #define regmap_read_poll_timeout(map, addr, val, cond, sleep_us, timeout_us) \
 ({ \
@@ -133,20 +133,20 @@ struct reg_sequence {
  * @addr: Address to poll
  * @val: Unsigned integer variable to read the value into
  * @cond: Break condition (usually involving @val)
- * @delay_us: Time to udelay between reads in us (0 tight-loops).
- *            Should be less than ~10us since udelay is used
- *            (see Documentation/timers/timers-howto.rst).
+ * @delay_us: Time to udelay between reads in us (0 tight-loops). Please
+ *            read udelay() function description for details and
+ *            limitations.
  * @timeout_us: Timeout in us, 0 means never timeout
  *
- * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_read
- * error return value in case of a error read. In the two former cases,
- * the last read value at @addr is stored in @val.
- *
  * This is modelled after the readx_poll_timeout_atomic macros in linux/iopoll.h.
  *
  * Note: In general regmap cannot be used in atomic context. If you want to use
  * this macro then first setup your regmap for atomic use (flat or no cache
  * and MMIO regmap).
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_read
+ * error return value in case of a error read. In the two former cases,
+ * the last read value at @addr is stored in @val.
  */
 #define regmap_read_poll_timeout_atomic(map, addr, val, cond, delay_us, timeout_us) \
 ({ \
@@ -177,17 +177,17 @@ struct reg_sequence {
  * @field: Regmap field to read from
  * @val: Unsigned integer variable to read the value into
  * @cond: Break condition (usually involving @val)
- * @sleep_us: Maximum time to sleep between reads in us (0
- *            tight-loops).  Should be less than ~20ms since usleep_range
- *            is used (see Documentation/timers/timers-howto.rst).
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please
+ *            read usleep_range() function description for details and
+ *            limitations.
  * @timeout_us: Timeout in us, 0 means never timeout
  *
- * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_field_read
+ * This is modelled after the readx_poll_timeout macros in linux/iopoll.h.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_field_read
  * error return value in case of a error read. In the two former cases,
  * the last read value at @addr is stored in @val. Must not be called
  * from atomic context if sleep_us or timeout_us are used.
- *
- * This is modelled after the readx_poll_timeout macros in linux/iopoll.h.
  */
 #define regmap_field_read_poll_timeout(field, val, cond, sleep_us, timeout_us) \
 ({ \
-- 
cgit v1.2.3


From 95b3120a485f77a9bb8060bf3398311e3dcb6c65 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Mon, 14 Oct 2024 16:52:16 -0700
Subject: neighbour: Remove NEIGH_DN_TABLE.

Since commit 1202cdd66531 ("Remove DECnet support from kernel"),
NEIGH_DN_TABLE is no longer used.

MPLS has implicit dependency on it in nla_put_via(), but nla_get_via()
does not support DECnet.

Let's remove NEIGH_DN_TABLE.

Now, neigh_tables[] has only 2 elements and no extra iteration
for DECnet in many places.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241014235216.10785-1-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/neighbour.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index a44f262a7384..3887ed9e5026 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -239,7 +239,6 @@ struct neigh_table {
 enum {
 	NEIGH_ARP_TABLE = 0,
 	NEIGH_ND_TABLE = 1,
-	NEIGH_DN_TABLE = 2,
 	NEIGH_NR_TABLES,
 	NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
 };
-- 
cgit v1.2.3


From e1c6c383123ab1caadbfe39b3362ce0cc09dd766 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Mon, 14 Oct 2024 13:18:28 -0700
Subject: rtnetlink: Remove rtnl_register() and rtnl_register_module().

No one uses rtnl_register() and rtnl_register_module().

Let's remove them.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241014201828.91221-12-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/rtnetlink.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 2d3eb7cb4dff..bb49c5708ce7 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -29,6 +29,16 @@ static inline enum rtnl_kinds rtnl_msgtype_kind(int msgtype)
 	return msgtype & RTNL_KIND_MASK;
 }
 
+/**
+ *	struct rtnl_msg_handler - rtnetlink message type and handlers
+ *
+ *	@owner: NULL for built-in, THIS_MODULE for module
+ *	@protocol: Protocol family or PF_UNSPEC
+ *	@msgtype: rtnetlink message type
+ *	@doit: Function pointer called for each request message
+ *	@dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ *	@flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
+ */
 struct rtnl_msg_handler {
 	struct module *owner;
 	int protocol;
@@ -38,11 +48,6 @@ struct rtnl_msg_handler {
 	int flags;
 };
 
-void rtnl_register(int protocol, int msgtype,
-		   rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
-int rtnl_register_module(struct module *owner, int protocol, int msgtype,
-			 rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
-int rtnl_unregister(int protocol, int msgtype);
 void rtnl_unregister_all(int protocol);
 
 int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n);
-- 
cgit v1.2.3


From 6390834c6f9b2c5e33f52f34579efa0d0df073db Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 3 Oct 2024 13:31:10 +0300
Subject: media: uapi: Add meta formats for PiSP FE config and stats

Add two meta formats for PiSP FE: V4L2_META_FMT_RPI_FE_CFG and
V4L2_META_FMT_RPI_FE_STATS. The former is used to provide configuration
for the FE and the latter is used to read the statistics from the FE.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index ded023edac70..e7c4dce39007 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -860,6 +860,8 @@ struct v4l2_pix_format {
 
 /* Vendor specific - used for RaspberryPi PiSP */
 #define V4L2_META_FMT_RPI_BE_CFG	v4l2_fourcc('R', 'P', 'B', 'C') /* PiSP BE configuration */
+#define V4L2_META_FMT_RPI_FE_CFG	v4l2_fourcc('R', 'P', 'F', 'C') /* PiSP FE configuration */
+#define V4L2_META_FMT_RPI_FE_STATS	v4l2_fourcc('R', 'P', 'F', 'S') /* PiSP FE stats */
 
 #ifdef __KERNEL__
 /*
-- 
cgit v1.2.3


From 6edb685abb2af445773876a326292b989dcb3c9f Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 3 Oct 2024 13:31:12 +0300
Subject: media: raspberrypi: Add support for RP1-CFE

Add support for Raspberry Pi CFE. The CFE is a hardware block that
contains:

- MIPI D-PHY
- MIPI CSI-2 receiver
- Front End ISP (FE)

The driver has been upported from the Raspberry Pi kernel commit
88a681df9623 ("ARM: dts: bcm2712-rpi: Add i2c<n>_pins labels").

Co-developed-by: Naushir Patuck <naush@raspberrypi.com>
Signed-off-by: Naushir Patuck <naush@raspberrypi.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../uapi/linux/media/raspberrypi/pisp_fe_config.h  | 273 +++++++++++++++++++++
 .../linux/media/raspberrypi/pisp_fe_statistics.h   |  64 +++++
 2 files changed, 337 insertions(+)
 create mode 100644 include/uapi/linux/media/raspberrypi/pisp_fe_config.h
 create mode 100644 include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h

(limited to 'include')

diff --git a/include/uapi/linux/media/raspberrypi/pisp_fe_config.h b/include/uapi/linux/media/raspberrypi/pisp_fe_config.h
new file mode 100644
index 000000000000..77237460a3b5
--- /dev/null
+++ b/include/uapi/linux/media/raspberrypi/pisp_fe_config.h
@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * RP1 PiSP Front End Driver Configuration structures
+ *
+ * Copyright (C) 2021 - Raspberry Pi Ltd.
+ *
+ */
+#ifndef _UAPI_PISP_FE_CONFIG_
+#define _UAPI_PISP_FE_CONFIG_
+
+#include <linux/types.h>
+
+#include "pisp_common.h"
+#include "pisp_fe_statistics.h"
+
+#define PISP_FE_NUM_OUTPUTS 2
+
+enum pisp_fe_enable {
+	PISP_FE_ENABLE_INPUT = 0x000001,
+	PISP_FE_ENABLE_DECOMPRESS = 0x000002,
+	PISP_FE_ENABLE_DECOMPAND = 0x000004,
+	PISP_FE_ENABLE_BLA = 0x000008,
+	PISP_FE_ENABLE_DPC = 0x000010,
+	PISP_FE_ENABLE_STATS_CROP = 0x000020,
+	PISP_FE_ENABLE_DECIMATE = 0x000040,
+	PISP_FE_ENABLE_BLC = 0x000080,
+	PISP_FE_ENABLE_CDAF_STATS = 0x000100,
+	PISP_FE_ENABLE_AWB_STATS = 0x000200,
+	PISP_FE_ENABLE_RGBY = 0x000400,
+	PISP_FE_ENABLE_LSC = 0x000800,
+	PISP_FE_ENABLE_AGC_STATS = 0x001000,
+	PISP_FE_ENABLE_CROP0 = 0x010000,
+	PISP_FE_ENABLE_DOWNSCALE0 = 0x020000,
+	PISP_FE_ENABLE_COMPRESS0 = 0x040000,
+	PISP_FE_ENABLE_OUTPUT0 = 0x080000,
+	PISP_FE_ENABLE_CROP1 = 0x100000,
+	PISP_FE_ENABLE_DOWNSCALE1 = 0x200000,
+	PISP_FE_ENABLE_COMPRESS1 = 0x400000,
+	PISP_FE_ENABLE_OUTPUT1 = 0x800000
+};
+
+#define PISP_FE_ENABLE_CROP(i) (PISP_FE_ENABLE_CROP0 << (4 * (i)))
+#define PISP_FE_ENABLE_DOWNSCALE(i) (PISP_FE_ENABLE_DOWNSCALE0 << (4 * (i)))
+#define PISP_FE_ENABLE_COMPRESS(i) (PISP_FE_ENABLE_COMPRESS0 << (4 * (i)))
+#define PISP_FE_ENABLE_OUTPUT(i) (PISP_FE_ENABLE_OUTPUT0 << (4 * (i)))
+
+/*
+ * We use the enable flags to show when blocks are "dirty", but we need some
+ * extra ones too.
+ */
+enum pisp_fe_dirty {
+	PISP_FE_DIRTY_GLOBAL = 0x0001,
+	PISP_FE_DIRTY_FLOATING = 0x0002,
+	PISP_FE_DIRTY_OUTPUT_AXI = 0x0004
+};
+
+struct pisp_fe_global_config {
+	__u32 enables;
+	__u8 bayer_order;
+	__u8 pad[3];
+} __attribute__((packed));
+
+struct pisp_fe_input_axi_config {
+	/* burst length minus one, in the range 0..15; OR'd with flags */
+	__u8 maxlen_flags;
+	/* { prot[2:0], cache[3:0] } fields */
+	__u8 cache_prot;
+	/* QoS (only 4 LS bits are used) */
+	__u16 qos;
+} __attribute__((packed));
+
+struct pisp_fe_output_axi_config {
+	/* burst length minus one, in the range 0..15; OR'd with flags */
+	__u8 maxlen_flags;
+	/* { prot[2:0], cache[3:0] } fields */
+	__u8 cache_prot;
+	/* QoS (4 bitfields of 4 bits each for different panic levels) */
+	__u16 qos;
+	/*  For Panic mode: Output FIFO panic threshold */
+	__u16 thresh;
+	/*  For Panic mode: Output FIFO statistics throttle threshold */
+	__u16 throttle;
+} __attribute__((packed));
+
+struct pisp_fe_input_config {
+	__u8 streaming;
+	__u8 pad[3];
+	struct pisp_image_format_config format;
+	struct pisp_fe_input_axi_config axi;
+	/* Extra cycles delay before issuing each burst request */
+	__u8 holdoff;
+	__u8 pad2[3];
+} __attribute__((packed));
+
+struct pisp_fe_output_config {
+	struct pisp_image_format_config format;
+	__u16 ilines;
+	__u8 pad[2];
+} __attribute__((packed));
+
+struct pisp_fe_input_buffer_config {
+	__u32 addr_lo;
+	__u32 addr_hi;
+	__u16 frame_id;
+	__u16 pad;
+} __attribute__((packed));
+
+#define PISP_FE_DECOMPAND_LUT_SIZE 65
+
+struct pisp_fe_decompand_config {
+	__u16 lut[PISP_FE_DECOMPAND_LUT_SIZE];
+	__u16 pad;
+} __attribute__((packed));
+
+struct pisp_fe_dpc_config {
+	__u8 coeff_level;
+	__u8 coeff_range;
+	__u8 coeff_range2;
+#define PISP_FE_DPC_FLAG_FOLDBACK 1
+#define PISP_FE_DPC_FLAG_VFLAG 2
+	__u8 flags;
+} __attribute__((packed));
+
+#define PISP_FE_LSC_LUT_SIZE 16
+
+struct pisp_fe_lsc_config {
+	__u8 shift;
+	__u8 pad0;
+	__u16 scale;
+	__u16 centre_x;
+	__u16 centre_y;
+	__u16 lut[PISP_FE_LSC_LUT_SIZE];
+} __attribute__((packed));
+
+struct pisp_fe_rgby_config {
+	__u16 gain_r;
+	__u16 gain_g;
+	__u16 gain_b;
+	__u8 maxflag;
+	__u8 pad;
+} __attribute__((packed));
+
+struct pisp_fe_agc_stats_config {
+	__u16 offset_x;
+	__u16 offset_y;
+	__u16 size_x;
+	__u16 size_y;
+	/* each weight only 4 bits */
+	__u8 weights[PISP_AGC_STATS_NUM_ZONES / 2];
+	__u16 row_offset_x;
+	__u16 row_offset_y;
+	__u16 row_size_x;
+	__u16 row_size_y;
+	__u8 row_shift;
+	__u8 float_shift;
+	__u8 pad1[2];
+} __attribute__((packed));
+
+struct pisp_fe_awb_stats_config {
+	__u16 offset_x;
+	__u16 offset_y;
+	__u16 size_x;
+	__u16 size_y;
+	__u8 shift;
+	__u8 pad[3];
+	__u16 r_lo;
+	__u16 r_hi;
+	__u16 g_lo;
+	__u16 g_hi;
+	__u16 b_lo;
+	__u16 b_hi;
+} __attribute__((packed));
+
+struct pisp_fe_floating_stats_region {
+	__u16 offset_x;
+	__u16 offset_y;
+	__u16 size_x;
+	__u16 size_y;
+} __attribute__((packed));
+
+struct pisp_fe_floating_stats_config {
+	struct pisp_fe_floating_stats_region
+		regions[PISP_FLOATING_STATS_NUM_ZONES];
+} __attribute__((packed));
+
+#define PISP_FE_CDAF_NUM_WEIGHTS 8
+
+struct pisp_fe_cdaf_stats_config {
+	__u16 noise_constant;
+	__u16 noise_slope;
+	__u16 offset_x;
+	__u16 offset_y;
+	__u16 size_x;
+	__u16 size_y;
+	__u16 skip_x;
+	__u16 skip_y;
+	__u32 mode;
+} __attribute__((packed));
+
+struct pisp_fe_stats_buffer_config {
+	__u32 addr_lo;
+	__u32 addr_hi;
+} __attribute__((packed));
+
+struct pisp_fe_crop_config {
+	__u16 offset_x;
+	__u16 offset_y;
+	__u16 width;
+	__u16 height;
+} __attribute__((packed));
+
+enum pisp_fe_downscale_flags {
+	/* downscale the four Bayer components independently... */
+	DOWNSCALE_BAYER = 1,
+	/* ...without trying to preserve their spatial relationship */
+	DOWNSCALE_BIN = 2,
+};
+
+struct pisp_fe_downscale_config {
+	__u8 xin;
+	__u8 xout;
+	__u8 yin;
+	__u8 yout;
+	__u8 flags; /* enum pisp_fe_downscale_flags */
+	__u8 pad[3];
+	__u16 output_width;
+	__u16 output_height;
+} __attribute__((packed));
+
+struct pisp_fe_output_buffer_config {
+	__u32 addr_lo;
+	__u32 addr_hi;
+} __attribute__((packed));
+
+/* Each of the two output channels/branches: */
+struct pisp_fe_output_branch_config {
+	struct pisp_fe_crop_config crop;
+	struct pisp_fe_downscale_config downscale;
+	struct pisp_compress_config compress;
+	struct pisp_fe_output_config output;
+	__u32 pad;
+} __attribute__((packed));
+
+/* And finally one to rule them all: */
+struct pisp_fe_config {
+	/* I/O configuration: */
+	struct pisp_fe_stats_buffer_config stats_buffer;
+	struct pisp_fe_output_buffer_config output_buffer[PISP_FE_NUM_OUTPUTS];
+	struct pisp_fe_input_buffer_config input_buffer;
+	/* processing configuration: */
+	struct pisp_fe_global_config global;
+	struct pisp_fe_input_config input;
+	struct pisp_decompress_config decompress;
+	struct pisp_fe_decompand_config decompand;
+	struct pisp_bla_config bla;
+	struct pisp_fe_dpc_config dpc;
+	struct pisp_fe_crop_config stats_crop;
+	__u32 spare1; /* placeholder for future decimate configuration */
+	struct pisp_bla_config blc;
+	struct pisp_fe_rgby_config rgby;
+	struct pisp_fe_lsc_config lsc;
+	struct pisp_fe_agc_stats_config agc_stats;
+	struct pisp_fe_awb_stats_config awb_stats;
+	struct pisp_fe_cdaf_stats_config cdaf_stats;
+	struct pisp_fe_floating_stats_config floating_stats;
+	struct pisp_fe_output_axi_config output_axi;
+	struct pisp_fe_output_branch_config ch[PISP_FE_NUM_OUTPUTS];
+	/* non-register fields: */
+	__u32 dirty_flags; /* these use pisp_fe_enable */
+	__u32 dirty_flags_extra; /* these use pisp_fe_dirty */
+} __attribute__((packed));
+
+#endif /* _UAPI_PISP_FE_CONFIG_ */
diff --git a/include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h b/include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h
new file mode 100644
index 000000000000..a7d42985aee8
--- /dev/null
+++ b/include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * RP1 PiSP Front End statistics definitions
+ *
+ * Copyright (C) 2021 - Raspberry Pi Ltd.
+ *
+ */
+#ifndef _UAPI_PISP_FE_STATISTICS_H_
+#define _UAPI_PISP_FE_STATISTICS_H_
+
+#include <linux/types.h>
+
+#define PISP_FLOATING_STATS_NUM_ZONES 4
+#define PISP_AGC_STATS_NUM_BINS 1024
+#define PISP_AGC_STATS_SIZE 16
+#define PISP_AGC_STATS_NUM_ZONES (PISP_AGC_STATS_SIZE * PISP_AGC_STATS_SIZE)
+#define PISP_AGC_STATS_NUM_ROW_SUMS 512
+
+struct pisp_agc_statistics_zone {
+	__u64 Y_sum;
+	__u32 counted;
+	__u32 pad;
+} __attribute__((packed));
+
+struct pisp_agc_statistics {
+	__u32 row_sums[PISP_AGC_STATS_NUM_ROW_SUMS];
+	/*
+	 * 32-bits per bin means an image (just less than) 16384x16384 pixels
+	 * in size can weight every pixel from 0 to 15.
+	 */
+	__u32 histogram[PISP_AGC_STATS_NUM_BINS];
+	struct pisp_agc_statistics_zone floating[PISP_FLOATING_STATS_NUM_ZONES];
+} __attribute__((packed));
+
+#define PISP_AWB_STATS_SIZE 32
+#define PISP_AWB_STATS_NUM_ZONES (PISP_AWB_STATS_SIZE * PISP_AWB_STATS_SIZE)
+
+struct pisp_awb_statistics_zone {
+	__u32 R_sum;
+	__u32 G_sum;
+	__u32 B_sum;
+	__u32 counted;
+} __attribute__((packed));
+
+struct pisp_awb_statistics {
+	struct pisp_awb_statistics_zone zones[PISP_AWB_STATS_NUM_ZONES];
+	struct pisp_awb_statistics_zone floating[PISP_FLOATING_STATS_NUM_ZONES];
+} __attribute__((packed));
+
+#define PISP_CDAF_STATS_SIZE 8
+#define PISP_CDAF_STATS_NUM_FOMS (PISP_CDAF_STATS_SIZE * PISP_CDAF_STATS_SIZE)
+
+struct pisp_cdaf_statistics {
+	__u64 foms[PISP_CDAF_STATS_NUM_FOMS];
+	__u64 floating[PISP_FLOATING_STATS_NUM_ZONES];
+} __attribute__((packed));
+
+struct pisp_statistics {
+	struct pisp_awb_statistics awb;
+	struct pisp_agc_statistics agc;
+	struct pisp_cdaf_statistics cdaf;
+} __attribute__((packed));
+
+#endif /* _UAPI_PISP_FE_STATISTICS_H_ */
-- 
cgit v1.2.3


From 30fe661eb9d32d0aa65a86507572bd654a374c56 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 17 Sep 2024 15:14:44 +0300
Subject: media: Documentation: Deprecate s_stream video op, update docs

The scope of the s_stream video operation is now fully supported by
{enable,disable}_streams. Explicitly document the s_stream() op as
deprecated and update the related documentation.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/v4l2-subdev.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 8daa0929865c..3cc6b4a5935f 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -450,8 +450,9 @@ enum v4l2_subdev_pre_streamon_flags {
  *	already started or stopped subdev. Also see call_s_stream wrapper in
  *	v4l2-subdev.c.
  *
- *	New drivers should instead implement &v4l2_subdev_pad_ops.enable_streams
- *	and &v4l2_subdev_pad_ops.disable_streams operations, and use
+ *	This callback is DEPRECATED. New drivers should instead implement
+ *	&v4l2_subdev_pad_ops.enable_streams and
+ *	&v4l2_subdev_pad_ops.disable_streams operations, and use
  *	v4l2_subdev_s_stream_helper for the &v4l2_subdev_video_ops.s_stream
  *	operation to support legacy users.
  *
-- 
cgit v1.2.3


From 646aed96d51088c1f24b4d710fa6fa4a17ecc484 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 17 Sep 2024 15:31:47 +0300
Subject: media: Documentation: Update {enable,disable}_streams documentation

Document the expected {enable,disable}_streams callback behaviour for
drivers that are stream-unaware i.e. don't specify the
V4L2_SUBDEV_CAP_STREAMS sub-device capability flag. In this specific case,
the mask argument can be ignored.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/v4l2-subdev.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 3cc6b4a5935f..ff63fb6046b1 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -834,11 +834,19 @@ struct v4l2_subdev_state {
  *	v4l2_subdev_init_finalize() at initialization time). Do not call
  *	directly, use v4l2_subdev_enable_streams() instead.
  *
+ *	Drivers that support only a single stream without setting the
+ *	V4L2_SUBDEV_CAP_STREAMS sub-device capability flag can ignore the mask
+ *	argument.
+ *
  * @disable_streams: Disable the streams defined in streams_mask on the given
  *	source pad. Subdevs that implement this operation must use the active
  *	state management provided by the subdev core (enabled through a call to
  *	v4l2_subdev_init_finalize() at initialization time). Do not call
  *	directly, use v4l2_subdev_disable_streams() instead.
+ *
+ *	Drivers that support only a single stream without setting the
+ *	V4L2_SUBDEV_CAP_STREAMS sub-device capability flag can ignore the mask
+ *	argument.
  */
 struct v4l2_subdev_pad_ops {
 	int (*enum_mbus_code)(struct v4l2_subdev *sd,
-- 
cgit v1.2.3


From fd0e579bc62cb1766469866a89471d2d8c8b721d Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 17 Sep 2024 17:58:58 +0300
Subject: media: Documentation: Improve v4l2_subdev_{en,dis}able_streams
 documentation

Document that callers of v4l2_subdev_{en,dis}able_streams() need to set
the mask to BIT_ULL(0).

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/v4l2-subdev.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index ff63fb6046b1..2f2200875b03 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -1685,6 +1685,8 @@ int v4l2_subdev_routing_validate(struct v4l2_subdev *sd,
  * function implements a best-effort compatibility by calling the .s_stream()
  * operation, limited to subdevs that have a single source pad.
  *
+ * Drivers that are not stream-aware shall set @streams_mask to BIT_ULL(0).
+ *
  * Return:
  * * 0: Success
  * * -EALREADY: One of the streams in streams_mask is already enabled
@@ -1715,6 +1717,8 @@ int v4l2_subdev_enable_streams(struct v4l2_subdev *sd, u32 pad,
  * function implements a best-effort compatibility by calling the .s_stream()
  * operation, limited to subdevs that have a single source pad.
  *
+ * Drivers that are not stream-aware shall set @streams_mask to BIT_ULL(0).
+ *
  * Return:
  * * 0: Success
  * * -EALREADY: One of the streams in streams_mask is not enabled
-- 
cgit v1.2.3


From 719258c55f7e9c123ef2a0941c6730b6d43c1bc2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Mon, 9 Sep 2024 15:49:41 +0200
Subject: mfd: palmas: Constify strings with regulator names

The names of regulators are static const strings, so pointers can be made
as pointers to const for code safety.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240909134941.121847-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/palmas.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index eda1ffd99c1a..dabcc0dea802 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -98,8 +98,8 @@ struct palmas_sleep_requestor_info {
 };
 
 struct palmas_regs_info {
-	char	*name;
-	char	*sname;
+	const char	*name;
+	const char	*sname;
 	u8	vsel_addr;
 	u8	ctrl_addr;
 	u8	tstep_addr;
-- 
cgit v1.2.3


From d5340a18cd321075442677ce1002d8e163b17b67 Mon Sep 17 00:00:00 2001
From: Dzmitry Sankouski <dsankouski@gmail.com>
Date: Thu, 26 Sep 2024 13:28:10 +0300
Subject: mfd: max77693: Remove unused max77693_irq_source declarations

Remove `enum max77693_irq_source` declaration because unused.

Signed-off-by: Dzmitry Sankouski <dsankouski@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Link: https://lore.kernel.org/r/20240617-starqltechn_integration_upstream-v5-1-125d9228d751@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/max77693-private.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index 20c5e02ed9da..c324d548619e 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -419,17 +419,6 @@ enum max77693_haptic_reg {
 #define MAX77693_CONFIG2_MEN            6
 #define MAX77693_CONFIG2_HTYP           5
 
-enum max77693_irq_source {
-	LED_INT = 0,
-	TOPSYS_INT,
-	CHG_INT,
-	MUIC_INT1,
-	MUIC_INT2,
-	MUIC_INT3,
-
-	MAX77693_IRQ_GROUP_NR,
-};
-
 #define SRC_IRQ_CHARGER			BIT(0)
 #define SRC_IRQ_TOP			BIT(1)
 #define SRC_IRQ_FLASH			BIT(2)
-- 
cgit v1.2.3


From bf231e5febcf9358d7e70a2c6974548f7f3e4f61 Mon Sep 17 00:00:00 2001
From: Dzmitry Sankouski <dsankouski@gmail.com>
Date: Thu, 26 Sep 2024 12:47:31 +0300
Subject: mfd: sec-core: Add support for the Samsung s2dos05

S2DOS05 is a panel/touchscreen PMIC, often found in
Samsung phones. We define regulator sub-device for which driver will
be added in subsequent patch. The device also has ADC for power and
current measurements.

Signed-off-by: Dzmitry Sankouski <dsankouski@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Link: https://lore.kernel.org/r/20240617-starqltechn_integration_upstream-v5-2-ea1109029ba5@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/samsung/core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index a212b9f72bc9..750274d41fc0 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -37,6 +37,7 @@ struct gpio_desc;
 
 enum sec_device_type {
 	S5M8767X,
+	S2DOS05,
 	S2MPA01,
 	S2MPS11X,
 	S2MPS13X,
-- 
cgit v1.2.3


From 2b627246c0cf2b54f1b17531dde964cf5b99b2a5 Mon Sep 17 00:00:00 2001
From: Pavan Holla <pholla@chromium.org>
Date: Tue, 10 Sep 2024 10:15:21 +0000
Subject: platform/chrome: Update EC feature flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Define EC_FEATURE_UCSI_PPM to enable usage of the cros_ec_ucsi
driver. Also, add any feature flags that are implemented by the EC
but are missing in the kernel header.

Signed-off-by: Pavan Holla <pholla@chromium.org>
Signed-off-by: Łukasz Bartosik <ukaszb@chromium.org>
Acked-by: Tzung-Bi Shih <tzungbi@kernel.org>
Link: https://lore.kernel.org/r/20240910101527.603452-3-ukaszb@chromium.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/platform_data/cros_ec_commands.h | 32 ++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
index e574b790be6f..b3c4993e656e 100644
--- a/include/linux/platform_data/cros_ec_commands.h
+++ b/include/linux/platform_data/cros_ec_commands.h
@@ -1312,6 +1312,38 @@ enum ec_feature_code {
 	 * The EC supports the AP composing VDMs for us to send.
 	 */
 	EC_FEATURE_TYPEC_AP_VDM_SEND = 46,
+	/*
+	 * The EC supports system safe mode panic recovery.
+	 */
+	EC_FEATURE_SYSTEM_SAFE_MODE = 47,
+	/*
+	 * The EC will reboot on runtime assertion failures.
+	 */
+	EC_FEATURE_ASSERT_REBOOTS = 48,
+	/*
+	 * The EC image is built with tokenized logging enabled.
+	 */
+	EC_FEATURE_TOKENIZED_LOGGING = 49,
+	/*
+	 * The EC supports triggering an STB dump.
+	 */
+	EC_FEATURE_AMD_STB_DUMP = 50,
+	/*
+	 * The EC supports memory dump commands.
+	 */
+	EC_FEATURE_MEMORY_DUMP = 51,
+	/*
+	 * The EC supports DP2.1 capability
+	 */
+	EC_FEATURE_TYPEC_DP2_1 = 52,
+	/*
+	 * The MCU is System Companion Processor Core 1
+	 */
+	EC_FEATURE_SCP_C1 = 53,
+	/*
+	 * The EC supports UCSI PPM.
+	 */
+	EC_FEATURE_UCSI_PPM = 54,
 };
 
 #define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32)
-- 
cgit v1.2.3


From 7b5a58952fc3b51905c2963647485565df1e5e26 Mon Sep 17 00:00:00 2001
From: Akash Kumar <quic_akakum@quicinc.com>
Date: Fri, 27 Sep 2024 20:51:38 +0530
Subject: usb: gadget: uvc: configfs: Add frame-based frame format support

Add support for frame-based frame format, which can be used to support
multiple formats like H264 or H265, in addition to MJPEG and YUV frames.

The frame-based format is set to H264 by default, but it can be updated
to other formats by modifying the GUID through the guid configfs
attribute. Different structures are used for all three formats, as
H264 has a different structure compared to MJPEG and uncompressed
formats. These structures will be passed to the frame make function
based on the active format, using a common frame structure with
additional parameters needed only for frame-based formats. These
parameters are handled at runtime in the UVC driver.

Signed-off-by: Akash Kumar <quic_akakum@quicinc.com>
Link: https://lore.kernel.org/r/20240927152138.31416-1-quic_akakum@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/usb/video.h | 58 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/usb/video.h b/include/uapi/linux/usb/video.h
index 2ff0e8a3a683..526b5155e23c 100644
--- a/include/uapi/linux/usb/video.h
+++ b/include/uapi/linux/usb/video.h
@@ -597,5 +597,63 @@ struct UVC_FRAME_MJPEG(n) {				\
 	__le32 dwFrameInterval[n];			\
 } __attribute__ ((packed))
 
+/* Frame Based Payload - 3.1.1. Frame Based Video Format Descriptor */
+struct uvc_format_framebased {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bFormatIndex;
+	__u8  bNumFrameDescriptors;
+	__u8  guidFormat[16];
+	__u8  bBitsPerPixel;
+	__u8  bDefaultFrameIndex;
+	__u8  bAspectRatioX;
+	__u8  bAspectRatioY;
+	__u8  bmInterfaceFlags;
+	__u8  bCopyProtect;
+	__u8  bVariableSize;
+} __attribute__((__packed__));
+
+#define UVC_DT_FORMAT_FRAMEBASED_SIZE                  28
+
+/* Frame Based Payload - 3.1.2. Frame Based Video Frame Descriptor */
+struct uvc_frame_framebased {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bFrameIndex;
+	__u8  bmCapabilities;
+	__u16 wWidth;
+	__u16 wHeight;
+	__u32 dwMinBitRate;
+	__u32 dwMaxBitRate;
+	__u32 dwDefaultFrameInterval;
+	__u8  bFrameIntervalType;
+	__u32 dwBytesPerLine;
+	__u32 dwFrameInterval[];
+} __attribute__((__packed__));
+
+#define UVC_DT_FRAME_FRAMEBASED_SIZE(n)                        (26+4*(n))
+
+#define UVC_FRAME_FRAMEBASED(n) \
+	uvc_frame_framebased_##n
+
+#define DECLARE_UVC_FRAME_FRAMEBASED(n)			\
+struct UVC_FRAME_FRAMEBASED(n) {			\
+	__u8  bLength;					\
+	__u8  bDescriptorType;				\
+	__u8  bDescriptorSubType;                       \
+	__u8  bFrameIndex;                              \
+	__u8  bmCapabilities;                           \
+	__u16 wWidth;                                   \
+	__u16 wHeight;                                  \
+	__u32 dwMinBitRate;                             \
+	__u32 dwMaxBitRate;                             \
+	__u32 dwDefaultFrameInterval;                   \
+	__u8  bFrameIntervalType;                       \
+	__u32 dwBytesPerLine;                           \
+	__u32 dwFrameInterval[n];                       \
+} __attribute__ ((packed))
+
 #endif /* __LINUX_USB_VIDEO_H */
 
-- 
cgit v1.2.3


From 522249f05c5551aec9ec0ba9b6438f1ec19c138d Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 3 Oct 2024 16:29:22 +0200
Subject: fanotify: allow reporting errors on failure to open fd

When working in "fd mode", fanotify_read() needs to open an fd
from a dentry to report event->fd to userspace.

Opening an fd from dentry can fail for several reasons.
For example, when tasks are gone and we try to open their
/proc files or we try to open a WRONLY file like in sysfs
or when trying to open a file that was deleted on the
remote network server.

Add a new flag FAN_REPORT_FD_ERROR for fanotify_init().
For a group with FAN_REPORT_FD_ERROR, we will send the
event with the error instead of the open fd, otherwise
userspace may not get the error at all.

For an overflow event, we report -EBADF to avoid confusing FAN_NOFD
with -EPERM.  Similarly for pidfd open errors we report either -ESRCH
or the open error instead of FAN_NOPIDFD and FAN_EPIDFD.

In any case, userspace will not know which file failed to
open, so add a debug print for further investigation.

Reported-by: Krishna Vivek Vitta <kvitta@microsoft.com>
Link: https://lore.kernel.org/linux-fsdevel/SI2P153MB07182F3424619EDDD1F393EED46D2@SI2P153MB0718.APCP153.PROD.OUTLOOK.COM/
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20241003142922.111539-1-amir73il@gmail.com
---
 include/linux/fanotify.h      | 1 +
 include/uapi/linux/fanotify.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 4f1c4f603118..89ff45bd6f01 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -36,6 +36,7 @@
 #define FANOTIFY_ADMIN_INIT_FLAGS	(FANOTIFY_PERM_CLASSES | \
 					 FAN_REPORT_TID | \
 					 FAN_REPORT_PIDFD | \
+					 FAN_REPORT_FD_ERROR | \
 					 FAN_UNLIMITED_QUEUE | \
 					 FAN_UNLIMITED_MARKS)
 
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index a37de58ca571..34f221d3a1b9 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -60,6 +60,7 @@
 #define FAN_REPORT_DIR_FID	0x00000400	/* Report unique directory id */
 #define FAN_REPORT_NAME		0x00000800	/* Report events with name */
 #define FAN_REPORT_TARGET_FID	0x00001000	/* Report dirent target id  */
+#define FAN_REPORT_FD_ERROR	0x00002000	/* event->fd can report error */
 
 /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
 #define FAN_REPORT_DFID_NAME	(FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
-- 
cgit v1.2.3


From d6083f040d5d8f8d748462c77e90547097df936e Mon Sep 17 00:00:00 2001
From: Leon Hwang <leon.hwang@linux.dev>
Date: Tue, 15 Oct 2024 23:02:06 +0800
Subject: bpf: Prevent tailcall infinite loop caused by freplace

There is a potential infinite loop issue that can occur when using a
combination of tail calls and freplace.

In an upcoming selftest, the attach target for entry_freplace of
tailcall_freplace.c is subprog_tc of tc_bpf2bpf.c, while the tail call in
entry_freplace leads to entry_tc. This results in an infinite loop:

entry_tc -> subprog_tc -> entry_freplace --tailcall-> entry_tc.

The problem arises because the tail_call_cnt in entry_freplace resets to
zero each time entry_freplace is executed, causing the tail call mechanism
to never terminate, eventually leading to a kernel panic.

To fix this issue, the solution is twofold:

1. Prevent updating a program extended by an freplace program to a
   prog_array map.
2. Prevent extending a program that is already part of a prog_array map
   with an freplace program.

This ensures that:

* If a program or its subprogram has been extended by an freplace program,
  it can no longer be updated to a prog_array map.
* If a program has been added to a prog_array map, neither it nor its
  subprograms can be extended by an freplace program.

Moreover, an extension program should not be tailcalled. As such, return
-EINVAL if the program has a type of BPF_PROG_TYPE_EXT when adding it to a
prog_array map.

Additionally, fix a minor code style issue by replacing eight spaces with a
tab for proper formatting.

Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
Link: https://lore.kernel.org/r/20241015150207.70264-2-leon.hwang@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 19d8ca8ac960..0c216e71cec7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1292,8 +1292,12 @@ void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
 bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
 
 #ifdef CONFIG_BPF_JIT
-int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
-int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
+int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+			     struct bpf_trampoline *tr,
+			     struct bpf_prog *tgt_prog);
+int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+			       struct bpf_trampoline *tr,
+			       struct bpf_prog *tgt_prog);
 struct bpf_trampoline *bpf_trampoline_get(u64 key,
 					  struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
@@ -1374,12 +1378,14 @@ void bpf_jit_uncharge_modmem(u32 size);
 bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
 #else
 static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
-					   struct bpf_trampoline *tr)
+					   struct bpf_trampoline *tr,
+					   struct bpf_prog *tgt_prog)
 {
 	return -ENOTSUPP;
 }
 static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
-					     struct bpf_trampoline *tr)
+					     struct bpf_trampoline *tr,
+					     struct bpf_prog *tgt_prog)
 {
 	return -ENOTSUPP;
 }
@@ -1483,6 +1489,9 @@ struct bpf_prog_aux {
 	bool xdp_has_frags;
 	bool exception_cb;
 	bool exception_boundary;
+	bool is_extended; /* true if extended by freplace program */
+	u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
+	struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
 	struct bpf_arena *arena;
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
-- 
cgit v1.2.3


From ce1dfe6d328966b75821c1f043a940eb2569768a Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Sat, 12 Oct 2024 20:32:43 +0900
Subject: PCI: endpoint: Introduce pci_epc_mem_map()/unmap()

Some endpoint controllers have requirements on the alignment of the
controller physical memory address that must be used to map a RC PCI
address region. For instance, the endpoint controller of the RK3399 SoC
uses at most the lower 20 bits of a physical memory address region as
the lower bits of a RC PCI address region. For mapping a PCI address
region of size bytes starting from pci_addr, the exact number of
address bits used is the number of address bits changing in the address
range [pci_addr..pci_addr + size - 1]. For this example, this creates
the following constraints:
1) The offset into the controller physical memory allocated for a
   mapping depends on the mapping size *and* the starting PCI address
   for the mapping.
2) A mapping size cannot exceed the controller windows size (1MB) minus
   the offset needed into the allocated physical memory, which can end
   up being a smaller size than the desired mapping size.

Handling these constraints independently of the controller being used
in an endpoint function driver is not possible with the current EPC
API as only the ->align field in struct pci_epc_features is provided
but used for BAR (inbound ATU mappings) mapping only. A new API is
needed for function drivers to discover mapping constraints and handle
non-static requirements based on the RC PCI address range to access.

Introduce the endpoint controller operation ->align_addr() to allow
the EPC core functions to obtain the size and the offset into a
controller address region that must be allocated and mapped to access
a RC PCI address region. The size of the mapping provided by the
align_addr() operation can then be used as the size argument for the
function pci_epc_mem_alloc_addr() and the offset into the allocated
controller memory provided can be used to correctly handle data
transfers. For endpoint controllers that have PCI address alignment
constraints, the align_addr() operation may indicate upon return an
effective PCI address mapping size that is smaller (but not 0) than the
requested PCI address region size.

The controller ->align_addr() operation is optional: controllers that
do not have any alignment constraints for mapping RC PCI address regions
do not need to implement this operation. For such controllers, it is
always assumed that the mapping size is equal to the requested size of
the PCI region and that the mapping offset is 0.

The function pci_epc_mem_map() is introduced to use this new controller
operation (if it is defined) to handle controller memory allocation and
mapping to a RC PCI address region in endpoint function drivers.

This function first uses the ->align_addr() controller operation to
determine the controller memory address size (and offset into) needed
for mapping an RC PCI address region. The result of this operation is
used to allocate a controller physical memory region using
pci_epc_mem_alloc_addr() and then to map that memory to the RC PCI
address space with pci_epc_map_addr().

Since ->align_addr() () may indicate that not all of a RC PCI address
region can be mapped, pci_epc_mem_map() may only partially map the RC
PCI address region specified. It is the responsibility of the caller
(an endpoint function driver) to handle such smaller mapping by
repeatedly using pci_epc_mem_map() over the desried PCI address range.

The counterpart of pci_epc_mem_map() to unmap and free a mapped
controller memory address region is pci_epc_mem_unmap().

Both functions operate using the new struct pci_epc_map data structure.
This new structure represents a mapping PCI address, mapping effective
size, the size of the controller memory needed for the mapping as well
as the physical and virtual CPU addresses of the mapping (phys_base and
virt_base fields). For convenience, the physical and virtual CPU
addresses within that mapping to use to access the target RC PCI address
region are also provided (phys_addr and virt_addr fields).

Endpoint function drivers can use struct pci_epc_map to access the
mapped RC PCI address region using the ->virt_addr and ->pci_size
fields.

Co-developed-by: Rick Wertenbroek <rick.wertenbroek@gmail.com>
Signed-off-by: Rick Wertenbroek <rick.wertenbroek@gmail.com>
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20241012113246.95634-4-dlemoal@kernel.org
[mani: squashed the patch that changed phy_addr_t to u64]
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 include/linux/pci-epc.h | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 42ef06136bd1..de8cc3658220 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -32,11 +32,43 @@ pci_epc_interface_string(enum pci_epc_interface_type type)
 	}
 }
 
+/**
+ * struct pci_epc_map - information about EPC memory for mapping a RC PCI
+ *                      address range
+ * @pci_addr: start address of the RC PCI address range to map
+ * @pci_size: size of the RC PCI address range mapped from @pci_addr
+ * @map_pci_addr: RC PCI address used as the first address mapped (may be lower
+ *                than @pci_addr)
+ * @map_size: size of the controller memory needed for mapping the RC PCI address
+ *            range @pci_addr..@pci_addr+@pci_size
+ * @phys_base: base physical address of the allocated EPC memory for mapping the
+ *             RC PCI address range
+ * @phys_addr: physical address at which @pci_addr is mapped
+ * @virt_base: base virtual address of the allocated EPC memory for mapping the
+ *             RC PCI address range
+ * @virt_addr: virtual address at which @pci_addr is mapped
+ */
+struct pci_epc_map {
+	u64		pci_addr;
+	size_t		pci_size;
+
+	u64		map_pci_addr;
+	size_t		map_size;
+
+	phys_addr_t	phys_base;
+	phys_addr_t	phys_addr;
+	void __iomem	*virt_base;
+	void __iomem	*virt_addr;
+};
+
 /**
  * struct pci_epc_ops - set of function pointers for performing EPC operations
  * @write_header: ops to populate configuration space header
  * @set_bar: ops to configure the BAR
  * @clear_bar: ops to reset the BAR
+ * @align_addr: operation to get the mapping address, mapping size and offset
+ *		into a controller memory window needed to map an RC PCI address
+ *		region
  * @map_addr: ops to map CPU address to PCI address
  * @unmap_addr: ops to unmap CPU address and PCI address
  * @set_msi: ops to set the requested number of MSI interrupts in the MSI
@@ -61,6 +93,8 @@ struct pci_epc_ops {
 			   struct pci_epf_bar *epf_bar);
 	void	(*clear_bar)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			     struct pci_epf_bar *epf_bar);
+	u64	(*align_addr)(struct pci_epc *epc, u64 pci_addr, size_t *size,
+			      size_t *offset);
 	int	(*map_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			    phys_addr_t addr, u64 pci_addr, size_t size);
 	void	(*unmap_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
@@ -278,6 +312,10 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc,
 				     phys_addr_t *phys_addr, size_t size);
 void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr,
 			   void __iomem *virt_addr, size_t size);
+int pci_epc_mem_map(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+		    u64 pci_addr, size_t pci_size, struct pci_epc_map *map);
+void pci_epc_mem_unmap(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+		       struct pci_epc_map *map);
 
 #else
 static inline void pci_epc_init_notify(struct pci_epc *epc)
-- 
cgit v1.2.3


From 5280a14a6079040205a1d968cd80f20448d047c7 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 15 Oct 2024 12:09:32 -0700
Subject: genirq: Introduce irq_get_nr_irqs() and irq_set_nr_irqs()

Prepare for changing 'nr_irqs' from an exported global variable into a
variable with file scope.

This will prevent accidental changes of assignments to a local variable
'nr_irqs' into assignments to the global 'nr_irqs' variable.

Suppose that a patch would be submitted for review that removes a
declaration of a local variable with the name 'nr_irqs' and that that patch
does not remove all assignments to that local variable. Such a patch
converts an assignment to a local variable into an assignment into a global
variable. If the 'nr_irqs' assignment is more than three lines away from
other changes, the assignment won't be included in the diff context lines
and hence won't be visible without inspecting the modified file.

With these abstraction series applied, such accidental conversions from
assignments to a local variable into an assignment to a global variable are
converted into a compilation error.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20241015190953.1266194-2-bvanassche@acm.org
---
 include/linux/irqnr.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 3496baa0b07f..7419b807b71b 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -6,6 +6,8 @@
 
 
 extern int nr_irqs;
+unsigned int irq_get_nr_irqs(void) __pure;
+unsigned int irq_set_nr_irqs(unsigned int nr);
 extern struct irq_desc *irq_to_desc(unsigned int irq);
 unsigned int irq_get_next_irq(unsigned int offset);
 
-- 
cgit v1.2.3


From 1ad2048bf7146efb83bc033147ca1611a7fe8494 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 15 Oct 2024 12:09:52 -0700
Subject: genirq: Switch to irq_get_nr_irqs()

Use the irq_get_nr_irqs() function instead of the global variable
'nr_irqs'. Cache the result of this function in a local variable in
order not to rely on CSE (common subexpression elimination). Prepare
for changing 'nr_irqs' from an exported global variable into a variable
with file scope.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20241015190953.1266194-22-bvanassche@acm.org
---
 include/linux/irqnr.h | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 7419b807b71b..a33088d27c54 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -11,26 +11,31 @@ unsigned int irq_set_nr_irqs(unsigned int nr);
 extern struct irq_desc *irq_to_desc(unsigned int irq);
 unsigned int irq_get_next_irq(unsigned int offset);
 
-# define for_each_irq_desc(irq, desc)					\
-	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
-	     irq++, desc = irq_to_desc(irq))				\
-		if (!desc)						\
-			;						\
-		else
-
+#define for_each_irq_desc(irq, desc)                                      \
+	for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__;   \
+	     __nr_irqs__ = 0)                                             \
+		for (irq = 0, desc = irq_to_desc(irq); irq < __nr_irqs__; \
+		     irq++, desc = irq_to_desc(irq))                      \
+			if (!desc)                                        \
+				;                                         \
+			else
 
 # define for_each_irq_desc_reverse(irq, desc)				\
-	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0;	\
-	     irq--, desc = irq_to_desc(irq))				\
+	for (irq = irq_get_nr_irqs() - 1, desc = irq_to_desc(irq);	\
+	     irq >= 0; irq--, desc = irq_to_desc(irq))			\
 		if (!desc)						\
 			;						\
 		else
 
-# define for_each_active_irq(irq)			\
-	for (irq = irq_get_next_irq(0); irq < nr_irqs;	\
-	     irq = irq_get_next_irq(irq + 1))
+#define for_each_active_irq(irq)                                        \
+	for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \
+	     __nr_irqs__ = 0)                                           \
+		for (irq = irq_get_next_irq(0); irq < __nr_irqs__;      \
+		     irq = irq_get_next_irq(irq + 1))
 
-#define for_each_irq_nr(irq)                   \
-       for (irq = 0; irq < nr_irqs; irq++)
+#define for_each_irq_nr(irq)                                            \
+	for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \
+	     __nr_irqs__ = 0)                                           \
+		for (irq = 0; irq < __nr_irqs__; irq++)
 
 #endif
-- 
cgit v1.2.3


From ef4c675dc2961ee533bdc1ea20390761df0af5be Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 15 Oct 2024 12:09:53 -0700
Subject: genirq: Unexport nr_irqs

Unexport nr_irqs and declare it static now that all code that reads or
modifies nr_irqs has been converted to number_of_interrupts() /
set_number_of_interrupts(). Change the type of 'nr_irqs' from 'int' into
'unsigned int' to match the return type and argument type of the
irq_get_nr_iqs() / irq_set_nr_irqs() functions.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20241015190953.1266194-23-bvanassche@acm.org
---
 include/linux/irqnr.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index a33088d27c54..e97206c721a0 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -5,7 +5,6 @@
 #include <uapi/linux/irqnr.h>
 
 
-extern int nr_irqs;
 unsigned int irq_get_nr_irqs(void) __pure;
 unsigned int irq_set_nr_irqs(unsigned int nr);
 extern struct irq_desc *irq_to_desc(unsigned int irq);
-- 
cgit v1.2.3


From cf70da29c4993bf23df68b67a82dfa3da8234e75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 5 Oct 2024 12:06:16 +0200
Subject: power: supply: core: unexport power_supply_property_is_writeable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit ("power: supply: Drop use_cnt check from power_supply_property_is_writeable()"),
this function does not check use_cnt anymore, making it unsuitable for
general usage. As it is only used by the psy core anyways, remove it
from the public header and unexport it to avoid misusage.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20241005-power-supply-cleanups-v1-2-45303b2d0a4d@weissschuh.net
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 14eedefb3ac8..c1d6cb7f4c40 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -865,8 +865,6 @@ static inline int power_supply_set_property(struct power_supply *psy,
 			    const union power_supply_propval *val)
 { return 0; }
 #endif
-extern int power_supply_property_is_writeable(struct power_supply *psy,
-					enum power_supply_property psp);
 extern void power_supply_external_power_changed(struct power_supply *psy);
 
 extern struct power_supply *__must_check
-- 
cgit v1.2.3


From 8060bcb109f2d9b85451e84a7a08042da40368df Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 16 Oct 2024 16:18:31 +0300
Subject: usb: typec: Add attribute file showing the supported USB modes of the
 port

This attribute file, named "usb_capability", will show the
supported USB modes, which are USB 2.0, USB 3.2 and USB4.
These modes are defined in the USB Type-C (R2.0) and USB
Power Delivery (R3.0 V2.0) Specifications.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20241016131834.898599-2-heikki.krogerus@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/typec.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index 549275f8ac1b..f7edced5b10b 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -87,6 +87,17 @@ enum typec_orientation {
 	TYPEC_ORIENTATION_REVERSE,
 };
 
+enum usb_mode {
+	USB_MODE_NONE,
+	USB_MODE_USB2,
+	USB_MODE_USB3,
+	USB_MODE_USB4
+};
+
+#define USB_CAPABILITY_USB2	BIT(0)
+#define USB_CAPABILITY_USB3	BIT(1)
+#define USB_CAPABILITY_USB4	BIT(2)
+
 /*
  * struct enter_usb_data - Enter_USB Message details
  * @eudo: Enter_USB Data Object
@@ -240,6 +251,7 @@ struct typec_partner_desc {
  * @port_type_set: Set port type
  * @pd_get: Get available USB Power Delivery Capabilities.
  * @pd_set: Set USB Power Delivery Capabilities.
+ * @default_usb_mode_set: USB Mode to be used by default with Enter_USB Message
  */
 struct typec_operations {
 	int (*try_role)(struct typec_port *port, int role);
@@ -250,6 +262,7 @@ struct typec_operations {
 			     enum typec_port_type type);
 	struct usb_power_delivery **(*pd_get)(struct typec_port *port);
 	int (*pd_set)(struct typec_port *port, struct usb_power_delivery *pd);
+	int (*default_usb_mode_set)(struct typec_port *port, enum usb_mode mode);
 };
 
 enum usb_pd_svdm_ver {
@@ -267,6 +280,7 @@ enum usb_pd_svdm_ver {
  * @svdm_version: USB PD Structured VDM version if supported
  * @prefer_role: Initial role preference (DRP ports).
  * @accessory: Supported Accessory Modes
+ * @usb_capability: Supported USB Modes
  * @fwnode: Optional fwnode of the port
  * @driver_data: Private pointer for driver specific info
  * @pd: Optional USB Power Delivery Support
@@ -283,6 +297,7 @@ struct typec_capability {
 	int			prefer_role;
 	enum typec_accessory	accessory[TYPEC_MAX_ACCESSORY];
 	unsigned int		orientation_aware:1;
+	u8			usb_capability;
 
 	struct fwnode_handle	*fwnode;
 	void			*driver_data;
@@ -350,6 +365,8 @@ int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_
 int typec_partner_set_usb_power_delivery(struct typec_partner *partner,
 					 struct usb_power_delivery *pd);
 
+void typec_port_set_usb_mode(struct typec_port *port, enum usb_mode mode);
+
 /**
  * struct typec_connector - Representation of Type-C port for external drivers
  * @attach: notification about device removal
-- 
cgit v1.2.3


From 2140a952c4e9c73993ae6d9c2cc674d263d4beab Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 16 Oct 2024 16:18:32 +0300
Subject: usb: typec: Add attribute file showing the USB Modes of the partner

This attribute file shows the supported USB modes (USB 2.0,
USB 3.0 and USB4) of the partner, and the currently active
mode.

The active mode is determined primarily by checking the
speed of the enumerated USB device. When USB Power Delivery
is supported, the active USB mode should be always the mode
that was used with the Enter_USB Message, regardless of the
result of the USB enumeration. The port drivers can
separately assign the mode with a dedicated API.

If USB Power Delivery Identity is supplied for the partner
device, the supported modes are extracted from it.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20241016131834.898599-3-heikki.krogerus@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/typec.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index f7edced5b10b..d616b8807000 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -220,6 +220,7 @@ struct typec_cable_desc {
  * @accessory: Audio, Debug or none.
  * @identity: Discover Identity command data
  * @pd_revision: USB Power Delivery Specification Revision if supported
+ * @usb_capability: Supported USB Modes
  * @attach: Notification about attached USB device
  * @deattach: Notification about removed USB device
  *
@@ -237,6 +238,7 @@ struct typec_partner_desc {
 	enum typec_accessory	accessory;
 	struct usb_pd_identity	*identity;
 	u16			pd_revision; /* 0300H = "3.0" */
+	u8			usb_capability;
 
 	void (*attach)(struct typec_partner *partner, struct device *dev);
 	void (*deattach)(struct typec_partner *partner, struct device *dev);
@@ -252,6 +254,7 @@ struct typec_partner_desc {
  * @pd_get: Get available USB Power Delivery Capabilities.
  * @pd_set: Set USB Power Delivery Capabilities.
  * @default_usb_mode_set: USB Mode to be used by default with Enter_USB Message
+ * @enter_usb_mode: Change the active USB Mode
  */
 struct typec_operations {
 	int (*try_role)(struct typec_port *port, int role);
@@ -263,6 +266,7 @@ struct typec_operations {
 	struct usb_power_delivery **(*pd_get)(struct typec_port *port);
 	int (*pd_set)(struct typec_port *port, struct usb_power_delivery *pd);
 	int (*default_usb_mode_set)(struct typec_port *port, enum usb_mode mode);
+	int (*enter_usb_mode)(struct typec_port *port, enum usb_mode mode);
 };
 
 enum usb_pd_svdm_ver {
@@ -365,6 +369,7 @@ int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_
 int typec_partner_set_usb_power_delivery(struct typec_partner *partner,
 					 struct usb_power_delivery *pd);
 
+void typec_partner_set_usb_mode(struct typec_partner *partner, enum usb_mode usb_mode);
 void typec_port_set_usb_mode(struct typec_port *port, enum usb_mode mode);
 
 /**
-- 
cgit v1.2.3


From d42a254633c773921884a19e8a1a0f53a31150c3 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Date: Wed, 16 Oct 2024 13:20:09 +0100
Subject: drm/sched: Optimise drm_sched_entity_push_job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In FIFO mode (which is the default), both drm_sched_entity_push_job() and
drm_sched_rq_update_fifo(), where the latter calls the former, are
currently taking and releasing the same entity->rq_lock.

We can avoid that design inelegance, and also have a miniscule
efficiency improvement on the submit from idle path, by introducing a new
drm_sched_rq_update_fifo_locked() helper and pulling up the lock taking to
its callers.

v2:
 * Remove drm_sched_rq_update_fifo() altogether. (Christian)

v3:
 * Improved commit message. (Philipp)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Philipp Stanner <pstanner@redhat.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-2-tursulin@igalia.com
---
 include/drm/gpu_scheduler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index e9f075f51db3..3658a6cb048e 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -593,7 +593,7 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 				struct drm_sched_entity *entity);
 
-void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts);
+void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts);
 
 int drm_sched_entity_init(struct drm_sched_entity *entity,
 			  enum drm_sched_priority priority,
-- 
cgit v1.2.3


From a6f46283e952fe50dea5f932a1e4f0b6b2370968 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Date: Wed, 16 Oct 2024 13:20:11 +0100
Subject: drm/sched: Re-order struct drm_sched_rq members for clarity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Current kerneldoc for struct drm_sched_rq incompletely documents what
fields are protected by the lock.

This is not good because it is misleading.

Lets fix it by listing all the elements which are protected by the lock.

While at it, lets also re-order the members so all protected by the lock
are in a single group.

v2:
 * Refer variables by kerneldoc syntax, more verbose commit text. (Philipp)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Philipp Stanner <pstanner@redhat.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-4-tursulin@igalia.com
---
 include/drm/gpu_scheduler.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 3658a6cb048e..b6d095074c19 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -243,10 +243,10 @@ struct drm_sched_entity {
 /**
  * struct drm_sched_rq - queue of entities to be scheduled.
  *
- * @lock: to modify the entities list.
  * @sched: the scheduler to which this rq belongs to.
- * @entities: list of the entities to be scheduled.
+ * @lock: protects @entities, @rb_tree_root and @current_entity.
  * @current_entity: the entity which is to be scheduled.
+ * @entities: list of the entities to be scheduled.
  * @rb_tree_root: root of time based priority queue of entities for FIFO scheduling
  *
  * Run queue is a set of entities scheduling command submissions for
@@ -254,10 +254,12 @@ struct drm_sched_entity {
  * the next entity to emit commands from.
  */
 struct drm_sched_rq {
-	spinlock_t			lock;
 	struct drm_gpu_scheduler	*sched;
-	struct list_head		entities;
+
+	spinlock_t			lock;
+	/* Following members are protected by the @lock: */
 	struct drm_sched_entity		*current_entity;
+	struct list_head		entities;
 	struct rb_root_cached		rb_tree_root;
 };
 
-- 
cgit v1.2.3


From f93126f5d55920d1447ef00a3fbe6706f40f53de Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Date: Wed, 16 Oct 2024 13:20:12 +0100
Subject: drm/sched: Re-group and rename the entity run-queue lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When writing to a drm_sched_entity's run-queue, writers are protected
through the lock drm_sched_entity.rq_lock. This naming, however,
frequently collides with the separate internal lock of struct
drm_sched_rq, resulting in uses like this:

	spin_lock(&entity->rq_lock);
	spin_lock(&entity->rq->lock);

Rename drm_sched_entity.rq_lock to improve readability. While at it,
re-order that struct's members to make it more obvious what the lock
protects.

v2:
 * Rename some rq_lock straddlers in kerneldoc, improve commit text. (Philipp)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Philipp Stanner <pstanner@redhat.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
[pstanner: Fix typo in docstring]
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-5-tursulin@igalia.com
---
 include/drm/gpu_scheduler.h | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index b6d095074c19..e2b612817e3b 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -96,14 +96,22 @@ struct drm_sched_entity {
 	 */
 	struct list_head		list;
 
+	/**
+	 * @lock:
+	 *
+	 * Lock protecting the run-queue (@rq) to which this entity belongs,
+	 * @priority and the list of schedulers (@sched_list, @num_sched_list).
+	 */
+	spinlock_t			lock;
+
 	/**
 	 * @rq:
 	 *
 	 * Runqueue on which this entity is currently scheduled.
 	 *
 	 * FIXME: Locking is very unclear for this. Writers are protected by
-	 * @rq_lock, but readers are generally lockless and seem to just race
-	 * with not even a READ_ONCE.
+	 * @lock, but readers are generally lockless and seem to just race with
+	 * not even a READ_ONCE.
 	 */
 	struct drm_sched_rq		*rq;
 
@@ -136,17 +144,10 @@ struct drm_sched_entity {
 	 * @priority:
 	 *
 	 * Priority of the entity. This can be modified by calling
-	 * drm_sched_entity_set_priority(). Protected by &rq_lock.
+	 * drm_sched_entity_set_priority(). Protected by @lock.
 	 */
 	enum drm_sched_priority         priority;
 
-	/**
-	 * @rq_lock:
-	 *
-	 * Lock to modify the runqueue to which this entity belongs.
-	 */
-	spinlock_t			rq_lock;
-
 	/**
 	 * @job_queue: the list of jobs of this entity.
 	 */
-- 
cgit v1.2.3


From 134e71bd1edcc7252b64ca31efe88edfef86d784 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Date: Wed, 16 Oct 2024 13:20:13 +0100
Subject: drm/sched: Further optimise drm_sched_entity_push_job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Having removed one re-lock cycle on the entity->lock in a patch titled
"drm/sched: Optimise drm_sched_entity_push_job", with only a tiny bit
larger refactoring we can do the same optimisation on the rq->lock.
(Currently both drm_sched_rq_add_entity() and
drm_sched_rq_update_fifo_locked() take and release the same lock.)

To achieve this we make drm_sched_rq_update_fifo_locked() and
drm_sched_rq_add_entity() expect the rq->lock to be held.

We also align drm_sched_rq_update_fifo_locked(),
drm_sched_rq_add_entity() and
drm_sched_rq_remove_fifo_locked() function signatures, by adding rq as a
parameter to the latter.

v2:
 * Fix after rebase of the series.
 * Avoid naming inconsistency between drm_sched_rq_add/remove. (Christian)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Philipp Stanner <pstanner@redhat.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-6-tursulin@igalia.com
---
 include/drm/gpu_scheduler.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index e2b612817e3b..ab161289d1bf 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -596,7 +596,8 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 				struct drm_sched_entity *entity);
 
-void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts);
+void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
+				     struct drm_sched_rq *rq, ktime_t ts);
 
 int drm_sched_entity_init(struct drm_sched_entity *entity,
 			  enum drm_sched_priority priority,
-- 
cgit v1.2.3


From 5bd0d8e687bf04fdd3d4a733a6bb17e25d4a1de2 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Wed, 16 Oct 2024 23:06:51 +0300
Subject: drm/bridge: synopsys: Add DW HDMI QP TX Controller support library

The Synopsys DesignWare HDMI 2.1 Quad-Pixel (QP) TX Controller IP
supports the following features, among others:

* Fixed Rate Link (FRL)
* Display Stream Compression (DSC)
* 4K@120Hz and 8K@60Hz video modes
* Variable Refresh Rate (VRR) including Quick Media Switching (QMS), aka
  Cinema VRR
* Fast Vactive (FVA), aka Quick Frame Transport (QFT)
* SCDC I2C DDC access
* TMDS Scrambler enabling 2160p@60Hz with RGB/YCbCr4:4:4
* YCbCr4:2:0 enabling 2160p@60Hz at lower HDMI link speeds
* Multi-stream audio
* Enhanced Audio Return Channel (EARC)

Add library containing common helpers to enable basic support, i.e. RGB
output up to 4K@30Hz, without audio, CEC or any HDMI 2.1 specific
features.

Co-developed-by: Algea Cao <algea.cao@rock-chips.com>
Signed-off-by: Algea Cao <algea.cao@rock-chips.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20241016-b4-rk3588-bridge-upstream-v10-1-87ef92a6d14e@collabora.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/bridge/dw_hdmi_qp.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 include/drm/bridge/dw_hdmi_qp.h

(limited to 'include')

diff --git a/include/drm/bridge/dw_hdmi_qp.h b/include/drm/bridge/dw_hdmi_qp.h
new file mode 100644
index 000000000000..e9be6d507ad9
--- /dev/null
+++ b/include/drm/bridge/dw_hdmi_qp.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021-2022 Rockchip Electronics Co., Ltd.
+ * Copyright (c) 2024 Collabora Ltd.
+ */
+
+#ifndef __DW_HDMI_QP__
+#define __DW_HDMI_QP__
+
+struct device;
+struct drm_encoder;
+struct dw_hdmi_qp;
+struct platform_device;
+
+struct dw_hdmi_qp_phy_ops {
+	int (*init)(struct dw_hdmi_qp *hdmi, void *data);
+	void (*disable)(struct dw_hdmi_qp *hdmi, void *data);
+	enum drm_connector_status (*read_hpd)(struct dw_hdmi_qp *hdmi, void *data);
+	void (*setup_hpd)(struct dw_hdmi_qp *hdmi, void *data);
+};
+
+struct dw_hdmi_qp_plat_data {
+	const struct dw_hdmi_qp_phy_ops *phy_ops;
+	void *phy_data;
+	int main_irq;
+};
+
+struct dw_hdmi_qp *dw_hdmi_qp_bind(struct platform_device *pdev,
+				   struct drm_encoder *encoder,
+				   const struct dw_hdmi_qp_plat_data *plat_data);
+void dw_hdmi_qp_resume(struct device *dev, struct dw_hdmi_qp *hdmi);
+#endif /* __DW_HDMI_QP__ */
-- 
cgit v1.2.3


From bae7aff5818b89d5d72f6ab0135c8faf3fa54318 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= <theo.lebrun@bootlin.com>
Date: Mon, 7 Oct 2024 15:49:17 +0200
Subject: dt-bindings: clock: add Mobileye EyeQ6L/EyeQ6H clock indexes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add #defines for Mobileye EyeQ6L and EyeQ6H SoC clocks.

Constant prefixes are:
 - EQ6LC_PLL_: EyeQ6L clock PLLs
 - EQ6HC_SOUTH_PLL_: EyeQ6H south OLB PLLs
 - EQ6HC_SOUTH_DIV_: EyeQ6H south OLB divider clocks
 - EQ6HC_ACC_PLL_: EyeQ6H accelerator OLB PLLs

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
Link: https://lore.kernel.org/r/20241007-mbly-clk-v5-2-e9d8994269cb@bootlin.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/mobileye,eyeq5-clk.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/mobileye,eyeq5-clk.h b/include/dt-bindings/clock/mobileye,eyeq5-clk.h
index 26d8930335e4..b433c1772c28 100644
--- a/include/dt-bindings/clock/mobileye,eyeq5-clk.h
+++ b/include/dt-bindings/clock/mobileye,eyeq5-clk.h
@@ -19,4 +19,25 @@
 
 #define EQ5C_DIV_OSPI	10
 
+#define EQ6LC_PLL_DDR		0
+#define EQ6LC_PLL_CPU		1
+#define EQ6LC_PLL_PER		2
+#define EQ6LC_PLL_VDI		3
+
+#define EQ6HC_SOUTH_PLL_VDI		0
+#define EQ6HC_SOUTH_PLL_PCIE		1
+#define EQ6HC_SOUTH_PLL_PER		2
+#define EQ6HC_SOUTH_PLL_ISP		3
+
+#define EQ6HC_SOUTH_DIV_EMMC		4
+#define EQ6HC_SOUTH_DIV_OSPI_REF	5
+#define EQ6HC_SOUTH_DIV_OSPI_SYS	6
+#define EQ6HC_SOUTH_DIV_TSU		7
+
+#define EQ6HC_ACC_PLL_XNN		0
+#define EQ6HC_ACC_PLL_VMP		1
+#define EQ6HC_ACC_PLL_PMA		2
+#define EQ6HC_ACC_PLL_MPC		3
+#define EQ6HC_ACC_PLL_NOC		4
+
 #endif
-- 
cgit v1.2.3


From 6a136805e3c1532d2414b298c024429943cfd482 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= <theo.lebrun@bootlin.com>
Date: Mon, 7 Oct 2024 15:49:18 +0200
Subject: clk: divider: Introduce CLK_DIVIDER_EVEN_INTEGERS flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add CLK_DIVIDER_EVEN_INTEGERS flag to support divisor of 2, 4, 6, etc.
The same divisor can be done using a table, which would be big and
wasteful for a clock dividor of width 8 (256 entries).

Require increasing flags size from u8 to u16 because
CLK_DIVIDER_EVEN_INTEGERS is the eighth flag. u16 is used inside struct
clk_divider; `unsigned long` is used for function arguments.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
Link: https://lore.kernel.org/r/20241007-mbly-clk-v5-3-e9d8994269cb@bootlin.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk-provider.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 7e43caabb54b..dbe793964c24 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -689,13 +689,15 @@ struct clk_div_table {
  * CLK_DIVIDER_BIG_ENDIAN - By default little endian register accesses are used
  *	for the divider register.  Setting this flag makes the register accesses
  *	big endian.
+ * CLK_DIVIDER_EVEN_INTEGERS - clock divisor is 2, 4, 6, 8, 10, etc.
+ *	Formula is 2 * (value read from hardware + 1).
  */
 struct clk_divider {
 	struct clk_hw	hw;
 	void __iomem	*reg;
 	u8		shift;
 	u8		width;
-	u8		flags;
+	u16		flags;
 	const struct clk_div_table	*table;
 	spinlock_t	*lock;
 };
@@ -711,6 +713,7 @@ struct clk_divider {
 #define CLK_DIVIDER_READ_ONLY		BIT(5)
 #define CLK_DIVIDER_MAX_AT_ZERO		BIT(6)
 #define CLK_DIVIDER_BIG_ENDIAN		BIT(7)
+#define CLK_DIVIDER_EVEN_INTEGERS	BIT(8)
 
 extern const struct clk_ops clk_divider_ops;
 extern const struct clk_ops clk_divider_ro_ops;
@@ -740,19 +743,21 @@ struct clk_hw *__clk_hw_register_divider(struct device *dev,
 		struct device_node *np, const char *name,
 		const char *parent_name, const struct clk_hw *parent_hw,
 		const struct clk_parent_data *parent_data, unsigned long flags,
-		void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags,
+		void __iomem *reg, u8 shift, u8 width,
+		unsigned long clk_divider_flags,
 		const struct clk_div_table *table, spinlock_t *lock);
 struct clk_hw *__devm_clk_hw_register_divider(struct device *dev,
 		struct device_node *np, const char *name,
 		const char *parent_name, const struct clk_hw *parent_hw,
 		const struct clk_parent_data *parent_data, unsigned long flags,
-		void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags,
+		void __iomem *reg, u8 shift, u8 width,
+		unsigned long clk_divider_flags,
 		const struct clk_div_table *table, spinlock_t *lock);
 struct clk *clk_register_divider_table(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
-		u8 clk_divider_flags, const struct clk_div_table *table,
-		spinlock_t *lock);
+		unsigned long clk_divider_flags,
+		const struct clk_div_table *table, spinlock_t *lock);
 /**
  * clk_register_divider - register a divider clock with the clock framework
  * @dev: device registering this clock
-- 
cgit v1.2.3


From ea1cca026842bc83af92785e61a4433bcc71ea70 Mon Sep 17 00:00:00 2001
From: Yassine Oudjana <y.oudjana@protonmail.com>
Date: Thu, 17 Oct 2024 10:17:05 +0300
Subject: dt-bindings: clock: Add MediaTek MT6735 clock and reset bindings

Add clock definitions for the main clock and reset controllers of MT6735
(apmixedsys, topckgen, infracfg and pericfg).

Signed-off-by: Yassine Oudjana <y.oudjana@protonmail.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20241017071708.38663-2-y.oudjana@protonmail.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 .../dt-bindings/clock/mediatek,mt6735-apmixedsys.h | 16 +++++
 .../dt-bindings/clock/mediatek,mt6735-infracfg.h   | 25 +++++++
 .../dt-bindings/clock/mediatek,mt6735-pericfg.h    | 37 ++++++++++
 .../dt-bindings/clock/mediatek,mt6735-topckgen.h   | 79 ++++++++++++++++++++++
 .../dt-bindings/reset/mediatek,mt6735-infracfg.h   | 27 ++++++++
 .../dt-bindings/reset/mediatek,mt6735-pericfg.h    | 31 +++++++++
 6 files changed, 215 insertions(+)
 create mode 100644 include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h
 create mode 100644 include/dt-bindings/clock/mediatek,mt6735-infracfg.h
 create mode 100644 include/dt-bindings/clock/mediatek,mt6735-pericfg.h
 create mode 100644 include/dt-bindings/clock/mediatek,mt6735-topckgen.h
 create mode 100644 include/dt-bindings/reset/mediatek,mt6735-infracfg.h
 create mode 100644 include/dt-bindings/reset/mediatek,mt6735-pericfg.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h b/include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h
new file mode 100644
index 000000000000..b4705204409c
--- /dev/null
+++ b/include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_CLK_MT6735_APMIXEDSYS_H
+#define _DT_BINDINGS_CLK_MT6735_APMIXEDSYS_H
+
+#define CLK_APMIXED_ARMPLL		0
+#define CLK_APMIXED_MAINPLL		1
+#define CLK_APMIXED_UNIVPLL		2
+#define CLK_APMIXED_MMPLL		3
+#define CLK_APMIXED_MSDCPLL		4
+#define CLK_APMIXED_VENCPLL		5
+#define CLK_APMIXED_TVDPLL		6
+#define CLK_APMIXED_APLL1		7
+#define CLK_APMIXED_APLL2		8
+
+#endif
diff --git a/include/dt-bindings/clock/mediatek,mt6735-infracfg.h b/include/dt-bindings/clock/mediatek,mt6735-infracfg.h
new file mode 100644
index 000000000000..d8dd51e15637
--- /dev/null
+++ b/include/dt-bindings/clock/mediatek,mt6735-infracfg.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_CLK_MT6735_INFRACFG_H
+#define _DT_BINDINGS_CLK_MT6735_INFRACFG_H
+
+#define CLK_INFRA_DBG			0
+#define CLK_INFRA_GCE			1
+#define CLK_INFRA_TRBG			2
+#define CLK_INFRA_CPUM			3
+#define CLK_INFRA_DEVAPC		4
+#define CLK_INFRA_AUDIO			5
+#define CLK_INFRA_GCPU			6
+#define CLK_INFRA_L2C_SRAM		7
+#define CLK_INFRA_M4U			8
+#define CLK_INFRA_CLDMA			9
+#define CLK_INFRA_CONNMCU_BUS		10
+#define CLK_INFRA_KP			11
+#define CLK_INFRA_APXGPT		12
+#define CLK_INFRA_SEJ			13
+#define CLK_INFRA_CCIF0_AP		14
+#define CLK_INFRA_CCIF1_AP		15
+#define CLK_INFRA_PMIC_SPI		16
+#define CLK_INFRA_PMIC_WRAP		17
+
+#endif
diff --git a/include/dt-bindings/clock/mediatek,mt6735-pericfg.h b/include/dt-bindings/clock/mediatek,mt6735-pericfg.h
new file mode 100644
index 000000000000..16bc21bbd95b
--- /dev/null
+++ b/include/dt-bindings/clock/mediatek,mt6735-pericfg.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_CLK_MT6735_PERICFG_H
+#define _DT_BINDINGS_CLK_MT6735_PERICFG_H
+
+#define CLK_PERI_DISP_PWM		0
+#define CLK_PERI_THERM			1
+#define CLK_PERI_PWM1			2
+#define CLK_PERI_PWM2			3
+#define CLK_PERI_PWM3			4
+#define CLK_PERI_PWM4			5
+#define CLK_PERI_PWM5			6
+#define CLK_PERI_PWM6			7
+#define CLK_PERI_PWM7			8
+#define CLK_PERI_PWM			9
+#define CLK_PERI_USB0			10
+#define CLK_PERI_IRDA			11
+#define CLK_PERI_APDMA			12
+#define CLK_PERI_MSDC30_0		13
+#define CLK_PERI_MSDC30_1		14
+#define CLK_PERI_MSDC30_2		15
+#define CLK_PERI_MSDC30_3		16
+#define CLK_PERI_UART0			17
+#define CLK_PERI_UART1			18
+#define CLK_PERI_UART2			19
+#define CLK_PERI_UART3			20
+#define CLK_PERI_UART4			21
+#define CLK_PERI_BTIF			22
+#define CLK_PERI_I2C0			23
+#define CLK_PERI_I2C1			24
+#define CLK_PERI_I2C2			25
+#define CLK_PERI_I2C3			26
+#define CLK_PERI_AUXADC			27
+#define CLK_PERI_SPI0			28
+#define CLK_PERI_IRTX			29
+
+#endif
diff --git a/include/dt-bindings/clock/mediatek,mt6735-topckgen.h b/include/dt-bindings/clock/mediatek,mt6735-topckgen.h
new file mode 100644
index 000000000000..d4b1e113cc0a
--- /dev/null
+++ b/include/dt-bindings/clock/mediatek,mt6735-topckgen.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_CLK_MT6735_TOPCKGEN_H
+#define _DT_BINDINGS_CLK_MT6735_TOPCKGEN_H
+
+#define CLK_TOP_AD_SYS_26M_CK		0
+#define CLK_TOP_CLKPH_MCK_O		1
+#define CLK_TOP_DMPLL			2
+#define CLK_TOP_DPI_CK			3
+#define CLK_TOP_WHPLL_AUDIO_CK		4
+
+#define CLK_TOP_SYSPLL_D2		5
+#define CLK_TOP_SYSPLL_D3		6
+#define CLK_TOP_SYSPLL_D5		7
+#define CLK_TOP_SYSPLL1_D2		8
+#define CLK_TOP_SYSPLL1_D4		9
+#define CLK_TOP_SYSPLL1_D8		10
+#define CLK_TOP_SYSPLL1_D16		11
+#define CLK_TOP_SYSPLL2_D2		12
+#define CLK_TOP_SYSPLL2_D4		13
+#define CLK_TOP_SYSPLL3_D2		14
+#define CLK_TOP_SYSPLL3_D4		15
+#define CLK_TOP_SYSPLL4_D2		16
+#define CLK_TOP_SYSPLL4_D4		17
+#define CLK_TOP_UNIVPLL_D2		18
+#define CLK_TOP_UNIVPLL_D3		19
+#define CLK_TOP_UNIVPLL_D5		20
+#define CLK_TOP_UNIVPLL_D26		21
+#define CLK_TOP_UNIVPLL1_D2		22
+#define CLK_TOP_UNIVPLL1_D4		23
+#define CLK_TOP_UNIVPLL1_D8		24
+#define CLK_TOP_UNIVPLL2_D2		25
+#define CLK_TOP_UNIVPLL2_D4		26
+#define CLK_TOP_UNIVPLL2_D8		27
+#define CLK_TOP_UNIVPLL3_D2		28
+#define CLK_TOP_UNIVPLL3_D4		29
+#define CLK_TOP_MSDCPLL_D2		30
+#define CLK_TOP_MSDCPLL_D4		31
+#define CLK_TOP_MSDCPLL_D8		32
+#define CLK_TOP_MSDCPLL_D16		33
+#define CLK_TOP_VENCPLL_D3		34
+#define CLK_TOP_TVDPLL_D2		35
+#define CLK_TOP_TVDPLL_D4		36
+#define CLK_TOP_DMPLL_D2		37
+#define CLK_TOP_DMPLL_D4		38
+#define CLK_TOP_DMPLL_D8		39
+#define CLK_TOP_AD_SYS_26M_D2		40
+
+#define CLK_TOP_AXI_SEL			41
+#define CLK_TOP_MEM_SEL			42
+#define CLK_TOP_DDRPHY_SEL		43
+#define CLK_TOP_MM_SEL			44
+#define CLK_TOP_PWM_SEL			45
+#define CLK_TOP_VDEC_SEL		46
+#define CLK_TOP_MFG_SEL			47
+#define CLK_TOP_CAMTG_SEL		48
+#define CLK_TOP_UART_SEL		49
+#define CLK_TOP_SPI_SEL			50
+#define CLK_TOP_USB20_SEL		51
+#define CLK_TOP_MSDC50_0_SEL		52
+#define CLK_TOP_MSDC30_0_SEL		53
+#define CLK_TOP_MSDC30_1_SEL		54
+#define CLK_TOP_MSDC30_2_SEL		55
+#define CLK_TOP_MSDC30_3_SEL		56
+#define CLK_TOP_AUDIO_SEL		57
+#define CLK_TOP_AUDINTBUS_SEL		58
+#define CLK_TOP_PMICSPI_SEL		59
+#define CLK_TOP_SCP_SEL			60
+#define CLK_TOP_ATB_SEL			61
+#define CLK_TOP_DPI0_SEL		62
+#define CLK_TOP_SCAM_SEL		63
+#define CLK_TOP_MFG13M_SEL		64
+#define CLK_TOP_AUD1_SEL		65
+#define CLK_TOP_AUD2_SEL		66
+#define CLK_TOP_IRDA_SEL		67
+#define CLK_TOP_IRTX_SEL		68
+#define CLK_TOP_DISPPWM_SEL		69
+
+#endif
diff --git a/include/dt-bindings/reset/mediatek,mt6735-infracfg.h b/include/dt-bindings/reset/mediatek,mt6735-infracfg.h
new file mode 100644
index 000000000000..9df969090377
--- /dev/null
+++ b/include/dt-bindings/reset/mediatek,mt6735-infracfg.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_RESET_MT6735_INFRACFG_H
+#define _DT_BINDINGS_RESET_MT6735_INFRACFG_H
+
+#define MT6735_INFRA_RST0_EMI_REG		0
+#define MT6735_INFRA_RST0_DRAMC0_AO		1
+#define MT6735_INFRA_RST0_AP_CIRQ_EINT		2
+#define MT6735_INFRA_RST0_APXGPT		3
+#define MT6735_INFRA_RST0_SCPSYS		4
+#define MT6735_INFRA_RST0_KP			5
+#define MT6735_INFRA_RST0_PMIC_WRAP		6
+#define MT6735_INFRA_RST0_CLDMA_AO_TOP		7
+#define MT6735_INFRA_RST0_USBSIF_TOP		8
+#define MT6735_INFRA_RST0_EMI			9
+#define MT6735_INFRA_RST0_CCIF			10
+#define MT6735_INFRA_RST0_DRAMC0		11
+#define MT6735_INFRA_RST0_EMI_AO_REG		12
+#define MT6735_INFRA_RST0_CCIF_AO		13
+#define MT6735_INFRA_RST0_TRNG			14
+#define MT6735_INFRA_RST0_SYS_CIRQ		15
+#define MT6735_INFRA_RST0_GCE			16
+#define MT6735_INFRA_RST0_M4U			17
+#define MT6735_INFRA_RST0_CCIF1			18
+#define MT6735_INFRA_RST0_CLDMA_TOP_PD		19
+
+#endif
diff --git a/include/dt-bindings/reset/mediatek,mt6735-pericfg.h b/include/dt-bindings/reset/mediatek,mt6735-pericfg.h
new file mode 100644
index 000000000000..a62bb192835a
--- /dev/null
+++ b/include/dt-bindings/reset/mediatek,mt6735-pericfg.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_RESET_MT6735_PERICFG_H
+#define _DT_BINDINGS_RESET_MT6735_PERICFG_H
+
+#define MT6735_PERI_RST0_UART0			0
+#define MT6735_PERI_RST0_UART1			1
+#define MT6735_PERI_RST0_UART2			2
+#define MT6735_PERI_RST0_UART3			3
+#define MT6735_PERI_RST0_UART4			4
+#define MT6735_PERI_RST0_BTIF			5
+#define MT6735_PERI_RST0_DISP_PWM_PERI		6
+#define MT6735_PERI_RST0_PWM			7
+#define MT6735_PERI_RST0_AUXADC			8
+#define MT6735_PERI_RST0_DMA			9
+#define MT6735_PERI_RST0_IRDA			10
+#define MT6735_PERI_RST0_IRTX			11
+#define MT6735_PERI_RST0_THERM			12
+#define MT6735_PERI_RST0_MSDC2			13
+#define MT6735_PERI_RST0_MSDC3			14
+#define MT6735_PERI_RST0_MSDC0			15
+#define MT6735_PERI_RST0_MSDC1			16
+#define MT6735_PERI_RST0_I2C0			17
+#define MT6735_PERI_RST0_I2C1			18
+#define MT6735_PERI_RST0_I2C2			19
+#define MT6735_PERI_RST0_I2C3			20
+#define MT6735_PERI_RST0_USB			21
+
+#define MT6735_PERI_RST1_SPI0			22
+
+#endif
-- 
cgit v1.2.3


From 44fcc479a574a4055fb6aed1f786d39999466383 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 17 Oct 2024 06:37:33 +0200
Subject: power: supply: hwmon: move interface to private header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The interface of power_supply_hwmon.c is only meant to be used by the
psy core. Remove it from the public header file and use the private one
instead.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20241017-power-supply-cleanups-v2-1-cb0f5deab088@weissschuh.net
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index c1d6cb7f4c40..b98106e1a90f 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -937,19 +937,6 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp)
 	return false;
 }
 
-#ifdef CONFIG_POWER_SUPPLY_HWMON
-int power_supply_add_hwmon_sysfs(struct power_supply *psy);
-void power_supply_remove_hwmon_sysfs(struct power_supply *psy);
-#else
-static inline int power_supply_add_hwmon_sysfs(struct power_supply *psy)
-{
-	return 0;
-}
-
-static inline
-void power_supply_remove_hwmon_sysfs(struct power_supply *psy) {}
-#endif
-
 #ifdef CONFIG_SYSFS
 ssize_t power_supply_charge_behaviour_show(struct device *dev,
 					   unsigned int available_behaviours,
-- 
cgit v1.2.3


From 22823157d90c4631a951920090686c20c459b36f Mon Sep 17 00:00:00 2001
From: Tomer Maimon <tmaimon77@gmail.com>
Date: Thu, 12 Sep 2024 22:10:37 +0300
Subject: reset: npcm: register npcm8xx clock auxiliary bus device

Add NPCM8xx clock controller auxiliary bus device registration.

The NPCM8xx clock controller is registered as an aux device because the
reset and the clock controller share the same register region.

Signed-off-by: Tomer Maimon <tmaimon77@gmail.com>
Tested-by: Benjamin Fair <benjaminfair@google.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Link: https://lore.kernel.org/r/20240912191038.981105-3-tmaimon77@gmail.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/soc/nuvoton/clock-npcm8xx.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 include/soc/nuvoton/clock-npcm8xx.h

(limited to 'include')

diff --git a/include/soc/nuvoton/clock-npcm8xx.h b/include/soc/nuvoton/clock-npcm8xx.h
new file mode 100644
index 000000000000..1d974e89d8a8
--- /dev/null
+++ b/include/soc/nuvoton/clock-npcm8xx.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SOC_NPCM8XX_CLOCK_H
+#define __SOC_NPCM8XX_CLOCK_H
+
+#include <linux/auxiliary_bus.h>
+#include <linux/container_of.h>
+
+struct npcm_clock_adev {
+	void __iomem *base;
+	struct auxiliary_device adev;
+};
+
+static inline struct npcm_clock_adev *to_npcm_clock_adev(struct auxiliary_device *_adev)
+{
+	return container_of(_adev, struct npcm_clock_adev, adev);
+}
+
+#endif
-- 
cgit v1.2.3


From 0784181b44af831a3fa52e1e5ff77c388d699dba Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 26 Sep 2024 16:17:37 +0100
Subject: lockdep: Add lockdep_cleanup_dead_cpu()

Add a function to check that an offline CPU has left the tracing
infrastructure in a sane state.

Commit 9bb69ba4c177 ("ACPI: processor_idle: use raw_safe_halt() in
acpi_idle_play_dead()") fixed an issue where the acpi_idle_play_dead()
function called safe_halt() instead of raw_safe_halt(), which had the
side-effect of setting the hardirqs_enabled flag for the offline CPU.

On x86 this triggered warnings from lockdep_assert_irqs_disabled() when
the CPU was brought back online again later. These warnings were too
early for the exception to be handled correctly, leading to a
triple-fault.

Add lockdep_cleanup_dead_cpu() to check for this kind of failure mode,
print the events leading up to it, and correct it so that the CPU can
come online again correctly. Re-introducing the original bug now merely
results in this warning instead:

[   61.556652] smpboot: CPU 1 is now offline
[   61.556769] CPU 1 left hardirqs enabled!
[   61.556915] irq event stamp: 128149
[   61.556965] hardirqs last  enabled at (128149): [<ffffffff81720a36>] acpi_idle_play_dead+0x46/0x70
[   61.557055] hardirqs last disabled at (128148): [<ffffffff81124d50>] do_idle+0x90/0xe0
[   61.557117] softirqs last  enabled at (128078): [<ffffffff81cec74c>] __do_softirq+0x31c/0x423
[   61.557199] softirqs last disabled at (128065): [<ffffffff810baae1>] __irq_exit_rcu+0x91/0x100

[boqun: Capitalize the title and reword the message a bit]

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lore.kernel.org/r/f7bd2b3b999051bb3ef4be34526a9262008285f5.camel@infradead.org
---
 include/linux/irqflags.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 3f003d5fde53..57b074e0cfbb 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -18,6 +18,8 @@
 #include <asm/irqflags.h>
 #include <asm/percpu.h>
 
+struct task_struct;
+
 /* Currently lockdep_softirqs_on/off is used only by lockdep */
 #ifdef CONFIG_PROVE_LOCKING
   extern void lockdep_softirqs_on(unsigned long ip);
@@ -25,12 +27,16 @@
   extern void lockdep_hardirqs_on_prepare(void);
   extern void lockdep_hardirqs_on(unsigned long ip);
   extern void lockdep_hardirqs_off(unsigned long ip);
+  extern void lockdep_cleanup_dead_cpu(unsigned int cpu,
+				       struct task_struct *idle);
 #else
   static inline void lockdep_softirqs_on(unsigned long ip) { }
   static inline void lockdep_softirqs_off(unsigned long ip) { }
   static inline void lockdep_hardirqs_on_prepare(void) { }
   static inline void lockdep_hardirqs_on(unsigned long ip) { }
   static inline void lockdep_hardirqs_off(unsigned long ip) { }
+  static inline void lockdep_cleanup_dead_cpu(unsigned int cpu,
+					      struct task_struct *idle) {}
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
-- 
cgit v1.2.3


From d7fe143cb115076fed0126ad8cf5ba6c3e575e43 Mon Sep 17 00:00:00 2001
From: Ahmed Ehab <bottaawesome633@gmail.com>
Date: Sun, 25 Aug 2024 01:10:30 +0300
Subject: locking/lockdep: Avoid creating new name string literals in
 lockdep_set_subclass()

Syzbot reports a problem that a warning will be triggered while
searching a lock class in look_up_lock_class().

The cause of the issue is that a new name is created and used by
lockdep_set_subclass() instead of using the existing one. This results
in a lock instance has a different name pointer than previous registered
one stored in lock class, and WARN_ONCE() is triggered because of that
in look_up_lock_class().

To fix this, change lockdep_set_subclass() to use the existing name
instead of a new one. Hence, no new name will be created by
lockdep_set_subclass(). Hence, the warning is avoided.

[boqun: Reword the commit log to state the correct issue]

Reported-by: <syzbot+7f4a6f7f7051474e40ad@syzkaller.appspotmail.com>
Fixes: de8f5e4f2dc1f ("lockdep: Introduce wait-type checks")
Cc: stable@vger.kernel.org
Signed-off-by: Ahmed Ehab <bottaawesome633@gmail.com>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lore.kernel.org/lkml/20240824221031.7751-1-bottaawesome633@gmail.com/
---
 include/linux/lockdep.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 217f7abf2cbf..67964dc4db95 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -173,7 +173,7 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name,
 			      (lock)->dep_map.lock_type)
 
 #define lockdep_set_subclass(lock, sub)					\
-	lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\
+	lockdep_init_map_type(&(lock)->dep_map, (lock)->dep_map.name, (lock)->dep_map.key, sub,\
 			      (lock)->dep_map.wait_type_inner,		\
 			      (lock)->dep_map.wait_type_outer,		\
 			      (lock)->dep_map.lock_type)
-- 
cgit v1.2.3


From df7e8b522a6090162ecb50fd298ebc4db137562b Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 14 Oct 2024 10:55:20 +0200
Subject: drm/client: Move client event handlers to drm_client_event.c

A number of DRM-client functions serve as entry points from device
operations to client code. Moving them info a separate file will later
allow for a more fine-grained kernel configuration. For most of the
users it is sufficient to include <drm/drm_client_event.h> instead of
the full driver-side interface in <drm/drm_client.h>

v2:
- rename new files to drm_client_event.{c,h}

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Karol Herbst <kherbst@redhat.com>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241014085740.582287-7-tzimmermann@suse.de
---
 include/drm/drm_client.h       |  4 ----
 include/drm/drm_client_event.h | 12 ++++++++++++
 2 files changed, 12 insertions(+), 4 deletions(-)
 create mode 100644 include/drm/drm_client_event.h

(limited to 'include')

diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h
index bc0e66f9c425..dfd5afcc9463 100644
--- a/include/drm/drm_client.h
+++ b/include/drm/drm_client.h
@@ -121,10 +121,6 @@ int drm_client_init(struct drm_device *dev, struct drm_client_dev *client,
 void drm_client_release(struct drm_client_dev *client);
 void drm_client_register(struct drm_client_dev *client);
 
-void drm_client_dev_unregister(struct drm_device *dev);
-void drm_client_dev_hotplug(struct drm_device *dev);
-void drm_client_dev_restore(struct drm_device *dev);
-
 /**
  * struct drm_client_buffer - DRM client buffer
  */
diff --git a/include/drm/drm_client_event.h b/include/drm/drm_client_event.h
new file mode 100644
index 000000000000..2c8915241120
--- /dev/null
+++ b/include/drm/drm_client_event.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+
+#ifndef _DRM_CLIENT_EVENT_H_
+#define _DRM_CLIENT_EVENT_H_
+
+struct drm_device;
+
+void drm_client_dev_unregister(struct drm_device *dev);
+void drm_client_dev_hotplug(struct drm_device *dev);
+void drm_client_dev_restore(struct drm_device *dev);
+
+#endif
-- 
cgit v1.2.3


From bf17766f108309027aac2bfe184df6088dfd7384 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 14 Oct 2024 10:55:21 +0200
Subject: drm/client: Move suspend/resume into DRM client callbacks

Suspend and resume is still tied to fbdev emulation. Modeset helpers
and several drivers call drm_fb_helper_set_suspend_unlocked() to inform
the fbdev client about suspend/resume events.

To make it work with arbitrary clients, add per-client callback
functions for suspend and resume. Implement them for fbdev emulation
with the existing drm_fb_helper_set_suspend_unlocked(). Then update
DRM's modeset helpers to call the new interface.

Clients that are not fbdev can now implement suspend/resume to their
requirements.

The callback parameter holds_console_lock is a workaround for i915,
radeon and xe, which possibly call the interface while having the
console lock acquired. Even though the commit doesn't modify these
drivers, it already adds the flag to avoid churn later on. New code
should not hold the console lock.

v4:
- clarify holds_console_lock in commit description (Jonathan)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241014085740.582287-8-tzimmermann@suse.de
---
 include/drm/drm_client.h       | 35 +++++++++++++++++++++++++++++++++++
 include/drm/drm_client_event.h |  2 ++
 2 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h
index dfd5afcc9463..c03c4b0f3e94 100644
--- a/include/drm/drm_client.h
+++ b/include/drm/drm_client.h
@@ -63,6 +63,34 @@ struct drm_client_funcs {
 	 * This callback is optional.
 	 */
 	int (*hotplug)(struct drm_client_dev *client);
+
+	/**
+	 * @suspend:
+	 *
+	 * Called when suspending the device.
+	 *
+	 * This callback is optional.
+	 *
+	 * FIXME: Some callers hold the console lock when invoking this
+	 *        function. This interferes with fbdev emulation, which
+	 *        also tries to acquire the lock. Push the console lock
+	 *        into the callback and remove 'holds_console_lock'.
+	 */
+	int (*suspend)(struct drm_client_dev *client, bool holds_console_lock);
+
+	/**
+	 * @resume:
+	 *
+	 * Called when resuming the device from suspend.
+	 *
+	 * This callback is optional.
+	 *
+	 * FIXME: Some callers hold the console lock when invoking this
+	 *        function. This interferes with fbdev emulation, which
+	 *        also tries to acquire the lock. Push the console lock
+	 *        into the callback and remove 'holds_console_lock'.
+	 */
+	int (*resume)(struct drm_client_dev *client, bool holds_console_lock);
 };
 
 /**
@@ -107,6 +135,13 @@ struct drm_client_dev {
 	 */
 	struct drm_mode_set *modesets;
 
+	/**
+	 * @suspended:
+	 *
+	 * The client has been suspended.
+	 */
+	bool suspended;
+
 	/**
 	 * @hotplug_failed:
 	 *
diff --git a/include/drm/drm_client_event.h b/include/drm/drm_client_event.h
index 2c8915241120..72c97d111169 100644
--- a/include/drm/drm_client_event.h
+++ b/include/drm/drm_client_event.h
@@ -8,5 +8,7 @@ struct drm_device;
 void drm_client_dev_unregister(struct drm_device *dev);
 void drm_client_dev_hotplug(struct drm_device *dev);
 void drm_client_dev_restore(struct drm_device *dev);
+void drm_client_dev_suspend(struct drm_device *dev, bool holds_console_lock);
+void drm_client_dev_resume(struct drm_device *dev, bool holds_console_lock);
 
 #endif
-- 
cgit v1.2.3


From 1f828b4dd40264028d9b481c0412e63837d968f6 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 14 Oct 2024 10:55:25 +0200
Subject: drm/client: Make client support optional
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only build client code if DRM_CLIENT has been selected. Automatially
do so if one of the default clients has been enabled. If client support
has been disabled, the helpers for client-related events are empty and
the regular client functions are not present.

Amdgpu has an internal DRM client, so it has to select DRM_CLIENT by
itself unconditionally.

v3:
- provide empty drm_client_debugfs_init() if DRM_CLIENT=n (kernel
  test robot)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Xinhui Pan <Xinhui.Pan@amd.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241014085740.582287-12-tzimmermann@suse.de
---
 include/drm/drm_client.h       |  2 --
 include/drm/drm_client_event.h | 13 +++++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h
index c03c4b0f3e94..3b13cf29ed55 100644
--- a/include/drm/drm_client.h
+++ b/include/drm/drm_client.h
@@ -236,6 +236,4 @@ int drm_client_modeset_dpms(struct drm_client_dev *client, int mode);
 	drm_for_each_connector_iter(connector, iter) \
 		if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
 
-void drm_client_debugfs_init(struct drm_device *dev);
-
 #endif
diff --git a/include/drm/drm_client_event.h b/include/drm/drm_client_event.h
index 72c97d111169..99863554b055 100644
--- a/include/drm/drm_client_event.h
+++ b/include/drm/drm_client_event.h
@@ -5,10 +5,23 @@
 
 struct drm_device;
 
+#if defined(CONFIG_DRM_CLIENT)
 void drm_client_dev_unregister(struct drm_device *dev);
 void drm_client_dev_hotplug(struct drm_device *dev);
 void drm_client_dev_restore(struct drm_device *dev);
 void drm_client_dev_suspend(struct drm_device *dev, bool holds_console_lock);
 void drm_client_dev_resume(struct drm_device *dev, bool holds_console_lock);
+#else
+static inline void drm_client_dev_unregister(struct drm_device *dev)
+{ }
+static inline void drm_client_dev_hotplug(struct drm_device *dev)
+{ }
+static inline void drm_client_dev_restore(struct drm_device *dev)
+{ }
+static inline void drm_client_dev_suspend(struct drm_device *dev, bool holds_console_lock)
+{ }
+static inline void drm_client_dev_resume(struct drm_device *dev, bool holds_console_lock)
+{ }
+#endif
 
 #endif
-- 
cgit v1.2.3


From 6eaa83ec229b1e99130456c4f6658430d509c76c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Thu, 17 Oct 2024 17:11:08 +0300
Subject: PCI: Remove unused PCI_SUBTRACTIVE_DECODE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

2fe2abf896c1 ("PCI: augment bus resource table with a list") added
PCI_SUBTRACTIVE_DECODE which is put into the struct pci_bus_resource flags
field but is never read. There seems to never have been users for it.

Remove both PCI_SUBTRACTIVE_DECODE and the flags field from the struct
pci_bus_resource.

Link: https://lore.kernel.org/r/20241017141111.44612-1-ilpo.jarvinen@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/pci.h | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 573b4c4c2be6..6a9cf80d0d4b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -633,18 +633,9 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge);
  * Use pci_bus_for_each_resource() to iterate through all the resources.
  */
 
-/*
- * PCI_SUBTRACTIVE_DECODE means the bridge forwards the window implicitly
- * and there's no way to program the bridge with the details of the window.
- * This does not apply to ACPI _CRS windows, even with the _DEC subtractive-
- * decode bit set, because they are explicit and can be programmed with _SRS.
- */
-#define PCI_SUBTRACTIVE_DECODE	0x1
-
 struct pci_bus_resource {
 	struct list_head	list;
 	struct resource		*res;
-	unsigned int		flags;
 };
 
 #define PCI_REGION_FLAG_MASK	0x0fU	/* These bits of resource flags tell us the PCI region flags */
@@ -1498,8 +1489,7 @@ void pci_add_resource(struct list_head *resources, struct resource *res);
 void pci_add_resource_offset(struct list_head *resources, struct resource *res,
 			     resource_size_t offset);
 void pci_free_resource_list(struct list_head *resources);
-void pci_bus_add_resource(struct pci_bus *bus, struct resource *res,
-			  unsigned int flags);
+void pci_bus_add_resource(struct pci_bus *bus, struct resource *res);
 struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n);
 void pci_bus_remove_resources(struct pci_bus *bus);
 void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res);
-- 
cgit v1.2.3


From 469c9cb941480718db52876bbdb95a7a79b34889 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Thu, 17 Oct 2024 17:11:09 +0300
Subject: PCI: Move struct pci_bus_resource into bus.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The struct pci_bus_resource is only used in bus.c, so move it there.

Link: https://lore.kernel.org/r/20241017141111.44612-2-ilpo.jarvinen@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/pci.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6a9cf80d0d4b..5a9d849b28ef 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -626,18 +626,6 @@ void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge);
 
-/*
- * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond
- * to P2P or CardBus bridge windows) go in a table.  Additional ones (for
- * buses below host bridges or subtractive decode bridges) go in the list.
- * Use pci_bus_for_each_resource() to iterate through all the resources.
- */
-
-struct pci_bus_resource {
-	struct list_head	list;
-	struct resource		*res;
-};
-
 #define PCI_REGION_FLAG_MASK	0x0fU	/* These bits of resource flags tell us the PCI region flags */
 
 struct pci_bus {
-- 
cgit v1.2.3


From 08ef26ea9ab315b895d57f8fbad41e02ff345bb9 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 7 Oct 2024 16:23:58 +0200
Subject: fs: add file_ref

As atomic_inc_not_zero() is implemented with a try_cmpxchg() loop it has
O(N^2) behaviour under contention with N concurrent operations and it is
in a hot path in __fget_files_rcu().

The rcuref infrastructures remedies this problem by using an
unconditional increment relying on safe- and dead zones to make this
work and requiring rcu protection for the data structure in question.
This not just scales better it also introduces overflow protection.

However, in contrast to generic rcuref, files require a memory barrier
and thus cannot rely on *_relaxed() atomic operations and also require
to be built on atomic_long_t as having massive amounts of reference
isn't unheard of even if it is just an attack.

As suggested by Linus, add a file specific variant instead of making
this a generic library.

Files are SLAB_TYPESAFE_BY_RCU and thus don't have "regular" rcu
protection. In short, freeing of files isn't delayed until a grace
period has elapsed. Instead, they are freed immediately and thus can be
reused (multiple times) within the same grace period.

So when picking a file from the file descriptor table via its file
descriptor number it is thus possible to see an elevated reference count
on file->f_count even though the file has already been recycled possibly
multiple times by another task.

To guard against this the vfs will pick the file from the file
descriptor table twice. Once before the refcount increment and once
after to compare the pointers (grossly simplified). If they match then
the file is still valid. If not the caller needs to fput() it.

The unconditional increment makes the following race possible as
illustrated by rcuref:

> Deconstruction race
> ===================
>
> The release operation must be protected by prohibiting a grace period in
> order to prevent a possible use after free:
>
>      T1                              T2
>      put()                           get()
>      // ref->refcnt = ONEREF
>      if (!atomic_add_negative(-1, &ref->refcnt))
>              return false;                           <- Not taken
>
>      // ref->refcnt == NOREF
>      --> preemption
>                                      // Elevates ref->refcnt to ONEREF
>                                      if (!atomic_add_negative(1, &ref->refcnt))
>                                              return true;                    <- taken
>
>                                      if (put(&p->ref)) { <-- Succeeds
>                                              remove_pointer(p);
>                                              kfree_rcu(p, rcu);
>                                      }
>
>              RCU grace period ends, object is freed
>
>      atomic_cmpxchg(&ref->refcnt, NOREF, DEAD);      <- UAF
>
> [...] it prevents the grace period which keeps the object alive until
> all put() operations complete.

Having files by SLAB_TYPESAFE_BY_RCU shouldn't cause any problems for
this deconstruction race. Afaict, the only interesting case would be
someone freeing the file and someone immediately recycling it within the
same grace period and reinitializing file->f_count to ONEREF while a
concurrent fput() is doing atomic_cmpxchg(&ref->refcnt, NOREF, DEAD) as
in the race above.

But this is safe from SLAB_TYPESAFE_BY_RCU's perspective and it should
be safe from rcuref's perspective.

      T1                              T2                                                    T3
      fput()                          fget()
      // f_count->refcnt = ONEREF
      if (!atomic_add_negative(-1, &f_count->refcnt))
              return false;                           <- Not taken

      // f_count->refcnt == NOREF
      --> preemption
                                      // Elevates f_count->refcnt to ONEREF
                                      if (!atomic_add_negative(1, &f_count->refcnt))
                                              return true;                    <- taken

                                      if (put(&f_count)) { <-- Succeeds
                                              remove_pointer(p);
                                              /*
                                               * Cache is SLAB_TYPESAFE_BY_RCU
                                               * so this is freed without a grace period.
                                               */
                                              kmem_cache_free(p);
                                      }

                                                                                             kmem_cache_alloc()
                                                                                             init_file() {
                                                                                                     // Sets f_count->refcnt to ONEREF
                                                                                                     rcuref_long_init(&f->f_count, 1);
                                                                                             }

                        Object has been reused within the same grace period
                        via kmem_cache_alloc()'s SLAB_TYPESAFE_BY_RCU.

      /*
       * With SLAB_TYPESAFE_BY_RCU this would be a safe UAF access and
       * it would work correctly because the atomic_cmpxchg()
       * will fail because the refcount has been reset to ONEREF by T3.
       */
      atomic_cmpxchg(&ref->refcnt, NOREF, DEAD);      <- UAF

However, there are other cases to consider:

(1) Benign race due to multiple atomic_long_read()

    CPU1                                                    CPU2

    file_ref_put()
    // last reference
    // => count goes negative/FILE_REF_NOREF
    atomic_long_add_negative_release(-1, &ref->refcnt)
    -> __file_ref_put()
                                                        file_ref_get()
                                                        // goes back from negative/FILE_REF_NOREF to 0
                                                        // and file_ref_get() succeeds
                                                        atomic_long_add_negative(1, &ref->refcnt)

                                                        // This is immediately followed by file_ref_put()
                                                        // managing to set FILE_REF_DEAD
                                                        file_ref_put()

       // __file_ref_put() continues and sees
       // cnt > FILE_REF_RELEASED // and splats with
       // "imbalanced put on file reference count"
       cnt = atomic_long_read(&ref->refcnt);

    The race however is benign and the problem is the
    atomic_long_read(). Instead of performing a separate read this uses
    atomic_long_dec_return() and pass the value to __file_ref_put().
    Thanks to Linus for pointing out that braino.

(2) SLAB_TYPESAFE_BY_RCU may cause recycled files to be marked dead

    When a file is recycled the following race exists:

    CPU1                                                       CPU2
    // @file is already dead and thus
    // cnt >= FILE_REF_RELEASED.
    file_ref_get(file)
    atomic_long_add_negative(1, &ref->refcnt)
       // We thus call into __file_ref_get()
    -> __file_ref_get()

       // which sees cnt >= FILE_REF_RELEASED
       cnt = atomic_long_read(&ref->refcnt);
                                                               // In the meantime @file gets freed
                                                               kmem_cache_free()

                                                               // and is immediately recycled
                                                               file = kmem_cache_zalloc()
                                                               // and the reference count is reinitialized
                                                               // and the file alive again in someone
                                                               // else's file descriptor table
                                                               file_ref_init(&ref->refcnt, 1);

       // the __file_ref_get() slowpath now continues
       // and as it saw earlier that cnt >= FILE_REF_RELEASED
       // it wants to ensure that we're staying in the middle
       // of the deadzone and unconditionally sets
       // FILE_REF_DEAD.
       // This marks @file dead for CPU2...
       atomic_long_set(&ref->refcnt, FILE_REF_DEAD);

                                                               // Caller issues a close() system call to close @file
                                                               close(fd)
                                                               file = file_close_fd_locked()
                                                               filp_flush()
                                                               // The caller sees that cnt >= FILE_REF_RELEASED
                                                               // and warns the first time...
                                                               CHECK_DATA_CORRUPTION(file_count(file) == 0)

                                                               // and then splats a second time because
                                                               // __file_ref_put() sees cnt >= FILE_REF_RELEASED
                                                               file_ref_put(&ref->refcnt);
                                                               -> __file_ref_put()

    My initial inclination was to replace the unconditional
    atomic_long_set() with an atomic_long_try_cmpxchg() but Linus
    pointed out that:

    > I think we should just make file_ref_get() do a simple
    >
    >        return !atomic_long_add_negative(1, &ref->refcnt));
    >
    > and nothing else. Yes, multiple CPU's can race, and you can increment
    > more than once, but the gap - even on 32-bit - between DEAD and
    > becoming close to REF_RELEASED is so big that we simply don't care.
    > That's the point of having a gap.

I've been testing this with will-it-scale using fstat() on a machine
that Jens gave me access (thank you very much!):

processor       : 511
vendor_id       : AuthenticAMD
cpu family      : 25
model           : 160
model name      : AMD EPYC 9754 128-Core Processor

and I consistently get a 3-5% improvement on 256+ threads.

Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202410151043.5d224a27-oliver.sang@intel.com
Closes: https://lore.kernel.org/all/202410151611.f4cd71f2-oliver.sang@intel.com
Link: https://lore.kernel.org/r/20241007-brauner-file-rcuref-v2-2-387e24dc9163@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/file_ref.h | 177 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 include/linux/file_ref.h

(limited to 'include')

diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h
new file mode 100644
index 000000000000..9b3a8d9b17ab
--- /dev/null
+++ b/include/linux/file_ref.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _LINUX_FILE_REF_H
+#define _LINUX_FILE_REF_H
+
+#include <linux/atomic.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+/*
+ * file_ref is a reference count implementation specifically for use by
+ * files. It takes inspiration from rcuref but differs in key aspects
+ * such as support for SLAB_TYPESAFE_BY_RCU type caches.
+ *
+ * FILE_REF_ONEREF                FILE_REF_MAXREF
+ * 0x0000000000000000UL      0x7FFFFFFFFFFFFFFFUL
+ * <-------------------valid ------------------->
+ *
+ *                       FILE_REF_SATURATED
+ * 0x8000000000000000UL 0xA000000000000000UL 0xBFFFFFFFFFFFFFFFUL
+ * <-----------------------saturation zone---------------------->
+ *
+ * FILE_REF_RELEASED                   FILE_REF_DEAD
+ * 0xC000000000000000UL         0xE000000000000000UL
+ * <-------------------dead zone------------------->
+ *
+ * FILE_REF_NOREF
+ * 0xFFFFFFFFFFFFFFFFUL
+ */
+
+#ifdef CONFIG_64BIT
+#define FILE_REF_ONEREF		0x0000000000000000UL
+#define FILE_REF_MAXREF		0x7FFFFFFFFFFFFFFFUL
+#define FILE_REF_SATURATED	0xA000000000000000UL
+#define FILE_REF_RELEASED	0xC000000000000000UL
+#define FILE_REF_DEAD		0xE000000000000000UL
+#define FILE_REF_NOREF		0xFFFFFFFFFFFFFFFFUL
+#else
+#define FILE_REF_ONEREF		0x00000000U
+#define FILE_REF_MAXREF		0x7FFFFFFFU
+#define FILE_REF_SATURATED	0xA0000000U
+#define FILE_REF_RELEASED	0xC0000000U
+#define FILE_REF_DEAD		0xE0000000U
+#define FILE_REF_NOREF		0xFFFFFFFFU
+#endif
+
+typedef struct {
+#ifdef CONFIG_64BIT
+	atomic64_t refcnt;
+#else
+	atomic_t refcnt;
+#endif
+} file_ref_t;
+
+/**
+ * file_ref_init - Initialize a file reference count
+ * @ref: Pointer to the reference count
+ * @cnt: The initial reference count typically '1'
+ */
+static inline void file_ref_init(file_ref_t *ref, unsigned long cnt)
+{
+	atomic_long_set(&ref->refcnt, cnt - 1);
+}
+
+bool __file_ref_put(file_ref_t *ref, unsigned long cnt);
+
+/**
+ * file_ref_get - Acquire one reference on a file
+ * @ref: Pointer to the reference count
+ *
+ * Similar to atomic_inc_not_zero() but saturates at FILE_REF_MAXREF.
+ *
+ * Provides full memory ordering.
+ *
+ * Return: False if the attempt to acquire a reference failed. This happens
+ *         when the last reference has been put already. True if a reference
+ *         was successfully acquired
+ */
+static __always_inline __must_check bool file_ref_get(file_ref_t *ref)
+{
+	/*
+	 * Unconditionally increase the reference count with full
+	 * ordering. The saturation and dead zones provide enough
+	 * tolerance for this.
+	 *
+	 * If this indicates negative the file in question the fail can
+	 * be freed and immediately reused due to SLAB_TYPSAFE_BY_RCU.
+	 * Hence, unconditionally altering the file reference count to
+	 * e.g., reset the file reference count back to the middle of
+	 * the deadzone risk end up marking someone else's file as dead
+	 * behind their back.
+	 *
+	 * It would be possible to do a careful:
+	 *
+	 * cnt = atomic_long_inc_return();
+	 * if (likely(cnt >= 0))
+	 *	return true;
+	 *
+	 * and then something like:
+	 *
+	 * if (cnt >= FILE_REF_RELEASE)
+	 *	atomic_long_try_cmpxchg(&ref->refcnt, &cnt, FILE_REF_DEAD),
+	 *
+	 * to set the value back to the middle of the deadzone. But it's
+	 * practically impossible to go from FILE_REF_DEAD to
+	 * FILE_REF_ONEREF. It would need 2305843009213693952/2^61
+	 * file_ref_get()s to resurrect such a dead file.
+	 */
+	return !atomic_long_add_negative(1, &ref->refcnt);
+}
+
+/**
+ * file_ref_inc - Acquire one reference on a file
+ * @ref: Pointer to the reference count
+ *
+ * Acquire an additional reference on a file. Warns if the caller didn't
+ * already hold a reference.
+ */
+static __always_inline void file_ref_inc(file_ref_t *ref)
+{
+	long prior = atomic_long_fetch_inc_relaxed(&ref->refcnt);
+	WARN_ONCE(prior < 0, "file_ref_inc() on a released file reference");
+}
+
+/**
+ * file_ref_put -- Release a file reference
+ * @ref:	Pointer to the reference count
+ *
+ * Provides release memory ordering, such that prior loads and stores
+ * are done before, and provides an acquire ordering on success such
+ * that free() must come after.
+ *
+ * Return: True if this was the last reference with no future references
+ *         possible. This signals the caller that it can safely release
+ *         the object which is protected by the reference counter.
+ *         False if there are still active references or the put() raced
+ *         with a concurrent get()/put() pair. Caller is not allowed to
+ *         release the protected object.
+ */
+static __always_inline __must_check bool file_ref_put(file_ref_t *ref)
+{
+	long cnt;
+
+	/*
+	 * While files are SLAB_TYPESAFE_BY_RCU and thus file_ref_put()
+	 * calls don't risk UAFs when a file is recyclyed, it is still
+	 * vulnerable to UAFs caused by freeing the whole slab page once
+	 * it becomes unused. Prevent file_ref_put() from being
+	 * preempted protects against this.
+	 */
+	guard(preempt)();
+	/*
+	 * Unconditionally decrease the reference count. The saturation
+	 * and dead zones provide enough tolerance for this. If this
+	 * fails then we need to handle the last reference drop and
+	 * cases inside the saturation and dead zones.
+	 */
+	cnt = atomic_long_dec_return(&ref->refcnt);
+	if (cnt >= 0)
+		return false;
+	return __file_ref_put(ref, cnt);
+}
+
+/**
+ * file_ref_read - Read the number of file references
+ * @ref: Pointer to the reference count
+ *
+ * Return: The number of held references (0 ... N)
+ */
+static inline unsigned long file_ref_read(file_ref_t *ref)
+{
+	unsigned long c = atomic_long_read(&ref->refcnt);
+
+	/* Return 0 if within the DEAD zone. */
+	return c >= FILE_REF_RELEASED ? 0 : c + 1;
+}
+
+#endif
-- 
cgit v1.2.3


From 9a8dbdadae509e5717ff6e5aa572ca0974d2101d Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Sat, 19 Oct 2024 12:51:06 +0000
Subject: block/fs: Pass an iocb to generic_atomic_write_valid()

Darrick and Hannes both thought it better that generic_atomic_write_valid()
should be passed a struct iocb, and not just the member of that struct
which is referenced; see [0] and [1].

I think that makes a more generic and clean API, so make that change.

[0] https://lore.kernel.org/linux-block/680ce641-729b-4150-b875-531a98657682@suse.de/
[1] https://lore.kernel.org/linux-xfs/20240620212401.GA3058325@frogsfrogsfrogs/

Fixes: c34fc6f26ab8 ("fs: Initial atomic write support")
Suggested-by: Darrick J. Wong <djwong@kernel.org>
Suggested-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20241019125113.369994-2-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e3c603d01337..fbfa032d1d90 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3721,6 +3721,6 @@ static inline bool vfs_empty_path(int dfd, const char __user *path)
 	return !c;
 }
 
-bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos);
+bool generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter);
 
 #endif /* _LINUX_FS_H */
-- 
cgit v1.2.3


From c3be7ebbbce5201e151f17e28a6c807602f369c9 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Sat, 19 Oct 2024 12:51:07 +0000
Subject: fs/block: Check for IOCB_DIRECT in generic_atomic_write_valid()

Currently FMODE_CAN_ATOMIC_WRITE is set if the bdev can atomic write and
the file is open for direct IO. This does not work if the file is not
opened for direct IO, yet fcntl(O_DIRECT) is used on the fd later.

Change to check for direct IO on a per-IO basis in
generic_atomic_write_valid(). Since we want to report -EOPNOTSUPP for
non-direct IO for an atomic write, change to return an error code.

Relocate the block fops atomic write checks to the common write path, as to
catch non-direct IO.

Fixes: c34fc6f26ab8 ("fs: Initial atomic write support")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20241019125113.369994-3-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index fbfa032d1d90..ba47fb283730 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3721,6 +3721,6 @@ static inline bool vfs_empty_path(int dfd, const char __user *path)
 	return !c;
 }
 
-bool generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter);
+int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter);
 
 #endif /* _LINUX_FS_H */
-- 
cgit v1.2.3


From 1eadb157947163ca72ba8963b915fdc099ce6cca Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Sat, 19 Oct 2024 12:51:08 +0000
Subject: block: Add bdev atomic write limits helpers

Add helpers to get atomic write limits for a bdev, so that we don't access
request_queue helpers outside the block layer.

We check if the bdev can actually atomic write in these helpers, so we
can avoid users missing using this check.

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20241019125113.369994-4-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 50c3b959da28..c2cc3c146d74 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1674,6 +1674,22 @@ static inline bool bdev_can_atomic_write(struct block_device *bdev)
 	return true;
 }
 
+static inline unsigned int
+bdev_atomic_write_unit_min_bytes(struct block_device *bdev)
+{
+	if (!bdev_can_atomic_write(bdev))
+		return 0;
+	return queue_atomic_write_unit_min_bytes(bdev_get_queue(bdev));
+}
+
+static inline unsigned int
+bdev_atomic_write_unit_max_bytes(struct block_device *bdev)
+{
+	if (!bdev_can_atomic_write(bdev))
+		return 0;
+	return queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev));
+}
+
 #define DEFINE_IO_COMP_BATCH(name)	struct io_comp_batch name = { }
 
 #endif /* _LINUX_BLKDEV_H */
-- 
cgit v1.2.3


From e896474fe4851ffc4dd860c92daa906783090346 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Oct 2024 23:08:34 -0400
Subject: getname_maybe_null() - the third variant of pathname copy-in

Semantics used by statx(2) (and later *xattrat(2)): without AT_EMPTY_PATH
it's standard getname() (i.e. ERR_PTR(-ENOENT) on empty string,
ERR_PTR(-EFAULT) on NULL), with AT_EMPTY_PATH both empty string and
NULL are accepted.

Calling conventions: getname_maybe_null(user_pointer, flags) returns
	* pointer to struct filename when non-empty string had been
successfully read
	* ERR_PTR(...) on error
	* NULL if an empty string or NULL pointer had been given
with AT_EMPTY_PATH in the flags argument.

It tries to avoid allocation in the last case; it's not always
able to do so, in which case the temporary struct filename instance
is freed and NULL returned anyway.

Fast path is inlined.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e3c603d01337..403258ac2ea2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2766,6 +2766,16 @@ extern struct filename *getname_flags(const char __user *, int);
 extern struct filename *getname_uflags(const char __user *, int);
 extern struct filename *getname(const char __user *);
 extern struct filename *getname_kernel(const char *);
+extern struct filename *__getname_maybe_null(const char __user *);
+static inline struct filename *getname_maybe_null(const char __user *name, int flags)
+{
+	if (!(flags & AT_EMPTY_PATH))
+		return getname(name);
+
+	if (!name)
+		return NULL;
+	return __getname_maybe_null(name);
+}
 extern void putname(struct filename *name);
 
 extern int finish_open(struct file *file, struct dentry *dentry,
-- 
cgit v1.2.3


From af264e5989055ac33f413c4c80874345cda0cc97 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 18 Oct 2024 19:05:57 +0200
Subject: mtd: spinand: Constify struct nand_ecc_engine_ops

'struct nand_ecc_engine_ops' are not modified in these drivers.

Constifying this structure moves some data to a read-only section, so
increases overall security, especially when the structure holds some
function pointers.

Update the prototype of mxic_ecc_get_pipelined_ops() accordingly.

On a x86_64, with allmodconfig, as an example:
Before:
======
   text	   data	    bss	    dec	    hex	filename
  16709	   1374	     16	  18099	   46b3	drivers/mtd/nand/ecc-mxic.o

After:
=====
   text	   data	    bss	    dec	    hex	filename
  16789	   1294	     16	  18099	   46b3	drivers/mtd/nand/ecc-mxic.o

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/72597e9de2320a4109be2112e696399592edacd4.1729271136.git.christophe.jaillet@wanadoo.fr
---
 include/linux/mtd/nand-ecc-mxic.h | 4 ++--
 include/linux/mtd/nand.h          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/nand-ecc-mxic.h b/include/linux/mtd/nand-ecc-mxic.h
index b125926e458c..0da4b2999576 100644
--- a/include/linux/mtd/nand-ecc-mxic.h
+++ b/include/linux/mtd/nand-ecc-mxic.h
@@ -16,7 +16,7 @@ struct mxic_ecc_engine;
 
 #if IS_ENABLED(CONFIG_MTD_NAND_ECC_MXIC) && IS_REACHABLE(CONFIG_MTD_NAND_CORE)
 
-struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void);
+const struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void);
 struct nand_ecc_engine *mxic_ecc_get_pipelined_engine(struct platform_device *spi_pdev);
 void mxic_ecc_put_pipelined_engine(struct nand_ecc_engine *eng);
 int mxic_ecc_process_data_pipelined(struct nand_ecc_engine *eng,
@@ -24,7 +24,7 @@ int mxic_ecc_process_data_pipelined(struct nand_ecc_engine *eng,
 
 #else /* !CONFIG_MTD_NAND_ECC_MXIC */
 
-static inline struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void)
+static inline const struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void)
 {
 	return NULL;
 }
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 1e4208040956..0e2f228e8b4a 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -293,7 +293,7 @@ enum nand_ecc_engine_integration {
 struct nand_ecc_engine {
 	struct device *dev;
 	struct list_head node;
-	struct nand_ecc_engine_ops *ops;
+	const struct nand_ecc_engine_ops *ops;
 	enum nand_ecc_engine_integration integration;
 	void *priv;
 };
-- 
cgit v1.2.3


From 192514ae05687a60ac230569b2a215fb2d3b8d90 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Mon, 10 Jun 2024 10:57:32 +0200
Subject: soc: mediatek: Add MediaTek DVFS Resource Collector (DVFSRC) driver

The Dynamic Voltage and Frequency Scaling Resource Collector (DVFSRC) is a
Hardware module used to collect all the requests from both software and the
various remote processors embedded into the SoC and decide about a minimum
operating voltage and a minimum DRAM frequency to fulfill those requests in
an effort to provide the best achievable performance per watt.

This hardware IP is capable of transparently performing direct register R/W
on all of the DVFSRC-controlled regulators and SoC bandwidth knobs.

This driver includes support for MT8183, MT8192 and MT8195.

Co-Developed-by: Dawei Chien <dawei.chien@mediatek.com>
[Angelo: Partial refactoring and cleanups]
Reviewed-by: Georgi Djakov <djakov@kerenl.org>
Link: https://lore.kernel.org/r/20240610085735.147134-5-angelogioacchino.delregno@collabora.com
Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
---
 include/linux/soc/mediatek/dvfsrc.h      | 36 ++++++++++++++++++++++++++++++++
 include/linux/soc/mediatek/mtk_sip_svc.h |  3 +++
 2 files changed, 39 insertions(+)
 create mode 100644 include/linux/soc/mediatek/dvfsrc.h

(limited to 'include')

diff --git a/include/linux/soc/mediatek/dvfsrc.h b/include/linux/soc/mediatek/dvfsrc.h
new file mode 100644
index 000000000000..1498b3ed396b
--- /dev/null
+++ b/include/linux/soc/mediatek/dvfsrc.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2021 MediaTek Inc.
+ * Copyright (c) 2024 Collabora Ltd.
+ *                    AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+ */
+
+#ifndef __MEDIATEK_DVFSRC_H
+#define __MEDIATEK_DVFSRC_H
+
+enum mtk_dvfsrc_cmd {
+	MTK_DVFSRC_CMD_BW,
+	MTK_DVFSRC_CMD_HRT_BW,
+	MTK_DVFSRC_CMD_PEAK_BW,
+	MTK_DVFSRC_CMD_OPP,
+	MTK_DVFSRC_CMD_VCORE_LEVEL,
+	MTK_DVFSRC_CMD_VSCP_LEVEL,
+	MTK_DVFSRC_CMD_MAX,
+};
+
+#if IS_ENABLED(CONFIG_MTK_DVFSRC)
+
+int mtk_dvfsrc_send_request(const struct device *dev, u32 cmd, u64 data);
+int mtk_dvfsrc_query_info(const struct device *dev, u32 cmd, int *data);
+
+#else
+
+static inline int mtk_dvfsrc_send_request(const struct device *dev, u32 cmd, u64 data)
+{ return -ENODEV; }
+
+static inline int mtk_dvfsrc_query_info(const struct device *dev, u32 cmd, int *data)
+{ return -ENODEV; }
+
+#endif /* CONFIG_MTK_DVFSRC */
+
+#endif
diff --git a/include/linux/soc/mediatek/mtk_sip_svc.h b/include/linux/soc/mediatek/mtk_sip_svc.h
index 0761128b4354..abe24a73ee19 100644
--- a/include/linux/soc/mediatek/mtk_sip_svc.h
+++ b/include/linux/soc/mediatek/mtk_sip_svc.h
@@ -22,6 +22,9 @@
 	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, MTK_SIP_SMC_CONVENTION, \
 			   ARM_SMCCC_OWNER_SIP, fn_id)
 
+/* DVFSRC SMC calls */
+#define MTK_SIP_DVFSRC_VCOREFS_CONTROL	MTK_SIP_SMC_CMD(0x506)
+
 /* IOMMU related SMC call */
 #define MTK_SIP_KERNEL_IOMMU_CONTROL	MTK_SIP_SMC_CMD(0x514)
 
-- 
cgit v1.2.3


From fb6f20ecb121cef4d7946f834a6ee867c4e21b4a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 17 Oct 2024 12:28:23 +0200
Subject: reiserfs: The last commit

Deprecation period of reiserfs ends with the end of this year so it is
time to remove it from the kernel.

Acked-by: Darrick J. Wong <djwong@kernel.org>
Acked-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/uapi/linux/reiserfs_fs.h    | 27 ---------------------------
 include/uapi/linux/reiserfs_xattr.h | 25 -------------------------
 2 files changed, 52 deletions(-)
 delete mode 100644 include/uapi/linux/reiserfs_fs.h
 delete mode 100644 include/uapi/linux/reiserfs_xattr.h

(limited to 'include')

diff --git a/include/uapi/linux/reiserfs_fs.h b/include/uapi/linux/reiserfs_fs.h
deleted file mode 100644
index 5bb921409f2b..000000000000
--- a/include/uapi/linux/reiserfs_fs.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details
- */
-#ifndef _LINUX_REISER_FS_H
-#define _LINUX_REISER_FS_H
-
-#include <linux/types.h>
-#include <linux/magic.h>
-
-/*
- *  include/linux/reiser_fs.h
- *
- *  Reiser File System constants and structures
- *
- */
-
-/* ioctl's command */
-#define REISERFS_IOC_UNPACK		_IOW(0xCD,1,long)
-/* define following flags to be the same as in ext2, so that chattr(1),
-   lsattr(1) will work with us. */
-#define REISERFS_IOC_GETFLAGS		FS_IOC_GETFLAGS
-#define REISERFS_IOC_SETFLAGS		FS_IOC_SETFLAGS
-#define REISERFS_IOC_GETVERSION		FS_IOC_GETVERSION
-#define REISERFS_IOC_SETVERSION		FS_IOC_SETVERSION
-
-#endif				/* _LINUX_REISER_FS_H */
diff --git a/include/uapi/linux/reiserfs_xattr.h b/include/uapi/linux/reiserfs_xattr.h
deleted file mode 100644
index 503ad018ce5b..000000000000
--- a/include/uapi/linux/reiserfs_xattr.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
-  File: linux/reiserfs_xattr.h
-*/
-
-#ifndef _LINUX_REISERFS_XATTR_H
-#define _LINUX_REISERFS_XATTR_H
-
-#include <linux/types.h>
-
-/* Magic value in header */
-#define REISERFS_XATTR_MAGIC 0x52465841	/* "RFXA" */
-
-struct reiserfs_xattr_header {
-	__le32 h_magic;		/* magic number for identification */
-	__le32 h_hash;		/* hash of the value */
-};
-
-struct reiserfs_security_handle {
-	const char *name;
-	void *value;
-	__kernel_size_t length;
-};
-
-#endif  /*  _LINUX_REISERFS_XATTR_H  */
-- 
cgit v1.2.3


From 424a55a4a9087887fcfcee561648df497701a4a2 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Thu, 10 Oct 2024 07:40:34 +1100
Subject: uaccess: add copy_struct_to_user helper

This is based on copy_struct_from_user(), but there is one additional
case to consider when creating a syscall that returns an
extensible-struct to userspace -- how should data in the struct that
cannot fit into the userspace struct be handled (ksize > usize)?

There are three possibilies:

 1. The interface is like sched_getattr(2), where new information will
    be silently not provided to userspace. This is probably what most
    interfaces will want to do, as it provides the most possible
    backwards-compatibility.

 2. The interface is like lsm_list_modules(2), where you want to return
    an error like -EMSGSIZE if not providing information could result in
    the userspace program making a serious mistake (such as one that
    could lead to a security problem) or if you want to provide some
    flag to userspace so they know that they are missing some
    information.

 3. The interface is like statx(2), where there some kind of a request
    mask that indicates what data userspace would like. One could
    imagine that statx2(2) (using extensible structs) would want to
    return -EMSGSIZE if the user explicitly requested a field that their
    structure is too small to fit, but not return an error if the field
    was not explicitly requested. This is kind of a mix between (1) and
    (2) based on the requested mask.

The copy_struct_to_user() helper includes a an extra argument that is
used to return a boolean flag indicating whether there was a non-zero
byte in the trailing bytes that were not copied to userspace. This can
be used in the following ways to handle all three cases, respectively:

 1. Just pass NULL, as you don't care about this case.

 2. Return an error (say -EMSGSIZE) if the argument was set to true by
    copy_struct_to_user().

 3. If the argument was set to true by copy_struct_to_user(), check if
    there is a flag that implies a field larger than usize.

    This is the only case where callers of copy_struct_to_user() should
    check usize themselves. This will probably require scanning an array
    that specifies what flags were added for each version of the flags
    struct and returning an error if the request mask matches any of the
    flags that were added in versions of the struct that are larger than
    usize.

At the moment we don't have any users of (3), so this patch doesn't
include any helpers to make the necessary scanning easier, but it should
be fairly easy to add some if necessary.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Link: https://lore.kernel.org/r/20241010-extensible-structs-check_fields-v3-1-d2833dfe6edd@cyphar.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/uaccess.h | 97 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)

(limited to 'include')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 39c7cf82b0c2..1e7c68b4c262 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -394,6 +394,103 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
 	return 0;
 }
 
+/**
+ * copy_struct_to_user: copy a struct to userspace
+ * @dst:   Destination address, in userspace. This buffer must be @ksize
+ *         bytes long.
+ * @usize: (Alleged) size of @dst struct.
+ * @src:   Source address, in kernel space.
+ * @ksize: Size of @src struct.
+ * @ignored_trailing: Set to %true if there was a non-zero byte in @src that
+ * userspace cannot see because they are using an smaller struct.
+ *
+ * Copies a struct from kernel space to userspace, in a way that guarantees
+ * backwards-compatibility for struct syscall arguments (as long as future
+ * struct extensions are made such that all new fields are *appended* to the
+ * old struct, and zeroed-out new fields have the same meaning as the old
+ * struct).
+ *
+ * Some syscalls may wish to make sure that userspace knows about everything in
+ * the struct, and if there is a non-zero value that userspce doesn't know
+ * about, they want to return an error (such as -EMSGSIZE) or have some other
+ * fallback (such as adding a "you're missing some information" flag). If
+ * @ignored_trailing is non-%NULL, it will be set to %true if there was a
+ * non-zero byte that could not be copied to userspace (ie. was past @usize).
+ *
+ * While unconditionally returning an error in this case is the simplest
+ * solution, for maximum backward compatibility you should try to only return
+ * -EMSGSIZE if the user explicitly requested the data that couldn't be copied.
+ * Note that structure sizes can change due to header changes and simple
+ * recompilations without code changes(!), so if you care about
+ * @ignored_trailing you probably want to make sure that any new field data is
+ * associated with a flag. Otherwise you might assume that a program knows
+ * about data it does not.
+ *
+ * @ksize is just sizeof(*src), and @usize should've been passed by userspace.
+ * The recommended usage is something like the following:
+ *
+ *   SYSCALL_DEFINE2(foobar, struct foo __user *, uarg, size_t, usize)
+ *   {
+ *      int err;
+ *      bool ignored_trailing;
+ *      struct foo karg = {};
+ *
+ *      if (usize > PAGE_SIZE)
+ *		return -E2BIG;
+ *      if (usize < FOO_SIZE_VER0)
+ *		return -EINVAL;
+ *
+ *      // ... modify karg somehow ...
+ *
+ *      err = copy_struct_to_user(uarg, usize, &karg, sizeof(karg),
+ *				  &ignored_trailing);
+ *      if (err)
+ *		return err;
+ *      if (ignored_trailing)
+ *		return -EMSGSIZE:
+ *
+ *      // ...
+ *   }
+ *
+ * There are three cases to consider:
+ *  * If @usize == @ksize, then it's copied verbatim.
+ *  * If @usize < @ksize, then the kernel is trying to pass userspace a newer
+ *    struct than it supports. Thus we only copy the interoperable portions
+ *    (@usize) and ignore the rest (but @ignored_trailing is set to %true if
+ *    any of the trailing (@ksize - @usize) bytes are non-zero).
+ *  * If @usize > @ksize, then the kernel is trying to pass userspace an older
+ *    struct than userspace supports. In order to make sure the
+ *    unknown-to-the-kernel fields don't contain garbage values, we zero the
+ *    trailing (@usize - @ksize) bytes.
+ *
+ * Returns (in all cases, some data may have been copied):
+ *  * -EFAULT: access to userspace failed.
+ */
+static __always_inline __must_check int
+copy_struct_to_user(void __user *dst, size_t usize, const void *src,
+		    size_t ksize, bool *ignored_trailing)
+{
+	size_t size = min(ksize, usize);
+	size_t rest = max(ksize, usize) - size;
+
+	/* Double check if ksize is larger than a known object size. */
+	if (WARN_ON_ONCE(ksize > __builtin_object_size(src, 1)))
+		return -E2BIG;
+
+	/* Deal with trailing bytes. */
+	if (usize > ksize) {
+		if (clear_user(dst + size, rest))
+			return -EFAULT;
+	}
+	if (ignored_trailing)
+		*ignored_trailing = ksize < usize &&
+			memchr_inv(src + size, 0, rest) != NULL;
+	/* Copy the interoperable parts of the struct. */
+	if (copy_to_user(dst, src, size))
+		return -EFAULT;
+	return 0;
+}
+
 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size);
 
 long copy_from_kernel_nofault(void *dst, const void *src, size_t size);
-- 
cgit v1.2.3


From 48931f65e9f785b65244550cc8f0c8bf9eab7acd Mon Sep 17 00:00:00 2001
From: Michael Margolin <mrgolin@amazon.com>
Date: Tue, 15 Oct 2024 17:42:42 +0000
Subject: RDMA/efa: Add option to set QP service level on create

Using modify QP with AH attributes and IB_QP_AV flag set doesn't make
much sense for connectionless QP types like SRD. Add SL parameter to EFA
create QP user ABI and pass it to the device.

Link: https://patch.msgid.link/r/20241015174242.3490-3-mrgolin@amazon.com
Reviewed-by: Firas Jahjah <firasj@amazon.com>
Reviewed-by: Yonatan Nachum <ynachum@amazon.com>
Signed-off-by: Michael Margolin <mrgolin@amazon.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/rdma/efa-abi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h
index d689b8b34189..11b94b0b035b 100644
--- a/include/uapi/rdma/efa-abi.h
+++ b/include/uapi/rdma/efa-abi.h
@@ -95,7 +95,8 @@ struct efa_ibv_create_qp {
 	__u32 sq_ring_size; /* bytes */
 	__u32 driver_qp_type;
 	__u16 flags;
-	__u8 reserved_90[6];
+	__u8 sl;
+	__u8 reserved_98[5];
 };
 
 struct efa_ibv_create_qp_resp {
-- 
cgit v1.2.3


From 3e482e2840546a3ff725d5adf8d0779ac1c3cf4f Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Fri, 18 Oct 2024 14:37:25 -0700
Subject: dmaengine: idxd: Move DSA/IAA device IDs to IDXD driver

Since the DSA/IAA device IDs are only used by the IDXD driver, there is
no need to define them as public IDs. Move their definitions to the IDXD
driver to limit their scope. This change helps reduce unnecessary
exposure of the device IDs in the global space, making the codebase
cleaner and better encapsulated.

There is no functional change.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: https://lore.kernel.org/r/20241018213725.4167413-1-fenghua.yu@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/pci_ids.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4cf6aaed5f35..e4bddb927795 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2709,9 +2709,6 @@
 #define PCI_DEVICE_ID_INTEL_82815_MC	0x1130
 #define PCI_DEVICE_ID_INTEL_82815_CGC	0x1132
 #define PCI_DEVICE_ID_INTEL_SST_TNG	0x119a
-#define PCI_DEVICE_ID_INTEL_DSA_GNRD	0x11fb
-#define PCI_DEVICE_ID_INTEL_DSA_DMR	0x1212
-#define PCI_DEVICE_ID_INTEL_IAA_DMR	0x1216
 #define PCI_DEVICE_ID_INTEL_82092AA_0	0x1221
 #define PCI_DEVICE_ID_INTEL_82437	0x122d
 #define PCI_DEVICE_ID_INTEL_82371FB_0	0x122e
-- 
cgit v1.2.3


From 6fa115569d980fca1969c075d8d958d205a405ca Mon Sep 17 00:00:00 2001
From: Raviteja Laggyshetty <quic_rlaggysh@quicinc.com>
Date: Tue, 10 Sep 2024 10:10:12 +0000
Subject: dt-bindings: interconnect: document the RPMh Network-On-Chip
 interconnect in QCS8300 SoC

Document the RPMh Network-On-Chip Interconnect of the QCS8300 platform.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Raviteja Laggyshetty <quic_rlaggysh@quicinc.com>
Link: https://lore.kernel.org/r/20240910101013.3020-2-quic_rlaggysh@quicinc.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 .../dt-bindings/interconnect/qcom,qcs8300-rpmh.h   | 189 +++++++++++++++++++++
 1 file changed, 189 insertions(+)
 create mode 100644 include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h b/include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h
new file mode 100644
index 000000000000..c5eeafa1b1dd
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_QCS8300_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_QCS8300_H
+
+#define MASTER_QUP_3				0
+#define MASTER_EMAC				1
+#define MASTER_SDC				2
+#define MASTER_UFS_MEM				3
+#define MASTER_USB2				4
+#define MASTER_USB3_0				5
+#define SLAVE_A1NOC_SNOC			6
+
+#define MASTER_QDSS_BAM				0
+#define MASTER_QUP_0				1
+#define MASTER_QUP_1				2
+#define MASTER_CNOC_A2NOC			3
+#define MASTER_CRYPTO_CORE0			4
+#define MASTER_CRYPTO_CORE1			5
+#define MASTER_IPA				6
+#define MASTER_QDSS_ETR_0			7
+#define MASTER_QDSS_ETR_1			8
+#define SLAVE_A2NOC_SNOC			9
+
+#define MASTER_QUP_CORE_0			0
+#define MASTER_QUP_CORE_1			1
+#define MASTER_QUP_CORE_3			2
+#define SLAVE_QUP_CORE_0			3
+#define SLAVE_QUP_CORE_1			4
+#define SLAVE_QUP_CORE_3			5
+
+#define MASTER_GEM_NOC_CNOC			0
+#define MASTER_GEM_NOC_PCIE_SNOC		1
+#define SLAVE_AHB2PHY_2				2
+#define SLAVE_AHB2PHY_3				3
+#define SLAVE_ANOC_THROTTLE_CFG			4
+#define SLAVE_AOSS				5
+#define SLAVE_APPSS				6
+#define SLAVE_BOOT_ROM				7
+#define SLAVE_CAMERA_CFG			8
+#define SLAVE_CAMERA_NRT_THROTTLE_CFG		9
+#define SLAVE_CAMERA_RT_THROTTLE_CFG		10
+#define SLAVE_CLK_CTL				11
+#define SLAVE_CDSP_CFG				12
+#define SLAVE_RBCPR_CX_CFG			13
+#define SLAVE_RBCPR_MMCX_CFG			14
+#define SLAVE_RBCPR_MX_CFG			15
+#define SLAVE_CPR_NSPCX				16
+#define SLAVE_CPR_NSPHMX			17
+#define SLAVE_CRYPTO_0_CFG			18
+#define SLAVE_CX_RDPM				19
+#define SLAVE_DISPLAY_CFG			20
+#define SLAVE_DISPLAY_RT_THROTTLE_CFG		21
+#define SLAVE_EMAC_CFG				22
+#define SLAVE_GP_DSP0_CFG			23
+#define SLAVE_GPDSP0_THROTTLE_CFG		24
+#define SLAVE_GPU_TCU_THROTTLE_CFG		25
+#define SLAVE_GFX3D_CFG				26
+#define SLAVE_HWKM				27
+#define SLAVE_IMEM_CFG				28
+#define SLAVE_IPA_CFG				29
+#define SLAVE_IPC_ROUTER_CFG			30
+#define SLAVE_LPASS				31
+#define SLAVE_LPASS_THROTTLE_CFG		32
+#define SLAVE_MX_RDPM				33
+#define SLAVE_MXC_RDPM				34
+#define SLAVE_PCIE_0_CFG			35
+#define SLAVE_PCIE_1_CFG			36
+#define SLAVE_PCIE_TCU_THROTTLE_CFG		37
+#define SLAVE_PCIE_THROTTLE_CFG			38
+#define SLAVE_PDM				39
+#define SLAVE_PIMEM_CFG				40
+#define SLAVE_PKA_WRAPPER_CFG			41
+#define SLAVE_QDSS_CFG				42
+#define SLAVE_QM_CFG				43
+#define SLAVE_QM_MPU_CFG			44
+#define SLAVE_QUP_0				45
+#define SLAVE_QUP_1				46
+#define SLAVE_QUP_3				47
+#define SLAVE_SAIL_THROTTLE_CFG			48
+#define SLAVE_SDC1				49
+#define SLAVE_SECURITY				50
+#define SLAVE_SNOC_THROTTLE_CFG			51
+#define SLAVE_TCSR				52
+#define SLAVE_TLMM				53
+#define SLAVE_TSC_CFG				54
+#define SLAVE_UFS_MEM_CFG			55
+#define SLAVE_USB2				56
+#define SLAVE_USB3_0				57
+#define SLAVE_VENUS_CFG				58
+#define SLAVE_VENUS_CVP_THROTTLE_CFG		59
+#define SLAVE_VENUS_V_CPU_THROTTLE_CFG		60
+#define SLAVE_VENUS_VCODEC_THROTTLE_CFG		61
+#define SLAVE_DDRSS_CFG				62
+#define SLAVE_GPDSP_NOC_CFG			63
+#define SLAVE_CNOC_MNOC_HF_CFG			64
+#define SLAVE_CNOC_MNOC_SF_CFG			65
+#define SLAVE_PCIE_ANOC_CFG			66
+#define SLAVE_SNOC_CFG				67
+#define SLAVE_BOOT_IMEM				68
+#define SLAVE_IMEM				69
+#define SLAVE_PIMEM				70
+#define SLAVE_PCIE_0				71
+#define SLAVE_PCIE_1				72
+#define SLAVE_QDSS_STM				73
+#define SLAVE_TCU				74
+
+#define MASTER_CNOC_DC_NOC			0
+#define SLAVE_LLCC_CFG				1
+#define SLAVE_GEM_NOC_CFG			2
+
+#define MASTER_GPU_TCU				0
+#define MASTER_PCIE_TCU				1
+#define MASTER_SYS_TCU				2
+#define MASTER_APPSS_PROC			3
+#define MASTER_COMPUTE_NOC			4
+#define MASTER_GEM_NOC_CFG			5
+#define MASTER_GPDSP_SAIL			6
+#define MASTER_GFX3D				7
+#define MASTER_MNOC_HF_MEM_NOC			8
+#define MASTER_MNOC_SF_MEM_NOC			9
+#define MASTER_ANOC_PCIE_GEM_NOC		10
+#define MASTER_SNOC_GC_MEM_NOC			11
+#define MASTER_SNOC_SF_MEM_NOC			12
+#define SLAVE_GEM_NOC_CNOC			13
+#define SLAVE_LLCC				14
+#define SLAVE_GEM_NOC_PCIE_CNOC			15
+#define SLAVE_SERVICE_GEM_NOC_1			16
+#define SLAVE_SERVICE_GEM_NOC_2			17
+#define SLAVE_SERVICE_GEM_NOC			18
+#define SLAVE_SERVICE_GEM_NOC2			19
+
+#define MASTER_SAILSS_MD0			0
+#define MASTER_DSP0				1
+#define SLAVE_GP_DSP_SAIL_NOC			2
+
+#define MASTER_CNOC_LPASS_AG_NOC		0
+#define MASTER_LPASS_PROC			1
+#define SLAVE_LPASS_CORE_CFG			2
+#define SLAVE_LPASS_LPI_CFG			3
+#define SLAVE_LPASS_MPU_CFG			4
+#define SLAVE_LPASS_TOP_CFG			5
+#define SLAVE_LPASS_SNOC			6
+#define SLAVE_SERVICES_LPASS_AML_NOC		7
+#define SLAVE_SERVICE_LPASS_AG_NOC		8
+
+#define MASTER_LLCC				0
+#define SLAVE_EBI1				1
+
+#define MASTER_CAMNOC_HF			0
+#define MASTER_CAMNOC_ICP			1
+#define MASTER_CAMNOC_SF			2
+#define MASTER_MDP0				3
+#define MASTER_MDP1				4
+#define MASTER_CNOC_MNOC_HF_CFG			5
+#define MASTER_CNOC_MNOC_SF_CFG			6
+#define MASTER_VIDEO_P0				7
+#define MASTER_VIDEO_PROC			8
+#define MASTER_VIDEO_V_PROC			9
+#define SLAVE_MNOC_HF_MEM_NOC			10
+#define SLAVE_MNOC_SF_MEM_NOC			11
+#define SLAVE_SERVICE_MNOC_HF			12
+#define SLAVE_SERVICE_MNOC_SF			13
+
+#define MASTER_CDSP_NOC_CFG			0
+#define MASTER_CDSP_PROC			1
+#define SLAVE_HCP_A				2
+#define SLAVE_CDSP_MEM_NOC			3
+#define SLAVE_SERVICE_NSP_NOC			4
+
+#define MASTER_PCIE_0				0
+#define MASTER_PCIE_1				1
+#define SLAVE_ANOC_PCIE_GEM_NOC			2
+
+#define MASTER_GIC_AHB				0
+#define MASTER_A1NOC_SNOC			1
+#define MASTER_A2NOC_SNOC			2
+#define MASTER_LPASS_ANOC			3
+#define MASTER_SNOC_CFG				4
+#define MASTER_PIMEM				5
+#define MASTER_GIC				6
+#define SLAVE_SNOC_GEM_NOC_GC			7
+#define SLAVE_SNOC_GEM_NOC_SF			8
+#define SLAVE_SERVICE_SNOC			9
+
+#endif
-- 
cgit v1.2.3


From 6c5e948f1fffdb1ae2c6d2f3d5336dbf07189334 Mon Sep 17 00:00:00 2001
From: Raviteja Laggyshetty <quic_rlaggysh@quicinc.com>
Date: Tue, 24 Sep 2024 14:39:57 +0000
Subject: dt-bindings: interconnect: document the RPMh Network-On-Chip
 interconnect in QCS615 SoC

Document the RPMh Network-On-Chip Interconnect of the QCS615 platform.

Signed-off-by: Raviteja Laggyshetty <quic_rlaggysh@quicinc.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Link: https://lore.kernel.org/r/20240924143958.25-2-quic_rlaggysh@quicinc.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 .../dt-bindings/interconnect/qcom,qcs615-rpmh.h    | 136 +++++++++++++++++++++
 1 file changed, 136 insertions(+)
 create mode 100644 include/dt-bindings/interconnect/qcom,qcs615-rpmh.h

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/qcom,qcs615-rpmh.h b/include/dt-bindings/interconnect/qcom,qcs615-rpmh.h
new file mode 100644
index 000000000000..84ae0d39e73c
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,qcs615-rpmh.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_QCS615_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_QCS615_H
+
+#define MASTER_A1NOC_CFG		1
+#define MASTER_QDSS_BAM			2
+#define MASTER_QSPI			3
+#define MASTER_QUP_0			4
+#define MASTER_BLSP_1			5
+#define MASTER_CNOC_A2NOC		6
+#define MASTER_CRYPTO			7
+#define MASTER_IPA			8
+#define MASTER_EMAC_EVB			9
+#define MASTER_PCIE			10
+#define MASTER_QDSS_ETR			11
+#define MASTER_SDCC_1			12
+#define MASTER_SDCC_2			13
+#define MASTER_UFS_MEM			14
+#define MASTER_USB2			15
+#define MASTER_USB3_0			16
+#define SLAVE_A1NOC_SNOC		17
+#define SLAVE_LPASS_SNOC		18
+#define SLAVE_ANOC_PCIE_SNOC		19
+#define SLAVE_SERVICE_A2NOC		20
+
+#define MASTER_CAMNOC_HF0_UNCOMP	1
+#define MASTER_CAMNOC_HF1_UNCOMP	2
+#define MASTER_CAMNOC_SF_UNCOMP		3
+#define SLAVE_CAMNOC_UNCOMP		4
+
+#define MASTER_SPDM			1
+#define MASTER_SNOC_CNOC		2
+#define MASTER_QDSS_DAP			3
+#define SLAVE_A1NOC_CFG			4
+#define SLAVE_AHB2PHY_EAST		5
+#define SLAVE_AHB2PHY_WEST		6
+#define SLAVE_AOP			7
+#define SLAVE_AOSS			8
+#define SLAVE_CAMERA_CFG		9
+#define SLAVE_CLK_CTL			10
+#define SLAVE_RBCPR_CX_CFG		11
+#define SLAVE_RBCPR_MX_CFG		12
+#define SLAVE_CRYPTO_0_CFG		13
+#define SLAVE_CNOC_DDRSS		14
+#define SLAVE_DISPLAY_CFG		15
+#define SLAVE_EMAC_AVB_CFG		16
+#define SLAVE_GLM			17
+#define SLAVE_GFX3D_CFG			18
+#define SLAVE_IMEM_CFG			19
+#define SLAVE_IPA_CFG			20
+#define SLAVE_CNOC_MNOC_CFG		21
+#define SLAVE_PCIE_CFG			22
+#define SLAVE_PIMEM_CFG			23
+#define SLAVE_PRNG			24
+#define SLAVE_QDSS_CFG			25
+#define SLAVE_QSPI			26
+#define SLAVE_QUP_0			27
+#define SLAVE_QUP_1			28
+#define SLAVE_SDCC_1			29
+#define SLAVE_SDCC_2			30
+#define SLAVE_SNOC_CFG			31
+#define SLAVE_SPDM_WRAPPER		32
+#define SLAVE_TCSR			33
+#define SLAVE_TLMM_EAST			34
+#define SLAVE_TLMM_SOUTH		35
+#define SLAVE_TLMM_WEST			36
+#define SLAVE_UFS_MEM_CFG		37
+#define SLAVE_USB2			38
+#define SLAVE_USB3			39
+#define SLAVE_VENUS_CFG			40
+#define SLAVE_VSENSE_CTRL_CFG		41
+#define SLAVE_CNOC_A2NOC		42
+#define SLAVE_SERVICE_CNOC		43
+
+#define MASTER_CNOC_DC_NOC		1
+#define SLAVE_DC_NOC_GEMNOC		2
+#define SLAVE_LLCC_CFG			3
+
+#define MASTER_APPSS_PROC		1
+#define MASTER_GPU_TCU			2
+#define MASTER_SYS_TCU			3
+#define MASTER_GEM_NOC_CFG		4
+#define MASTER_GFX3D			5
+#define MASTER_MNOC_HF_MEM_NOC		6
+#define MASTER_MNOC_SF_MEM_NOC		7
+#define MASTER_SNOC_GC_MEM_NOC		8
+#define MASTER_SNOC_SF_MEM_NOC		9
+#define SLAVE_MSS_PROC_MS_MPU_CFG	10
+#define SLAVE_GEM_NOC_SNOC		11
+#define SLAVE_LLCC			12
+#define SLAVE_MEM_NOC_PCIE_SNOC		13
+#define SLAVE_SERVICE_GEM_NOC		14
+
+#define MASTER_IPA_CORE			1
+#define SLAVE_IPA_CORE			2
+
+#define MASTER_LLCC			1
+#define SLAVE_EBI1			2
+
+#define MASTER_CNOC_MNOC_CFG		1
+#define MASTER_CAMNOC_HF0		2
+#define MASTER_CAMNOC_HF1		3
+#define MASTER_CAMNOC_SF		4
+#define MASTER_MDP0			5
+#define MASTER_ROTATOR			6
+#define MASTER_VIDEO_P0			7
+#define MASTER_VIDEO_PROC		8
+#define SLAVE_MNOC_SF_MEM_NOC		9
+#define SLAVE_MNOC_HF_MEM_NOC		10
+#define SLAVE_SERVICE_MNOC		11
+
+#define MASTER_SNOC_CFG			1
+#define MASTER_A1NOC_SNOC		2
+#define MASTER_GEM_NOC_SNOC		3
+#define MASTER_GEM_NOC_PCIE_SNOC	4
+#define MASTER_LPASS_ANOC		5
+#define MASTER_ANOC_PCIE_SNOC		6
+#define MASTER_PIMEM			7
+#define MASTER_GIC			8
+#define SLAVE_APPSS			9
+#define SLAVE_SNOC_CNOC			10
+#define SLAVE_SNOC_GEM_NOC_SF		11
+#define SLAVE_SNOC_MEM_NOC_GC		12
+#define SLAVE_IMEM			13
+#define SLAVE_PIMEM			14
+#define SLAVE_SERVICE_SNOC		15
+#define SLAVE_PCIE_0			16
+#define SLAVE_QDSS_STM			17
+#define SLAVE_TCU			18
+
+#endif
+
-- 
cgit v1.2.3


From 3ccddc3991beef2705e8097b01ae07054bf11022 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 21 Oct 2024 16:37:46 +0300
Subject: drm/fbdev: fix drm_fb_helper_deferred_io() build failure

The drm_fb_helper_deferred_io() uses struct fb_deferred_io_pageref,
which isn't available without CONFIG_FB_DEFERRED_IO. Put the function
under corresponding #ifdef to fix build failure if deferred I/O isn't
enabled.

Fixes: 8058944f5226 ("drm/fbdev: Select fbdev I/O helpers from modules that require them")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20241021-fix-drm-deferred-v2-1-db1de4c6b042@linaro.org
---
 include/drm/drm_fb_helper.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 699f2790b9ac..8426b9921a03 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -256,7 +256,9 @@ void drm_fb_helper_fill_info(struct fb_info *info,
 void drm_fb_helper_damage_range(struct fb_info *info, off_t off, size_t len);
 void drm_fb_helper_damage_area(struct fb_info *info, u32 x, u32 y, u32 width, u32 height);
 
+#ifdef CONFIG_FB_DEFERRED_IO
 void drm_fb_helper_deferred_io(struct fb_info *info, struct list_head *pagereflist);
+#endif
 
 void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, bool suspend);
 void drm_fb_helper_set_suspend_unlocked(struct drm_fb_helper *fb_helper,
@@ -361,10 +363,12 @@ static inline int drm_fb_helper_ioctl(struct fb_info *info, unsigned int cmd,
 	return 0;
 }
 
+#ifdef CONFIG_FB_DEFERRED_IO
 static inline void drm_fb_helper_deferred_io(struct fb_info *info,
 					     struct list_head *pagelist)
 {
 }
+#endif
 
 static inline void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper,
 					     bool suspend)
-- 
cgit v1.2.3


From 43c7ce69d28e185f62fe2b8be2c681c5cac0bc6b Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Wed, 16 Oct 2024 11:53:50 -0700
Subject: rtnetlink: Protect struct rtnl_link_ops with SRCU.

Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to
guarantee that rtnl_link_ops is alive during inflight RTM_NEWLINK
even when its module is being unloaded.

Let's use SRCU to protect ops.

rtnl_link_ops_get() now iterates link_ops under RCU and returns
SRCU-protected ops pointer.  The caller must call rtnl_link_ops_put()
to release the pointer after the use.

Also, __rtnl_link_unregister() unlinks the ops first and calls
synchronize_srcu() to wait for inflight RTM_NEWLINK requests to
complete.

Note that link_ops needs to be protected by its dedicated lock
when RTNL is removed.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/rtnetlink.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index bb49c5708ce7..1a6aa5ca74f3 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -3,6 +3,7 @@
 #define __NET_RTNETLINK_H
 
 #include <linux/rtnetlink.h>
+#include <linux/srcu.h>
 #include <net/netlink.h>
 
 typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
@@ -69,7 +70,8 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
 /**
  *	struct rtnl_link_ops - rtnetlink link operations
  *
- *	@list: Used internally
+ *	@list: Used internally, protected by RTNL and SRCU
+ *	@srcu: Used internally
  *	@kind: Identifier
  *	@netns_refund: Physical device, move to init_net on netns exit
  *	@maxtype: Highest device specific netlink attribute number
@@ -100,6 +102,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
  */
 struct rtnl_link_ops {
 	struct list_head	list;
+	struct srcu_struct	srcu;
 
 	const char		*kind;
 
-- 
cgit v1.2.3


From 26eebdc4b005ccd4cf63f4fef4c9c0adf9bfa380 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Wed, 16 Oct 2024 11:53:56 -0700
Subject: rtnetlink: Return int from rtnl_af_register().

The next patch will add init_srcu_struct() in rtnl_af_register(),
then we need to handle its error.

Let's add the error handling in advance to make the following
patch cleaner.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Matt Johnston <matt@codeconstruct.com.au>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/rtnetlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 1a6aa5ca74f3..969138ae2f4b 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -204,7 +204,7 @@ struct rtnl_af_ops {
 	size_t			(*get_stats_af_size)(const struct net_device *dev);
 };
 
-void rtnl_af_register(struct rtnl_af_ops *ops);
+int rtnl_af_register(struct rtnl_af_ops *ops);
 void rtnl_af_unregister(struct rtnl_af_ops *ops);
 
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]);
-- 
cgit v1.2.3


From 6ab0f866948323724e95cf14d9e47fd77703c192 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Wed, 16 Oct 2024 11:53:57 -0700
Subject: rtnetlink: Protect struct rtnl_af_ops with SRCU.

Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to
guarantee that rtnl_af_ops is alive during inflight RTM_SETLINK
even when its module is being unloaded.

Let's use SRCU to protect ops.

rtnl_af_lookup() now iterates rtnl_af_ops under RCU and returns
SRCU-protected ops pointer.  The caller must call rtnl_af_put()
to release the pointer after the use.

Also, rtnl_af_unregister() unlinks the ops first and calls
synchronize_srcu() to wait for inflight RTM_SETLINK requests to
complete.

Note that rtnl_af_ops needs to be protected by its dedicated lock
when RTNL is removed.

Note also that BUG_ON() in do_setlink() is changed to the normal
error handling as a different af_ops might be found after
validate_linkmsg().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/rtnetlink.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 969138ae2f4b..e0d9a8eae6b6 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -172,7 +172,8 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops);
 /**
  * 	struct rtnl_af_ops - rtnetlink address family operations
  *
- *	@list: Used internally
+ *	@list: Used internally, protected by RTNL and SRCU
+ *	@srcu: Used internally
  * 	@family: Address family
  * 	@fill_link_af: Function to fill IFLA_AF_SPEC with address family
  * 		       specific netlink attributes.
@@ -185,6 +186,8 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops);
  */
 struct rtnl_af_ops {
 	struct list_head	list;
+	struct srcu_struct	srcu;
+
 	int			family;
 
 	int			(*fill_link_af)(struct sk_buff *skb,
-- 
cgit v1.2.3


From 6474353a5e3d0b2cf610153cea0c61f576a36d0a Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 25 Sep 2024 11:05:16 +0200
Subject: epoll: annotate racy check

Epoll relies on a racy fastpath check during __fput() in
eventpoll_release() to avoid the hit of pointlessly acquiring a
semaphore. Annotate that race by using WRITE_ONCE() and READ_ONCE().

Link: https://lore.kernel.org/r/66edfb3c.050a0220.3195df.001a.GAE@google.com
Link: https://lore.kernel.org/r/20240925-fungieren-anbauen-79b334b00542@brauner
Reviewed-by: Jan Kara <jack@suse.cz>
Reported-by: syzbot+3b6b32dc50537a49bb4a@syzkaller.appspotmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/eventpoll.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 3337745d81bd..0c0d00fcd131 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -42,7 +42,7 @@ static inline void eventpoll_release(struct file *file)
 	 * because the file in on the way to be removed and nobody ( but
 	 * eventpoll ) has still a reference to this file.
 	 */
-	if (likely(!file->f_ep))
+	if (likely(!READ_ONCE(file->f_ep)))
 		return;
 
 	/*
-- 
cgit v1.2.3


From 900bbaae67e980945dec74d36f8afe0de7556d5a Mon Sep 17 00:00:00 2001
From: Xuewen Yan <xuewen.yan@unisoc.com>
Date: Fri, 26 Apr 2024 16:05:48 +0800
Subject: epoll: Add synchronous wakeup support for ep_poll_callback

Now, the epoll only use wake_up() interface to wake up task.
However, sometimes, there are epoll users which want to use
the synchronous wakeup flag to hint the scheduler, such as
Android binder driver.
So add a wake_up_sync() define, and use the wake_up_sync()
when the sync is true in ep_poll_callback().

Co-developed-by: Jing Xia <jing.xia@unisoc.com>
Signed-off-by: Jing Xia <jing.xia@unisoc.com>
Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
Link: https://lore.kernel.org/r/20240426080548.8203-1-xuewen.yan@unisoc.com
Tested-by: Brian Geffon <bgeffon@google.com>
Reviewed-by: Brian Geffon <bgeffon@google.com>
Reported-by: Benoit Lize <lizeb@google.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/wait.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 8aa3372f21a0..2b322a9b88a2 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -221,6 +221,7 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
 #define wake_up_all(x)			__wake_up(x, TASK_NORMAL, 0, NULL)
 #define wake_up_locked(x)		__wake_up_locked((x), TASK_NORMAL, 1)
 #define wake_up_all_locked(x)		__wake_up_locked((x), TASK_NORMAL, 0)
+#define wake_up_sync(x)			__wake_up_sync(x, TASK_NORMAL)
 
 #define wake_up_interruptible(x)	__wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
 #define wake_up_interruptible_nr(x, nr)	__wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
-- 
cgit v1.2.3


From 99bdadbde9c418f29b78b7241732268dbc0a05cc Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Tue, 15 Oct 2024 22:21:54 +0200
Subject: acl: Realign struct posix_acl to save 8 bytes

Reduce posix_acl's struct size by 8 bytes by realigning its members.

Cc: Christian Brauner <brauner@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Link: https://lore.kernel.org/r/20241015202158.2376-1-thorsten.blum@linux.dev
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/posix_acl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 0e65b3d634d9..2d6a4badd306 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -28,8 +28,8 @@ struct posix_acl_entry {
 
 struct posix_acl {
 	refcount_t		a_refcount;
-	struct rcu_head		a_rcu;
 	unsigned int		a_count;
+	struct rcu_head		a_rcu;
 	struct posix_acl_entry	a_entries[];
 };
 
-- 
cgit v1.2.3


From 8c6e03ffedc5463d1aa1ba89f6ceb082518a3520 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Fri, 18 Oct 2024 14:14:21 +0200
Subject: acl: Annotate struct posix_acl with __counted_by()

Add the __counted_by compiler attribute to the flexible array member
a_entries to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and
CONFIG_FORTIFY_SOURCE.

Use struct_size() to calculate the number of bytes to allocate for new
and cloned acls and remove the local size variables.

Change the posix_acl_alloc() function parameter count from int to
unsigned int to match posix_acl's a_count data type. Add identifier
names to the function definition to silence two checkpatch warnings.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Link: https://lore.kernel.org/r/20241018121426.155247-2-thorsten.blum@linux.dev
Cc: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/posix_acl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 2d6a4badd306..e2d47eb1a7f3 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -30,7 +30,7 @@ struct posix_acl {
 	refcount_t		a_refcount;
 	unsigned int		a_count;
 	struct rcu_head		a_rcu;
-	struct posix_acl_entry	a_entries[];
+	struct posix_acl_entry	a_entries[] __counted_by(a_count);
 };
 
 #define FOREACH_ACL_ENTRY(pa, acl, pe) \
@@ -62,7 +62,7 @@ posix_acl_release(struct posix_acl *acl)
 /* posix_acl.c */
 
 extern void posix_acl_init(struct posix_acl *, int);
-extern struct posix_acl *posix_acl_alloc(int, gfp_t);
+extern struct posix_acl *posix_acl_alloc(unsigned int count, gfp_t flags);
 extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t);
 extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *);
 extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *);
-- 
cgit v1.2.3


From bc2bb732162fb183e5c8df9d42604ddb7ec36e8b Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Fri, 18 Oct 2024 11:33:24 +0300
Subject: dt-bindings: interconnect: qcom: document SAR2130P NoC

Add bindings for the Network of Connects (NoC) present on the
Qualcomm SAR2130P platform.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20241018-sar2130p-icc-v2-1-c58c73dcd19d@linaro.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 .../dt-bindings/interconnect/qcom,sar2130p-rpmh.h  | 137 +++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h b/include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h
new file mode 100644
index 000000000000..aec7cbb7cd70
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2024, Linaro Ltd.
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_SAR2130P_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_SAR2130P_H
+
+#define MASTER_QUP_CORE_0			0
+#define MASTER_QUP_CORE_1			1
+#define SLAVE_QUP_CORE_0			2
+#define SLAVE_QUP_CORE_1			3
+
+#define MASTER_GEM_NOC_CNOC			0
+#define MASTER_GEM_NOC_PCIE_SNOC		1
+#define MASTER_QDSS_DAP				2
+#define SLAVE_AHB2PHY_SOUTH			3
+#define SLAVE_AOSS				4
+#define SLAVE_CAMERA_CFG			5
+#define SLAVE_CLK_CTL				6
+#define SLAVE_CDSP_CFG				7
+#define SLAVE_RBCPR_CX_CFG			8
+#define SLAVE_RBCPR_MMCX_CFG			9
+#define SLAVE_RBCPR_MXA_CFG			10
+#define SLAVE_RBCPR_MXC_CFG			11
+#define SLAVE_CPR_NSPCX				12
+#define SLAVE_CRYPTO_0_CFG			13
+#define SLAVE_CX_RDPM				14
+#define SLAVE_DISPLAY_CFG			15
+#define SLAVE_GFX3D_CFG				16
+#define SLAVE_IMEM_CFG				17
+#define SLAVE_IPC_ROUTER_CFG			18
+#define SLAVE_LPASS				19
+#define SLAVE_MX_RDPM				20
+#define SLAVE_PCIE_0_CFG			21
+#define SLAVE_PCIE_1_CFG			22
+#define SLAVE_PDM				23
+#define SLAVE_PIMEM_CFG				24
+#define SLAVE_PRNG				25
+#define SLAVE_QDSS_CFG				26
+#define SLAVE_QSPI_0				27
+#define SLAVE_QUP_0				28
+#define SLAVE_QUP_1				29
+#define SLAVE_SDCC_1				30
+#define SLAVE_TCSR				31
+#define SLAVE_TLMM				32
+#define SLAVE_TME_CFG				33
+#define SLAVE_USB3_0				34
+#define SLAVE_VENUS_CFG				35
+#define SLAVE_VSENSE_CTRL_CFG			36
+#define SLAVE_WLAN_Q6_CFG			37
+#define SLAVE_DDRSS_CFG				38
+#define SLAVE_CNOC_MNOC_CFG			39
+#define SLAVE_SNOC_CFG				40
+#define SLAVE_IMEM				41
+#define SLAVE_PIMEM				42
+#define SLAVE_SERVICE_CNOC			43
+#define SLAVE_PCIE_0				44
+#define SLAVE_PCIE_1				45
+#define SLAVE_QDSS_STM				46
+#define SLAVE_TCU				47
+
+#define MASTER_GPU_TCU				0
+#define MASTER_SYS_TCU				1
+#define MASTER_APPSS_PROC			2
+#define MASTER_GFX3D				3
+#define MASTER_MNOC_HF_MEM_NOC			4
+#define MASTER_MNOC_SF_MEM_NOC			5
+#define MASTER_COMPUTE_NOC			6
+#define MASTER_ANOC_PCIE_GEM_NOC		7
+#define MASTER_SNOC_GC_MEM_NOC			8
+#define MASTER_SNOC_SF_MEM_NOC			9
+#define MASTER_WLAN_Q6				10
+#define SLAVE_GEM_NOC_CNOC			11
+#define SLAVE_LLCC				12
+#define SLAVE_MEM_NOC_PCIE_SNOC			13
+
+#define MASTER_CNOC_LPASS_AG_NOC		0
+#define MASTER_LPASS_PROC			1
+#define SLAVE_LPASS_CORE_CFG			2
+#define SLAVE_LPASS_LPI_CFG			3
+#define SLAVE_LPASS_MPU_CFG			4
+#define SLAVE_LPASS_TOP_CFG			5
+#define SLAVE_LPASS_SNOC			6
+#define SLAVE_SERVICES_LPASS_AML_NOC		7
+#define SLAVE_SERVICE_LPASS_AG_NOC		8
+
+#define MASTER_LLCC				0
+#define SLAVE_EBI1				1
+
+#define MASTER_CAMNOC_HF			0
+#define MASTER_CAMNOC_ICP			1
+#define MASTER_CAMNOC_SF			2
+#define MASTER_LSR				3
+#define MASTER_MDP				4
+#define MASTER_CNOC_MNOC_CFG			5
+#define MASTER_VIDEO				6
+#define MASTER_VIDEO_CV_PROC			7
+#define MASTER_VIDEO_PROC			8
+#define MASTER_VIDEO_V_PROC			9
+#define SLAVE_MNOC_HF_MEM_NOC			10
+#define SLAVE_MNOC_SF_MEM_NOC			11
+#define SLAVE_SERVICE_MNOC			12
+
+#define MASTER_CDSP_NOC_CFG			0
+#define MASTER_CDSP_PROC			1
+#define SLAVE_CDSP_MEM_NOC			2
+#define SLAVE_SERVICE_NSP_NOC			3
+
+#define MASTER_PCIE_0				0
+#define MASTER_PCIE_1				1
+#define SLAVE_ANOC_PCIE_GEM_NOC			2
+
+#define MASTER_GIC_AHB				0
+#define MASTER_QDSS_BAM				1
+#define MASTER_QSPI_0				2
+#define MASTER_QUP_0				3
+#define MASTER_QUP_1				4
+#define MASTER_A2NOC_SNOC			5
+#define MASTER_CNOC_DATAPATH			6
+#define MASTER_LPASS_ANOC			7
+#define MASTER_SNOC_CFG				8
+#define MASTER_CRYPTO				9
+#define MASTER_PIMEM				10
+#define MASTER_GIC				11
+#define MASTER_QDSS_ETR				12
+#define MASTER_QDSS_ETR_1			13
+#define MASTER_SDCC_1				14
+#define MASTER_USB3_0				15
+#define SLAVE_A2NOC_SNOC			16
+#define SLAVE_SNOC_GEM_NOC_GC			17
+#define SLAVE_SNOC_GEM_NOC_SF			18
+#define SLAVE_SERVICE_SNOC			19
+
+#endif
-- 
cgit v1.2.3


From c16a7d3ef029494a6f5b5cd20606dd08aeaa2b0f Mon Sep 17 00:00:00 2001
From: Yassine Oudjana <y.oudjana@protonmail.com>
Date: Thu, 17 Oct 2024 11:51:34 +0300
Subject: dt-bindings: power: Add binding for MediaTek MT6735 power controller

Add DT binding for MediaTek MT6735 SCPSYS power controller.

Signed-off-by: Yassine Oudjana <y.oudjana@protonmail.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogiaocchino.delregno@collabora.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20241017085136.68053-2-y.oudjana@protonmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 .../dt-bindings/power/mediatek,mt6735-power-controller.h   | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 include/dt-bindings/power/mediatek,mt6735-power-controller.h

(limited to 'include')

diff --git a/include/dt-bindings/power/mediatek,mt6735-power-controller.h b/include/dt-bindings/power/mediatek,mt6735-power-controller.h
new file mode 100644
index 000000000000..6957075fcb9e
--- /dev/null
+++ b/include/dt-bindings/power/mediatek,mt6735-power-controller.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_POWER_MT6735_POWER_CONTROLLER_H
+#define _DT_BINDINGS_POWER_MT6735_POWER_CONTROLLER_H
+
+#define MT6735_POWER_DOMAIN_MD1		0
+#define MT6735_POWER_DOMAIN_CONN	1
+#define MT6735_POWER_DOMAIN_DIS		2
+#define MT6735_POWER_DOMAIN_MFG		3
+#define MT6735_POWER_DOMAIN_ISP		4
+#define MT6735_POWER_DOMAIN_VDE		5
+#define MT6735_POWER_DOMAIN_VEN		6
+
+#endif
-- 
cgit v1.2.3


From c2114a0d17631497df99388905353c11f4d5f0dd Mon Sep 17 00:00:00 2001
From: Yassine Oudjana <y.oudjana@protonmail.com>
Date: Thu, 17 Oct 2024 11:51:35 +0300
Subject: pmdomain: mediatek: Add support for MT6735

Add support for SCPSYS power domains of MT6735. All non-CPU power domains
are added except for MD2 (C2K modem), which is left out due to issues
with powering it on.

Signed-off-by: Yassine Oudjana <y.oudjana@protonmail.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20241017085136.68053-3-y.oudjana@protonmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/soc/mediatek/infracfg.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h
index 6c6cccc848f4..9956e18c5ffa 100644
--- a/include/linux/soc/mediatek/infracfg.h
+++ b/include/linux/soc/mediatek/infracfg.h
@@ -434,6 +434,11 @@
 #define MT7622_TOP_AXI_PROT_EN_WB		(BIT(2) | BIT(6) | \
 						 BIT(7) | BIT(8))
 
+#define MT6735_TOP_AXI_PROT_EN_CONN		(BIT(2) | BIT(8))
+#define MT6735_TOP_AXI_PROT_EN_MD1		(BIT(24) | BIT(25) | \
+						 BIT(26) | BIT(27) | \
+						 BIT(28))
+
 #define INFRA_TOPAXI_PROTECTEN			0x0220
 #define INFRA_TOPAXI_PROTECTSTA1		0x0228
 #define INFRA_TOPAXI_PROTECTEN_SET		0x0260
-- 
cgit v1.2.3


From d811ac148f0afd2f3f7e1cd7f54de8da973ec5e3 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 15 Oct 2024 15:56:35 +0200
Subject: virtchnl: fix m68k build.

The kernel test robot reported a build failure on m68k in the intel
driver due to the recent shapers-related changes.

The mentioned arch has funny alignment properties, let's be explicit
about the binary layout expectation introducing a padding field.

Fixes: 608a5c05c39b ("virtchnl: support queue rate limit and quanta size configuration")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202410131710.71Wt6LKO-lkp@intel.com/
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://patch.msgid.link/e45d1c9f17356d431b03b419f60b8b763d2ff768.1729000481.git.pabeni@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/avf/virtchnl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 223e433c39fe..13a11f3c09b8 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -1499,6 +1499,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_queue_chunk);
 
 struct virtchnl_quanta_cfg {
 	u16 quanta_size;
+	u16 pad;
 	struct virtchnl_queue_chunk queue_select;
 };
 
-- 
cgit v1.2.3


From 2c50ec98fc6cab28df35e0a22a2bcc7957d9d0ab Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 2 Oct 2024 14:06:47 -0600
Subject: block: remove redundant passthrough check in blk_mq_need_time_stamp()

Simply checking the rq_flags is enough to determine if accounting is
being done for this request.

Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 4fecf46ef681..59e9adf815a4 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -857,12 +857,6 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib);
  */
 static inline bool blk_mq_need_time_stamp(struct request *rq)
 {
-	/*
-	 * passthrough io doesn't use iostat accounting, cgroup stats
-	 * and io scheduler functionalities.
-	 */
-	if (blk_rq_is_passthrough(rq))
-		return false;
 	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED));
 }
 
-- 
cgit v1.2.3


From 9dfd9ea93aeab57d897bb7fc7c0707f26b0b9af8 Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Thu, 3 Oct 2024 00:11:43 +0200
Subject: block: introduce add_disk_fwnode()

Introduce add_disk_fwnode() as a replacement of device_add_disk() that
permits to pass and attach a fwnode to disk dev.

This variant can be useful for eMMC that might have the partition table
for the disk defined in DT. A parser can later make use of the attached
fwnode to parse the related table and init the hardcoded partition for
the disk.

device_add_disk() is converted to a simple wrapper of add_disk_fwnode()
with the fwnode entry set as NULL.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20241002221306.4403-4-ansuelsmth@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 50c3b959da28..a6aae750b4ac 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -725,6 +725,9 @@ static inline unsigned int blk_queue_depth(struct request_queue *q)
 #define for_each_bio(_bio)		\
 	for (; _bio; _bio = _bio->bi_next)
 
+int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
+				 const struct attribute_group **groups,
+				 struct fwnode_handle *fwnode);
 int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
 				 const struct attribute_group **groups);
 static inline int __must_check add_disk(struct gendisk *disk)
-- 
cgit v1.2.3


From 110234da18ab482f6f583d28eff26b9569bf5622 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Mon, 7 Oct 2024 08:32:35 -0700
Subject: block: enable passthrough command statistics

Applications using the passthrough interfaces for IO want to continue
seeing the disk stats. These requests had been fenced off from this
block layer feature. While the block layer doesn't necessarily know what
a passthrough command does, we do know the data size and direction,
which is enough to account for the command's stats.

Since tracking these has the potential to produce unexpected results,
the passthrough stats are locked behind a new queue flag that needs to
be enabled with the /sys/block/<dev>/queue/iostats_passthrough
attribute.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20241007153236.2818562-1-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a6aae750b4ac..6b78a68e0bd9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -349,6 +349,9 @@ typedef unsigned int __bitwise blk_flags_t;
 /* I/O topology is misaligned */
 #define BLK_FLAG_MISALIGNED		((__force blk_flags_t)(1u << 1))
 
+/* passthrough command IO accounting */
+#define BLK_FLAG_IOSTATS_PASSTHROUGH	((__force blk_flags_t)(1u << 2))
+
 struct queue_limits {
 	blk_features_t		features;
 	blk_flags_t		flags;
@@ -617,6 +620,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	(!((q)->limits.features & BLK_FEAT_ROTATIONAL))
 #define blk_queue_io_stat(q)	((q)->limits.features & BLK_FEAT_IO_STAT)
+#define blk_queue_passthrough_stat(q)	\
+	((q)->limits.flags & BLK_FLAG_IOSTATS_PASSTHROUGH)
 #define blk_queue_dax(q)	((q)->limits.features & BLK_FEAT_DAX)
 #define blk_queue_pci_p2pdma(q)	((q)->limits.features & BLK_FEAT_PCI_P2PDMA)
 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
-- 
cgit v1.2.3


From 59eaa01ce7a6cbc5c36b928f52888f99fca6b295 Mon Sep 17 00:00:00 2001
From: Uday Shankar <ushankar@purestorage.com>
Date: Mon, 7 Oct 2024 12:24:17 -0600
Subject: ublk: support device recovery without I/O queueing

ublk currently supports the following behaviors on ublk server exit:

A: outstanding I/Os get errors, subsequently issued I/Os get errors
B: outstanding I/Os get errors, subsequently issued I/Os queue
C: outstanding I/Os get reissued, subsequently issued I/Os queue

and the following behaviors for recovery of preexisting block devices by
a future incarnation of the ublk server:

1: ublk devices stopped on ublk server exit (no recovery possible)
2: ublk devices are recoverable using start/end_recovery commands

The userspace interface allows selection of combinations of these
behaviors using flags specified at device creation time, namely:

default behavior: A + 1
UBLK_F_USER_RECOVERY: B + 2
UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_REISSUE: C + 2

The behavior A + 2 is currently unsupported. Add support for this
behavior under the new flag combination
UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_FAIL_IO.

Signed-off-by: Uday Shankar <ushankar@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20241007182419.3263186-5-ushankar@purestorage.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/ublk_cmd.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index 12873639ea96..a8bc98bb69fc 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -147,8 +147,18 @@
  */
 #define UBLK_F_NEED_GET_DATA (1UL << 2)
 
+/*
+ * - Block devices are recoverable if ublk server exits and restarts
+ * - Outstanding I/O when ublk server exits is met with errors
+ * - I/O issued while there is no ublk server queues
+ */
 #define UBLK_F_USER_RECOVERY	(1UL << 3)
 
+/*
+ * - Block devices are recoverable if ublk server exits and restarts
+ * - Outstanding I/O when ublk server exits is reissued
+ * - I/O issued while there is no ublk server queues
+ */
 #define UBLK_F_USER_RECOVERY_REISSUE	(1UL << 4)
 
 /*
@@ -190,10 +200,18 @@
  */
 #define UBLK_F_ZONED (1ULL << 8)
 
+/*
+ * - Block devices are recoverable if ublk server exits and restarts
+ * - Outstanding I/O when ublk server exits is met with errors
+ * - I/O issued while there is no ublk server is met with errors
+ */
+#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9)
+
 /* device state */
 #define UBLK_S_DEV_DEAD	0
 #define UBLK_S_DEV_LIVE	1
 #define UBLK_S_DEV_QUIESCED	2
+#define UBLK_S_DEV_FAIL_IO 	3
 
 /* shipped via sqe->cmd of io_uring command */
 struct ublksrv_ctrl_cmd {
-- 
cgit v1.2.3


From b21d948f4cc73e3296f2365c7afca721dd6893fa Mon Sep 17 00:00:00 2001
From: Greg Joyce <gjoyce@linux.ibm.com>
Date: Thu, 29 Aug 2024 12:56:11 -0500
Subject: block: sed-opal: add ioctl IOC_OPAL_SET_SID_PW

After a SED drive is provisioned, there is no way to change the SID
password via the ioctl() interface. A new ioctl IOC_OPAL_SET_SID_PW
will allow the password to be changed. The valid current password is
required.

Signed-off-by: Greg Joyce <gjoyce@linux.ibm.com>
Reviewed-by: Daniel Wagner <dwagner@suse.de>
Link: https://lore.kernel.org/r/20240829175639.6478-2-gjoyce@linux.ibm.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sed-opal.h      | 1 +
 include/uapi/linux/sed-opal.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index 2ac50822554e..80f33a93f944 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -52,6 +52,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
 	case IOC_OPAL_GET_GEOMETRY:
 	case IOC_OPAL_DISCOVERY:
 	case IOC_OPAL_REVERT_LSP:
+	case IOC_OPAL_SET_SID_PW:
 		return true;
 	}
 	return false;
diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h
index d3994b7716bc..9025dd5a4f0f 100644
--- a/include/uapi/linux/sed-opal.h
+++ b/include/uapi/linux/sed-opal.h
@@ -215,5 +215,6 @@ struct opal_revert_lsp {
 #define IOC_OPAL_GET_GEOMETRY       _IOR('p', 238, struct opal_geometry)
 #define IOC_OPAL_DISCOVERY          _IOW('p', 239, struct opal_discovery)
 #define IOC_OPAL_REVERT_LSP         _IOW('p', 240, struct opal_revert_lsp)
+#define IOC_OPAL_SET_SID_PW         _IOW('p', 241, struct opal_new_pw)
 
 #endif /* _UAPI_SED_OPAL_H */
-- 
cgit v1.2.3


From 8cf8dfceebdaf282da8a836b2bb578808a12698c Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 22 Oct 2024 15:41:34 +0200
Subject: seccomp: Stub for !HAVE_ARCH_SECCOMP_FILTER

If we have CONFIG_SECCOMP but not CONFIG_HAVE_ARCH_SECCOMP_FILTER
we get a compilation error:

../kernel/entry/common.c: In function 'syscall_trace_enter':
../kernel/entry/common.c:55:23: error: implicit declaration of function '__secure_computing' [-Werror=implicit-function-declaration]
   55 |                 ret = __secure_computing(NULL);
      |                       ^~~~~~~~~~~~~~~~~~

This is because generic entry calls __secure_computing()
unconditionally.

Provide the needed stub similar to how current ARM does
this by calling secure_computing_strict() in the absence
of secure_computing(). This is similar to what is done for
ARM in arch/arm/kernel/ptrace.c.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20241022-seccomp-compile-error-v2-1-c9f08a4f8ebb@linaro.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/seccomp.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 709ad84809e1..341980599c71 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -32,6 +32,11 @@ static inline int secure_computing(void)
 }
 #else
 extern void secure_computing_strict(int this_syscall);
+static inline int __secure_computing(const struct seccomp_data *sd)
+{
+	secure_computing_strict(sd->nr);
+	return 0;
+}
 #endif
 
 extern long prctl_get_seccomp(void);
-- 
cgit v1.2.3


From cfcdf395c21eeac4543d2b8fef9d29ae9e4559e9 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 8 Oct 2024 18:07:02 +0200
Subject: regulator: core: add callback to perform runtime init

Provide an initialisation callback to handle runtime parameters.
The idea is similar to the regulator_init() callback, but it provides
regulator specific structures, instead of just the driver specific data.

As an example, this allows the driver to amend the regulator constraints
based on runtime parameters if necessary.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://patch.msgid.link/20241008-regulator-ignored-data-v2-2-d1251e0ee507@baylibre.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index f230a472ccd3..d2f4427504f0 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -365,6 +365,8 @@ struct regulator_desc {
 	int (*of_parse_cb)(struct device_node *,
 			    const struct regulator_desc *,
 			    struct regulator_config *);
+	int (*init_cb)(struct regulator_dev *,
+		       struct regulator_config *);
 	int id;
 	unsigned int continuous_voltage_range:1;
 	unsigned n_voltages;
-- 
cgit v1.2.3


From 602ff58ae4fe4289b0ca71cba9fb82f7de92cd64 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 8 Oct 2024 18:07:03 +0200
Subject: regulator: core: remove machine init callback from config

The machine specific regulator_init() appears to be unused.
It does not allow a lot of interaction with the regulator framework,
since nothing from the framework is passed along (desc, config,
etc ...)

Machine specific init may also be done with the added init_cb() in
the regulator description, so remove regulator_init().

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://patch.msgid.link/20241008-regulator-ignored-data-v2-3-d1251e0ee507@baylibre.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/machine.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 0cd76d264727..d0d700ff337a 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -285,8 +285,7 @@ struct regulator_init_data {
 	int num_consumer_supplies;
 	struct regulator_consumer_supply *consumer_supplies;
 
-	/* optional regulator machine specific init */
-	int (*regulator_init)(void *driver_data);
+	/* optional regulator machine specific data */
 	void *driver_data;	/* core does not touch this */
 };
 
-- 
cgit v1.2.3


From 51e32e897539663957f7a0950f66b48f8896efee Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Sat, 19 Oct 2024 14:16:00 +0300
Subject: clk: Provide devm_clk_bulk_get_all_enabled() helper

Commit 265b07df758a ("clk: Provide managed helper to get and enable bulk
clocks") added devm_clk_bulk_get_all_enable() function, but missed to
return the number of clocks stored in the clk_bulk_data table referenced
by the clks argument.  Without knowing the number, it's not possible to
iterate these clocks when needed, hence the argument is useless and
could have been simply removed.

Introduce devm_clk_bulk_get_all_enabled() variant, which is consistent
with devm_clk_bulk_get_all() in terms of the returned value:

 > 0 if one or more clocks have been stored
 = 0 if there are no clocks
 < 0 if an error occurred

Moreover, the naming is consistent with devm_clk_get_enabled(), i.e. use
the past form of 'enable'.

To reduce code duplication and improve patch readability, make
devm_clk_bulk_get_all_enable() use the new helper, as suggested by
Stephen Boyd.

Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Link: https://lore.kernel.org/r/20241019-clk_bulk_ena_fix-v4-1-57f108f64e70@collabora.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk.h | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/clk.h b/include/linux/clk.h
index 851a0f2cf42c..1dcee6d701e4 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -496,11 +496,13 @@ int __must_check devm_clk_bulk_get_all(struct device *dev,
 				       struct clk_bulk_data **clks);
 
 /**
- * devm_clk_bulk_get_all_enable - Get and enable all clocks of the consumer (managed)
+ * devm_clk_bulk_get_all_enabled - Get and enable all clocks of the consumer (managed)
  * @dev: device for clock "consumer"
  * @clks: pointer to the clk_bulk_data table of consumer
  *
- * Returns success (0) or negative errno.
+ * Returns a positive value for the number of clocks obtained while the
+ * clock references are stored in the clk_bulk_data table in @clks field.
+ * Returns 0 if there're none and a negative value if something failed.
  *
  * This helper function allows drivers to get all clocks of the
  * consumer and enables them in one operation with management.
@@ -508,8 +510,8 @@ int __must_check devm_clk_bulk_get_all(struct device *dev,
  * is unbound.
  */
 
-int __must_check devm_clk_bulk_get_all_enable(struct device *dev,
-					      struct clk_bulk_data **clks);
+int __must_check devm_clk_bulk_get_all_enabled(struct device *dev,
+					       struct clk_bulk_data **clks);
 
 /**
  * devm_clk_get - lookup and obtain a managed reference to a clock producer.
@@ -1034,7 +1036,7 @@ static inline int __must_check devm_clk_bulk_get_all(struct device *dev,
 	return 0;
 }
 
-static inline int __must_check devm_clk_bulk_get_all_enable(struct device *dev,
+static inline int __must_check devm_clk_bulk_get_all_enabled(struct device *dev,
 						struct clk_bulk_data **clks)
 {
 	return 0;
@@ -1136,6 +1138,15 @@ static inline void clk_restore_context(void) {}
 
 #endif
 
+/* Deprecated. Use devm_clk_bulk_get_all_enabled() */
+static inline int __must_check
+devm_clk_bulk_get_all_enable(struct device *dev, struct clk_bulk_data **clks)
+{
+	int ret = devm_clk_bulk_get_all_enabled(dev, clks);
+
+	return ret > 0 ? 0 : ret;
+}
+
 /* clk_prepare_enable helps cases using clk_enable in non-atomic context. */
 static inline int clk_prepare_enable(struct clk *clk)
 {
-- 
cgit v1.2.3


From 97ddae76ddd20ea35d2059086aacd85b707a09c5 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 21 Oct 2024 13:41:13 -0400
Subject: Revert "drm/amdkfd: SMI report dropped event count"

This reverts commit a3ab2d45b9887ee609cd3bea39f668236935774c.

The userspace side for this code is not ready yet so revert
for now.

Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Philip Yang <Philip.Yang@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 7afd66d45313..fa9f9846b88e 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -530,7 +530,6 @@ enum kfd_smi_event {
 	KFD_SMI_EVENT_QUEUE_EVICTION = 9,
 	KFD_SMI_EVENT_QUEUE_RESTORE = 10,
 	KFD_SMI_EVENT_UNMAP_FROM_GPU = 11,
-	KFD_SMI_EVENT_DROPPED_EVENT = 12,
 
 	/*
 	 * max event number, as a flag bit to get events from all processes,
@@ -611,7 +610,6 @@ struct kfd_ioctl_smi_events_args {
  *    rw: 'W' for write page fault, 'R' for read page fault
  *    rescheduled: 'R' if the queue restore failed and rescheduled to try again
  *    error_code: migrate failure error code, 0 if no error
- *    drop_count: how many events dropped when fifo is full
  */
 #define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\
 		"%x %s\n", (reset_seq_num), (reset_cause)
@@ -647,10 +645,6 @@ struct kfd_ioctl_smi_events_args {
 		"%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\
 		(node), (unmap_trigger)
 
-#define KFD_EVENT_FMT_DROPPED_EVENT(ns, pid, drop_count)\
-		"%lld -%d %d\n", (ns), (pid), (drop_count)
-
-
 /**************************************************************************************************
  * CRIU IOCTLs (Checkpoint Restore In Userspace)
  *
-- 
cgit v1.2.3


From 7867cb6575ac2f93a2daccdf3df49e4f1a4594e4 Mon Sep 17 00:00:00 2001
From: Taniya Das <quic_tdas@quicinc.com>
Date: Fri, 11 Oct 2024 00:28:31 +0530
Subject: dt-bindings: clock: qcom: Add SA8775P video clock controller

Add device tree bindings for the video clock controller on Qualcomm
SA8775P platform.

Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Taniya Das <quic_tdas@quicinc.com>
Link: https://lore.kernel.org/r/20241011-sa8775p-mm-v4-resend-patches-v5-1-4a9f17dc683a@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sa8775p-videocc.h | 47 ++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sa8775p-videocc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sa8775p-videocc.h b/include/dt-bindings/clock/qcom,sa8775p-videocc.h
new file mode 100644
index 000000000000..e6325f68c317
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sa8775p-videocc.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_SA8775P_VIDEO_CC_H
+#define _DT_BINDINGS_CLK_QCOM_SA8775P_VIDEO_CC_H
+
+/* VIDEO_CC clocks */
+#define VIDEO_CC_AHB_CLK					0
+#define VIDEO_CC_AHB_CLK_SRC					1
+#define VIDEO_CC_MVS0_CLK					2
+#define VIDEO_CC_MVS0_CLK_SRC					3
+#define VIDEO_CC_MVS0_DIV_CLK_SRC				4
+#define VIDEO_CC_MVS0C_CLK					5
+#define VIDEO_CC_MVS0C_DIV2_DIV_CLK_SRC				6
+#define VIDEO_CC_MVS1_CLK					7
+#define VIDEO_CC_MVS1_CLK_SRC					8
+#define VIDEO_CC_MVS1_DIV_CLK_SRC				9
+#define VIDEO_CC_MVS1C_CLK					10
+#define VIDEO_CC_MVS1C_DIV2_DIV_CLK_SRC				11
+#define VIDEO_CC_PLL_LOCK_MONITOR_CLK				12
+#define VIDEO_CC_SLEEP_CLK					13
+#define VIDEO_CC_SLEEP_CLK_SRC					14
+#define VIDEO_CC_SM_DIV_CLK_SRC					15
+#define VIDEO_CC_SM_OBS_CLK					16
+#define VIDEO_CC_XO_CLK						17
+#define VIDEO_CC_XO_CLK_SRC					18
+#define VIDEO_PLL0						19
+#define VIDEO_PLL1						20
+
+/* VIDEO_CC power domains */
+#define VIDEO_CC_MVS0C_GDSC					0
+#define VIDEO_CC_MVS0_GDSC					1
+#define VIDEO_CC_MVS1C_GDSC					2
+#define VIDEO_CC_MVS1_GDSC					3
+
+/* VIDEO_CC resets */
+#define VIDEO_CC_INTERFACE_BCR					0
+#define VIDEO_CC_MVS0_BCR					1
+#define VIDEO_CC_MVS0C_CLK_ARES					2
+#define VIDEO_CC_MVS0C_BCR					3
+#define VIDEO_CC_MVS1_BCR					4
+#define VIDEO_CC_MVS1C_CLK_ARES					5
+#define VIDEO_CC_MVS1C_BCR					6
+
+#endif
-- 
cgit v1.2.3


From 9b1873d2350b3f08463563603fdbfcea4f068e67 Mon Sep 17 00:00:00 2001
From: Taniya Das <quic_tdas@quicinc.com>
Date: Fri, 11 Oct 2024 00:28:33 +0530
Subject: dt-bindings: clock: qcom: Add SA8775P camera clock controller

Add device tree bindings for the camera clock controller
on Qualcomm SA8775P platform.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Taniya Das <quic_tdas@quicinc.com>
Link: https://lore.kernel.org/r/20241011-sa8775p-mm-v4-resend-patches-v5-3-4a9f17dc683a@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sa8775p-camcc.h | 108 +++++++++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sa8775p-camcc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sa8775p-camcc.h b/include/dt-bindings/clock/qcom,sa8775p-camcc.h
new file mode 100644
index 000000000000..38531acd699f
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sa8775p-camcc.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_SA8775P_CAM_CC_H
+#define _DT_BINDINGS_CLK_QCOM_SA8775P_CAM_CC_H
+
+/* CAM_CC clocks */
+#define CAM_CC_CAMNOC_AXI_CLK					0
+#define CAM_CC_CAMNOC_AXI_CLK_SRC				1
+#define CAM_CC_CAMNOC_DCD_XO_CLK				2
+#define CAM_CC_CAMNOC_XO_CLK					3
+#define CAM_CC_CCI_0_CLK					4
+#define CAM_CC_CCI_0_CLK_SRC					5
+#define CAM_CC_CCI_1_CLK					6
+#define CAM_CC_CCI_1_CLK_SRC					7
+#define CAM_CC_CCI_2_CLK					8
+#define CAM_CC_CCI_2_CLK_SRC					9
+#define CAM_CC_CCI_3_CLK					10
+#define CAM_CC_CCI_3_CLK_SRC					11
+#define CAM_CC_CORE_AHB_CLK					12
+#define CAM_CC_CPAS_AHB_CLK					13
+#define CAM_CC_CPAS_FAST_AHB_CLK				14
+#define CAM_CC_CPAS_IFE_0_CLK					15
+#define CAM_CC_CPAS_IFE_1_CLK					16
+#define CAM_CC_CPAS_IFE_LITE_CLK				17
+#define CAM_CC_CPAS_IPE_CLK					18
+#define CAM_CC_CPAS_SFE_LITE_0_CLK				19
+#define CAM_CC_CPAS_SFE_LITE_1_CLK				20
+#define CAM_CC_CPHY_RX_CLK_SRC					21
+#define CAM_CC_CSI0PHYTIMER_CLK					22
+#define CAM_CC_CSI0PHYTIMER_CLK_SRC				23
+#define CAM_CC_CSI1PHYTIMER_CLK					24
+#define CAM_CC_CSI1PHYTIMER_CLK_SRC				25
+#define CAM_CC_CSI2PHYTIMER_CLK					26
+#define CAM_CC_CSI2PHYTIMER_CLK_SRC				27
+#define CAM_CC_CSI3PHYTIMER_CLK					28
+#define CAM_CC_CSI3PHYTIMER_CLK_SRC				29
+#define CAM_CC_CSID_CLK						30
+#define CAM_CC_CSID_CLK_SRC					31
+#define CAM_CC_CSID_CSIPHY_RX_CLK				32
+#define CAM_CC_CSIPHY0_CLK					33
+#define CAM_CC_CSIPHY1_CLK					34
+#define CAM_CC_CSIPHY2_CLK					35
+#define CAM_CC_CSIPHY3_CLK					36
+#define CAM_CC_FAST_AHB_CLK_SRC					37
+#define CAM_CC_GDSC_CLK						38
+#define CAM_CC_ICP_AHB_CLK					39
+#define CAM_CC_ICP_CLK						40
+#define CAM_CC_ICP_CLK_SRC					41
+#define CAM_CC_IFE_0_CLK					42
+#define CAM_CC_IFE_0_CLK_SRC					43
+#define CAM_CC_IFE_0_FAST_AHB_CLK				44
+#define CAM_CC_IFE_1_CLK					45
+#define CAM_CC_IFE_1_CLK_SRC					46
+#define CAM_CC_IFE_1_FAST_AHB_CLK				47
+#define CAM_CC_IFE_LITE_AHB_CLK					48
+#define CAM_CC_IFE_LITE_CLK					49
+#define CAM_CC_IFE_LITE_CLK_SRC					50
+#define CAM_CC_IFE_LITE_CPHY_RX_CLK				51
+#define CAM_CC_IFE_LITE_CSID_CLK				52
+#define CAM_CC_IFE_LITE_CSID_CLK_SRC				53
+#define CAM_CC_IPE_AHB_CLK					54
+#define CAM_CC_IPE_CLK						55
+#define CAM_CC_IPE_CLK_SRC					56
+#define CAM_CC_IPE_FAST_AHB_CLK					57
+#define CAM_CC_MCLK0_CLK					58
+#define CAM_CC_MCLK0_CLK_SRC					59
+#define CAM_CC_MCLK1_CLK					60
+#define CAM_CC_MCLK1_CLK_SRC					61
+#define CAM_CC_MCLK2_CLK					62
+#define CAM_CC_MCLK2_CLK_SRC					63
+#define CAM_CC_MCLK3_CLK					64
+#define CAM_CC_MCLK3_CLK_SRC					65
+#define CAM_CC_PLL0						66
+#define CAM_CC_PLL0_OUT_EVEN					67
+#define CAM_CC_PLL0_OUT_ODD					68
+#define CAM_CC_PLL2						69
+#define CAM_CC_PLL3						70
+#define CAM_CC_PLL3_OUT_EVEN					71
+#define CAM_CC_PLL4						72
+#define CAM_CC_PLL4_OUT_EVEN					73
+#define CAM_CC_PLL5						74
+#define CAM_CC_PLL5_OUT_EVEN					75
+#define CAM_CC_SFE_LITE_0_CLK					76
+#define CAM_CC_SFE_LITE_0_FAST_AHB_CLK				77
+#define CAM_CC_SFE_LITE_1_CLK					78
+#define CAM_CC_SFE_LITE_1_FAST_AHB_CLK				79
+#define CAM_CC_SLEEP_CLK					80
+#define CAM_CC_SLEEP_CLK_SRC					81
+#define CAM_CC_SLOW_AHB_CLK_SRC					82
+#define CAM_CC_SM_OBS_CLK					83
+#define CAM_CC_XO_CLK_SRC					84
+#define CAM_CC_QDSS_DEBUG_XO_CLK				85
+
+/* CAM_CC power domains */
+#define CAM_CC_TITAN_TOP_GDSC					0
+
+/* CAM_CC resets */
+#define CAM_CC_ICP_BCR						0
+#define CAM_CC_IFE_0_BCR					1
+#define CAM_CC_IFE_1_BCR					2
+#define CAM_CC_IPE_0_BCR					3
+#define CAM_CC_SFE_LITE_0_BCR					4
+#define CAM_CC_SFE_LITE_1_BCR					5
+
+#endif
-- 
cgit v1.2.3


From 33b5cd95d801cab8c634c822e6877470f4209612 Mon Sep 17 00:00:00 2001
From: Taniya Das <quic_tdas@quicinc.com>
Date: Fri, 11 Oct 2024 00:28:35 +0530
Subject: dt-bindings: clock: qcom: Add SA8775P display clock controllers

Add device tree bindings for the display clock controllers
on Qualcomm SA8775P platform.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Taniya Das <quic_tdas@quicinc.com>
Link: https://lore.kernel.org/r/20241011-sa8775p-mm-v4-resend-patches-v5-5-4a9f17dc683a@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sa8775p-dispcc.h | 87 +++++++++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sa8775p-dispcc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sa8775p-dispcc.h b/include/dt-bindings/clock/qcom,sa8775p-dispcc.h
new file mode 100644
index 000000000000..e2049e510658
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sa8775p-dispcc.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_SA8775P_DISP_CC_H
+#define _DT_BINDINGS_CLK_QCOM_SA8775P_DISP_CC_H
+
+/* DISP_CC_0/1 clocks */
+#define MDSS_DISP_CC_MDSS_AHB1_CLK				0
+#define MDSS_DISP_CC_MDSS_AHB_CLK				1
+#define MDSS_DISP_CC_MDSS_AHB_CLK_SRC				2
+#define MDSS_DISP_CC_MDSS_BYTE0_CLK				3
+#define MDSS_DISP_CC_MDSS_BYTE0_CLK_SRC				4
+#define MDSS_DISP_CC_MDSS_BYTE0_DIV_CLK_SRC			5
+#define MDSS_DISP_CC_MDSS_BYTE0_INTF_CLK			6
+#define MDSS_DISP_CC_MDSS_BYTE1_CLK				7
+#define MDSS_DISP_CC_MDSS_BYTE1_CLK_SRC				8
+#define MDSS_DISP_CC_MDSS_BYTE1_DIV_CLK_SRC			9
+#define MDSS_DISP_CC_MDSS_BYTE1_INTF_CLK			10
+#define MDSS_DISP_CC_MDSS_DPTX0_AUX_CLK				11
+#define MDSS_DISP_CC_MDSS_DPTX0_AUX_CLK_SRC			12
+#define MDSS_DISP_CC_MDSS_DPTX0_CRYPTO_CLK			13
+#define MDSS_DISP_CC_MDSS_DPTX0_CRYPTO_CLK_SRC			14
+#define MDSS_DISP_CC_MDSS_DPTX0_LINK_CLK			15
+#define MDSS_DISP_CC_MDSS_DPTX0_LINK_CLK_SRC			16
+#define MDSS_DISP_CC_MDSS_DPTX0_LINK_DIV_CLK_SRC		17
+#define MDSS_DISP_CC_MDSS_DPTX0_LINK_INTF_CLK			18
+#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL0_CLK			19
+#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL0_CLK_SRC			20
+#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL1_CLK			21
+#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL1_CLK_SRC			22
+#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL2_CLK			23
+#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL2_CLK_SRC			24
+#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL3_CLK			25
+#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL3_CLK_SRC			26
+#define MDSS_DISP_CC_MDSS_DPTX0_USB_ROUTER_LINK_INTF_CLK	27
+#define MDSS_DISP_CC_MDSS_DPTX1_AUX_CLK				28
+#define MDSS_DISP_CC_MDSS_DPTX1_AUX_CLK_SRC			29
+#define MDSS_DISP_CC_MDSS_DPTX1_CRYPTO_CLK			30
+#define MDSS_DISP_CC_MDSS_DPTX1_CRYPTO_CLK_SRC			31
+#define MDSS_DISP_CC_MDSS_DPTX1_LINK_CLK			32
+#define MDSS_DISP_CC_MDSS_DPTX1_LINK_CLK_SRC			33
+#define MDSS_DISP_CC_MDSS_DPTX1_LINK_DIV_CLK_SRC		34
+#define MDSS_DISP_CC_MDSS_DPTX1_LINK_INTF_CLK			35
+#define MDSS_DISP_CC_MDSS_DPTX1_PIXEL0_CLK			36
+#define MDSS_DISP_CC_MDSS_DPTX1_PIXEL0_CLK_SRC			37
+#define MDSS_DISP_CC_MDSS_DPTX1_PIXEL1_CLK			38
+#define MDSS_DISP_CC_MDSS_DPTX1_PIXEL1_CLK_SRC			39
+#define MDSS_DISP_CC_MDSS_DPTX1_USB_ROUTER_LINK_INTF_CLK	40
+#define MDSS_DISP_CC_MDSS_ESC0_CLK				41
+#define MDSS_DISP_CC_MDSS_ESC0_CLK_SRC				42
+#define MDSS_DISP_CC_MDSS_ESC1_CLK				43
+#define MDSS_DISP_CC_MDSS_ESC1_CLK_SRC				44
+#define MDSS_DISP_CC_MDSS_MDP1_CLK				45
+#define MDSS_DISP_CC_MDSS_MDP_CLK				46
+#define MDSS_DISP_CC_MDSS_MDP_CLK_SRC				47
+#define MDSS_DISP_CC_MDSS_MDP_LUT1_CLK				48
+#define MDSS_DISP_CC_MDSS_MDP_LUT_CLK				49
+#define MDSS_DISP_CC_MDSS_NON_GDSC_AHB_CLK			50
+#define MDSS_DISP_CC_MDSS_PCLK0_CLK				51
+#define MDSS_DISP_CC_MDSS_PCLK0_CLK_SRC				52
+#define MDSS_DISP_CC_MDSS_PCLK1_CLK				53
+#define MDSS_DISP_CC_MDSS_PCLK1_CLK_SRC				54
+#define MDSS_DISP_CC_MDSS_PLL_LOCK_MONITOR_CLK			55
+#define MDSS_DISP_CC_MDSS_RSCC_AHB_CLK				56
+#define MDSS_DISP_CC_MDSS_RSCC_VSYNC_CLK			57
+#define MDSS_DISP_CC_MDSS_VSYNC1_CLK				58
+#define MDSS_DISP_CC_MDSS_VSYNC_CLK				59
+#define MDSS_DISP_CC_MDSS_VSYNC_CLK_SRC				60
+#define MDSS_DISP_CC_PLL0					61
+#define MDSS_DISP_CC_PLL1					62
+#define MDSS_DISP_CC_SLEEP_CLK					63
+#define MDSS_DISP_CC_SLEEP_CLK_SRC				64
+#define MDSS_DISP_CC_SM_OBS_CLK					65
+#define MDSS_DISP_CC_XO_CLK					66
+#define MDSS_DISP_CC_XO_CLK_SRC					67
+
+/* DISP_CC_0/1 power domains */
+#define MDSS_DISP_CC_MDSS_CORE_GDSC				0
+#define MDSS_DISP_CC_MDSS_CORE_INT2_GDSC			1
+
+/* DISP_CC_0/1 resets */
+#define MDSS_DISP_CC_MDSS_CORE_BCR				0
+#define MDSS_DISP_CC_MDSS_RSCC_BCR				1
+
+#endif
-- 
cgit v1.2.3


From dbc81e680a0f007cf062963c40f145074aacab2d Mon Sep 17 00:00:00 2001
From: Lijuan Gao <quic_lijuang@quicinc.com>
Date: Tue, 22 Oct 2024 16:54:30 +0800
Subject: dt-bindings: arm: qcom,ids: add SoC ID for QCS615

Add the ID for the Qualcomm QCS615 SoC.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Lijuan Gao <quic_lijuang@quicinc.com>
Link: https://lore.kernel.org/r/20241022-add_initial_support_for_qcs615-v4-2-0a551c6dd342@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/arm/qcom,ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h
index 8332f8d82f96..73a69fc535f6 100644
--- a/include/dt-bindings/arm/qcom,ids.h
+++ b/include/dt-bindings/arm/qcom,ids.h
@@ -278,6 +278,7 @@
 #define QCOM_ID_IPQ5321			650
 #define QCOM_ID_QCS8300			674
 #define QCOM_ID_QCS8275			675
+#define QCOM_ID_QCS615			680
 
 /*
  * The board type and revision information, used by Qualcomm bootloaders and
-- 
cgit v1.2.3


From 3b3214acd7f2dd506924cd57ce6e380cdaede423 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Fri, 18 Oct 2024 19:32:36 +0300
Subject: dt-bindings: arm: qcom,ids: add SoC ID for SAR2130P and SAR1130P

Add the IDs for Qualcomm SAR2130P and SAR1130P platforms.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20241018-sar2130p-socinfo-v1-1-b1e97ea963fe@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/arm/qcom,ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h
index 73a69fc535f6..1a53ca2df523 100644
--- a/include/dt-bindings/arm/qcom,ids.h
+++ b/include/dt-bindings/arm/qcom,ids.h
@@ -255,6 +255,7 @@
 #define QCOM_ID_IPQ9510			521
 #define QCOM_ID_QRB4210			523
 #define QCOM_ID_QRB2210			524
+#define QCOM_ID_SAR2130P		525
 #define QCOM_ID_SM8475			530
 #define QCOM_ID_SM8475P			531
 #define QCOM_ID_SA8775P			534
@@ -264,6 +265,7 @@
 #define QCOM_ID_X1E80100		555
 #define QCOM_ID_SM8650			557
 #define QCOM_ID_SM4450			568
+#define QCOM_ID_SAR1130P		579
 #define QCOM_ID_QDU1010			587
 #define QCOM_ID_QRU1032			588
 #define QCOM_ID_QRU1052			589
-- 
cgit v1.2.3


From 44059790a5cb9258ae6137387e4c39b717fd2ced Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 23 Oct 2024 07:53:04 +0200
Subject: kfifo: don't include dma-mapping.h in kfifo.h

Nothing in kfifo.h directly needs dma-mapping.h, only two macros
use DMA_MAPPING_ERROR when actually instantiated.  Drop the
dma-mapping.h include to reduce include bloat.

Add an explicity <linux/io.h> include to drivers/mailbox/omap-mailbox.c
as that file uses __raw_readl and __raw_writel through a complicated
include chain involving <linux/dma-mapping.h>

Fixes: d52b761e4b1a ("kfifo: add kfifo_dma_out_prepare_mapped()")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20241023055317.313234-1-hch@lst.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kfifo.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 564868bdce89..fd743d4c4b4b 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -37,7 +37,6 @@
  */
 
 #include <linux/array_size.h>
-#include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
-- 
cgit v1.2.3


From 83c289e81e88d01e55d6d56531502ed7b4886a05 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 17 Oct 2024 19:19:34 +0300
Subject: net/sched: act_api: unexport tcf_action_dump_1()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This isn't used outside act_api.c, but is called by tcf_dump_walker()
prior to its definition. So move it upwards and make it static.

Simultaneously, reorder the variable declarations so that they follow
the networking "reverse Christmas tree" coding style.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://patch.msgid.link/20241017161934.3599046-1-vladimir.oltean@nxp.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/act_api.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 77ee0c657e2c..404df8557f6a 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -219,7 +219,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
 		    int ref, bool terse);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
-int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 
 static inline void tcf_action_update_bstats(struct tc_action *a,
 					    struct sk_buff *skb)
-- 
cgit v1.2.3


From c972c1c41d9b20fb38b54e77dcee763e27e715a9 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 17 Oct 2024 18:41:00 -0700
Subject: ipv4: Switch inet_addr_hash() to less predictable hash.

Recently, commit 4a0ec2aa0704 ("ipv6: switch inet6_addr_hash()
to less predictable hash") and commit 4daf4dc275f1 ("ipv6: switch
inet6_acaddr_hash() to less predictable hash") hardened IPv6
address hash functions.

inet_addr_hash() is also highly predictable, and a malicious use
could abuse a specific bucket.

Let's follow the change on IPv4 by using jhash_1word().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241018014100.93776-1-kuniyu@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/ip.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 4be0a6a603b2..0e548c1f2a0e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -690,6 +690,11 @@ static inline unsigned int ipv4_addr_hash(__be32 ip)
 	return (__force unsigned int) ip;
 }
 
+static inline u32 __ipv4_addr_hash(const __be32 ip, const u32 initval)
+{
+	return jhash_1word((__force u32)ip, initval);
+}
+
 static inline u32 ipv4_portaddr_hash(const struct net *net,
 				     __be32 saddr,
 				     unsigned int port)
-- 
cgit v1.2.3


From e44ef3f66c5472c2cbc6957c684d7279c26b0db1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 18 Oct 2024 05:21:08 +0000
Subject: netpoll: remove ndo_netpoll_setup() second argument

npinfo is not used in any of the ndo_netpoll_setup() methods.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20241018052108.2610827-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8feaca12655e..86a0b7eb9461 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1425,8 +1425,7 @@ struct net_device_ops {
 						        __be16 proto, u16 vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
-	int			(*ndo_netpoll_setup)(struct net_device *dev,
-						     struct netpoll_info *info);
+	int			(*ndo_netpoll_setup)(struct net_device *dev);
 	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
-- 
cgit v1.2.3


From 7cfc1b1fa8673fe386304194d4cc2c8fe555bbf9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 18 Oct 2024 05:23:10 +0000
Subject: net: netdev_tx_sent_queue() small optimization

Change smp_mb() imediately following a set_bit()
with smp_mb__after_atomic().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241018052310.2612084-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 86a0b7eb9461..bbd30f3c5d29 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3517,7 +3517,7 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 	 * because in netdev_tx_completed_queue we update the dql_completed
 	 * before checking the XOFF flag.
 	 */
-	smp_mb();
+	smp_mb__after_atomic();
 
 	/* check again in case another CPU has just made room avail */
 	if (unlikely(dql_avail(&dev_queue->dql) >= 0))
-- 
cgit v1.2.3


From e55f45b0cda71ac2e9b4c6dee8852dc8d6f22ef0 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Wed, 23 Oct 2024 10:35:43 +0200
Subject: regulator: doc: add missing documentation for init_cb

Add comment documenting introduced init_cb.
This solves the following warning when building the kernel documentation:

./include/linux/regulator/driver.h:435: warning: Function parameter or struct member 'init_cb' not described in 'regulator_desc'

Fixes: cfcdf395c21e ("regulator: core: add callback to perform runtime init")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/lkml/20241023155120.6c4fea20@canb.auug.org.au/
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://patch.msgid.link/20241023-regulator-doc-fixup-v1-1-ec018742ad73@baylibre.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index d2f4427504f0..5b66caf1695d 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -269,6 +269,11 @@ enum regulator_type {
  *               config but it cannot store it for later usage.
  *               Callback should return 0 on success or negative ERRNO
  *               indicating failure.
+ * @init_cb: Optional callback called after the parsing of init_data.
+ *           Allows the regulator to perform runtime init if necessary,
+ *           such as synching the regulator and the parsed constraints.
+ *           Callback should return 0 on success or negative ERRNO
+ *           indicating failure.
  * @id: Numerical identifier for the regulator.
  * @ops: Regulator operations table.
  * @irq: Interrupt number for the regulator.
-- 
cgit v1.2.3


From d1bc2d5cca434e7c854fd16ef021c16d74293b60 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Wed, 23 Oct 2024 10:35:44 +0200
Subject: regulator: doc: remove documentation comment for regulator_init

Remove documentation comment related to regulator_init callback.
This solves the following warning when building the kernel documentation:

./include/linux/regulator/machine.h:290: warning: Excess struct member 'regulator_init' description in 'regulator_init_data'

Fixes: 602ff58ae4fe ("regulator: core: remove machine init callback from config")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/lkml/20241023155257.0fa7211d@canb.auug.org.au/
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://patch.msgid.link/20241023-regulator-doc-fixup-v1-2-ec018742ad73@baylibre.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/machine.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index d0d700ff337a..b3db09a7429b 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -273,8 +273,6 @@ struct regulator_consumer_supply {
  *               be usable.
  * @num_consumer_supplies: Number of consumer device supplies.
  * @consumer_supplies: Consumer device supply configuration.
- *
- * @regulator_init: Callback invoked when the regulator has been registered.
  * @driver_data: Data passed to regulator_init.
  */
 struct regulator_init_data {
-- 
cgit v1.2.3


From 50a191a8a12b33dfad3b27c6ba4e76c5ba39db73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 10 Aug 2024 19:00:35 +0200
Subject: sysctl: update comments to new registration APIs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sysctl registration APIs do not need a terminating table entry
anymore and with commit acc154691fc7 ("sysctl: Warn on an empty procname
element") even emit warnings if such a sentinel entry is supplied.

While at it also remove the mention of "table->de" which was removed in
commit 3fbfa98112fc ("[PATCH] sysctl: remove the proc_dir_entry member
for the sysctl tables") back in 2007.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reviewed-by: Kees Cook <kees@kernel.org>
Signed-off-by: Joel Granados <joel.granados@kernel.org>
---
 include/linux/sysctl.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 202855befa8b..40a6ac6c9713 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -90,9 +90,7 @@ int proc_do_static_key(const struct ctl_table *table, int write, void *buffer,
 
 /*
  * Register a set of sysctl names by calling register_sysctl
- * with an initialised array of struct ctl_table's.  An entry with 
- * NULL procname terminates the table.  table->de will be
- * set up by the registration and need not be initialised in advance.
+ * with an initialised array of struct ctl_table's.
  *
  * sysctl names can be mirrored automatically under /proc/sys.  The
  * procname supplied controls /proc naming.
@@ -133,7 +131,7 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
 
 /* A sysctl table is an array of struct ctl_table: */
 struct ctl_table {
-	const char *procname;		/* Text ID for /proc/sys, or zero */
+	const char *procname;		/* Text ID for /proc/sys */
 	void *data;
 	int maxlen;
 	umode_t mode;
-- 
cgit v1.2.3


From 074a8b54dacc1920f54381f3661ecee6786b0c21 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 7 Oct 2024 15:00:45 +0300
Subject: wifi: mac80211: Add support to indicate that a new interface is to be
 added

Add support to indicate to the driver that an interface is about to be
added so that the driver could prepare its resources early if it needs
so.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20241007144851.e0e8563e1c30.Ifccc96a46a347eb15752caefc9f4eff31f75ed47@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 333e0fae6796..0b8df8ec5a3b 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4444,6 +4444,12 @@ struct ieee80211_prep_tx_info {
  *	if the requested TID-To-Link mapping can be accepted or not.
  *	If it's not accepted the driver may suggest a preferred mapping and
  *	modify @ttlm parameter with the suggested TID-to-Link mapping.
+ * @prep_add_interface: prepare for interface addition. This can be used by
+ *      drivers to prepare for the addition of a new interface, e.g., allocate
+ *      the needed resources etc. This callback doesn't guarantee that an
+ *      interface with the specified type would be added, and thus drivers that
+ *      implement this callback need to handle such cases. The type is the full
+ *      &enum nl80211_iftype.
  */
 struct ieee80211_ops {
 	void (*tx)(struct ieee80211_hw *hw,
@@ -4828,6 +4834,8 @@ struct ieee80211_ops {
 	enum ieee80211_neg_ttlm_res
 	(*can_neg_ttlm)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			struct ieee80211_neg_ttlm *ttlm);
+	void (*prep_add_interface)(struct ieee80211_hw *hw,
+				   enum nl80211_iftype type);
 };
 
 /**
-- 
cgit v1.2.3


From 62262dd00c319195f2e14022903b7ebbb53119bc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Oct 2024 15:00:46 +0300
Subject: wifi: cfg80211: disallow SMPS in AP mode

In practice, userspace hasn't been able to set this for many
years, and mac80211 has already rejected it (which is now no
longer needed), so reject SMPS mode (other than "OFF" to be
a bit more compatible) in AP mode. Also remove the parameter
from the AP settings struct.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20241007144851.fe1fc46484cf.I8676fb52b818a4bedeb9c25b901e1396277ffc0b@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 69ec1eb41a09..c8ce5c2e14f4 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1460,7 +1460,6 @@ struct cfg80211_unsol_bcast_probe_resp {
  * @crypto: crypto settings
  * @privacy: the BSS uses privacy
  * @auth_type: Authentication type (algorithm)
- * @smps_mode: SMPS mode
  * @inactivity_timeout: time in seconds to determine station's inactivity.
  * @p2p_ctwindow: P2P CT Window
  * @p2p_opp_ps: P2P opportunistic PS
@@ -1498,7 +1497,6 @@ struct cfg80211_ap_settings {
 	struct cfg80211_crypto_settings crypto;
 	bool privacy;
 	enum nl80211_auth_type auth_type;
-	enum nl80211_smps_mode smps_mode;
 	int inactivity_timeout;
 	u8 p2p_ctwindow;
 	bool p2p_opp_ps;
-- 
cgit v1.2.3


From 9c5f2c7eeb585834f8dadb552b4fd811dd2dee6f Mon Sep 17 00:00:00 2001
From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Date: Mon, 7 Oct 2024 15:00:47 +0300
Subject: wifi: mac80211: rename IEEE80211_CHANCTX_CHANGE_MIN_WIDTH

The name is misleading, this actually indicates that
ieee80211_chanctx_conf::min_def was updated.
Rename it to IEEE80211_CHANCTX_CHANGE_MIN_DEF.

Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20241007144851.726b5f12ae0c.I3bd9e594c9d2735183ec049a4c7224bd0a9599c9@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0b8df8ec5a3b..c42ad5a0c303 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -213,7 +213,7 @@ struct ieee80211_low_level_stats {
  * @IEEE80211_CHANCTX_CHANGE_RADAR: radar detection flag changed
  * @IEEE80211_CHANCTX_CHANGE_CHANNEL: switched to another operating channel,
  *	this is used only with channel switching with CSA
- * @IEEE80211_CHANCTX_CHANGE_MIN_WIDTH: The min required channel width changed
+ * @IEEE80211_CHANCTX_CHANGE_MIN_DEF: The min chandef changed
  * @IEEE80211_CHANCTX_CHANGE_AP: The AP channel definition changed, so (wider
  *	bandwidth) OFDMA settings need to be changed
  * @IEEE80211_CHANCTX_CHANGE_PUNCTURING: The punctured channel(s) bitmap
@@ -224,7 +224,7 @@ enum ieee80211_chanctx_change {
 	IEEE80211_CHANCTX_CHANGE_RX_CHAINS	= BIT(1),
 	IEEE80211_CHANCTX_CHANGE_RADAR		= BIT(2),
 	IEEE80211_CHANCTX_CHANGE_CHANNEL	= BIT(3),
-	IEEE80211_CHANCTX_CHANGE_MIN_WIDTH	= BIT(4),
+	IEEE80211_CHANCTX_CHANGE_MIN_DEF	= BIT(4),
 	IEEE80211_CHANCTX_CHANGE_AP		= BIT(5),
 	IEEE80211_CHANCTX_CHANGE_PUNCTURING	= BIT(6),
 };
-- 
cgit v1.2.3


From e21dd758cf4c51d508e6665b653c5836103d1027 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 7 Oct 2024 15:00:48 +0300
Subject: wifi: mac80211: make bss_param_ch_cnt available for the low level
 driver

Drivers may need to track this. Make it available for them, and maintain
the value when beacons are received.
When link X receives a beacon, iterate the RNR elements and update all
the links with their respective data.
Track the link id that updated the data so that each link can know
whether the update came from its own beacon or from another link.
In case, the update came from the link's own beacon, always update the
updater link id.
The purpose is to let the low level driver know if a link is losing its
beacons. If link X is losing its beacons, it can still track the
bss_param_ch_cnt and know where the update came from.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20241007144851.e2d8d1a722ad.I04b883daba2cd48e5730659eb62ca1614c899cbb@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c42ad5a0c303..20562676e9cd 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -740,6 +740,19 @@ struct ieee80211_parsed_tpe {
  * @eht_80mhz_full_bw_ul_mumimo: in AP-mode, does this BSS support the
  *	reception of an EHT TB PPDU on an RU that spans the entire PPDU
  *	bandwidth
+ * @bss_param_ch_cnt: in BSS-mode, the BSS params change count. This
+ *	information is the latest known value. It can come from this link's
+ *	beacon or from a beacon sent by another link.
+ * @bss_param_ch_cnt_link_id: in BSS-mode, the link_id to which the beacon
+ *	that updated &bss_param_ch_cnt belongs. E.g. if link 1 doesn't hear
+ *	its beacons, and link 2 sent a beacon with an RNR element that updated
+ *	link 1's BSS params change count, then, link 1's
+ *	bss_param_ch_cnt_link_id will be 2. That means that link 1 knows that
+ *	link 2 was the link that updated its bss_param_ch_cnt value.
+ *	In case link 1 hears its beacon again, bss_param_ch_cnt_link_id will
+ *	be updated to 1, even if bss_param_ch_cnt didn't change. This allows
+ *	the link to know that it heard the latest value from its own beacon
+ *	(as opposed to hearing its value from another link's beacon).
  */
 struct ieee80211_bss_conf {
 	struct ieee80211_vif *vif;
@@ -834,6 +847,8 @@ struct ieee80211_bss_conf {
 	bool eht_su_beamformee;
 	bool eht_mu_beamformer;
 	bool eht_80mhz_full_bw_ul_mumimo;
+	u8 bss_param_ch_cnt;
+	u8 bss_param_ch_cnt_link_id;
 };
 
 /**
-- 
cgit v1.2.3


From 88b67e91e2928c3311f3658d1270b40708b0de00 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Oct 2024 15:00:54 +0300
Subject: wifi: mac80211: call rate_control_rate_update() for link STA

In order to update the right link information, call the update
rate_control_rate_update() with the right link_sta, and then
pass that through to the driver's sta_rc_update() method. The
software rate control still doesn't support it, but that'll be
skipped by not having a rate control ref.

Since it now operates on a link sta, rename the driver method.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20241007144851.5851b6b5fd41.Ibdf50d96afa4b761dd9b9dfd54a1147e77a75329@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 20562676e9cd..7a6edc04e212 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4090,8 +4090,8 @@ struct ieee80211_prep_tx_info {
  *	in @sta_state.
  *	The callback can sleep.
  *
- * @sta_rc_update: Notifies the driver of changes to the bitrates that can be
- *	used to transmit to the station. The changes are advertised with bits
+ * @link_sta_rc_update: Notifies the driver of changes to the bitrates that can
+ *	be used to transmit to the station. The changes are advertised with bits
  *	from &enum ieee80211_rate_control_changed and the values are reflected
  *	in the station data. This callback should only be used when the driver
  *	uses hardware rate control (%IEEE80211_HW_HAS_RATE_CONTROL) since
@@ -4581,10 +4581,10 @@ struct ieee80211_ops {
 	void (*sta_pre_rcu_remove)(struct ieee80211_hw *hw,
 				   struct ieee80211_vif *vif,
 				   struct ieee80211_sta *sta);
-	void (*sta_rc_update)(struct ieee80211_hw *hw,
-			      struct ieee80211_vif *vif,
-			      struct ieee80211_sta *sta,
-			      u32 changed);
+	void (*link_sta_rc_update)(struct ieee80211_hw *hw,
+				   struct ieee80211_vif *vif,
+				   struct ieee80211_link_sta *link_sta,
+				   u32 changed);
 	void (*sta_rate_tbl_update)(struct ieee80211_hw *hw,
 				    struct ieee80211_vif *vif,
 				    struct ieee80211_sta *sta);
-- 
cgit v1.2.3


From 751e7489c1d74b94ffffbed619d8fd724eeff4ee Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Oct 2024 15:00:58 +0300
Subject: wifi: mac80211: expose ieee80211_chan_width_to_rx_bw() to drivers

Drivers might need to also do this calculation, no point in
them duplicating the code. Since it's so simple, just make
it an inline.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20241007144851.af003cb4a088.I8b5d29504b726caae24af6013c65b3daebe842a2@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7a6edc04e212..32bdda90a4ac 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7704,6 +7704,33 @@ void ieee80211_set_active_links_async(struct ieee80211_vif *vif,
  */
 void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif);
 
+/**
+ * ieee80211_chan_width_to_rx_bw - convert channel width to STA RX bandwidth
+ * @width: the channel width value to convert
+ * Return: the STA RX bandwidth value for the channel width
+ */
+static inline enum ieee80211_sta_rx_bandwidth
+ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
+{
+	switch (width) {
+	default:
+		WARN_ON_ONCE(1);
+		fallthrough;
+	case NL80211_CHAN_WIDTH_20_NOHT:
+	case NL80211_CHAN_WIDTH_20:
+		return IEEE80211_STA_RX_BW_20;
+	case NL80211_CHAN_WIDTH_40:
+		return IEEE80211_STA_RX_BW_40;
+	case NL80211_CHAN_WIDTH_80:
+		return IEEE80211_STA_RX_BW_80;
+	case NL80211_CHAN_WIDTH_160:
+	case NL80211_CHAN_WIDTH_80P80:
+		return IEEE80211_STA_RX_BW_160;
+	case NL80211_CHAN_WIDTH_320:
+		return IEEE80211_STA_RX_BW_320;
+	}
+}
+
 /* for older drivers - let's not document these ... */
 int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw,
 				  struct ieee80211_chanctx_conf *ctx);
-- 
cgit v1.2.3


From 3607798ad9bdef35ad08489a8239390fccaac6b5 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 9 Oct 2024 10:25:42 +0200
Subject: wifi: cfg80211: add option for vif allowed radios

This allows users to prevent a vif from affecting radios other than the
configured ones. This can be useful in cases where e.g. an AP is running
on one radio, and triggering a scan on another radio should not disturb it.

Changing the allowed radios list for a vif is supported, but only while
it is down.

While it is possible to achieve the same by always explicitly specifying
a frequency list for scan requests and ensuring that the wrong channel/band
is never accidentally set on an unrelated interface, this change makes
multi-radio wiphy setups a lot easier to deal with for CLI users.

By itself, this patch only enforces the radio mask for scanning requests
and remain-on-channel. Follow-up changes build on this to limit configured
frequencies.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/eefcb218780f71a1549875d149f1196486762756.1728462320.git-series.nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 14 ++++++++++++++
 include/uapi/linux/nl80211.h |  5 +++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c8ce5c2e14f4..95d05e67e69a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6221,6 +6221,7 @@ enum ieee80211_ap_reg_power {
  *	entered.
  * @links.cac_time_ms: CAC time in ms
  * @valid_links: bitmap describing what elements of @links are valid
+ * @radio_mask: Bitmask of radios that this interface is allowed to operate on.
  */
 struct wireless_dev {
 	struct wiphy *wiphy;
@@ -6333,6 +6334,8 @@ struct wireless_dev {
 		unsigned int cac_time_ms;
 	} links[IEEE80211_MLD_MAX_NUM_LINKS];
 	u16 valid_links;
+
+	u32 radio_mask;
 };
 
 static inline const u8 *wdev_address(struct wireless_dev *wdev)
@@ -6518,6 +6521,17 @@ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan)
 bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio,
 				  const struct cfg80211_chan_def *chandef);
 
+/**
+ * cfg80211_wdev_channel_allowed - Check if the wdev may use the channel
+ *
+ * @wdev: the wireless device
+ * @chan: channel to check
+ *
+ * Return: whether or not the wdev may use the channel
+ */
+bool cfg80211_wdev_channel_allowed(struct wireless_dev *wdev,
+				   struct ieee80211_channel *chan);
+
 /**
  * ieee80211_get_response_rate - get basic rate for a given rate
  *
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f97f5adc8d51..d31ccee99cc7 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2868,6 +2868,9 @@ enum nl80211_commands {
  *	nested item, it contains attributes defined in
  *	&enum nl80211_if_combination_attrs.
  *
+ * @NL80211_ATTR_VIF_RADIO_MASK: Bitmask of allowed radios (u32).
+ *	A value of 0 means all radios.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3416,6 +3419,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_WIPHY_RADIOS,
 	NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS,
 
+	NL80211_ATTR_VIF_RADIO_MASK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
-- 
cgit v1.2.3


From ebda716ea4da03326ac4d0a71604d18aa8a2e695 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 9 Oct 2024 10:25:45 +0200
Subject: wifi: cfg80211: report per wiphy radio antenna mask

With multi-radio devices, each radio typically gets a fixed set of antennas.
In order to be able to disable specific antennas for some radios, user space
needs to know which antenna mask bits are assigned to which radio.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/e0a26afa2c88eaa188ec96ec6d17ecac4e827641.1728462320.git-series.nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 4 ++++
 include/uapi/linux/nl80211.h | 3 +++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 95d05e67e69a..3100733f3e23 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5434,6 +5434,8 @@ struct wiphy_radio_freq_range {
  * @iface_combinations: Valid interface combinations array, should not
  *	list single interface types.
  * @n_iface_combinations: number of entries in @iface_combinations array.
+ *
+ * @antenna_mask: bitmask of antennas connected to this radio.
  */
 struct wiphy_radio {
 	const struct wiphy_radio_freq_range *freq_range;
@@ -5441,6 +5443,8 @@ struct wiphy_radio {
 
 	const struct ieee80211_iface_combination *iface_combinations;
 	int n_iface_combinations;
+
+	u32 antenna_mask;
 };
 
 #define CFG80211_HW_TIMESTAMP_ALL_PEERS	0xffff
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index d31ccee99cc7..1b8827f920ff 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -8036,6 +8036,8 @@ enum nl80211_ap_settings_flags {
  * @NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION: Supported interface
  *	combination for this radio. Attribute may be present multiple times
  *	and contains attributes defined in &enum nl80211_if_combination_attrs.
+ * @NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK: bitmask (u32) of antennas
+ *	connected to this radio.
  *
  * @__NL80211_WIPHY_RADIO_ATTR_LAST: Internal
  * @NL80211_WIPHY_RADIO_ATTR_MAX: Highest attribute
@@ -8046,6 +8048,7 @@ enum nl80211_wiphy_radio_attrs {
 	NL80211_WIPHY_RADIO_ATTR_INDEX,
 	NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE,
 	NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION,
+	NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK,
 
 	/* keep last */
 	__NL80211_WIPHY_RADIO_ATTR_LAST,
-- 
cgit v1.2.3


From 006a97ceb6732c861c0e2fa3b6a34512caac9354 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 9 Oct 2024 10:25:46 +0200
Subject: wifi: mac80211: remove status->ampdu_delimiter_crc

This was never used by any driver, so remove it to free up some space.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/e6fee6eed49b105261830db1c74f13841fb9616c.1728462320.git-series.nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 32bdda90a4ac..d4f23224eff9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1463,8 +1463,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  * @RX_FLAG_AMPDU_IS_LAST: this subframe is the last subframe of the A-MPDU
  * @RX_FLAG_AMPDU_DELIM_CRC_ERROR: A delimiter CRC error has been detected
  *	on this subframe
- * @RX_FLAG_AMPDU_DELIM_CRC_KNOWN: The delimiter CRC field is known (the CRC
- *	is stored in the @ampdu_delimiter_crc field)
  * @RX_FLAG_MIC_STRIPPED: The mic was stripped of this packet. Decryption was
  *	done by the hardware
  * @RX_FLAG_ONLY_MONITOR: Report frame only to monitor interfaces without
@@ -1536,7 +1534,7 @@ enum mac80211_rx_flags {
 	RX_FLAG_AMPDU_LAST_KNOWN	= BIT(12),
 	RX_FLAG_AMPDU_IS_LAST		= BIT(13),
 	RX_FLAG_AMPDU_DELIM_CRC_ERROR	= BIT(14),
-	RX_FLAG_AMPDU_DELIM_CRC_KNOWN	= BIT(15),
+	/* one free bit at 15 */
 	RX_FLAG_MACTIME			= BIT(16) | BIT(17),
 	RX_FLAG_MACTIME_PLCP_START	= 1 << 16,
 	RX_FLAG_MACTIME_START		= 2 << 16,
@@ -1633,7 +1631,6 @@ enum mac80211_rx_encoding {
  * @rx_flags: internal RX flags for mac80211
  * @ampdu_reference: A-MPDU reference number, must be a different value for
  *	each A-MPDU but the same for each subframe within one A-MPDU
- * @ampdu_delimiter_crc: A-MPDU delimiter CRC
  * @zero_length_psdu_type: radiotap type of the 0-length PSDU
  * @link_valid: if the link which is identified by @link_id is valid. This flag
  *	is set only when connection is MLO.
@@ -1671,7 +1668,6 @@ struct ieee80211_rx_status {
 	s8 signal;
 	u8 chains;
 	s8 chain_signal[IEEE80211_MAX_CHAINS];
-	u8 ampdu_delimiter_crc;
 	u8 zero_length_psdu_type;
 	u8 link_valid:1, link_id:4;
 };
-- 
cgit v1.2.3


From 9c4f830927750a2bf9fd9426a5257f0fdce3b662 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 9 Oct 2024 10:25:47 +0200
Subject: wifi: cfg80211: pass net_device to .set_monitor_channel

Preparation for allowing multiple monitor interfaces with different channels
on a multi-radio wiphy.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/35fa652dbfebf93343f8b9a08fdef0467a2a02dc.1728462320.git-series.nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3100733f3e23..5feb93ba0400 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4694,6 +4694,7 @@ struct cfg80211_ops {
 					     struct ieee80211_channel *chan);
 
 	int	(*set_monitor_channel)(struct wiphy *wiphy,
+				       struct net_device *dev,
 				       struct cfg80211_chan_def *chandef);
 
 	int	(*scan)(struct wiphy *wiphy,
-- 
cgit v1.2.3


From 9d40f7e32774279be7e5a7a278d7a290872b2f81 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 9 Oct 2024 10:25:48 +0200
Subject: wifi: mac80211: add flag to opt out of virtual monitor support

This is useful for multi-radio devices that are capable of monitoring on
multiple channels simultanenously. When this flag is set, each monitor
interface is passed to the driver individually and can have a configured
channel.
The vif mac address for non-active monitor interfaces is cleared, in order
to allow the driver to tell them apart from active ones.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/3c55505ee0cf0a5f141fbcb30d1e8be8d9f40373.1728462320.git-series.nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d4f23224eff9..b4f246cdcca4 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2694,6 +2694,11 @@ struct ieee80211_txq {
  *	a virtual monitor interface when monitor interfaces are the only
  *	active interfaces.
  *
+ * @IEEE80211_HW_NO_VIRTUAL_MONITOR: The driver would like to be informed
+ *	of any monitor interface, as well as their configured channel.
+ *	This is useful for supporting multiple monitor interfaces on different
+ *	channels.
+ *
  * @IEEE80211_HW_NO_AUTO_VIF: The driver would like for no wlanX to
  *	be created.  It is expected user-space will create vifs as
  *	desired (and thus have them named as desired).
@@ -2853,6 +2858,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_SUPPORTS_DYNAMIC_PS,
 	IEEE80211_HW_MFP_CAPABLE,
 	IEEE80211_HW_WANT_MONITOR_VIF,
+	IEEE80211_HW_NO_VIRTUAL_MONITOR,
 	IEEE80211_HW_NO_AUTO_VIF,
 	IEEE80211_HW_SW_CRYPTO_CONTROL,
 	IEEE80211_HW_SUPPORT_FAST_XMIT,
-- 
cgit v1.2.3


From a77e527b470cc38754c730bce1483711f643bb60 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 9 Oct 2024 10:25:49 +0200
Subject: wifi: cfg80211: add monitor SKIP_TX flag

This can be used to indicate that the user is not interested in receiving
locally sent packets on the monitor interface.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/f0c20f832eadd36c71fba9a2a16ba57d78389b6c.1728462320.git-series.nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 2 ++
 include/uapi/linux/nl80211.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5feb93ba0400..8f9853b1a5d1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2267,6 +2267,7 @@ static inline int cfg80211_get_station(struct net_device *dev,
  * @MONITOR_FLAG_OTHER_BSS: disable BSSID filtering
  * @MONITOR_FLAG_COOK_FRAMES: report frames after processing
  * @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address
+ * @MONITOR_FLAG_SKIP_TX: do not pass locally transmitted frames
  */
 enum monitor_flags {
 	MONITOR_FLAG_CHANGED		= BIT(__NL80211_MNTR_FLAG_INVALID),
@@ -2276,6 +2277,7 @@ enum monitor_flags {
 	MONITOR_FLAG_OTHER_BSS		= BIT(NL80211_MNTR_FLAG_OTHER_BSS),
 	MONITOR_FLAG_COOK_FRAMES	= BIT(NL80211_MNTR_FLAG_COOK_FRAMES),
 	MONITOR_FLAG_ACTIVE		= BIT(NL80211_MNTR_FLAG_ACTIVE),
+	MONITOR_FLAG_SKIP_TX		= BIT(NL80211_MNTR_FLAG_SKIP_TX),
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1b8827f920ff..6d11437596b9 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4703,6 +4703,7 @@ enum nl80211_survey_info {
  *	overrides all other flags.
  * @NL80211_MNTR_FLAG_ACTIVE: use the configured MAC address
  *	and ACK incoming unicast packets.
+ * @NL80211_MNTR_FLAG_SKIP_TX: do not pass local tx packets
  *
  * @__NL80211_MNTR_FLAG_AFTER_LAST: internal use
  * @NL80211_MNTR_FLAG_MAX: highest possible monitor flag
@@ -4715,6 +4716,7 @@ enum nl80211_mntr_flags {
 	NL80211_MNTR_FLAG_OTHER_BSS,
 	NL80211_MNTR_FLAG_COOK_FRAMES,
 	NL80211_MNTR_FLAG_ACTIVE,
+	NL80211_MNTR_FLAG_SKIP_TX,
 
 	/* keep last */
 	__NL80211_MNTR_FLAG_AFTER_LAST,
-- 
cgit v1.2.3


From 8dc6d81c6b2acc434b00c4585f0594739031c4e4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 22 Oct 2024 15:18:34 +0200
Subject: debugfs: add small file operations for most files

As struct file_operations is really big, but (most) debugfs
files only use simple_open, read, write and perhaps seek, and
don't need anything else, this wastes a lot of space for NULL
pointers.

Add a struct debugfs_short_fops and some bookkeeping code in
debugfs so that users can use that with debugfs_create_file()
using _Generic to figure out which function to use.

Converting mac80211 to use it where possible saves quite a
bit of space:

1010127  205064    1220 1216411  128f9b net/mac80211/mac80211.ko (before)
 981199  205064    1220 1187483  121e9b net/mac80211/mac80211.ko (after)
-------
 -28928 = ~28KiB

With a marginal space cost in debugfs:

   8701	    550	     16	   9267	   2433	fs/debugfs/inode.o (before)
  25233	    325	     32	  25590	   63f6	fs/debugfs/file.o  (before)
   8914	    558	     16	   9488	   2510	fs/debugfs/inode.o (after)
  25380	    325	     32	  25737	   6489	fs/debugfs/file.o  (after)
---------------
   +360      +8

(All on x86-64)

A simple spatch suggests there are more than 300 instances,
not even counting the ones hidden in macros like in mac80211,
that could be trivially converted, for additional savings of
about 240 bytes for each.

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/20241022151838.26f9925fb959.Ia80b55e934bbfc45ce0df42a3233d34b35508046@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/debugfs.h | 62 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 0928a6c8ae1e..59444b495d49 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -71,9 +71,63 @@ typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *);
 
 struct dentry *debugfs_lookup(const char *name, struct dentry *parent);
 
-struct dentry *debugfs_create_file(const char *name, umode_t mode,
-				   struct dentry *parent, void *data,
-				   const struct file_operations *fops);
+struct debugfs_short_fops {
+	ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
+	ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
+	loff_t (*llseek) (struct file *, loff_t, int);
+};
+
+struct dentry *debugfs_create_file_full(const char *name, umode_t mode,
+					struct dentry *parent, void *data,
+					const struct file_operations *fops);
+struct dentry *debugfs_create_file_short(const char *name, umode_t mode,
+					 struct dentry *parent, void *data,
+					 const struct debugfs_short_fops *fops);
+
+/**
+ * debugfs_create_file - create a file in the debugfs filesystem
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this parameter is NULL, then the
+ *          file will be created in the root of the debugfs filesystem.
+ * @data: a pointer to something that the caller will want to get to later
+ *        on.  The inode.i_private pointer will point to this value on
+ *        the open() call.
+ * @fops: a pointer to a struct file_operations or struct debugfs_short_fops that
+ *        should be used for this file.
+ *
+ * This is the basic "create a file" function for debugfs.  It allows for a
+ * wide range of flexibility in creating a file, or a directory (if you want
+ * to create a directory, the debugfs_create_dir() function is
+ * recommended to be used instead.)
+ *
+ * This function will return a pointer to a dentry if it succeeds.  This
+ * pointer must be passed to the debugfs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.)  If an error occurs, ERR_PTR(-ERROR) will be
+ * returned.
+ *
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ *
+ * If fops points to a struct debugfs_short_fops, then simple_open() will be
+ * used for the open, and only read/write/llseek are supported and are proxied,
+ * so no module reference or release are needed.
+ *
+ * NOTE: it's expected that most callers should _ignore_ the errors returned
+ * by this function. Other debugfs functions handle the fact that the "dentry"
+ * passed to them could be an error and they don't crash in that case.
+ * Drivers should generally work fine even if debugfs fails to init anyway.
+ */
+#define debugfs_create_file(name, mode, parent, data, fops)			\
+	_Generic(fops,								\
+		 const struct file_operations *: debugfs_create_file_full,	\
+		 const struct debugfs_short_fops *: debugfs_create_file_short,	\
+		 struct file_operations *: debugfs_create_file_full,		\
+		 struct debugfs_short_fops *: debugfs_create_file_short)	\
+		(name, mode, parent, data, fops)
+
 struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode,
 				   struct dentry *parent, void *data,
 				   const struct file_operations *fops);
@@ -207,7 +261,7 @@ static inline struct dentry *debugfs_lookup(const char *name,
 
 static inline struct dentry *debugfs_create_file(const char *name, umode_t mode,
 					struct dentry *parent, void *data,
-					const struct file_operations *fops)
+					const void *fops)
 {
 	return ERR_PTR(-ENODEV);
 }
-- 
cgit v1.2.3


From 26ff1fb02991e1260481185bb5ccab1ee498d5e4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 16 Oct 2024 09:19:29 -0700
Subject: rcu: Delete unused rcu_gp_might_be_stalled() function

The rcu_gp_might_be_stalled() function is no longer used, so this commit
removes it.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 include/linux/rcutiny.h | 1 -
 include/linux/rcutree.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 0ee270b3f5ed..fe42315f667f 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -165,7 +165,6 @@ static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
 static inline bool rcu_is_watching(void) { return true; }
 static inline void rcu_momentary_eqs(void) { }
 static inline void kfree_rcu_scheduler_running(void) { }
-static inline bool rcu_gp_might_be_stalled(void) { return false; }
 
 /* Avoid RCU read-side critical sections leaking across. */
 static inline void rcu_all_qs(void) { barrier(); }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 90a684f94776..27d86d912781 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -40,7 +40,6 @@ void kvfree_rcu_barrier(void);
 void rcu_barrier(void);
 void rcu_momentary_eqs(void);
 void kfree_rcu_scheduler_running(void);
-bool rcu_gp_might_be_stalled(void);
 
 struct rcu_gp_oldstate {
 	unsigned long rgos_norm;
-- 
cgit v1.2.3


From b3aba04883de872488e5dabda199427b2bfa0395 Mon Sep 17 00:00:00 2001
From: Manikanta Mylavarapu <quic_mmanikan@quicinc.com>
Date: Tue, 20 Aug 2024 11:26:16 +0530
Subject: dt-bindings: clock: qcom: gcc-ipq5332: remove q6 bring up clock
 macros

Q6 firmware takes care of bringup clocks, so remove them.

Signed-off-by: Manikanta Mylavarapu <quic_mmanikan@quicinc.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Gokul Sriram Palanisamy <quic_gokulsri@quicinc.com>
Link: https://lore.kernel.org/r/20240820055618.267554-4-quic_gokulsri@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,ipq5332-gcc.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,ipq5332-gcc.h b/include/dt-bindings/clock/qcom,ipq5332-gcc.h
index 8a405a0a96d0..da9b507c30bf 100644
--- a/include/dt-bindings/clock/qcom,ipq5332-gcc.h
+++ b/include/dt-bindings/clock/qcom,ipq5332-gcc.h
@@ -96,15 +96,7 @@
 #define GCC_PCNOC_BFDCD_CLK_SRC				87
 #define GCC_PCNOC_LPASS_CLK				88
 #define GCC_PRNG_AHB_CLK				89
-#define GCC_Q6_AHB_CLK					90
-#define GCC_Q6_AHB_S_CLK				91
-#define GCC_Q6_AXIM_CLK					92
 #define GCC_Q6_AXIM_CLK_SRC				93
-#define GCC_Q6_AXIS_CLK					94
-#define GCC_Q6_TSCTR_1TO2_CLK				95
-#define GCC_Q6SS_ATBM_CLK				96
-#define GCC_Q6SS_PCLKDBG_CLK				97
-#define GCC_Q6SS_TRIG_CLK				98
 #define GCC_QDSS_AT_CLK					99
 #define GCC_QDSS_AT_CLK_SRC				100
 #define GCC_QDSS_CFG_AHB_CLK				101
@@ -134,7 +126,6 @@
 #define GCC_SNOC_PCIE3_2LANE_S_CLK			125
 #define GCC_SNOC_USB_CLK				126
 #define GCC_SYS_NOC_AT_CLK				127
-#define GCC_SYS_NOC_WCSS_AHB_CLK			128
 #define GCC_SYSTEM_NOC_BFDCD_CLK_SRC			129
 #define GCC_UNIPHY0_AHB_CLK				130
 #define GCC_UNIPHY0_SYS_CLK				131
@@ -155,17 +146,6 @@
 #define GCC_USB0_PIPE_CLK				146
 #define GCC_USB0_SLEEP_CLK				147
 #define GCC_WCSS_AHB_CLK_SRC				148
-#define GCC_WCSS_AXIM_CLK				149
-#define GCC_WCSS_AXIS_CLK				150
-#define GCC_WCSS_DBG_IFC_APB_BDG_CLK			151
-#define GCC_WCSS_DBG_IFC_APB_CLK			152
-#define GCC_WCSS_DBG_IFC_ATB_BDG_CLK			153
-#define GCC_WCSS_DBG_IFC_ATB_CLK			154
-#define GCC_WCSS_DBG_IFC_NTS_BDG_CLK			155
-#define GCC_WCSS_DBG_IFC_NTS_CLK			156
-#define GCC_WCSS_ECAHB_CLK				157
-#define GCC_WCSS_MST_ASYNC_BDG_CLK			158
-#define GCC_WCSS_SLV_ASYNC_BDG_CLK			159
 #define GCC_XO_CLK					160
 #define GCC_XO_CLK_SRC					161
 #define GCC_XO_DIV4_CLK					162
-- 
cgit v1.2.3


From da040d56031976144740bddba942485999f6a16f Mon Sep 17 00:00:00 2001
From: Manikanta Mylavarapu <quic_mmanikan@quicinc.com>
Date: Tue, 20 Aug 2024 11:26:17 +0530
Subject: dt-bindings: clock: qcom: gcc-ipq9574: remove q6 bring up clock
 macros

Q6 firmware takes care of bringup clocks, so remove them.

Signed-off-by: Manikanta Mylavarapu <quic_mmanikan@quicinc.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Gokul Sriram Palanisamy <quic_gokulsri@quicinc.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240820055618.267554-5-quic_gokulsri@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,ipq9574-gcc.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,ipq9574-gcc.h b/include/dt-bindings/clock/qcom,ipq9574-gcc.h
index 52123c5a09fa..f238aa4794a8 100644
--- a/include/dt-bindings/clock/qcom,ipq9574-gcc.h
+++ b/include/dt-bindings/clock/qcom,ipq9574-gcc.h
@@ -132,16 +132,8 @@
 #define GCC_NSSNOC_SNOC_1_CLK				123
 #define GCC_QDSS_ETR_USB_CLK				124
 #define WCSS_AHB_CLK_SRC				125
-#define GCC_Q6_AHB_CLK					126
-#define GCC_Q6_AHB_S_CLK				127
-#define GCC_WCSS_ECAHB_CLK				128
-#define GCC_WCSS_ACMT_CLK				129
-#define GCC_SYS_NOC_WCSS_AHB_CLK			130
 #define WCSS_AXI_M_CLK_SRC				131
-#define GCC_ANOC_WCSS_AXI_M_CLK				132
 #define QDSS_AT_CLK_SRC					133
-#define GCC_Q6SS_ATBM_CLK				134
-#define GCC_WCSS_DBG_IFC_ATB_CLK			135
 #define GCC_NSSNOC_ATB_CLK				136
 #define GCC_QDSS_AT_CLK					137
 #define GCC_SYS_NOC_AT_CLK				138
@@ -154,27 +146,18 @@
 #define QDSS_TRACECLKIN_CLK_SRC				145
 #define GCC_QDSS_TRACECLKIN_CLK				146
 #define QDSS_TSCTR_CLK_SRC				147
-#define GCC_Q6_TSCTR_1TO2_CLK				148
-#define GCC_WCSS_DBG_IFC_NTS_CLK			149
 #define GCC_QDSS_TSCTR_DIV2_CLK				150
 #define GCC_QDSS_TS_CLK					151
 #define GCC_QDSS_TSCTR_DIV4_CLK				152
 #define GCC_NSS_TS_CLK					153
 #define GCC_QDSS_TSCTR_DIV8_CLK				154
 #define GCC_QDSS_TSCTR_DIV16_CLK			155
-#define GCC_Q6SS_PCLKDBG_CLK				156
-#define GCC_Q6SS_TRIG_CLK				157
-#define GCC_WCSS_DBG_IFC_APB_CLK			158
-#define GCC_WCSS_DBG_IFC_DAPBUS_CLK			159
 #define GCC_QDSS_DAP_CLK				160
 #define GCC_QDSS_APB2JTAG_CLK				161
 #define GCC_QDSS_TSCTR_DIV3_CLK				162
 #define QPIC_IO_MACRO_CLK_SRC				163
 #define GCC_QPIC_IO_MACRO_CLK                           164
 #define Q6_AXI_CLK_SRC					165
-#define GCC_Q6_AXIM_CLK					166
-#define GCC_WCSS_Q6_TBU_CLK				167
-#define GCC_MEM_NOC_Q6_AXI_CLK				168
 #define Q6_AXIM2_CLK_SRC				169
 #define NSSNOC_MEMNOC_BFDCD_CLK_SRC			170
 #define GCC_NSSNOC_MEMNOC_CLK				171
@@ -199,7 +182,6 @@
 #define GCC_UNIPHY2_SYS_CLK				190
 #define GCC_CMN_12GPLL_SYS_CLK				191
 #define GCC_NSSNOC_XO_DCD_CLK				192
-#define GCC_Q6SS_BOOT_CLK				193
 #define UNIPHY_SYS_CLK_SRC				194
 #define NSS_TS_CLK_SRC					195
 #define GCC_ANOC_PCIE0_1LANE_M_CLK			196
-- 
cgit v1.2.3


From da09a9e0c3eab164af950be44ee6bdea8527c3e5 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 18 Oct 2024 22:22:51 +0200
Subject: uprobe: Add data pointer to consumer handlers

Adding data pointer to both entry and exit consumer handlers and all
its users. The functionality itself is coming in following change.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20241018202252.693462-2-jolsa@kernel.org
---
 include/linux/uprobes.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 2b294bf1881f..bb265a632b91 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -37,10 +37,10 @@ struct uprobe_consumer {
 	 * for the current process. If filter() is omitted or returns true,
 	 * UPROBE_HANDLER_REMOVE is effectively ignored.
 	 */
-	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
+	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data);
 	int (*ret_handler)(struct uprobe_consumer *self,
 				unsigned long func,
-				struct pt_regs *regs);
+				struct pt_regs *regs, __u64 *data);
 	bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
 
 	struct list_head cons_node;
-- 
cgit v1.2.3


From 4d756095d3994cb41393817dc696b458938a6bd0 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 18 Oct 2024 22:22:52 +0200
Subject: uprobe: Add support for session consumer

This change allows the uprobe consumer to behave as session which
means that 'handler' and 'ret_handler' callbacks are connected in
a way that allows to:

  - control execution of 'ret_handler' from 'handler' callback
  - share data between 'handler' and 'ret_handler' callbacks

The session concept fits to our common use case where we do filtering
on entry uprobe and based on the result we decide to run the return
uprobe (or not).

It's also convenient to share the data between session callbacks.

To achive this we are adding new return value the uprobe consumer
can return from 'handler' callback:

  UPROBE_HANDLER_IGNORE
  - Ignore 'ret_handler' callback for this consumer.

And store cookie and pass it to 'ret_handler' when consumer has both
'handler' and 'ret_handler' callbacks defined.

We store shared data in the return_consumer object array as part of
the return_instance object. This way the handle_uretprobe_chain can
find related return_consumer and its shared data.

We also store entry handler return value, for cases when there are
multiple consumers on single uprobe and some of them are ignored and
some of them not, in which case the return probe gets installed and
we need to have a way to find out which consumer needs to be ignored.

The tricky part is when consumer is registered 'after' the uprobe
entry handler is hit. In such case this consumer's 'ret_handler' gets
executed as well, but it won't have the proper data pointer set,
so we can filter it out.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20241018202252.693462-3-jolsa@kernel.org
---
 include/linux/uprobes.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index bb265a632b91..dbaf04189548 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -23,8 +23,17 @@ struct inode;
 struct notifier_block;
 struct page;
 
+/*
+ * Allowed return values from uprobe consumer's handler callback
+ * with following meaning:
+ *
+ * UPROBE_HANDLER_REMOVE
+ * - Remove the uprobe breakpoint from current->mm.
+ * UPROBE_HANDLER_IGNORE
+ * - Ignore ret_handler callback for this consumer.
+ */
 #define UPROBE_HANDLER_REMOVE		1
-#define UPROBE_HANDLER_MASK		1
+#define UPROBE_HANDLER_IGNORE		2
 
 #define MAX_URETPROBE_DEPTH		64
 
@@ -44,6 +53,8 @@ struct uprobe_consumer {
 	bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
 
 	struct list_head cons_node;
+
+	__u64 id;	/* set when uprobe_consumer is registered */
 };
 
 #ifdef CONFIG_UPROBES
@@ -83,14 +94,22 @@ struct uprobe_task {
 	unsigned int			depth;
 };
 
+struct return_consumer {
+	__u64	cookie;
+	__u64	id;
+};
+
 struct return_instance {
 	struct uprobe		*uprobe;
 	unsigned long		func;
 	unsigned long		stack;		/* stack pointer */
 	unsigned long		orig_ret_vaddr; /* original return address */
 	bool			chained;	/* true, if instance is nested */
+	int			consumers_cnt;
 
 	struct return_instance	*next;		/* keep as stack */
+
+	struct return_consumer	consumers[] __counted_by(consumers_cnt);
 };
 
 enum rp_check {
-- 
cgit v1.2.3


From c8507a25cebd179db935dd266a33c51bef1b1e80 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Tue, 22 Oct 2024 13:03:47 -0700
Subject: drm/xe/oa/uapi: Define and parse OA sync properties
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that we have laid the groundwork, introduce OA sync properties in the
uapi and parse the input xe_sync array as is done elsewhere in the
driver. Also add DRM_XE_OA_CAPS_SYNCS bit in OA capabilities for userspace.

v2: Fix and document DRM_XE_SYNC_TYPE_USER_FENCE for OA (Matt B)
    Add DRM_XE_OA_CAPS_SYNCS bit to OA capabilities (Jose)

Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-3-ashutosh.dixit@intel.com
---
 include/uapi/drm/xe_drm.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index c4182e95a619..4a8a4a63e99c 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1485,6 +1485,7 @@ struct drm_xe_oa_unit {
 	/** @capabilities: OA capabilities bit-mask */
 	__u64 capabilities;
 #define DRM_XE_OA_CAPS_BASE		(1 << 0)
+#define DRM_XE_OA_CAPS_SYNCS		(1 << 1)
 
 	/** @oa_timestamp_freq: OA timestamp freq */
 	__u64 oa_timestamp_freq;
@@ -1634,6 +1635,22 @@ enum drm_xe_oa_property_id {
 	 * to be disabled for the stream exec queue.
 	 */
 	DRM_XE_OA_PROPERTY_NO_PREEMPT,
+
+	/**
+	 * @DRM_XE_OA_PROPERTY_NUM_SYNCS: Number of syncs in the sync array
+	 * specified in @DRM_XE_OA_PROPERTY_SYNCS
+	 */
+	DRM_XE_OA_PROPERTY_NUM_SYNCS,
+
+	/**
+	 * @DRM_XE_OA_PROPERTY_SYNCS: Pointer to struct @drm_xe_sync array
+	 * with array size specified via @DRM_XE_OA_PROPERTY_NUM_SYNCS. OA
+	 * configuration will wait till input fences signal. Output fences
+	 * will signal after the new OA configuration takes effect. For
+	 * @DRM_XE_SYNC_TYPE_USER_FENCE, @addr is a user pointer, similar
+	 * to the VM bind case.
+	 */
+	DRM_XE_OA_PROPERTY_SYNCS,
 };
 
 /**
-- 
cgit v1.2.3


From ee1251fc0c4e799a48025318f262739919deb977 Mon Sep 17 00:00:00 2001
From: Chen Ridong <chenridong@huawei.com>
Date: Tue, 22 Oct 2024 11:49:45 +0000
Subject: cgroup/freezer: Reduce redundant traversal for cgroup_freeze
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Whether a cgroup is frozen is determined solely by whether it is set to
to be frozen and whether its parent is frozen. Currently, when is cgroup
is frozen or unfrozen, it iterates through the entire subtree to freeze
or unfreeze its descentdants. However, this is unesessary for a cgroup
that does not change its effective frozen status. This path aims to skip
the subtree if its parent does not have a change in effective freeze.

For an example, subtree like, a-b-c-d-e-f-g, when a is frozen, the
entire tree is frozen. If we freeze b and c again, it is unesessary to
iterate d, e, f and g. So does that If we unfreeze b/c.

Reviewed-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Chen Ridong <chenridong@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 0a80ef9191a6..1b20d2d8ef7c 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -398,7 +398,7 @@ struct cgroup_freezer_state {
 	bool freeze;
 
 	/* Should the cgroup actually be frozen? */
-	int e_freeze;
+	bool e_freeze;
 
 	/* Fields below are protected by css_set_lock */
 
-- 
cgit v1.2.3


From c0813ce2e5b0d1174782aff30d366509377abc7b Mon Sep 17 00:00:00 2001
From: Pengfei Li <pengfei.li_1@nxp.com>
Date: Wed, 23 Oct 2024 11:46:48 -0700
Subject: dt-bindings: clock: imx93: Drop IMX93_CLK_END macro definition

IMX93_CLK_END should be dropped as it is not part of the ABI.

Signed-off-by: Pengfei Li <pengfei.li_1@nxp.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Peng Fan <peng.fan@nxp.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20241023184651.381265-3-pengfei.li_1@nxp.com
Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
---
 include/dt-bindings/clock/imx93-clock.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/imx93-clock.h b/include/dt-bindings/clock/imx93-clock.h
index 787c9e74dc96..a1d0b326bb6b 100644
--- a/include/dt-bindings/clock/imx93-clock.h
+++ b/include/dt-bindings/clock/imx93-clock.h
@@ -204,6 +204,5 @@
 #define IMX93_CLK_A55_SEL		199
 #define IMX93_CLK_A55_CORE		200
 #define IMX93_CLK_PDM_IPG		201
-#define IMX93_CLK_END			202
 
 #endif
-- 
cgit v1.2.3


From f029d870096fcd8565a2ee8c2d0078b9aaec4fdb Mon Sep 17 00:00:00 2001
From: Pengfei Li <pengfei.li_1@nxp.com>
Date: Wed, 23 Oct 2024 11:46:49 -0700
Subject: dt-bindings: clock: Add i.MX91 clock support

i.MX91 has similar Clock Control Module(CCM) design as i.MX93, only add
few new clock compared to i.MX93.
Add a new compatible string and some new clocks for i.MX91.

Signed-off-by: Pengfei Li <pengfei.li_1@nxp.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20241023184651.381265-4-pengfei.li_1@nxp.com
Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
---
 include/dt-bindings/clock/imx93-clock.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/imx93-clock.h b/include/dt-bindings/clock/imx93-clock.h
index a1d0b326bb6b..6c685067288b 100644
--- a/include/dt-bindings/clock/imx93-clock.h
+++ b/include/dt-bindings/clock/imx93-clock.h
@@ -204,5 +204,10 @@
 #define IMX93_CLK_A55_SEL		199
 #define IMX93_CLK_A55_CORE		200
 #define IMX93_CLK_PDM_IPG		201
+#define IMX91_CLK_ENET1_QOS_TSN     202
+#define IMX91_CLK_ENET_TIMER        203
+#define IMX91_CLK_ENET2_REGULAR     204
+#define IMX91_CLK_ENET2_REGULAR_GATE		205
+#define IMX91_CLK_ENET1_QOS_TSN_GATE		206
 
 #endif
-- 
cgit v1.2.3


From 04af8a399fa40310f831b4f1dc9f757085f41983 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Fri, 18 Oct 2024 17:47:48 +0300
Subject: PCI: Protect Link Control 2 Register with RMW locking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PCIe Bandwidth Controller performs RMW accesses the Link Control 2
Register which can occur concurrently to other sources of Link Control 2
Register writes. Therefore, add Link Control 2 Register among the PCI
Express Capability Registers that need RMW locking.

Link: https://lore.kernel.org/r/20241018144755.7875-3-ilpo.jarvinen@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 573b4c4c2be6..be5ed534c39c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1274,6 +1274,7 @@ static inline int pcie_capability_clear_and_set_word(struct pci_dev *dev,
 {
 	switch (pos) {
 	case PCI_EXP_LNKCTL:
+	case PCI_EXP_LNKCTL2:
 	case PCI_EXP_RTCTL:
 		return pcie_capability_clear_and_set_word_locked(dev, pos,
 								 clear, set);
-- 
cgit v1.2.3


From 52e0874fc16bd26e9ea1871e30ffb2c6dff187cf Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 Aug 2024 12:39:02 +0200
Subject: locking/rt: Add sparse annotation PREEMPT_RT's sleeping locks.

The sleeping locks on PREEMPT_RT (rt_spin_lock() and friends) lack
sparse annotation. Therefore a missing spin_unlock() won't be spotted by
sparse in a PREEMPT_RT build while it is noticed on a !PREEMPT_RT build.

Add the __acquires/__releases macros to the lock/ unlock functions. The
trylock functions already use the __cond_lock() wrapper.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20240812104200.2239232-2-bigeasy@linutronix.de
---
 include/linux/rwlock_rt.h   | 10 +++++-----
 include/linux/spinlock_rt.h |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h
index 8544ff05e594..7d81fc6918ee 100644
--- a/include/linux/rwlock_rt.h
+++ b/include/linux/rwlock_rt.h
@@ -24,13 +24,13 @@ do {							\
 	__rt_rwlock_init(rwl, #rwl, &__key);		\
 } while (0)
 
-extern void rt_read_lock(rwlock_t *rwlock);
+extern void rt_read_lock(rwlock_t *rwlock)	__acquires(rwlock);
 extern int rt_read_trylock(rwlock_t *rwlock);
-extern void rt_read_unlock(rwlock_t *rwlock);
-extern void rt_write_lock(rwlock_t *rwlock);
-extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass);
+extern void rt_read_unlock(rwlock_t *rwlock)	__releases(rwlock);
+extern void rt_write_lock(rwlock_t *rwlock)	__acquires(rwlock);
+extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass)	__acquires(rwlock);
 extern int rt_write_trylock(rwlock_t *rwlock);
-extern void rt_write_unlock(rwlock_t *rwlock);
+extern void rt_write_unlock(rwlock_t *rwlock)	__releases(rwlock);
 
 static __always_inline void read_lock(rwlock_t *rwlock)
 {
diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
index 61c49b16f69a..babc3e028779 100644
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -32,10 +32,10 @@ do {								\
 	__rt_spin_lock_init(slock, #slock, &__key, true);	\
 } while (0)
 
-extern void rt_spin_lock(spinlock_t *lock);
-extern void rt_spin_lock_nested(spinlock_t *lock, int subclass);
-extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock);
-extern void rt_spin_unlock(spinlock_t *lock);
+extern void rt_spin_lock(spinlock_t *lock) __acquires(lock);
+extern void rt_spin_lock_nested(spinlock_t *lock, int subclass)	__acquires(lock);
+extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock) __acquires(lock);
+extern void rt_spin_unlock(spinlock_t *lock)	__releases(lock);
 extern void rt_spin_lock_unlock(spinlock_t *lock);
 extern int rt_spin_trylock_bh(spinlock_t *lock);
 extern int rt_spin_trylock(spinlock_t *lock);
-- 
cgit v1.2.3


From b1f01f9e54b1aaadb6740f86017e8fabdee77fe2 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 Aug 2024 12:39:03 +0200
Subject: locking/rt: Remove one __cond_lock() in RT's spin_trylock_irqsave()

spin_trylock_irqsave() has a __cond_lock() wrapper which points to
__spin_trylock_irqsave(). The function then invokes spin_trylock() which
has another __cond_lock() finally pointing to rt_spin_trylock().

The compiler has no problem to parse this but sparse does not recognise
that users of spin_trylock_irqsave() acquire a conditional lock and
complains.

Remove one layer of __cond_lock() so that sparse recognises conditional
locking.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20240812104200.2239232-3-bigeasy@linutronix.de
---
 include/linux/spinlock_rt.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
index babc3e028779..f9f14e135be7 100644
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -132,7 +132,7 @@ static __always_inline void spin_unlock_irqrestore(spinlock_t *lock,
 #define spin_trylock_irq(lock)				\
 	__cond_lock(lock, rt_spin_trylock(lock))
 
-#define __spin_trylock_irqsave(lock, flags)		\
+#define spin_trylock_irqsave(lock, flags)		\
 ({							\
 	int __locked;					\
 							\
@@ -142,9 +142,6 @@ static __always_inline void spin_unlock_irqrestore(spinlock_t *lock,
 	__locked;					\
 })
 
-#define spin_trylock_irqsave(lock, flags)		\
-	__cond_lock(lock, __spin_trylock_irqsave(lock, flags))
-
 #define spin_is_contended(lock)		(((void)(lock), 0))
 
 static inline int spin_is_locked(spinlock_t *lock)
-- 
cgit v1.2.3


From cdda1f26e74bac732eca537a69f19f6a37b641be Mon Sep 17 00:00:00 2001
From: Luca Boccassi <luca.boccassi@gmail.com>
Date: Thu, 10 Oct 2024 16:52:32 +0100
Subject: pidfd: add ioctl to retrieve pid info

A common pattern when using pid fds is having to get information
about the process, which currently requires /proc being mounted,
resolving the fd to a pid, and then do manual string parsing of
/proc/N/status and friends. This needs to be reimplemented over
and over in all userspace projects (e.g.: I have reimplemented
resolving in systemd, dbus, dbus-daemon, polkit so far), and
requires additional care in checking that the fd is still valid
after having parsed the data, to avoid races.

Having a programmatic API that can be used directly removes all
these requirements, including having /proc mounted.

As discussed at LPC24, add an ioctl with an extensible struct
so that more parameters can be added later if needed. Start with
returning pid/tgid/ppid and creds unconditionally, and cgroupid
optionally.

Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Link: https://lore.kernel.org/r/20241010155401.2268522-1-luca.boccassi@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/pidfd.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 565fc0629fff..4540f6301b8c 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -16,6 +16,55 @@
 #define PIDFD_SIGNAL_THREAD_GROUP	(1UL << 1)
 #define PIDFD_SIGNAL_PROCESS_GROUP	(1UL << 2)
 
+/* Flags for pidfd_info. */
+#define PIDFD_INFO_PID			(1UL << 0) /* Always returned, even if not requested */
+#define PIDFD_INFO_CREDS		(1UL << 1) /* Always returned, even if not requested */
+#define PIDFD_INFO_CGROUPID		(1UL << 2) /* Always returned if available, even if not requested */
+
+#define PIDFD_INFO_SIZE_VER0		64 /* sizeof first published struct */
+
+struct pidfd_info {
+	/*
+	 * This mask is similar to the request_mask in statx(2).
+	 *
+	 * Userspace indicates what extensions or expensive-to-calculate fields
+	 * they want by setting the corresponding bits in mask. The kernel
+	 * will ignore bits that it does not know about.
+	 *
+	 * When filling the structure, the kernel will only set bits
+	 * corresponding to the fields that were actually filled by the kernel.
+	 * This also includes any future extensions that might be automatically
+	 * filled. If the structure size is too small to contain a field
+	 * (requested or not), to avoid confusion the mask will not
+	 * contain a bit for that field.
+	 *
+	 * As such, userspace MUST verify that mask contains the
+	 * corresponding flags after the ioctl(2) returns to ensure that it is
+	 * using valid data.
+	 */
+	__u64 mask;
+	/*
+	 * The information contained in the following fields might be stale at the
+	 * time it is received, as the target process might have exited as soon as
+	 * the IOCTL was processed, and there is no way to avoid that. However, it
+	 * is guaranteed that if the call was successful, then the information was
+	 * correct and referred to the intended process at the time the work was
+	 * performed. */
+	__u64 cgroupid;
+	__u32 pid;
+	__u32 tgid;
+	__u32 ppid;
+	__u32 ruid;
+	__u32 rgid;
+	__u32 euid;
+	__u32 egid;
+	__u32 suid;
+	__u32 sgid;
+	__u32 fsuid;
+	__u32 fsgid;
+	__u32 spare0[1];
+};
+
 #define PIDFS_IOCTL_MAGIC 0xFF
 
 #define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
@@ -28,5 +77,6 @@
 #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
 #define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
 #define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
+#define PIDFD_GET_INFO                        _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
 
 #endif /* _UAPI_LINUX_PIDFD_H */
-- 
cgit v1.2.3


From 9a9f7e13952b2638bc57bc9b34e6bdd106509836 Mon Sep 17 00:00:00 2001
From: Victor Shih <victor.shih@genesyslogic.com.tw>
Date: Fri, 18 Oct 2024 18:53:18 +0800
Subject: mmc: core: Support UHS-II card control and access

Embed UHS-II access/control functionality into the MMC request
processing flow.

Signed-off-by: Jason Lai <jason.lai@genesyslogic.com.tw>
Signed-off-by: Victor Shih <victor.shih@genesyslogic.com.tw>
Message-ID: <20241018105333.4569-2-victorshihgli@gmail.com>
[Ulf: A couple of cleanups and fixed sd_uhs2_power_off()]
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/core.h | 17 +++++++++++++++++
 include/linux/mmc/host.h | 15 +++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index a890a71288ef..56972bd78462 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -11,6 +11,20 @@
 struct mmc_data;
 struct mmc_request;
 
+#define UHS2_MAX_PAYLOAD_LEN 2
+#define UHS2_MAX_RESP_LEN 20
+
+struct uhs2_command {
+	u16	header;
+	u16	arg;
+	__be32	payload[UHS2_MAX_PAYLOAD_LEN];
+	u8	payload_len;
+	u8	packet_len;
+	u8	tmode_half_duplex;
+	u8	uhs2_resp[UHS2_MAX_RESP_LEN];	/* UHS2 native cmd resp */
+	u8	uhs2_resp_len;			/* UHS2 native cmd resp len */
+};
+
 struct mmc_command {
 	u32			opcode;
 	u32			arg;
@@ -97,6 +111,8 @@ struct mmc_command {
 	struct mmc_data		*data;		/* data segment associated with cmd */
 	struct mmc_request	*mrq;		/* associated request */
 
+	struct uhs2_command	*uhs2_cmd;	/* UHS2 command */
+
 	/* for SDUC */
 	bool has_ext_addr;
 	u8 ext_addr;
@@ -158,6 +174,7 @@ struct mmc_request {
 	const struct bio_crypt_ctx *crypto_ctx;
 	int			crypto_key_slot;
 #endif
+	struct uhs2_command	uhs2_cmd;
 };
 
 struct mmc_card;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0980d06ed419..f166d6611ddb 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -127,6 +127,13 @@ struct sd_uhs2_caps {
 };
 
 enum sd_uhs2_operation {
+	UHS2_PHY_INIT = 0,
+	UHS2_SET_CONFIG,
+	UHS2_ENABLE_INT,
+	UHS2_DISABLE_INT,
+	UHS2_ENABLE_CLK,
+	UHS2_DISABLE_CLK,
+	UHS2_CHECK_DORMANT,
 	UHS2_SET_IOS,
 };
 
@@ -453,6 +460,8 @@ struct mmc_host {
 #endif
 #define MMC_CAP2_ALT_GPT_TEGRA	(1 << 28)	/* Host with eMMC that has GPT entry at a non-standard location */
 
+	bool			uhs2_sd_tran;	/* UHS-II flag for SD_TRAN state */
+	bool			uhs2_app_cmd;	/* UHS-II flag for APP command */
 	struct sd_uhs2_caps	uhs2_caps;	/* Host UHS-II capabilities */
 
 	int			fixed_drv_type;	/* fixed driver type for non-removable media */
@@ -714,6 +723,12 @@ static inline void mmc_debugfs_err_stats_inc(struct mmc_host *host,
 	host->err_stats[stat] += 1;
 }
 
+static inline int mmc_card_uhs2_hd_mode(struct mmc_host *host)
+{
+	return host->ios.timing == MMC_TIMING_UHS2_SPEED_A_HD ||
+	       host->ios.timing == MMC_TIMING_UHS2_SPEED_B_HD;
+}
+
 int mmc_sd_switch(struct mmc_card *card, bool mode, int group,
 		u8 value, u8 *resp);
 int mmc_send_status(struct mmc_card *card, u32 *status);
-- 
cgit v1.2.3


From a5a98a786e5e31e85a69d30935254e8730734706 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 14 Oct 2024 16:59:46 +0200
Subject: thermal: core: Add and use cooling device guard

Add and use a special guard for cooling devices.

This allows quite a few error code paths to be simplified among
other things and brings in code size reduction for a good measure.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/5837621.DvuYhMxLoT@rjwysocki.net
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
---
 include/linux/thermal.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index bcaa92732e14..754802478b96 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -140,6 +140,9 @@ struct thermal_cooling_device {
 #endif
 };
 
+DEFINE_GUARD(cooling_dev, struct thermal_cooling_device *, mutex_lock(&_T->lock),
+	     mutex_unlock(&_T->lock))
+
 /* Structure to define Thermal Zone parameters */
 struct thermal_zone_params {
 	const char *governor_name;
-- 
cgit v1.2.3


From 1773572863c43a14a3e45f0591f28b7dec1ee52a Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 22 Oct 2024 17:51:42 +0200
Subject: thermal: netlink: Add the commands and the events for the thresholds

The thresholds exist but there is no notification neither action code
related to them yet.

These changes implement the netlink for the notifications when the
thresholds are crossed, added, deleted or flushed as well as the
commands which allows to get the list of the thresholds, flush them,
add and delete.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Link: https://patch.msgid.link/20241022155147.463475-3-daniel.lezcano@linaro.org
[ rjw: Use the thermal_zone guard for locking, subject edit ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/uapi/linux/thermal.h | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
index 2e6f60a36173..ba8604bdf206 100644
--- a/include/uapi/linux/thermal.h
+++ b/include/uapi/linux/thermal.h
@@ -20,7 +20,7 @@ enum thermal_trip_type {
 
 /* Adding event notification support elements */
 #define THERMAL_GENL_FAMILY_NAME		"thermal"
-#define THERMAL_GENL_VERSION			0x01
+#define THERMAL_GENL_VERSION			0x02
 #define THERMAL_GENL_SAMPLING_GROUP_NAME	"sampling"
 #define THERMAL_GENL_EVENT_GROUP_NAME		"event"
 
@@ -30,6 +30,7 @@ enum thermal_genl_attr {
 	THERMAL_GENL_ATTR_TZ,
 	THERMAL_GENL_ATTR_TZ_ID,
 	THERMAL_GENL_ATTR_TZ_TEMP,
+	THERMAL_GENL_ATTR_TZ_PREV_TEMP,
 	THERMAL_GENL_ATTR_TZ_TRIP,
 	THERMAL_GENL_ATTR_TZ_TRIP_ID,
 	THERMAL_GENL_ATTR_TZ_TRIP_TYPE,
@@ -50,6 +51,9 @@ enum thermal_genl_attr {
 	THERMAL_GENL_ATTR_CPU_CAPABILITY_ID,
 	THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE,
 	THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY,
+	THERMAL_GENL_ATTR_THRESHOLD,
+	THERMAL_GENL_ATTR_THRESHOLD_TEMP,
+	THERMAL_GENL_ATTR_THRESHOLD_DIRECTION,
 	__THERMAL_GENL_ATTR_MAX,
 };
 #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
@@ -77,6 +81,11 @@ enum thermal_genl_event {
 	THERMAL_GENL_EVENT_CDEV_STATE_UPDATE,	/* Cdev state updated */
 	THERMAL_GENL_EVENT_TZ_GOV_CHANGE,	/* Governor policy changed  */
 	THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE,	/* CPU capability changed */
+	THERMAL_GENL_EVENT_THRESHOLD_ADD,	/* A thresold has been added */
+	THERMAL_GENL_EVENT_THRESHOLD_DELETE,	/* A thresold has been deleted */
+	THERMAL_GENL_EVENT_THRESHOLD_FLUSH,	/* All thresolds have been deleted */
+	THERMAL_GENL_EVENT_THRESHOLD_UP,	/* A thresold has been crossed the way up */
+	THERMAL_GENL_EVENT_THRESHOLD_DOWN,	/* A thresold has been crossed the way down */
 	__THERMAL_GENL_EVENT_MAX,
 };
 #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1)
@@ -84,12 +93,16 @@ enum thermal_genl_event {
 /* Commands supported by the thermal_genl_family */
 enum thermal_genl_cmd {
 	THERMAL_GENL_CMD_UNSPEC,
-	THERMAL_GENL_CMD_TZ_GET_ID,	/* List of thermal zones id */
-	THERMAL_GENL_CMD_TZ_GET_TRIP,	/* List of thermal trips */
-	THERMAL_GENL_CMD_TZ_GET_TEMP,	/* Get the thermal zone temperature */
-	THERMAL_GENL_CMD_TZ_GET_GOV,	/* Get the thermal zone governor */
-	THERMAL_GENL_CMD_TZ_GET_MODE,	/* Get the thermal zone mode */
-	THERMAL_GENL_CMD_CDEV_GET,	/* List of cdev id */
+	THERMAL_GENL_CMD_TZ_GET_ID,		/* List of thermal zones id */
+	THERMAL_GENL_CMD_TZ_GET_TRIP,		/* List of thermal trips */
+	THERMAL_GENL_CMD_TZ_GET_TEMP,		/* Get the thermal zone temperature */
+	THERMAL_GENL_CMD_TZ_GET_GOV,		/* Get the thermal zone governor */
+	THERMAL_GENL_CMD_TZ_GET_MODE,		/* Get the thermal zone mode */
+	THERMAL_GENL_CMD_CDEV_GET,		/* List of cdev id */
+	THERMAL_GENL_CMD_THRESHOLD_GET,		/* List of thresholds */
+	THERMAL_GENL_CMD_THRESHOLD_ADD,		/* Add a threshold */
+	THERMAL_GENL_CMD_THRESHOLD_DELETE,	/* Delete a threshold */
+	THERMAL_GENL_CMD_THRESHOLD_FLUSH,	/* Flush all the thresholds */
 	__THERMAL_GENL_CMD_MAX,
 };
 #define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1)
-- 
cgit v1.2.3


From 68ed5c38b512b734caf3da1f87db4a99fcfe3002 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 17 Oct 2024 11:31:33 -0700
Subject: phonet: Pass net and ifindex to phonet_address_notify().

Currently, phonet_address_notify() fetches netns and ifindex from dev.

Once addr_doit() is converted to RCU, phonet_address_notify() will be
called outside of RCU due to GFP_KERNEL, and dev will be unavailable
there.

Let's pass net and ifindex to phonet_address_notify().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/phonet/pn_dev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index e9dc8dca5817..6b2102b4ece3 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -38,7 +38,7 @@ int phonet_address_add(struct net_device *dev, u8 addr);
 int phonet_address_del(struct net_device *dev, u8 addr);
 u8 phonet_address_get(struct net_device *dev, u8 addr);
 int phonet_address_lookup(struct net *net, u8 addr);
-void phonet_address_notify(int event, struct net_device *dev, u8 addr);
+void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr);
 
 int phonet_route_add(struct net_device *dev, u8 daddr);
 int phonet_route_del(struct net_device *dev, u8 daddr);
-- 
cgit v1.2.3


From 42f5fe1dc4babad1c49bcc4121983fffccee3cd9 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 17 Oct 2024 11:31:34 -0700
Subject: phonet: Convert phonet_device_list.lock to spinlock_t.

addr_doit() calls phonet_address_add() or phonet_address_del()
for RTM_NEWADDR or RTM_DELADDR, respectively.

Both functions only touch phonet_device_list(dev_net(dev)),
which is currently protected by RTNL and its dedicated mutex,
phonet_device_list.lock.

We will convert addr_doit() to RCU and cannot use mutex inside RCU.

Let's convert the mutex to spinlock_t.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/phonet/pn_dev.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index 6b2102b4ece3..ac0331d83a81 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -12,12 +12,13 @@
 
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/spinlock.h>
 
 struct net;
 
 struct phonet_device_list {
 	struct list_head list;
-	struct mutex lock;
+	spinlock_t lock;
 };
 
 struct phonet_device_list *phonet_device_list(struct net *net);
-- 
cgit v1.2.3


From de51ad08b1177bbbb8b60cb7dd4c3c5dd50d262f Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 17 Oct 2024 11:31:38 -0700
Subject: phonet: Pass net and ifindex to rtm_phonet_notify().

Currently, rtm_phonet_notify() fetches netns and ifindex from dev.

Once route_doit() is converted to RCU, rtm_phonet_notify() will be
called outside of RCU due to GFP_KERNEL, and dev will be unavailable
there.

Let's pass net and ifindex to rtm_phonet_notify().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/phonet/pn_dev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index ac0331d83a81..021e524fd20a 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -43,7 +43,7 @@ void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr);
 
 int phonet_route_add(struct net_device *dev, u8 daddr);
 int phonet_route_del(struct net_device *dev, u8 daddr);
-void rtm_phonet_notify(int event, struct net_device *dev, u8 dst);
+void rtm_phonet_notify(struct net *net, int event, u32 ifindex, u8 dst);
 struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr);
 struct net_device *phonet_route_output(struct net *net, u8 daddr);
 
-- 
cgit v1.2.3


From 3deec3b4afb4c767007eae1eeedbcf3da599395b Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 17 Oct 2024 11:31:39 -0700
Subject: phonet: Convert phonet_routes.lock to spinlock_t.

route_doit() calls phonet_route_add() or phonet_route_del()
for RTM_NEWROUTE or RTM_DELROUTE, respectively.

Both functions only touch phonet_pernet(dev_net(dev))->routes,
which is currently protected by RTNL and its dedicated mutex,
phonet_routes.lock.

We will convert route_doit() to RCU and cannot use mutex inside RCU.

Let's convert the mutex to spinlock_t.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/phonet/pn_dev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index 021e524fd20a..37a3e83531c6 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -11,7 +11,6 @@
 #define PN_DEV_H
 
 #include <linux/list.h>
-#include <linux/mutex.h>
 #include <linux/spinlock.h>
 
 struct net;
-- 
cgit v1.2.3


From a6021aa24f6417416d93318bbfa022ab229c33c8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 11 Oct 2024 06:18:17 +0000
Subject: ACPI: EC: make EC support compile-time conditional

The embedded controller code is mainly used on x86 laptops and cannot
work without PC style I/O port access.

Make this a user-visible configuration option that is default enabled
on x86 but otherwise disabled, and that can never be enabled unless
CONFIG_HAS_IOPORT is also available.

The empty stubs in internal.h help ignore the EC code in configurations
that don't support it. In order to see those stubs, the sbshc code also
has to include this header and drop duplicate declarations.

All the direct callers of ec_read/ec_write already had an x86
dependency and now also need to depend on APCI_EC.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patch.msgid.link/20241011061948.3211423-1-arnd@kernel.org
[ rjw: Subject edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 4d5ee84c468b..7dd24acd9ffe 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1164,8 +1164,6 @@ int acpi_subsys_suspend_noirq(struct device *dev);
 int acpi_subsys_suspend(struct device *dev);
 int acpi_subsys_freeze(struct device *dev);
 int acpi_subsys_poweroff(struct device *dev);
-void acpi_ec_mark_gpe_for_wake(void);
-void acpi_ec_set_gpe_wake_mask(u8 action);
 int acpi_subsys_restore_early(struct device *dev);
 #else
 static inline int acpi_subsys_prepare(struct device *dev) { return 0; }
@@ -1176,6 +1174,12 @@ static inline int acpi_subsys_suspend(struct device *dev) { return 0; }
 static inline int acpi_subsys_freeze(struct device *dev) { return 0; }
 static inline int acpi_subsys_poweroff(struct device *dev) { return 0; }
 static inline int acpi_subsys_restore_early(struct device *dev) { return 0; }
+#endif
+
+#if defined(CONFIG_ACPI_EC) && defined(CONFIG_PM_SLEEP)
+void acpi_ec_mark_gpe_for_wake(void);
+void acpi_ec_set_gpe_wake_mask(u8 action);
+#else
 static inline void acpi_ec_mark_gpe_for_wake(void) {}
 static inline void acpi_ec_set_gpe_wake_mask(u8 action) {}
 #endif
-- 
cgit v1.2.3


From 1cb80d9e93f861018fabe81a69ea0ded20f5a2d0 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <thinker.li@gmail.com>
Date: Wed, 23 Oct 2024 16:47:48 -0700
Subject: bpf: Support __uptr type tag in BTF
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch introduces the "__uptr" type tag to BTF. It is to define
a pointer pointing to the user space memory. This patch adds BTF
logic to pass the "__uptr" type tag.

btf_find_kptr() is reused for the "__uptr" tag. The "__uptr" will only
be supported in the map_value of the task storage map. However,
btf_parse_struct_meta() also uses btf_find_kptr() but it is not
interested in "__uptr". This patch adds a "field_mask" argument
to btf_find_kptr() which will return BTF_FIELD_IGNORE if the
caller is not interested in a “__uptr” field.

btf_parse_kptr() is also reused to parse the uptr.
The btf_check_and_fixup_fields() is changed to do extra
checks on the uptr to ensure that its struct size is not larger
than PAGE_SIZE. It is not clear how a uptr pointing to a CO-RE
supported kernel struct will be used, so it is also not allowed now.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20241023234759.860539-2-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0c216e71cec7..bb31bc6d0c4d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -203,6 +203,7 @@ enum btf_field_type {
 	BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD,
 	BPF_REFCOUNT   = (1 << 9),
 	BPF_WORKQUEUE  = (1 << 10),
+	BPF_UPTR       = (1 << 11),
 };
 
 typedef void (*btf_dtor_kfunc_t)(void *);
@@ -322,6 +323,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
 		return "kptr";
 	case BPF_KPTR_PERCPU:
 		return "percpu_kptr";
+	case BPF_UPTR:
+		return "uptr";
 	case BPF_LIST_HEAD:
 		return "bpf_list_head";
 	case BPF_LIST_NODE:
@@ -350,6 +353,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
 	case BPF_KPTR_PERCPU:
+	case BPF_UPTR:
 		return sizeof(u64);
 	case BPF_LIST_HEAD:
 		return sizeof(struct bpf_list_head);
@@ -379,6 +383,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
 	case BPF_KPTR_PERCPU:
+	case BPF_UPTR:
 		return __alignof__(u64);
 	case BPF_LIST_HEAD:
 		return __alignof__(struct bpf_list_head);
-- 
cgit v1.2.3


From b9a5a07aeaa2a903fb1306eb422880b2fa5f937f Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Wed, 23 Oct 2024 16:47:50 -0700
Subject: bpf: Add "bool swap_uptrs" arg to bpf_local_storage_update() and
 bpf_selem_alloc()

In a later patch, the task local storage will only accept uptr
from the syscall update_elem and will not accept uptr from
the bpf prog. The reason is the bpf prog does not have a way
to provide a valid user space address.

bpf_local_storage_update() and bpf_selem_alloc() are used by
both bpf prog bpf_task_storage_get(BPF_LOCAL_STORAGE_GET_F_CREATE)
and bpf syscall update_elem. "bool swap_uptrs" arg is added
to bpf_local_storage_update() and bpf_selem_alloc() to tell if
it is called by the bpf prog or by the bpf syscall. When
swap_uptrs==true, it is called by the syscall.

The arg is named (swap_)uptrs because the later patch will swap
the uptrs between the newly allocated selem and the user space
provided map_value. It will make error handling easier in case
map->ops->map_update_elem() fails and the caller can decide
if it needs to unpin the uptr in the user space provided
map_value or the bpf_local_storage_update() has already
taken the uptr ownership and will take care of unpinning it also.

Only swap_uptrs==false is passed now. The logic to handle
the true case will be added in a later patch.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20241023234759.860539-4-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_local_storage.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index dcddb0aef7d8..0c7216c065d5 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -181,7 +181,7 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap,
 
 struct bpf_local_storage_elem *
 bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
-		bool charge_mem, gfp_t gfp_flags);
+		bool charge_mem, bool swap_uptrs, gfp_t gfp_flags);
 
 void bpf_selem_free(struct bpf_local_storage_elem *selem,
 		    struct bpf_local_storage_map *smap,
@@ -195,7 +195,7 @@ bpf_local_storage_alloc(void *owner,
 
 struct bpf_local_storage_data *
 bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
-			 void *value, u64 map_flags, gfp_t gfp_flags);
+			 void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags);
 
 u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map);
 
-- 
cgit v1.2.3


From 5bd5bab76669b1e1551f03f5fcbc165f3fa8d269 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Wed, 23 Oct 2024 16:47:51 -0700
Subject: bpf: Postpone bpf_selem_free() in bpf_selem_unlink_storage_nolock()

In a later patch, bpf_selem_free() will call unpin_user_page()
through bpf_obj_free_fields(). unpin_user_page() may take spin_lock.
However, some bpf_selem_free() call paths have held a raw_spin_lock.
Like this:

raw_spin_lock_irqsave()
  bpf_selem_unlink_storage_nolock()
    bpf_selem_free()
      unpin_user_page()
        spin_lock()

To avoid spinlock nested in raw_spinlock, bpf_selem_free() should be
done after releasing the raw_spinlock. The "bool reuse_now" arg is
replaced with "struct hlist_head *free_selem_list" in
bpf_selem_unlink_storage_nolock(). The bpf_selem_unlink_storage_nolock()
will append the to-be-free selem at the free_selem_list. The caller of
bpf_selem_unlink_storage_nolock() will need to call the new
bpf_selem_free_list(free_selem_list, reuse_now) to free the selem
after releasing the raw_spinlock.

Note that the selem->snode cannot be reused for linking to
the free_selem_list because the selem->snode is protected by the
raw_spinlock that we want to avoid holding. A new
"struct hlist_node free_node;" is union-ized with
the rcu_head. Only the first one successfully
hlist_del_init_rcu(&selem->snode) will be able
to use the free_node. After succeeding hlist_del_init_rcu(&selem->snode),
the free_node and rcu_head usage is serialized such that they
can share the 16 bytes in a union.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20241023234759.860539-5-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_local_storage.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 0c7216c065d5..ab7244d8108f 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -77,7 +77,13 @@ struct bpf_local_storage_elem {
 	struct hlist_node map_node;	/* Linked to bpf_local_storage_map */
 	struct hlist_node snode;	/* Linked to bpf_local_storage */
 	struct bpf_local_storage __rcu *local_storage;
-	struct rcu_head rcu;
+	union {
+		struct rcu_head rcu;
+		struct hlist_node free_node;	/* used to postpone
+						 * bpf_selem_free
+						 * after raw_spin_unlock
+						 */
+	};
 	/* 8 bytes hole */
 	/* The data is stored in another cacheline to minimize
 	 * the number of cachelines access during a cache hit.
-- 
cgit v1.2.3


From ba512b00e5efbf7e19cfb7fa9f66ce82669b7077 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Wed, 23 Oct 2024 16:47:53 -0700
Subject: bpf: Add uptr support in the map_value of the task local storage.

This patch adds uptr support in the map_value of the task local storage.

struct map_value {
	struct user_data __uptr *uptr;
};

struct {
	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
	__uint(map_flags, BPF_F_NO_PREALLOC);
	__type(key, int);
	__type(value, struct value_type);
} datamap SEC(".maps");

A new bpf_obj_pin_uptrs() is added to pin the user page and
also stores the kernel address back to the uptr for the
bpf prog to use later. It currently does not support
the uptr pointing to a user struct across two pages.
It also excludes PageHighMem support to keep it simple.
As of now, the 32bit bpf jit is missing other more crucial bpf
features. For example, many important bpf features depend on
bpf kfunc now but so far only one arch (x86-32) supports it
which was added by me as an example when kfunc was first
introduced to bpf.

The uptr can only be stored to the task local storage by the
syscall update_elem. Meaning the uptr will not be considered
if it is provided by the bpf prog through
bpf_task_storage_get(BPF_LOCAL_STORAGE_GET_F_CREATE).
This is enforced by only calling
bpf_local_storage_update(swap_uptrs==true) in
bpf_pid_task_storage_update_elem. Everywhere else will
have swap_uptrs==false.

This will pump down to bpf_selem_alloc(swap_uptrs==true). It is
the only case that bpf_selem_alloc() will take the uptr value when
updating the newly allocated selem. bpf_obj_swap_uptrs() is added
to swap the uptr between the SDATA(selem)->data and the user provided
map_value in "void *value". bpf_obj_swap_uptrs() makes the
SDATA(selem)->data takes the ownership of the uptr and the user space
provided map_value will have NULL in the uptr.

The bpf_obj_unpin_uptrs() is called after map->ops->map_update_elem()
returning error. If the map->ops->map_update_elem has reached
a state that the local storage has taken the uptr ownership,
the bpf_obj_unpin_uptrs() will be a no op because the uptr
is NULL. A "__"bpf_obj_unpin_uptrs is added to make this
error path unpin easier such that it does not have to check
the map->record is NULL or not.

BPF_F_LOCK is not supported when the map_value has uptr.
This can be revisited later if there is a use case. A similar
swap_uptrs idea can be considered.

The final bit is to do unpin_user_page in the bpf_obj_free_fields().
The earlier patch has ensured that the bpf_obj_free_fields() has
gone through the rcu gp when needed.

Cc: linux-mm@kvack.org
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Link: https://lore.kernel.org/r/20241023234759.860539-7-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bb31bc6d0c4d..8888689aa917 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -424,6 +424,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
 	case BPF_KPTR_PERCPU:
+	case BPF_UPTR:
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -512,6 +513,25 @@ static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src
 	bpf_obj_memcpy(map->record, dst, src, map->value_size, true);
 }
 
+static inline void bpf_obj_swap_uptrs(const struct btf_record *rec, void *dst, void *src)
+{
+	unsigned long *src_uptr, *dst_uptr;
+	const struct btf_field *field;
+	int i;
+
+	if (!btf_record_has_field(rec, BPF_UPTR))
+		return;
+
+	for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) {
+		if (field->type != BPF_UPTR)
+			continue;
+
+		src_uptr = src + field->offset;
+		dst_uptr = dst + field->offset;
+		swap(*src_uptr, *dst_uptr);
+	}
+}
+
 static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size)
 {
 	u32 curr_off = 0;
-- 
cgit v1.2.3


From 3ced1c68751299c0cdf6a1ceeafdbe77db7d4956 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 19 Oct 2024 00:49:12 +0300
Subject: drm/display: bridge_connector: handle ycbcr_420_allowed

Follow the interlace_allowed example and calculate drm_connector's
ycbcr_420_allowed flag as AND of all drm_bridge's ycbcr_420_allowed
flags in a chain. This is one of the gaps between several
bridge-specific connector implementations and drm_bridge_connector.

Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20241019-bridge-yuv420-v1-1-d74efac9e4e6@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 include/drm/drm_bridge.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
index 75019d16be64..e8d735b7f6a4 100644
--- a/include/drm/drm_bridge.h
+++ b/include/drm/drm_bridge.h
@@ -802,6 +802,11 @@ struct drm_bridge {
 	 * modes.
 	 */
 	bool interlace_allowed;
+	/**
+	 * @ycbcr_420_allowed: Indicate that the bridge can handle YCbCr 420
+	 * output.
+	 */
+	bool ycbcr_420_allowed;
 	/**
 	 * @pre_enable_prev_first: The bridge requires that the prev
 	 * bridge @pre_enable function is called before its @pre_enable,
-- 
cgit v1.2.3


From 09d6775f503b393d0457c7126aa43208e1724004 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland@sifive.com>
Date: Wed, 16 Oct 2024 13:27:45 -0700
Subject: riscv: Add support for userspace pointer masking

RISC-V supports pointer masking with a variable number of tag bits
(which is called "PMLEN" in the specification) and which is configured
at the next higher privilege level.

Wire up the PR_SET_TAGGED_ADDR_CTRL and PR_GET_TAGGED_ADDR_CTRL prctls
so userspace can request a lower bound on the number of tag bits and
determine the actual number of tag bits. As with arm64's
PR_TAGGED_ADDR_ENABLE, the pointer masking configuration is
thread-scoped, inherited on clone() and fork() and cleared on execve().

Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
Tested-by: Charlie Jenkins <charlie@rivosinc.com>
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
Link: https://lore.kernel.org/r/20241016202814.4061541-5-samuel.holland@sifive.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 include/uapi/linux/prctl.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 35791791a879..cefd656ebf43 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -230,7 +230,7 @@ struct prctl_mm_map {
 # define PR_PAC_APDBKEY			(1UL << 3)
 # define PR_PAC_APGAKEY			(1UL << 4)
 
-/* Tagged user address controls for arm64 */
+/* Tagged user address controls for arm64 and RISC-V */
 #define PR_SET_TAGGED_ADDR_CTRL		55
 #define PR_GET_TAGGED_ADDR_CTRL		56
 # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
@@ -244,6 +244,9 @@ struct prctl_mm_map {
 # define PR_MTE_TAG_MASK		(0xffffUL << PR_MTE_TAG_SHIFT)
 /* Unused; kept only for source compatibility */
 # define PR_MTE_TCF_SHIFT		1
+/* RISC-V pointer masking tag length */
+# define PR_PMLEN_SHIFT			24
+# define PR_PMLEN_MASK			(0x7fUL << PR_PMLEN_SHIFT)
 
 /* Control reclaim behavior when allocating memory */
 #define PR_SET_IO_FLUSHER		57
-- 
cgit v1.2.3


From 78844482a1c939a972681842f8ee2a8ddb202441 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland@sifive.com>
Date: Wed, 16 Oct 2024 13:27:47 -0700
Subject: riscv: Allow ptrace control of the tagged address ABI

This allows a tracer to control the ABI of the tracee, as on arm64.

Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
Link: https://lore.kernel.org/r/20241016202814.4061541-7-samuel.holland@sifive.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 include/uapi/linux/elf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b9935988da5c..a920cf8934dc 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -450,6 +450,7 @@ typedef struct elf64_shdr {
 #define NT_MIPS_MSA	0x802		/* MIPS SIMD registers */
 #define NT_RISCV_CSR	0x900		/* RISC-V Control and Status Registers */
 #define NT_RISCV_VECTOR	0x901		/* RISC-V vector registers */
+#define NT_RISCV_TAGGED_ADDR_CTRL 0x902	/* RISC-V tagged address control (prctl()) */
 #define NT_LOONGARCH_CPUCFG	0xa00	/* LoongArch CPU config registers */
 #define NT_LOONGARCH_CSR	0xa01	/* LoongArch control and status registers */
 #define NT_LOONGARCH_LSX	0xa02	/* LoongArch Loongson SIMD Extension registers */
-- 
cgit v1.2.3


From 02ac5f9d6caec96071103f7c62b5117526e47b64 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 24 Oct 2024 02:20:02 +0000
Subject: of: property: add of_graph_get_next_port()

We have endpoint base functions
	- of_graph_get_next_endpoint()
	- of_graph_get_endpoint_count()
	- for_each_endpoint_of_node()

Here, for_each_endpoint_of_node() loop finds each endpoints

	ports {
		port@0 {
(1)			endpoint {...};
		};
		port@1 {
(2)			endpoint {...};
		};
		...
	};

In above case, it finds endpoint as (1) -> (2) -> ...

Basically, user/driver knows which port is used for what, but not in
all cases. For example on flexible/generic driver case, how many ports
are used is not fixed.

For example Sound Generic Card driver which is very flexible/generic and
used from many venders can't know how many ports are used, and used for
what, because it depends on each vender SoC and/or its used board.

And more, the port can have multi endpoints. For example Generic Sound
Card case, it supports many type of connection between CPU / Codec, and
some of them uses multi endpoint in one port. see below.

	ports {
(A)		port@0 {
(1)			endpoint@0 {...};
(2)			endpoint@1 {...};
		};
(B)		port@1 {
(3)			endpoint {...};
		};
		...
	};

Generic Sound Card want to handle each connection via "port" base instead
of "endpoint" base. But, it is very difficult to handle each "port" via
existing for_each_endpoint_of_node(). Because getting each "port" via
of_get_parent() from each "endpoint" doesn't work. For example in above
case, both (1) (2) endpoint has same "port" (= A).

Add "port" base functions.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87ldyeb5t9.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of_graph.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h
index a4bea62bfa29..44518f3583a4 100644
--- a/include/linux/of_graph.h
+++ b/include/linux/of_graph.h
@@ -11,6 +11,7 @@
 #ifndef __LINUX_OF_GRAPH_H
 #define __LINUX_OF_GRAPH_H
 
+#include <linux/cleanup.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 
@@ -37,14 +38,29 @@ struct of_endpoint {
 	for (child = of_graph_get_next_endpoint(parent, NULL); child != NULL; \
 	     child = of_graph_get_next_endpoint(parent, child))
 
+/**
+ * for_each_of_graph_port - iterate over every port in a device or ports node
+ * @parent: parent device or ports node containing port
+ * @child: loop variable pointing to the current port node
+ *
+ * When breaking out of the loop, and continue to use the @child, you need to
+ * use return_ptr(@child) or no_free_ptr(@child) not to call __free() for it.
+ */
+#define for_each_of_graph_port(parent, child)			\
+	for (struct device_node *child __free(device_node) = of_graph_get_next_port(parent, NULL);\
+	     child != NULL; child = of_graph_get_next_port(parent, child))
+
 #ifdef CONFIG_OF
 bool of_graph_is_present(const struct device_node *node);
 int of_graph_parse_endpoint(const struct device_node *node,
 				struct of_endpoint *endpoint);
 unsigned int of_graph_get_endpoint_count(const struct device_node *np);
+unsigned int of_graph_get_port_count(struct device_node *np);
 struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id);
 struct device_node *of_graph_get_next_endpoint(const struct device_node *parent,
 					struct device_node *previous);
+struct device_node *of_graph_get_next_port(const struct device_node *parent,
+					   struct device_node *port);
 struct device_node *of_graph_get_endpoint_by_regs(
 		const struct device_node *parent, int port_reg, int reg);
 struct device_node *of_graph_get_remote_endpoint(
@@ -73,6 +89,11 @@ static inline unsigned int of_graph_get_endpoint_count(const struct device_node
 	return 0;
 }
 
+static inline unsigned int of_graph_get_port_count(struct device_node *np)
+{
+	return 0;
+}
+
 static inline struct device_node *of_graph_get_port_by_id(
 					struct device_node *node, u32 id)
 {
@@ -86,6 +107,13 @@ static inline struct device_node *of_graph_get_next_endpoint(
 	return NULL;
 }
 
+static inline struct device_node *of_graph_get_next_port(
+					const struct device_node *parent,
+					struct device_node *previous)
+{
+	return NULL;
+}
+
 static inline struct device_node *of_graph_get_endpoint_by_regs(
 		const struct device_node *parent, int port_reg, int reg)
 {
-- 
cgit v1.2.3


From 58fe47d6ac7413172e1fa88324fb1b4c0eb2c0a2 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 24 Oct 2024 02:20:06 +0000
Subject: of: property: add of_graph_get_next_port_endpoint()

We already have of_graph_get_next_endpoint(), but it is not
intuitive to use in some case.

(X)	node {
(Y)		ports {
(P0)			port@0 { endpoint { remote-endpoint = ...; };};
(P10)			port@1 { endpoint { remote-endpoint = ...; };
(P11)				 endpoint { remote-endpoint = ...; };};
(P2)			port@2 { endpoint { remote-endpoint = ...; };};
		};
	};

For example, if I want to handle port@1's 2 endpoints (= P10, P11),
I want to use like below

	P10 = of_graph_get_next_endpoint(port1, NULL);
	P11 = of_graph_get_next_endpoint(port1, P10);

But 1st one will be error, because of_graph_get_next_endpoint()
requested 1st parameter is "node" (X) or "ports" (Y), not but "port".
Below works well, but it will get P0

	P0 = of_graph_get_next_endpoint(node,  NULL);
	P0 = of_graph_get_next_endpoint(ports, NULL);

In other words, we can't handle P10/P11 directly via
of_graph_get_next_endpoint().

There is another non intuitive behavior on of_graph_get_next_endpoint().
In case of if I could get P10 pointer for some way, and if I want to
handle port@1 things by loop, I would like use it like below

	/*
	 * "ep" is now P10, and handle port1 things here,
	 * but we don't know how many endpoints port1 have.
	 *
	 * Because "ep" is non NULL now, we can use port1
	 * as of_graph_get_next_endpoint(port1, xxx)
	 */
	do {
		/* do something for port1 specific things here */
	} while (ep = of_graph_get_next_endpoint(port1, ep))

But it also not worked as I expected.
I expect it will be P10 -> P11 -> NULL,
but      it will be P10 -> P11 -> P2,    because
of_graph_get_next_endpoint() will fetch "endpoint" beyond the "port".
It is not useful for generic driver.

To handle endpoint more intuitive, create of_graph_get_next_port_endpoint()

	of_graph_get_next_port_endpoint(port1, NULL); // P10
	of_graph_get_next_port_endpoint(port1, P10);  // P11
	of_graph_get_next_port_endpoint(port1, P11);  // NULL

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87jzdyb5t5.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of_graph.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h
index 44518f3583a4..a692d9d979a6 100644
--- a/include/linux/of_graph.h
+++ b/include/linux/of_graph.h
@@ -50,6 +50,18 @@ struct of_endpoint {
 	for (struct device_node *child __free(device_node) = of_graph_get_next_port(parent, NULL);\
 	     child != NULL; child = of_graph_get_next_port(parent, child))
 
+/**
+ * for_each_of_graph_port_endpoint - iterate over every endpoint in a port node
+ * @parent: parent port node
+ * @child: loop variable pointing to the current endpoint node
+ *
+ * When breaking out of the loop, and continue to use the @child, you need to
+ * use return_ptr(@child) or no_free_ptr(@child) not to call __free() for it.
+ */
+#define for_each_of_graph_port_endpoint(parent, child)			\
+	for (struct device_node *child __free(device_node) = of_graph_get_next_port_endpoint(parent, NULL);\
+	     child != NULL; child = of_graph_get_next_port_endpoint(parent, child))
+
 #ifdef CONFIG_OF
 bool of_graph_is_present(const struct device_node *node);
 int of_graph_parse_endpoint(const struct device_node *node,
@@ -61,6 +73,8 @@ struct device_node *of_graph_get_next_endpoint(const struct device_node *parent,
 					struct device_node *previous);
 struct device_node *of_graph_get_next_port(const struct device_node *parent,
 					   struct device_node *port);
+struct device_node *of_graph_get_next_port_endpoint(const struct device_node *port,
+						    struct device_node *prev);
 struct device_node *of_graph_get_endpoint_by_regs(
 		const struct device_node *parent, int port_reg, int reg);
 struct device_node *of_graph_get_remote_endpoint(
@@ -114,6 +128,13 @@ static inline struct device_node *of_graph_get_next_port(
 	return NULL;
 }
 
+static inline struct device_node *of_graph_get_next_port_endpoint(
+					const struct device_node *parent,
+					struct device_node *previous)
+{
+	return NULL;
+}
+
 static inline struct device_node *of_graph_get_endpoint_by_regs(
 		const struct device_node *parent, int port_reg, int reg)
 {
-- 
cgit v1.2.3


From 78cd57bbb4529690d152a5dce409dd91fa1a8b9b Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 24 Oct 2024 02:20:35 +0000
Subject: fbdev: omapfb: use new of_graph functions

Now we can use new port related functions for port parsing. Use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Helge Deller <deller@gmx.de>
Link: https://lore.kernel.org/r/87bjzab5sd.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/video/omapfb_dss.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/video/omapfb_dss.h b/include/video/omapfb_dss.h
index a8c0c3eeeb5b..d133190e3143 100644
--- a/include/video/omapfb_dss.h
+++ b/include/video/omapfb_dss.h
@@ -811,14 +811,6 @@ static inline bool omapdss_device_is_enabled(struct omap_dss_device *dssdev)
 	return dssdev->state == OMAP_DSS_DISPLAY_ACTIVE;
 }
 
-struct device_node *
-omapdss_of_get_next_port(const struct device_node *parent,
-			 struct device_node *prev);
-
-struct device_node *
-omapdss_of_get_next_endpoint(const struct device_node *parent,
-			     struct device_node *prev);
-
 struct omap_dss_device *
 omapdss_of_find_source_for_first_ep(struct device_node *node);
 #else
-- 
cgit v1.2.3


From 2f2d46959808e9b039ecb241ff13d50be2d6e231 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 19 Oct 2024 18:15:42 +0100
Subject: firmware/psci: Add definitions for PSCI v1.3 specification

The v1.3 PSCI spec (https://developer.arm.com/documentation/den0022) adds
the SYSTEM_OFF2 function. Add definitions for it and its hibernation type
parameter.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Miguel Luis <miguel.luis@oracle.com>
Link: https://lore.kernel.org/r/20241019172459.2241939-2-dwmw2@infradead.org
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 include/uapi/linux/psci.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
index 42a40ad3fb62..81759ff385e6 100644
--- a/include/uapi/linux/psci.h
+++ b/include/uapi/linux/psci.h
@@ -59,6 +59,7 @@
 #define PSCI_1_1_FN_SYSTEM_RESET2		PSCI_0_2_FN(18)
 #define PSCI_1_1_FN_MEM_PROTECT			PSCI_0_2_FN(19)
 #define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE	PSCI_0_2_FN(20)
+#define PSCI_1_3_FN_SYSTEM_OFF2			PSCI_0_2_FN(21)
 
 #define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND	PSCI_0_2_FN64(12)
 #define PSCI_1_0_FN64_NODE_HW_STATE		PSCI_0_2_FN64(13)
@@ -68,6 +69,7 @@
 
 #define PSCI_1_1_FN64_SYSTEM_RESET2		PSCI_0_2_FN64(18)
 #define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE	PSCI_0_2_FN64(20)
+#define PSCI_1_3_FN64_SYSTEM_OFF2		PSCI_0_2_FN64(21)
 
 /* PSCI v0.2 power state encoding for CPU_SUSPEND function */
 #define PSCI_0_2_POWER_STATE_ID_MASK		0xffff
@@ -100,6 +102,9 @@
 #define PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET	0
 #define PSCI_1_1_RESET_TYPE_VENDOR_START	0x80000000U
 
+/* PSCI v1.3 hibernate type for SYSTEM_OFF2 */
+#define PSCI_1_3_OFF_TYPE_HIBERNATE_OFF		BIT(0)
+
 /* PSCI version decoding (independent of PSCI version) */
 #define PSCI_VERSION_MAJOR_SHIFT		16
 #define PSCI_VERSION_MINOR_MASK			\
-- 
cgit v1.2.3


From 8be82d536a9f8d9974cf746d235c1f1c24dd31ed Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 19 Oct 2024 18:15:44 +0100
Subject: KVM: arm64: Add support for PSCI v1.2 and v1.3

As with PSCI v1.1 in commit 512865d83fd9 ("KVM: arm64: Bump guest PSCI
version to 1.1"), expose v1.3 to the guest by default. The SYSTEM_OFF2
call which is exposed by doing so is compatible for userspace because
it's just a new flag in the event that KVM raises, in precisely the same
way that SYSTEM_RESET2 was compatible when v1.1 was enabled by default.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Miguel Luis <miguel.luis@oracle.com>
Link: https://lore.kernel.org/r/20241019172459.2241939-4-dwmw2@infradead.org
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 include/kvm/arm_psci.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h
index e8fb624013d1..cbaec804eb83 100644
--- a/include/kvm/arm_psci.h
+++ b/include/kvm/arm_psci.h
@@ -14,8 +14,10 @@
 #define KVM_ARM_PSCI_0_2	PSCI_VERSION(0, 2)
 #define KVM_ARM_PSCI_1_0	PSCI_VERSION(1, 0)
 #define KVM_ARM_PSCI_1_1	PSCI_VERSION(1, 1)
+#define KVM_ARM_PSCI_1_2	PSCI_VERSION(1, 2)
+#define KVM_ARM_PSCI_1_3	PSCI_VERSION(1, 3)
 
-#define KVM_ARM_PSCI_LATEST	KVM_ARM_PSCI_1_1
+#define KVM_ARM_PSCI_LATEST	KVM_ARM_PSCI_1_3
 
 static inline int kvm_psci_version(struct kvm_vcpu *vcpu)
 {
-- 
cgit v1.2.3


From f730fd535fc51573f982fad629f2fc6b4a0cde2f Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Mon, 19 Aug 2024 09:41:15 +0200
Subject: cleanup: Remove address space of returned pointer

Guard functions in local_lock.h are defined using DEFINE_GUARD() and
DEFINE_LOCK_GUARD_1() macros having lock type defined as pointer in
the percpu address space. The functions, defined by these macros
return value in generic address space, causing:

cleanup.h:157:18: error: return from pointer to non-enclosed address space

and

cleanup.h:214:18: error: return from pointer to non-enclosed address space

when strict percpu checks are enabled.

Add explicit casts to remove address space of the returned pointer.

Found by GCC's named address space checks.

Fixes: e4ab322fbaaa ("cleanup: Add conditional guard support")
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20240819074124.143565-1-ubizjak@gmail.com
---
 include/linux/cleanup.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index 038b2d523bf8..518bd1fd86fb 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -290,7 +290,7 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
 #define DEFINE_GUARD(_name, _type, _lock, _unlock) \
 	DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \
 	static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
-	{ return *_T; }
+	{ return (void *)(__force unsigned long)*_T; }
 
 #define DEFINE_GUARD_COND(_name, _ext, _condlock) \
 	EXTEND_CLASS(_name, _ext, \
@@ -347,7 +347,7 @@ static inline void class_##_name##_destructor(class_##_name##_t *_T)	\
 									\
 static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T)	\
 {									\
-	return _T->lock;						\
+	return (void *)(__force unsigned long)_T->lock;			\
 }
 
 
-- 
cgit v1.2.3


From fcc22ac5baf06dd17193de44b60dbceea6461983 Mon Sep 17 00:00:00 2001
From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Date: Fri, 18 Oct 2024 13:38:14 +0200
Subject: cleanup: Adjust scoped_guard() macros to avoid potential warning

Change scoped_guard() and scoped_cond_guard() macros to make reasoning
about them easier for static analysis tools (smatch, compiler
diagnostics), especially to enable them to tell if the given usage of
scoped_guard() is with a conditional lock class (interruptible-locks,
try-locks) or not (like simple mutex_lock()).

Add compile-time error if scoped_cond_guard() is used for non-conditional
lock class.

Beyond easier tooling and a little shrink reported by bloat-o-meter
this patch enables developer to write code like:

int foo(struct my_drv *adapter)
{
	scoped_guard(spinlock, &adapter->some_spinlock)
		return adapter->spinlock_protected_var;
}

Current scoped_guard() implementation does not support that,
due to compiler complaining:
error: control reaches end of non-void function [-Werror=return-type]

Technical stuff about the change:
scoped_guard() macro uses common idiom of using "for" statement to declare
a scoped variable. Unfortunately, current logic is too hard for compiler
diagnostics to be sure that there is exactly one loop step; fix that.

To make any loop so trivial that there is no above warning, it must not
depend on any non-const variable to tell if there are more steps. There is
no obvious solution for that in C, but one could use the compound
statement expression with "goto" jumping past the "loop", effectively
leaving only the subscope part of the loop semantics.

More impl details:
one more level of macro indirection is now needed to avoid duplicating
label names;
I didn't spot any other place that is using the
"for (...; goto label) if (0) label: break;" idiom, so it's not packed for
reuse beyond scoped_guard() family, what makes actual macros code cleaner.

There was also a need to introduce const true/false variable per lock
class, it is used to aid compiler diagnostics reasoning about "exactly
1 step" loops (note that converting that to function would undo the whole
benefit).

Big thanks to Andy Shevchenko for help on this patch, both internal and
public, ranging from whitespace/formatting, through commit message
clarifications, general improvements, ending with presenting alternative
approaches - all despite not even liking the idea.

Big thanks to Dmitry Torokhov for the idea of compile-time check for
scoped_cond_guard() (to use it only with conditional locsk), and general
improvements for the patch.

Big thanks to David Lechner for idea to cover also scoped_cond_guard().

Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Link: https://lkml.kernel.org/r/20241018113823.171256-1-przemyslaw.kitszel@intel.com
---
 include/linux/cleanup.h | 52 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index 518bd1fd86fb..0cc66f8d28e7 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -285,14 +285,20 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
  *      similar to scoped_guard(), except it does fail when the lock
  *      acquire fails.
  *
+ *      Only for conditional locks.
  */
 
+#define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond)	\
+static __maybe_unused const bool class_##_name##_is_conditional = _is_cond
+
 #define DEFINE_GUARD(_name, _type, _lock, _unlock) \
+	__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \
 	DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \
 	static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
 	{ return (void *)(__force unsigned long)*_T; }
 
 #define DEFINE_GUARD_COND(_name, _ext, _condlock) \
+	__DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \
 	EXTEND_CLASS(_name, _ext, \
 		     ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \
 		     class_##_name##_t _T) \
@@ -303,17 +309,40 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
 	CLASS(_name, __UNIQUE_ID(guard))
 
 #define __guard_ptr(_name) class_##_name##_lock_ptr
+#define __is_cond_ptr(_name) class_##_name##_is_conditional
 
-#define scoped_guard(_name, args...)					\
-	for (CLASS(_name, scope)(args),					\
-	     *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1)
-
-#define scoped_cond_guard(_name, _fail, args...) \
-	for (CLASS(_name, scope)(args), \
-	     *done = NULL; !done; done = (void *)1) \
-		if (!__guard_ptr(_name)(&scope)) _fail; \
-		else
-
+/*
+ * Helper macro for scoped_guard().
+ *
+ * Note that the "!__is_cond_ptr(_name)" part of the condition ensures that
+ * compiler would be sure that for the unconditional locks the body of the
+ * loop (caller-provided code glued to the else clause) could not be skipped.
+ * It is needed because the other part - "__guard_ptr(_name)(&scope)" - is too
+ * hard to deduce (even if could be proven true for unconditional locks).
+ */
+#define __scoped_guard(_name, _label, args...)				\
+	for (CLASS(_name, scope)(args);					\
+	     __guard_ptr(_name)(&scope) || !__is_cond_ptr(_name);	\
+	     ({ goto _label; }))					\
+		if (0) {						\
+_label:									\
+			break;						\
+		} else
+
+#define scoped_guard(_name, args...)	\
+	__scoped_guard(_name, __UNIQUE_ID(label), args)
+
+#define __scoped_cond_guard(_name, _fail, _label, args...)		\
+	for (CLASS(_name, scope)(args); true; ({ goto _label; }))	\
+		if (!__guard_ptr(_name)(&scope)) {			\
+			BUILD_BUG_ON(!__is_cond_ptr(_name));		\
+			_fail;						\
+_label:									\
+			break;						\
+		} else
+
+#define scoped_cond_guard(_name, _fail, args...)	\
+	__scoped_cond_guard(_name, _fail, __UNIQUE_ID(label), args)
 /*
  * Additional helper macros for generating lock guards with types, either for
  * locks that don't have a native type (eg. RCU, preempt) or those that need a
@@ -369,14 +398,17 @@ static inline class_##_name##_t class_##_name##_constructor(void)	\
 }
 
 #define DEFINE_LOCK_GUARD_1(_name, _type, _lock, _unlock, ...)		\
+__DEFINE_CLASS_IS_CONDITIONAL(_name, false);				\
 __DEFINE_UNLOCK_GUARD(_name, _type, _unlock, __VA_ARGS__)		\
 __DEFINE_LOCK_GUARD_1(_name, _type, _lock)
 
 #define DEFINE_LOCK_GUARD_0(_name, _lock, _unlock, ...)			\
+__DEFINE_CLASS_IS_CONDITIONAL(_name, false);				\
 __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__)		\
 __DEFINE_LOCK_GUARD_0(_name, _lock)
 
 #define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock)		\
+	__DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true);		\
 	EXTEND_CLASS(_name, _ext,					\
 		     ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\
 		        if (_T->lock && !(_condlock)) _T->lock = NULL;	\
-- 
cgit v1.2.3


From 36c2cf88808d47e926d11b98734f154fe4a9f50f Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Tue, 1 Oct 2024 17:30:18 -0500
Subject: cleanup: Add conditional guard helper

Add a new if_not_guard() macro to cleanup.h for handling
conditional guards such as mutext_trylock().

This is more ergonomic than scoped_guard() for most use cases.
Instead of hiding the error handling statement in the macro args, it
works like a normal if statement and allow the error path to be indented
while the normal code flow path is not indented. And it avoid unwanted
side-effect from hidden for loop in scoped_guard().

Signed-off-by: David Lechner <dlechner@baylibre.com>
Co-developed-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://lkml.kernel.org/r/20241001-cleanup-if_not_cond_guard-v1-1-7753810b0f7a@baylibre.com
---
 include/linux/cleanup.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index 0cc66f8d28e7..e859f79b9d2d 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -273,6 +273,12 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
  *	an anonymous instance of the (guard) class, not recommended for
  *	conditional locks.
  *
+ * if_not_guard(name, args...) { <error handling> }:
+ *	convenience macro for conditional guards that calls the statement that
+ *	follows only if the lock was not acquired (typically an error return).
+ *
+ *	Only for conditional locks.
+ *
  * scoped_guard (name, args...) { }:
  *	similar to CLASS(name, scope)(args), except the variable (with the
  *	explicit name 'scope') is declard in a for-loop such that its scope is
@@ -343,6 +349,15 @@ _label:									\
 
 #define scoped_cond_guard(_name, _fail, args...)	\
 	__scoped_cond_guard(_name, _fail, __UNIQUE_ID(label), args)
+
+#define __if_not_guard(_name, _id, args...)		\
+	BUILD_BUG_ON(!__is_cond_ptr(_name));		\
+	CLASS(_name, _id)(args);			\
+	if (!__guard_ptr(_name)(&_id))
+
+#define if_not_guard(_name, args...) \
+	__if_not_guard(_name, __UNIQUE_ID(guard), args)
+
 /*
  * Additional helper macros for generating lock guards with types, either for
  * locks that don't have a native type (eg. RCU, preempt) or those that need a
-- 
cgit v1.2.3


From 49991cca67d584a59cb10d48825cce3d11f7d843 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Date: Sat, 19 Oct 2024 11:47:27 +0300
Subject: dt-bindings: clock: r9a08g045-cpg: Add power domain ID for RTC

The RTC and VBATTB don't share the MSTOP control bit (but only the bus
clock and the reset signal). As the MSTOP control is modeled though power
domains add power domain ID for the RTC device available on the
Renesas RZ/G3S SoC.

Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/20241019084738.3370489-2-claudiu.beznea.uj@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/dt-bindings/clock/r9a08g045-cpg.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/r9a08g045-cpg.h b/include/dt-bindings/clock/r9a08g045-cpg.h
index 8281e9caf3a9..311521fe4b59 100644
--- a/include/dt-bindings/clock/r9a08g045-cpg.h
+++ b/include/dt-bindings/clock/r9a08g045-cpg.h
@@ -308,5 +308,6 @@
 #define R9A08G045_PD_DDR		64
 #define R9A08G045_PD_TZCDDR		65
 #define R9A08G045_PD_OTFDE_DDR		66
+#define R9A08G045_PD_RTC		67
 
 #endif /* __DT_BINDINGS_CLOCK_R9A08G045_CPG_H__ */
-- 
cgit v1.2.3


From ec24643bdd625971933451f22b8e33d364920f6e Mon Sep 17 00:00:00 2001
From: Vibhore Vardhan <vibhore@ti.com>
Date: Mon, 7 Oct 2024 08:08:56 +0200
Subject: firmware: ti_sci: Add system suspend and resume call

Introduce system suspend call that enables the ti_sci driver to support
low power mode when the user space issues a suspend to mem.

The following power management operations defined in the TISCI
Low Power Mode API [1] are implemented to support suspend and resume:

1) TISCI_MSG_PREPARE_SLEEP
Prepare the SOC for entering into a low power mode and
provide details to firmware about the state being entered.

2) TISCI_MSG_SET_IO_ISOLATION
Control the IO isolation for Low Power Mode.

Also, write a ti_sci_prepare_system_suspend call to be used in the driver
suspend handler to allow the system to identify the low power mode being
entered and if necessary, send TISCI_MSG_PREPARE_SLEEP with information
about the mode being entered.

Sysfw version >= 10.00.04 support LPM_DM_MANAGED capability [2], where
Device Mgr firmware now manages which low power mode is chosen. Going
forward, this is the default configuration supported for TI AM62 family
of devices. The state chosen by the DM can be influenced by sending
constraints using the new LPM constraint APIs.

In case the firmware does not support LPM_DM_MANAGED mode, the mode
selection logic can be extended as needed. If no suspend-to-RAM modes
are supported, return without taking any action.

We're using "pm_suspend_target_state" to map the kernel's target suspend
state to SysFW low power mode. Make sure this is available only when
CONFIG_SUSPEND is enabled.

Suspend has to be split into two parts, ti_sci_suspend() will send
the prepare sleep message to prepare suspend. ti_sci_suspend_noirq()
sets IO isolation which needs to be done as late as possible to avoid
any issues. On resume this has to be done as early as possible.

[1] https://software-dl.ti.com/tisci/esd/latest/2_tisci_msgs/pm/lpm.html

Co-developed-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Georgi Vlaev <g-vlaev@ti.com>
Signed-off-by: Dhruva Gole <d-gole@ti.com>
Signed-off-by: Vibhore Vardhan <vibhore@ti.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Tested-by: Dhruva Gole <d-gole@ti.com>
Signed-off-by: Markus Schneider-Pargmann <msp@baylibre.com>
Tested-by: Roger Quadros <rogerq@kernel.org>
Link: https://lore.kernel.org/r/20241007-tisci-syssuspendresume-v13-3-ed54cd659a49@baylibre.com
Signed-off-by: Nishanth Menon <nm@ti.com>
---
 include/linux/soc/ti/ti_sci_protocol.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h
index bd0d11af76c5..1f1871e23f76 100644
--- a/include/linux/soc/ti/ti_sci_protocol.h
+++ b/include/linux/soc/ti/ti_sci_protocol.h
@@ -195,6 +195,10 @@ struct ti_sci_clk_ops {
 			u64 *current_freq);
 };
 
+/* TISCI LPM IO isolation control values */
+#define TISCI_MSG_VALUE_IO_ENABLE			1
+#define TISCI_MSG_VALUE_IO_DISABLE			0
+
 /**
  * struct ti_sci_resource_desc - Description of TI SCI resource instance range.
  * @start:	Start index of the first resource range.
-- 
cgit v1.2.3


From 60357991f6b9d4bd4dc442a368da3f468cfa4903 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Mon, 7 Oct 2024 08:08:57 +0200
Subject: firmware: ti_sci: Introduce Power Management Ops

Introduce power management ops supported by the TISCI
Low Power Mode API [1].

1) TISCI_MSG_LPM_WAKE_REASON
Get which wake up source woke the SoC from Low Power Mode.
The wake up source IDs will be common for all K3 platforms.

2) TISCI_MSG_LPM_SET_DEVICE_CONSTRAINT
Set LPM constraint on behalf of a device. By setting a constraint, the
device ensures that it will not be powered off or reset in the selected
mode.

3) TISCI_MSG_LPM_SET_LATENCY_CONSTRAINT
Set LPM resume latency constraint. By setting a constraint, the host
ensures that the resume time from selected mode will be less than the
constraint value.

[1] https://software-dl.ti.com/tisci/esd/latest/2_tisci_msgs/pm/lpm.html

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
[g-vlaev@ti.com: LPM_WAKE_REASON and IO_ISOLATION support]
Signed-off-by: Georgi Vlaev <g-vlaev@ti.com>
[a-kaur@ti.com: SET_DEVICE_CONSTRAINT support]
Signed-off-by: Akashdeep Kaur <a-kaur@ti.com>
[vibhore@ti.com: SET_LATENCY_CONSTRAINT support]
Signed-off-by: Vibhore Vardhan <vibhore@ti.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Reviewed-by: Akashdeep Kaur <a-kaur@ti.com>
Tested-by: Dhruva Gole <d-gole@ti.com>
Signed-off-by: Markus Schneider-Pargmann <msp@baylibre.com>
Tested-by: Kevin Hilman <khilman@baylibre.com>
Tested-by: Roger Quadros <rogerq@kernel.org>
Acked-by: Dhruva Gole <d-gole@ti.com>
Link: https://lore.kernel.org/r/20241007-tisci-syssuspendresume-v13-4-ed54cd659a49@baylibre.com
Signed-off-by: Nishanth Menon <nm@ti.com>
---
 include/linux/soc/ti/ti_sci_protocol.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h
index 1f1871e23f76..fd104b666836 100644
--- a/include/linux/soc/ti/ti_sci_protocol.h
+++ b/include/linux/soc/ti/ti_sci_protocol.h
@@ -199,6 +199,31 @@ struct ti_sci_clk_ops {
 #define TISCI_MSG_VALUE_IO_ENABLE			1
 #define TISCI_MSG_VALUE_IO_DISABLE			0
 
+/* TISCI LPM constraint state values */
+#define TISCI_MSG_CONSTRAINT_SET			1
+#define TISCI_MSG_CONSTRAINT_CLR			0
+
+/**
+ * struct ti_sci_pm_ops - Low Power Mode (LPM) control operations
+ * @lpm_wake_reason: Get the wake up source that woke the SoC from LPM
+ *		- source: The wake up source that woke soc from LPM.
+ *		- timestamp: Timestamp at which soc woke.
+ * @set_device_constraint: Set LPM constraint on behalf of a device
+ *		- id: Device Identifier
+ *		- state: The desired state of device constraint: set or clear.
+ * @set_latency_constraint: Set LPM resume latency constraint
+ *		- latency: maximum acceptable latency to wake up from low power mode
+ *		- state: The desired state of latency constraint: set or clear.
+ */
+struct ti_sci_pm_ops {
+	int (*lpm_wake_reason)(const struct ti_sci_handle *handle,
+			       u32 *source, u64 *timestamp, u8 *pin, u8 *mode);
+	int (*set_device_constraint)(const struct ti_sci_handle *handle,
+				     u32 id, u8 state);
+	int (*set_latency_constraint)(const struct ti_sci_handle *handle,
+				      u16 latency, u8 state);
+};
+
 /**
  * struct ti_sci_resource_desc - Description of TI SCI resource instance range.
  * @start:	Start index of the first resource range.
@@ -543,6 +568,7 @@ struct ti_sci_ops {
 	struct ti_sci_core_ops core_ops;
 	struct ti_sci_dev_ops dev_ops;
 	struct ti_sci_clk_ops clk_ops;
+	struct ti_sci_pm_ops pm_ops;
 	struct ti_sci_rm_core_ops rm_core_ops;
 	struct ti_sci_rm_irq_ops rm_irq_ops;
 	struct ti_sci_rm_ringacc_ops rm_ring_ops;
-- 
cgit v1.2.3


From 3ae80b375739495e36fc6143ff27716fe390a13e Mon Sep 17 00:00:00 2001
From: Philipp Stanner <pstanner@redhat.com>
Date: Wed, 23 Oct 2024 16:15:31 +0200
Subject: drm/sched: warn about drm_sched_job_init()'s partial init

drm_sched_job_init()'s name suggests that after the function succeeded,
parameter "job" will be fully initialized. This is not the case; some
members are only later set, notably drm_sched_job.sched by
drm_sched_job_arm().

Document that drm_sched_job_init() does not set all struct members.

Document the lifetime of drm_sched_job.sched.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241023141530.113370-2-pstanner@redhat.com
---
 include/drm/gpu_scheduler.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index ab161289d1bf..95e17504e46a 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -340,6 +340,14 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
 struct drm_sched_job {
 	struct spsc_node		queue_node;
 	struct list_head		list;
+
+	/**
+	 * @sched:
+	 *
+	 * The scheduler this job is or will be scheduled on. Gets set by
+	 * drm_sched_job_arm(). Valid until drm_sched_backend_ops.free_job()
+	 * has finished.
+	 */
 	struct drm_gpu_scheduler	*sched;
 	struct drm_sched_fence		*s_fence;
 
-- 
cgit v1.2.3


From 037bc38b298c9a8de64f84b253c0b472228bbb10 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:23:03 -0700
Subject: KVM: Drop KVM_ERR_PTR_BAD_PAGE and instead return NULL to indicate an
 error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove KVM_ERR_PTR_BAD_PAGE and instead return NULL, as "bad page" is just
a leftover bit of weirdness from days of old when KVM stuffed a "bad" page
into the guest instead of actually handling missing pages.  See commit
cea7bb21280e ("KVM: MMU: Make gfn_to_page() always safe").

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-2-seanjc@google.com>
---
 include/linux/kvm_host.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 45be36e5285f..0b280e5bad0a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -153,13 +153,6 @@ static inline bool kvm_is_error_gpa(gpa_t gpa)
 	return gpa == INVALID_GPA;
 }
 
-#define KVM_ERR_PTR_BAD_PAGE	(ERR_PTR(-ENOENT))
-
-static inline bool is_error_page(struct page *page)
-{
-	return IS_ERR(page);
-}
-
 #define KVM_REQUEST_MASK           GENMASK(7,0)
 #define KVM_REQUEST_NO_WAKEUP      BIT(8)
 #define KVM_REQUEST_WAIT           BIT(9)
-- 
cgit v1.2.3


From 3af91068b7e10dba438f70eae94d877f20842fa1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:23:05 -0700
Subject: KVM: Add kvm_release_page_unused() API to put pages that KVM never
 consumes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an API to release an unused page, i.e. to put a page without marking
it accessed or dirty.  The API will be used when KVM faults-in a page but
bails before installing the guest mapping (and other similar flows).

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-4-seanjc@google.com>
---
 include/linux/kvm_host.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0b280e5bad0a..8de9acb0b35e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1216,6 +1216,15 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
 				      bool *writable);
+
+static inline void kvm_release_page_unused(struct page *page)
+{
+	if (!page)
+		return;
+
+	put_page(page);
+}
+
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
 
-- 
cgit v1.2.3


From 6419bc52072b928acb764766968c672d5fede802 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:23:13 -0700
Subject: KVM: Rename gfn_to_page_many_atomic() to kvm_prefetch_pages()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename gfn_to_page_many_atomic() to kvm_prefetch_pages() to try and
communicate its true purpose, as the "atomic" aspect is essentially a
side effect of the fact that x86 uses the API while holding mmu_lock.
E.g. even if mmu_lock weren't held, KVM wouldn't want to fault-in pages,
as the goal is to opportunistically grab surrounding pages that have
already been accessed and/or dirtied by the host, and to do so quickly.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-12-seanjc@google.com>
---
 include/linux/kvm_host.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8de9acb0b35e..6a3976c1a218 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1207,8 +1207,8 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm);
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 				   struct kvm_memory_slot *slot);
 
-int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
-			    struct page **pages, int nr_pages);
+int kvm_prefetch_pages(struct kvm_memory_slot *slot, gfn_t gfn,
+		       struct page **pages, int nr_pages);
 
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
-- 
cgit v1.2.3


From e2d2ca71ac03c748dbc44e0dd7dc1557befb1ab6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:23:14 -0700
Subject: KVM: Drop @atomic param from gfn=>pfn and hva=>pfn APIs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop @atomic from the myriad "to_pfn" APIs now that all callers pass
"false", and remove a comment blurb about KVM running only the "GUP fast"
part in atomic context.

No functional change intended.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-13-seanjc@google.com>
---
 include/linux/kvm_host.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6a3976c1a218..32e23e05a8c3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1232,9 +1232,8 @@ kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
 kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn);
-kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn);
 kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
-			       bool atomic, bool interruptible, bool *async,
+			       bool interruptible, bool *async,
 			       bool write_fault, bool *writable, hva_t *hva);
 
 void kvm_release_pfn_clean(kvm_pfn_t pfn);
-- 
cgit v1.2.3


From 6769d1bcd3509ad2d2ee04da122c465a11a165b4 Mon Sep 17 00:00:00 2001
From: David Stevens <stevensd@chromium.org>
Date: Thu, 10 Oct 2024 11:23:18 -0700
Subject: KVM: Replace "async" pointer in gfn=>pfn with "no_wait" and error
 code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a pfn error code to communicate that hva_to_pfn() failed because I/O
was needed and disallowed, and convert @async to a constant @no_wait
boolean.  This will allow eliminating the @no_wait param by having callers
pass in FOLL_NOWAIT along with other FOLL_* flags.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: David Stevens <stevensd@chromium.org>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-17-seanjc@google.com>
---
 include/linux/kvm_host.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 32e23e05a8c3..dc15a9a64408 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -97,6 +97,7 @@
 #define KVM_PFN_ERR_HWPOISON	(KVM_PFN_ERR_MASK + 1)
 #define KVM_PFN_ERR_RO_FAULT	(KVM_PFN_ERR_MASK + 2)
 #define KVM_PFN_ERR_SIGPENDING	(KVM_PFN_ERR_MASK + 3)
+#define KVM_PFN_ERR_NEEDS_IO	(KVM_PFN_ERR_MASK + 4)
 
 /*
  * error pfns indicate that the gfn is in slot but faild to
@@ -1233,7 +1234,7 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
 kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn);
 kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
-			       bool interruptible, bool *async,
+			       bool interruptible, bool no_wait,
 			       bool write_fault, bool *writable, hva_t *hva);
 
 void kvm_release_pfn_clean(kvm_pfn_t pfn);
-- 
cgit v1.2.3


From cccefb0a0d3b4f7b41b1921538087dd7031876ac Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:23:20 -0700
Subject: KVM: Drop unused "hva" pointer from __gfn_to_pfn_memslot()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop @hva from __gfn_to_pfn_memslot() now that all callers pass NULL.

No functional change intended.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-19-seanjc@google.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dc15a9a64408..2c9eb472f059 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1235,7 +1235,7 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn);
 kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
 			       bool interruptible, bool no_wait,
-			       bool write_fault, bool *writable, hva_t *hva);
+			       bool write_fault, bool *writable);
 
 void kvm_release_pfn_clean(kvm_pfn_t pfn);
 void kvm_release_pfn_dirty(kvm_pfn_t pfn);
-- 
cgit v1.2.3


From ef7db98e477f4b379fac3131bf94c33774c4a211 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:23:24 -0700
Subject: KVM: Use NULL for struct page pointer to indicate mremapped memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop yet another unnecessary magic page value from KVM, as there's zero
reason to use a poisoned pointer to indicate "no page".  If KVM uses a
NULL page pointer, the kernel will explode just as quickly as if KVM uses
a poisoned pointer.  Never mind the fact that such usage would be a
blatant and egregious KVM bug.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-23-seanjc@google.com>
---
 include/linux/kvm_host.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c9eb472f059..cd6f5cc1930f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -273,16 +273,12 @@ enum {
 	READING_SHADOW_PAGE_TABLES,
 };
 
-#define KVM_UNMAPPED_PAGE	((void *) 0x500 + POISON_POINTER_DELTA)
-
 struct kvm_host_map {
 	/*
 	 * Only valid if the 'pfn' is managed by the host kernel (i.e. There is
 	 * a 'struct page' for it. When using mem= kernel parameter some memory
 	 * can be used as guest memory but they are not managed by host
 	 * kernel).
-	 * If 'pfn' is not managed by the host kernel, this field is
-	 * initialized to KVM_UNMAPPED_PAGE.
 	 */
 	struct page *page;
 	void *hva;
-- 
cgit v1.2.3


From 2ff072ba7ad2deb1c3b2d231faa0ac3a25e2451b Mon Sep 17 00:00:00 2001
From: David Stevens <stevensd@chromium.org>
Date: Thu, 10 Oct 2024 11:23:32 -0700
Subject: KVM: Migrate kvm_vcpu_map() to kvm_follow_pfn()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Migrate kvm_vcpu_map() to kvm_follow_pfn(), and have it track whether or
not the map holds a refcounted struct page.  Precisely tracking struct
page references will eventually allow removing kvm_pfn_to_refcounted_page()
and its various wrappers.

Signed-off-by: David Stevens <stevensd@chromium.org>
[sean: use a pointer instead of a boolean]
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-31-seanjc@google.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cd6f5cc1930f..35e1beb017dd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -280,6 +280,7 @@ struct kvm_host_map {
 	 * can be used as guest memory but they are not managed by host
 	 * kernel).
 	 */
+	struct page *refcounted_page;
 	struct page *page;
 	void *hva;
 	kvm_pfn_t pfn;
@@ -1238,7 +1239,6 @@ void kvm_release_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_accessed(kvm_pfn_t pfn);
 
-void kvm_release_pfn(kvm_pfn_t pfn, bool dirty);
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 			int len);
 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
-- 
cgit v1.2.3


From 2bcb52a3602bf4cbc55d8fb4da00c930f83d7789 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:23:33 -0700
Subject: KVM: Pin (as in FOLL_PIN) pages during kvm_vcpu_map()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pin, as in FOLL_PIN, pages when mapping them for direct access by KVM.
As per Documentation/core-api/pin_user_pages.rst, writing to a page that
was gotten via FOLL_GET is explicitly disallowed.

  Correct (uses FOLL_PIN calls):
      pin_user_pages()
      write to the data within the pages
      unpin_user_pages()

  INCORRECT (uses FOLL_GET calls):
      get_user_pages()
      write to the data within the pages
      put_page()

Unfortunately, FOLL_PIN is a "private" flag, and so kvm_follow_pfn must
use a one-off bool instead of being able to piggyback the "flags" field.

Link: https://lwn.net/Articles/930667
Link: https://lore.kernel.org/all/cover.1683044162.git.lstoakes@gmail.com
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-32-seanjc@google.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 35e1beb017dd..b4c541fa5a1f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -280,7 +280,7 @@ struct kvm_host_map {
 	 * can be used as guest memory but they are not managed by host
 	 * kernel).
 	 */
-	struct page *refcounted_page;
+	struct page *pinned_page;
 	struct page *page;
 	void *hva;
 	kvm_pfn_t pfn;
-- 
cgit v1.2.3


From 365e319208442a0807a96e9ea4d0b1fa338f1929 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:23:35 -0700
Subject: KVM: Pass in write/dirty to kvm_vcpu_map(), not kvm_vcpu_unmap()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that all kvm_vcpu_{,un}map() users pass "true" for @dirty, have them
pass "true" as a @writable param to kvm_vcpu_map(), and thus create a
read-only mapping when possible.

Note, creating read-only mappings can be theoretically slower, as they
don't play nice with fast GUP due to the need to break CoW before mapping
the underlying PFN.  But practically speaking, creating a mapping isn't
a super hot path, and getting a writable mapping for reading is weird and
confusing.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-34-seanjc@google.com>
---
 include/linux/kvm_host.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b4c541fa5a1f..101dbf2be1ce 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,7 @@ struct kvm_host_map {
 	void *hva;
 	kvm_pfn_t pfn;
 	kvm_pfn_t gfn;
+	bool writable;
 };
 
 /*
@@ -1311,8 +1312,23 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
 struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
-int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
-void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
+
+int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map,
+		   bool writable);
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map);
+
+static inline int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa,
+			       struct kvm_host_map *map)
+{
+	return __kvm_vcpu_map(vcpu, gpa, map, true);
+}
+
+static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gpa_t gpa,
+					struct kvm_host_map *map)
+{
+	return __kvm_vcpu_map(vcpu, gpa, map, false);
+}
+
 unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
 unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
 int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
-- 
cgit v1.2.3


From 21dd877060d49f1f57901f929189653fc42ac37a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:23:44 -0700
Subject: KVM: Move declarations of memslot accessors up in kvm_host.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the memslot lookup helpers further up in kvm_host.h so that they can
be used by inlined "to pfn" wrappers.

No functional change intended.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-43-seanjc@google.com>
---
 include/linux/kvm_host.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 101dbf2be1ce..8d35a36e7707 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1168,6 +1168,10 @@ static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_
 	     kvm_memslot_iter_is_valid(iter, end);			\
 	     kvm_memslot_iter_next(iter))
 
+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
+struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
+struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
+
 /*
  * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
  * - create a new memory slot
@@ -1303,16 +1307,12 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 })
 
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
-struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
 void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
-struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
-struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
-
 int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map,
 		   bool writable);
 void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map);
-- 
cgit v1.2.3


From 1c7b627e930624dd64ee906df554c8f2bad628ff Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:23:45 -0700
Subject: KVM: Add kvm_faultin_pfn() to specifically service guest page faults
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new dedicated API, kvm_faultin_pfn(), for servicing guest page
faults, i.e. for getting pages/pfns that will be mapped into the guest via
an mmu_notifier-protected KVM MMU.  Keep struct kvm_follow_pfn buried in
internal code, as having __kvm_faultin_pfn() take "out" params is actually
cleaner for several architectures, e.g. it allows the caller to have its
own "page fault" structure without having to marshal data to/from
kvm_follow_pfn.

Long term, common KVM would ideally provide a kvm_page_fault structure, a
la x86's struct of the same name.  But all architectures need to be
converted to a common API before that can happen.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-44-seanjc@google.com>
---
 include/linux/kvm_host.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8d35a36e7707..a63b0325d3e2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1231,6 +1231,18 @@ static inline void kvm_release_page_unused(struct page *page)
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
 
+kvm_pfn_t __kvm_faultin_pfn(const struct kvm_memory_slot *slot, gfn_t gfn,
+			    unsigned int foll, bool *writable,
+			    struct page **refcounted_page);
+
+static inline kvm_pfn_t kvm_faultin_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
+					bool write, bool *writable,
+					struct page **refcounted_page)
+{
+	return __kvm_faultin_pfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn,
+				 write ? FOLL_WRITE : 0, writable, refcounted_page);
+}
+
 kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
-- 
cgit v1.2.3


From 1fbee5b01a0fd27db571eed757682a7c20045107 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:23:48 -0700
Subject: KVM: guest_memfd: Provide "struct page" as output from
 kvm_gmem_get_pfn()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide the "struct page" associated with a guest_memfd pfn as an output
from __kvm_gmem_get_pfn() so that KVM guest page fault handlers can
directly put the page instead of having to rely on
kvm_pfn_to_refcounted_page().

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-47-seanjc@google.com>
---
 include/linux/kvm_host.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a63b0325d3e2..6efdc00b4254 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2487,11 +2487,13 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
 
 #ifdef CONFIG_KVM_PRIVATE_MEM
 int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
-		     gfn_t gfn, kvm_pfn_t *pfn, int *max_order);
+		     gfn_t gfn, kvm_pfn_t *pfn, struct page **page,
+		     int *max_order);
 #else
 static inline int kvm_gmem_get_pfn(struct kvm *kvm,
 				   struct kvm_memory_slot *slot, gfn_t gfn,
-				   kvm_pfn_t *pfn, int *max_order)
+				   kvm_pfn_t *pfn, struct page **page,
+				   int *max_order)
 {
 	KVM_BUG_ON(1, kvm);
 	return -EIO;
-- 
cgit v1.2.3


From dc06193532af4ba88ed20daeef88f22b053ebb91 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:23:51 -0700
Subject: KVM: Move x86's API to release a faultin page to common KVM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move KVM x86's helper that "finishes" the faultin process to common KVM
so that the logic can be shared across all architectures.  Note, not all
architectures implement a fast page fault path, but the gist of the
comment applies to all architectures.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-50-seanjc@google.com>
---
 include/linux/kvm_host.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6efdc00b4254..3e06393e5f1e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1231,6 +1231,32 @@ static inline void kvm_release_page_unused(struct page *page)
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
 
+static inline void kvm_release_faultin_page(struct kvm *kvm, struct page *page,
+					    bool unused, bool dirty)
+{
+	lockdep_assert_once(lockdep_is_held(&kvm->mmu_lock) || unused);
+
+	if (!page)
+		return;
+
+	/*
+	 * If the page that KVM got from the *primary MMU* is writable, and KVM
+	 * installed or reused a SPTE, mark the page/folio dirty.  Note, this
+	 * may mark a folio dirty even if KVM created a read-only SPTE, e.g. if
+	 * the GFN is write-protected.  Folios can't be safely marked dirty
+	 * outside of mmu_lock as doing so could race with writeback on the
+	 * folio.  As a result, KVM can't mark folios dirty in the fast page
+	 * fault handler, and so KVM must (somewhat) speculatively mark the
+	 * folio dirty if KVM could locklessly make the SPTE writable.
+	 */
+	if (unused)
+		kvm_release_page_unused(page);
+	else if (dirty)
+		kvm_release_page_dirty(page);
+	else
+		kvm_release_page_clean(page);
+}
+
 kvm_pfn_t __kvm_faultin_pfn(const struct kvm_memory_slot *slot, gfn_t gfn,
 			    unsigned int foll, bool *writable,
 			    struct page **refcounted_page);
-- 
cgit v1.2.3


From f42e289a2095f61755e6ca5fd1370d441bf589d5 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:24:18 -0700
Subject: KVM: Add support for read-only usage of gfn_to_page()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rework gfn_to_page() to support read-only accesses so that it can be used
by arm64 to get MTE tags out of guest memory.

Opportunistically rewrite the comment to be even more stern about using
gfn_to_page(), as there are very few scenarios where requiring a struct
page is actually the right thing to do (though there are such scenarios).
Add a FIXME to call out that KVM probably should be pinning pages, not
just getting pages.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-77-seanjc@google.com>
---
 include/linux/kvm_host.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3e06393e5f1e..96cf9e5660c3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1213,7 +1213,12 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 int kvm_prefetch_pages(struct kvm_memory_slot *slot, gfn_t gfn,
 		       struct page **pages, int nr_pages);
 
-struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
+struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn, bool write);
+static inline struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+{
+	return __gfn_to_page(kvm, gfn, true);
+}
+
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
-- 
cgit v1.2.3


From 06cdaff80e50e3fb74e5e3101e1d5d7aa8b68da6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:24:21 -0700
Subject: KVM: Drop gfn_to_pfn() APIs now that all users are gone
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop gfn_to_pfn() and all its variants now that all users are gone.

No functional change intended.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-80-seanjc@google.com>
---
 include/linux/kvm_host.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 96cf9e5660c3..4a1eaa40a215 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1274,14 +1274,6 @@ static inline kvm_pfn_t kvm_faultin_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 				 write ? FOLL_WRITE : 0, writable, refcounted_page);
 }
 
-kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
-kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
-		      bool *writable);
-kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn);
-kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
-			       bool interruptible, bool no_wait,
-			       bool write_fault, bool *writable);
-
 void kvm_release_pfn_clean(kvm_pfn_t pfn);
 void kvm_release_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_dirty(kvm_pfn_t pfn);
-- 
cgit v1.2.3


From 93b7da404f5b0b02a4211bbb784889f001d27953 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:24:26 -0700
Subject: KVM: Drop APIs that manipulate "struct page" via pfns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove all kvm_{release,set}_pfn_*() APIs now that all users are gone.

No functional change intended.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-85-seanjc@google.com>
---
 include/linux/kvm_host.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4a1eaa40a215..d045f8310a48 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1274,11 +1274,6 @@ static inline kvm_pfn_t kvm_faultin_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 				 write ? FOLL_WRITE : 0, writable, refcounted_page);
 }
 
-void kvm_release_pfn_clean(kvm_pfn_t pfn);
-void kvm_release_pfn_dirty(kvm_pfn_t pfn);
-void kvm_set_pfn_dirty(kvm_pfn_t pfn);
-void kvm_set_pfn_accessed(kvm_pfn_t pfn);
-
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 			int len);
 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
-- 
cgit v1.2.3


From 8b15c3764c05ed8766709711d2054d96349dee8e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Oct 2024 11:24:27 -0700
Subject: KVM: Don't grab reference on VM_MIXEDMAP pfns that have a "struct
 page"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that KVM no longer relies on an ugly heuristic to find its struct page
references, i.e. now that KVM can't get false positives on VM_MIXEDMAP
pfns, remove KVM's hack to elevate the refcount for pfns that happen to
have a valid struct page.  In addition to removing a long-standing wart
in KVM, this allows KVM to map non-refcounted struct page memory into the
guest, e.g. for exposing GPU TTM buffers to KVM guests.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-86-seanjc@google.com>
---
 include/linux/kvm_host.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d045f8310a48..02f0206fd2dc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1730,9 +1730,6 @@ void kvm_arch_sync_events(struct kvm *kvm);
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 
-struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn);
-bool kvm_is_zone_device_page(struct page *page);
-
 struct kvm_irq_ack_notifier {
 	struct hlist_node link;
 	unsigned gsi;
-- 
cgit v1.2.3


From 6860d28ccb2390b4eeda32ab2ce7eb10f71921e1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 15 Oct 2024 12:08:39 +0200
Subject: timekeeping: Reorder struct timekeeper

struct timekeeper is ordered suboptimal vs. cachelines. The layout,
including the preceding seqcount (see struct tk_core in timekeeper.c) is:

 cacheline 0:   seqcount, tkr_mono
 cacheline 1:   tkr_raw, xtime_sec
 cacheline 2:   ktime_sec ... tai_offset, internal variables
 cacheline 3:	next_leap_ktime, raw_sec, internal variables
 cacheline 4:	internal variables

So any access to via ktime_get*() except for access to CLOCK_MONOTONIC_RAW
will use either cachelines 0 + 1 or cachelines 0 + 2. Access to
CLOCK_MONOTONIC_RAW uses cachelines 0 + 1 + 3.

Reorder the members so that the result is more efficient:

 cacheline 0:   seqcount, tkr_mono
 cacheline 1:   xtime_sec, ktime_sec ... tai_offset
 cacheline 2:	tkr_raw, raw_sec
 cacheline 3:	internal variables
 cacheline 4:	internal variables

That means ktime_get*() will access cacheline 0 + 1 and CLOCK_MONOTONIC_RAW
access will use cachelines 0 + 2.

Update kernel-doc and fix formatting issues while at it. Also fix a typo
in struct tk_read_base kernel-doc.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <jstultz@google.com>
Link: https://lore.kernel.org/all/20241015100839.12702-1-anna-maria@linutronix.de
---
 include/linux/timekeeper_internal.h | 106 ++++++++++++++++++++++--------------
 1 file changed, 65 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 902c20ef495a..a3b6380a7777 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -26,7 +26,7 @@
  * occupies a single 64byte cache line.
  *
  * The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessors.
+ * for the fast NMI safe accessors.
  *
  * @base_real is for the fast NMI safe accessor to allow reading clock
  * realtime from any context.
@@ -44,33 +44,41 @@ struct tk_read_base {
 
 /**
  * struct timekeeper - Structure holding internal timekeeping values.
- * @tkr_mono:		The readout base structure for CLOCK_MONOTONIC
- * @tkr_raw:		The readout base structure for CLOCK_MONOTONIC_RAW
- * @xtime_sec:		Current CLOCK_REALTIME time in seconds
- * @ktime_sec:		Current CLOCK_MONOTONIC time in seconds
- * @wall_to_monotonic:	CLOCK_REALTIME to CLOCK_MONOTONIC offset
- * @offs_real:		Offset clock monotonic -> clock realtime
- * @offs_boot:		Offset clock monotonic -> clock boottime
- * @offs_tai:		Offset clock monotonic -> clock tai
- * @tai_offset:		The current UTC to TAI offset in seconds
- * @clock_was_set_seq:	The sequence number of clock was set events
- * @cs_was_changed_seq:	The sequence number of clocksource change events
- * @next_leap_ktime:	CLOCK_MONOTONIC time value of a pending leap-second
- * @raw_sec:		CLOCK_MONOTONIC_RAW  time in seconds
- * @monotonic_to_boot:	CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
- * @cycle_interval:	Number of clock cycles in one NTP interval
- * @xtime_interval:	Number of clock shifted nano seconds in one NTP
- *			interval.
- * @xtime_remainder:	Shifted nano seconds left over when rounding
- *			@cycle_interval
- * @raw_interval:	Shifted raw nano seconds accumulated per NTP interval.
- * @ntp_error:		Difference between accumulated time and NTP time in ntp
- *			shifted nano seconds.
- * @ntp_error_shift:	Shift conversion between clock shifted nano seconds and
- *			ntp shifted nano seconds.
- * @last_warning:	Warning ratelimiter (DEBUG_TIMEKEEPING)
- * @underflow_seen:	Underflow warning flag (DEBUG_TIMEKEEPING)
- * @overflow_seen:	Overflow warning flag (DEBUG_TIMEKEEPING)
+ * @tkr_mono:			The readout base structure for CLOCK_MONOTONIC
+ * @xtime_sec:			Current CLOCK_REALTIME time in seconds
+ * @ktime_sec:			Current CLOCK_MONOTONIC time in seconds
+ * @wall_to_monotonic:		CLOCK_REALTIME to CLOCK_MONOTONIC offset
+ * @offs_real:			Offset clock monotonic -> clock realtime
+ * @offs_boot:			Offset clock monotonic -> clock boottime
+ * @offs_tai:			Offset clock monotonic -> clock tai
+ * @tai_offset:			The current UTC to TAI offset in seconds
+ * @tkr_raw:			The readout base structure for CLOCK_MONOTONIC_RAW
+ * @raw_sec:			CLOCK_MONOTONIC_RAW  time in seconds
+ * @clock_was_set_seq:		The sequence number of clock was set events
+ * @cs_was_changed_seq:		The sequence number of clocksource change events
+ * @monotonic_to_boot:		CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
+ * @cycle_interval:		Number of clock cycles in one NTP interval
+ * @xtime_interval:		Number of clock shifted nano seconds in one NTP
+ *				interval.
+ * @xtime_remainder:		Shifted nano seconds left over when rounding
+ *				@cycle_interval
+ * @raw_interval:		Shifted raw nano seconds accumulated per NTP interval.
+ * @next_leap_ktime:		CLOCK_MONOTONIC time value of a pending leap-second
+ * @ntp_tick:			The ntp_tick_length() value currently being
+ *				used. This cached copy ensures we consistently
+ *				apply the tick length for an entire tick, as
+ *				ntp_tick_length may change mid-tick, and we don't
+ *				want to apply that new value to the tick in
+ *				progress.
+ * @ntp_error:			Difference between accumulated time and NTP time in ntp
+ *				shifted nano seconds.
+ * @ntp_error_shift:		Shift conversion between clock shifted nano seconds and
+ *				ntp shifted nano seconds.
+ * @ntp_err_mult:		Multiplication factor for scaled math conversion
+ * @skip_second_overflow:	Flag used to avoid updating NTP twice with same second
+ * @last_warning:		Warning ratelimiter (DEBUG_TIMEKEEPING)
+ * @underflow_seen:		Underflow warning flag (DEBUG_TIMEKEEPING)
+ * @overflow_seen:		Overflow warning flag (DEBUG_TIMEKEEPING)
  *
  * Note: For timespec(64) based interfaces wall_to_monotonic is what
  * we need to add to xtime (or xtime corrected for sub jiffy times)
@@ -88,10 +96,28 @@ struct tk_read_base {
  *
  * @monotonic_to_boottime is a timespec64 representation of @offs_boot to
  * accelerate the VDSO update for CLOCK_BOOTTIME.
+ *
+ * The cacheline ordering of the structure is optimized for in kernel usage of
+ * the ktime_get() and ktime_get_ts64() family of time accessors. Struct
+ * timekeeper is prepended in the core timekeeping code with a sequence count,
+ * which results in the following cacheline layout:
+ *
+ * 0:	seqcount, tkr_mono
+ * 1:	xtime_sec ... tai_offset
+ * 2:	tkr_raw, raw_sec
+ * 3,4: Internal variables
+ *
+ * Cacheline 0,1 contain the data which is used for accessing
+ * CLOCK_MONOTONIC/REALTIME/BOOTTIME/TAI, while cacheline 2 contains the
+ * data for accessing CLOCK_MONOTONIC_RAW.  Cacheline 3,4 are internal
+ * variables which are only accessed during timekeeper updates once per
+ * tick.
  */
 struct timekeeper {
+	/* Cacheline 0 (together with prepended seqcount of timekeeper core): */
 	struct tk_read_base	tkr_mono;
-	struct tk_read_base	tkr_raw;
+
+	/* Cacheline 1: */
 	u64			xtime_sec;
 	unsigned long		ktime_sec;
 	struct timespec64	wall_to_monotonic;
@@ -99,31 +125,29 @@ struct timekeeper {
 	ktime_t			offs_boot;
 	ktime_t			offs_tai;
 	s32			tai_offset;
+
+	/* Cacheline 2: */
+	struct tk_read_base	tkr_raw;
+	u64			raw_sec;
+
+	/* Cachline 3 and 4 (timekeeping internal variables): */
 	unsigned int		clock_was_set_seq;
 	u8			cs_was_changed_seq;
-	ktime_t			next_leap_ktime;
-	u64			raw_sec;
+
 	struct timespec64	monotonic_to_boot;
 
-	/* The following members are for timekeeping internal use */
 	u64			cycle_interval;
 	u64			xtime_interval;
 	s64			xtime_remainder;
 	u64			raw_interval;
-	/* The ntp_tick_length() value currently being used.
-	 * This cached copy ensures we consistently apply the tick
-	 * length for an entire tick, as ntp_tick_length may change
-	 * mid-tick, and we don't want to apply that new value to
-	 * the tick in progress.
-	 */
+
+	ktime_t			next_leap_ktime;
 	u64			ntp_tick;
-	/* Difference between accumulated time and NTP time in ntp
-	 * shifted nano seconds. */
 	s64			ntp_error;
 	u32			ntp_error_shift;
 	u32			ntp_err_mult;
-	/* Flag used to avoid updating NTP twice with same second */
 	u32			skip_second_overflow;
+
 #ifdef CONFIG_DEBUG_TIMEKEEPING
 	long			last_warning;
 	/*
-- 
cgit v1.2.3


From 92b043fd995a63a57aae29ff85a39b6f30cd440c Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 25 Oct 2024 13:01:41 +0200
Subject: time: Fix references to _msecs_to_jiffies() handling of values

The details about the handling of the "normal" values were moved
to the _msecs_to_jiffies() helpers in commit ca42aaf0c861 ("time:
Refactor msecs_to_jiffies"). However, the same commit still mentioned
__msecs_to_jiffies() in the added documentation.

Thus point to _msecs_to_jiffies() instead.

Fixes: ca42aaf0c861 ("time: Refactor msecs_to_jiffies")
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20241025110141.157205-2-ojeda@kernel.org
---
 include/linux/jiffies.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 1220f0fbe5bf..5d21dacd62bc 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -502,7 +502,7 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m)
  * - all other values are converted to jiffies by either multiplying
  *   the input value by a factor or dividing it with a factor and
  *   handling any 32-bit overflows.
- *   for the details see __msecs_to_jiffies()
+ *   for the details see _msecs_to_jiffies()
  *
  * msecs_to_jiffies() checks for the passed in value being a constant
  * via __builtin_constant_p() allowing gcc to eliminate most of the
-- 
cgit v1.2.3


From 78bc671bd1501e2f6c571e063301a4fdc5db53b2 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 18 Oct 2024 12:47:39 -0700
Subject: scsi: ufs: core: Make DMA mask configuration more flexible

Replace UFSHCD_QUIRK_BROKEN_64BIT_ADDRESS with
ufs_hba_variant_ops::set_dma_mask.  Update the Renesas driver
accordingly.  This patch enables supporting other configurations than
32-bit or 64-bit DMA addresses, e.g. 36-bit DMA addresses.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20241018194753.775074-1-bvanassche@acm.org
Reviewed-by: Avri Altman <Avri.Altman@wdc.com>
Reviewed-by: Peter Wang <peter.wang@mediatek.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index a95282b9f743..a2d600ff4372 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -299,6 +299,8 @@ struct ufs_pwr_mode_info {
  * @max_num_rtt: maximum RTT supported by the host
  * @init: called when the driver is initialized
  * @exit: called to cleanup everything done in init
+ * @set_dma_mask: For setting another DMA mask than indicated by the 64AS
+ *	capability bit.
  * @get_ufs_hci_version: called to get UFS HCI version
  * @clk_scale_notify: notifies that clks are scaled up/down
  * @setup_clocks: called before touching any of the controller registers
@@ -341,6 +343,7 @@ struct ufs_hba_variant_ops {
 	int	(*init)(struct ufs_hba *);
 	void    (*exit)(struct ufs_hba *);
 	u32	(*get_ufs_hci_version)(struct ufs_hba *);
+	int	(*set_dma_mask)(struct ufs_hba *);
 	int	(*clk_scale_notify)(struct ufs_hba *, bool,
 				    enum ufs_notify_change_status);
 	int	(*setup_clocks)(struct ufs_hba *, bool,
@@ -623,12 +626,6 @@ enum ufshcd_quirks {
 	 */
 	UFSHCD_QUIRK_SKIP_PH_CONFIGURATION		= 1 << 16,
 
-	/*
-	 * This quirk needs to be enabled if the host controller has
-	 * 64-bit addressing supported capability but it doesn't work.
-	 */
-	UFSHCD_QUIRK_BROKEN_64BIT_ADDRESS		= 1 << 17,
-
 	/*
 	 * This quirk needs to be enabled if the host controller has
 	 * auto-hibernate capability but it's FASTAUTO only.
-- 
cgit v1.2.3


From a085e03758b87ee5aea45de27c811576574d795b Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 22 Oct 2024 12:30:57 -0700
Subject: scsi: ufs: core: Move the ufshcd_mcq_enable_esi() definition

Move the ufshcd_mcq_enable_esi() definition such that it occurs
immediately before the ufshcd_mcq_config_esi() definition.

Reviewed-by: Peter Wang <peter.wang@mediatek.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20241022193130.2733293-2-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index a2d600ff4372..d2dad221c12b 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1318,8 +1318,8 @@ void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i);
 unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
 					 struct ufs_hw_queue *hwq);
 void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba);
-void ufshcd_mcq_enable_esi(struct ufs_hba *hba);
 void ufshcd_mcq_enable(struct ufs_hba *hba);
+void ufshcd_mcq_enable_esi(struct ufs_hba *hba);
 void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg);
 
 int ufshcd_opp_config_clks(struct device *dev, struct opp_table *opp_table,
-- 
cgit v1.2.3


From 2a36646012fc58e6262435ff5d2c8c97456c253f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 22 Oct 2024 12:31:01 -0700
Subject: scsi: ufs: core: Simplify ufshcd_err_handling_prepare()

Use blk_mq_quiesce_tagset() instead of ufshcd_scsi_block_requests() and
blk_mq_wait_quiesce_done(). Since this patch removes the last callers of
ufshcd_scsi_block_requests() and ufshcd_scsi_unblock_requests(), remove
these functions.

Reviewed-by: Avri Altman <avri.altman@wdc.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20241022193130.2733293-6-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index d2dad221c12b..9ea2a7411bb5 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -925,7 +925,6 @@ enum ufshcd_mcq_opr {
  * @wb_mutex: used to serialize devfreq and sysfs write booster toggling
  * @clk_scaling_lock: used to serialize device commands and clock scaling
  * @desc_size: descriptor sizes reported by device
- * @scsi_block_reqs_cnt: reference counting for scsi block requests
  * @bsg_dev: struct device associated with the BSG queue
  * @bsg_queue: BSG queue associated with the UFS controller
  * @rpm_dev_flush_recheck_work: used to suspend from RPM (runtime power
@@ -1086,7 +1085,6 @@ struct ufs_hba {
 
 	struct mutex wb_mutex;
 	struct rw_semaphore clk_scaling_lock;
-	atomic_t scsi_block_reqs_cnt;
 
 	struct device		bsg_dev;
 	struct request_queue	*bsg_queue;
-- 
cgit v1.2.3


From a81dca057273c32b8554b3bc562480d4b3816155 Mon Sep 17 00:00:00 2001
From: Ivaylo Ivanov <ivo.ivanov.ivanov1@gmail.com>
Date: Wed, 23 Oct 2024 12:01:34 +0300
Subject: dt-bindings: clock: samsung: Add Exynos8895 SoC

Provide dt-schema documentation for Samsung Exynos8895 SoC clock
controller CMU blocks:
- CMU_FSYS0/1
- CMU_PERIC0/1
- CMU_PERIS
- CMU_TOP

Signed-off-by: Ivaylo Ivanov <ivo.ivanov.ivanov1@gmail.com>
Link: https://lore.kernel.org/r/20241023090136.537395-2-ivo.ivanov.ivanov1@gmail.com
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/dt-bindings/clock/samsung,exynos8895.h | 453 +++++++++++++++++++++++++
 1 file changed, 453 insertions(+)
 create mode 100644 include/dt-bindings/clock/samsung,exynos8895.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/samsung,exynos8895.h b/include/dt-bindings/clock/samsung,exynos8895.h
new file mode 100644
index 000000000000..27998c53f929
--- /dev/null
+++ b/include/dt-bindings/clock/samsung,exynos8895.h
@@ -0,0 +1,453 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (C) 2024 Ivaylo Ivanov <ivo.ivanov.ivanov1@gmail.com>
+ * Author: Ivaylo Ivanov <ivo.ivanov.ivanov1@gmail.com>
+ *
+ * Device Tree binding constants for Exynos8895 clock controller.
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_EXYNOS8895_H
+#define _DT_BINDINGS_CLOCK_EXYNOS8895_H
+
+/* CMU_TOP */
+#define CLK_FOUT_SHARED0_PLL			1
+#define CLK_FOUT_SHARED1_PLL			2
+#define CLK_FOUT_SHARED2_PLL			3
+#define CLK_FOUT_SHARED3_PLL			4
+#define CLK_FOUT_SHARED4_PLL			5
+#define CLK_MOUT_PLL_SHARED0			6
+#define CLK_MOUT_PLL_SHARED1			7
+#define CLK_MOUT_PLL_SHARED2			8
+#define CLK_MOUT_PLL_SHARED3			9
+#define CLK_MOUT_PLL_SHARED4			10
+#define CLK_MOUT_CP2AP_MIF_CLK_USER		11
+#define CLK_MOUT_CMU_ABOX_CPUABOX		12
+#define CLK_MOUT_CMU_APM_BUS			13
+#define CLK_MOUT_CMU_BUS1_BUS			14
+#define CLK_MOUT_CMU_BUSC_BUS			15
+#define CLK_MOUT_CMU_BUSC_BUSPHSI2C		16
+#define CLK_MOUT_CMU_CAM_BUS			17
+#define CLK_MOUT_CMU_CAM_TPU0			18
+#define CLK_MOUT_CMU_CAM_TPU1			19
+#define CLK_MOUT_CMU_CAM_VRA			20
+#define CLK_MOUT_CMU_CIS_CLK0			21
+#define CLK_MOUT_CMU_CIS_CLK1			22
+#define CLK_MOUT_CMU_CIS_CLK2			23
+#define CLK_MOUT_CMU_CIS_CLK3			24
+#define CLK_MOUT_CMU_CORE_BUS			25
+#define CLK_MOUT_CMU_CPUCL0_SWITCH		26
+#define CLK_MOUT_CMU_CPUCL1_SWITCH		27
+#define CLK_MOUT_CMU_DBG_BUS			28
+#define CLK_MOUT_CMU_DCAM_BUS			29
+#define CLK_MOUT_CMU_DCAM_IMGD			30
+#define CLK_MOUT_CMU_DPU_BUS			31
+#define CLK_MOUT_CMU_DROOPDETECTOR		32
+#define CLK_MOUT_CMU_DSP_BUS			33
+#define CLK_MOUT_CMU_FSYS0_BUS			34
+#define CLK_MOUT_CMU_FSYS0_DPGTC		35
+#define CLK_MOUT_CMU_FSYS0_MMC_EMBD		36
+#define CLK_MOUT_CMU_FSYS0_UFS_EMBD		37
+#define CLK_MOUT_CMU_FSYS0_USBDRD30		38
+#define CLK_MOUT_CMU_FSYS1_BUS			39
+#define CLK_MOUT_CMU_FSYS1_MMC_CARD		40
+#define CLK_MOUT_CMU_FSYS1_PCIE			41
+#define CLK_MOUT_CMU_FSYS1_UFS_CARD		42
+#define CLK_MOUT_CMU_G2D_G2D			43
+#define CLK_MOUT_CMU_G2D_JPEG			44
+#define CLK_MOUT_CMU_HPM			45
+#define CLK_MOUT_CMU_IMEM_BUS			46
+#define CLK_MOUT_CMU_ISPHQ_BUS			47
+#define CLK_MOUT_CMU_ISPLP_BUS			48
+#define CLK_MOUT_CMU_IVA_BUS			49
+#define CLK_MOUT_CMU_MFC_BUS			50
+#define CLK_MOUT_CMU_MIF_SWITCH			51
+#define CLK_MOUT_CMU_PERIC0_BUS			52
+#define CLK_MOUT_CMU_PERIC0_UART_DBG		53
+#define CLK_MOUT_CMU_PERIC0_USI00		54
+#define CLK_MOUT_CMU_PERIC0_USI01		55
+#define CLK_MOUT_CMU_PERIC0_USI02		56
+#define CLK_MOUT_CMU_PERIC0_USI03		57
+#define CLK_MOUT_CMU_PERIC1_BUS			58
+#define CLK_MOUT_CMU_PERIC1_SPEEDY2		59
+#define CLK_MOUT_CMU_PERIC1_SPI_CAM0		60
+#define CLK_MOUT_CMU_PERIC1_SPI_CAM1		61
+#define CLK_MOUT_CMU_PERIC1_UART_BT		62
+#define CLK_MOUT_CMU_PERIC1_USI04		63
+#define CLK_MOUT_CMU_PERIC1_USI05		64
+#define CLK_MOUT_CMU_PERIC1_USI06		65
+#define CLK_MOUT_CMU_PERIC1_USI07		66
+#define CLK_MOUT_CMU_PERIC1_USI08		67
+#define CLK_MOUT_CMU_PERIC1_USI09		68
+#define CLK_MOUT_CMU_PERIC1_USI10		69
+#define CLK_MOUT_CMU_PERIC1_USI11		70
+#define CLK_MOUT_CMU_PERIC1_USI12		71
+#define CLK_MOUT_CMU_PERIC1_USI13		72
+#define CLK_MOUT_CMU_PERIS_BUS			73
+#define CLK_MOUT_CMU_SRDZ_BUS			74
+#define CLK_MOUT_CMU_SRDZ_IMGD			75
+#define CLK_MOUT_CMU_VPU_BUS			76
+#define CLK_DOUT_CMU_ABOX_CPUABOX		77
+#define CLK_DOUT_CMU_APM_BUS			78
+#define CLK_DOUT_CMU_BUS1_BUS			79
+#define CLK_DOUT_CMU_BUSC_BUS			80
+#define CLK_DOUT_CMU_BUSC_BUSPHSI2C		81
+#define CLK_DOUT_CMU_CAM_BUS			82
+#define CLK_DOUT_CMU_CAM_TPU0			83
+#define CLK_DOUT_CMU_CAM_TPU1			84
+#define CLK_DOUT_CMU_CAM_VRA			85
+#define CLK_DOUT_CMU_CIS_CLK0			86
+#define CLK_DOUT_CMU_CIS_CLK1			87
+#define CLK_DOUT_CMU_CIS_CLK2			88
+#define CLK_DOUT_CMU_CIS_CLK3			89
+#define CLK_DOUT_CMU_CORE_BUS			90
+#define CLK_DOUT_CMU_CPUCL0_SWITCH		91
+#define CLK_DOUT_CMU_CPUCL1_SWITCH		92
+#define CLK_DOUT_CMU_DBG_BUS			93
+#define CLK_DOUT_CMU_DCAM_BUS			94
+#define CLK_DOUT_CMU_DCAM_IMGD			95
+#define CLK_DOUT_CMU_DPU_BUS			96
+#define CLK_DOUT_CMU_DSP_BUS			97
+#define CLK_DOUT_CMU_FSYS0_BUS			98
+#define CLK_DOUT_CMU_FSYS0_DPGTC		99
+#define CLK_DOUT_CMU_FSYS0_MMC_EMBD		100
+#define CLK_DOUT_CMU_FSYS0_UFS_EMBD		101
+#define CLK_DOUT_CMU_FSYS0_USBDRD30		102
+#define CLK_DOUT_CMU_FSYS1_BUS			103
+#define CLK_DOUT_CMU_FSYS1_MMC_CARD		104
+#define CLK_DOUT_CMU_FSYS1_UFS_CARD		105
+#define CLK_DOUT_CMU_G2D_G2D			106
+#define CLK_DOUT_CMU_G2D_JPEG			107
+#define CLK_DOUT_CMU_G3D_SWITCH			108
+#define CLK_DOUT_CMU_HPM			109
+#define CLK_DOUT_CMU_IMEM_BUS			110
+#define CLK_DOUT_CMU_ISPHQ_BUS			111
+#define CLK_DOUT_CMU_ISPLP_BUS			112
+#define CLK_DOUT_CMU_IVA_BUS			113
+#define CLK_DOUT_CMU_MFC_BUS			114
+#define CLK_DOUT_CMU_MODEM_SHARED0		115
+#define CLK_DOUT_CMU_MODEM_SHARED1		116
+#define CLK_DOUT_CMU_PERIC0_BUS			117
+#define CLK_DOUT_CMU_PERIC0_UART_DBG		118
+#define CLK_DOUT_CMU_PERIC0_USI00		119
+#define CLK_DOUT_CMU_PERIC0_USI01		120
+#define CLK_DOUT_CMU_PERIC0_USI02		121
+#define CLK_DOUT_CMU_PERIC0_USI03		122
+#define CLK_DOUT_CMU_PERIC1_BUS			123
+#define CLK_DOUT_CMU_PERIC1_SPEEDY2		124
+#define CLK_DOUT_CMU_PERIC1_SPI_CAM0		125
+#define CLK_DOUT_CMU_PERIC1_SPI_CAM1		126
+#define CLK_DOUT_CMU_PERIC1_UART_BT		127
+#define CLK_DOUT_CMU_PERIC1_USI04		128
+#define CLK_DOUT_CMU_PERIC1_USI05		129
+#define CLK_DOUT_CMU_PERIC1_USI06		130
+#define CLK_DOUT_CMU_PERIC1_USI07		131
+#define CLK_DOUT_CMU_PERIC1_USI08		132
+#define CLK_DOUT_CMU_PERIC1_USI09		133
+#define CLK_DOUT_CMU_PERIC1_USI10		134
+#define CLK_DOUT_CMU_PERIC1_USI11		135
+#define CLK_DOUT_CMU_PERIC1_USI12		136
+#define CLK_DOUT_CMU_PERIC1_USI13		137
+#define CLK_DOUT_CMU_PERIS_BUS			138
+#define CLK_DOUT_CMU_SRDZ_BUS			139
+#define CLK_DOUT_CMU_SRDZ_IMGD			140
+#define CLK_DOUT_CMU_VPU_BUS			141
+#define CLK_DOUT_CMU_SHARED0_DIV2		142
+#define CLK_DOUT_CMU_SHARED0_DIV4		143
+#define CLK_DOUT_CMU_SHARED1_DIV2		144
+#define CLK_DOUT_CMU_SHARED1_DIV4		145
+#define CLK_DOUT_CMU_SHARED2_DIV2		146
+#define CLK_DOUT_CMU_SHARED3_DIV2		147
+#define CLK_DOUT_CMU_SHARED4_DIV2		148
+#define CLK_DOUT_CMU_FSYS1_PCIE			149
+#define CLK_DOUT_CMU_CP2AP_MIF_CLK_DIV2		150
+#define CLK_DOUT_CMU_CMU_OTP			151
+#define CLK_GOUT_CMU_DROOPDETECTOR		152
+#define CLK_GOUT_CMU_MIF_SWITCH			153
+#define CLK_GOUT_CMU_ABOX_CPUABOX		154
+#define CLK_GOUT_CMU_APM_BUS			155
+#define CLK_GOUT_CMU_BUS1_BUS			156
+#define CLK_GOUT_CMU_BUSC_BUS			157
+#define CLK_GOUT_CMU_BUSC_BUSPHSI2C		158
+#define CLK_GOUT_CMU_CAM_BUS			159
+#define CLK_GOUT_CMU_CAM_TPU0			160
+#define CLK_GOUT_CMU_CAM_TPU1			161
+#define CLK_GOUT_CMU_CAM_VRA			162
+#define CLK_GOUT_CMU_CIS_CLK0			163
+#define CLK_GOUT_CMU_CIS_CLK1			164
+#define CLK_GOUT_CMU_CIS_CLK2			165
+#define CLK_GOUT_CMU_CIS_CLK3			166
+#define CLK_GOUT_CMU_CORE_BUS			167
+#define CLK_GOUT_CMU_CPUCL0_SWITCH		168
+#define CLK_GOUT_CMU_CPUCL1_SWITCH		169
+#define CLK_GOUT_CMU_DBG_BUS			170
+#define CLK_GOUT_CMU_DCAM_BUS			171
+#define CLK_GOUT_CMU_DCAM_IMGD			172
+#define CLK_GOUT_CMU_DPU_BUS			173
+#define CLK_GOUT_CMU_DSP_BUS			174
+#define CLK_GOUT_CMU_FSYS0_BUS			175
+#define CLK_GOUT_CMU_FSYS0_DPGTC		176
+#define CLK_GOUT_CMU_FSYS0_MMC_EMBD		177
+#define CLK_GOUT_CMU_FSYS0_UFS_EMBD		178
+#define CLK_GOUT_CMU_FSYS0_USBDRD30		179
+#define CLK_GOUT_CMU_FSYS1_BUS			180
+#define CLK_GOUT_CMU_FSYS1_MMC_CARD		181
+#define CLK_GOUT_CMU_FSYS1_PCIE			182
+#define CLK_GOUT_CMU_FSYS1_UFS_CARD		183
+#define CLK_GOUT_CMU_G2D_G2D			184
+#define CLK_GOUT_CMU_G2D_JPEG			185
+#define CLK_GOUT_CMU_G3D_SWITCH			186
+#define CLK_GOUT_CMU_HPM			187
+#define CLK_GOUT_CMU_IMEM_BUS			188
+#define CLK_GOUT_CMU_ISPHQ_BUS			189
+#define CLK_GOUT_CMU_ISPLP_BUS			190
+#define CLK_GOUT_CMU_IVA_BUS			191
+#define CLK_GOUT_CMU_MFC_BUS			192
+#define CLK_GOUT_CMU_MODEM_SHARED0		193
+#define CLK_GOUT_CMU_MODEM_SHARED1		194
+#define CLK_GOUT_CMU_PERIC0_BUS			195
+#define CLK_GOUT_CMU_PERIC0_UART_DBG		196
+#define CLK_GOUT_CMU_PERIC0_USI00		197
+#define CLK_GOUT_CMU_PERIC0_USI01		198
+#define CLK_GOUT_CMU_PERIC0_USI02		199
+#define CLK_GOUT_CMU_PERIC0_USI03		200
+#define CLK_GOUT_CMU_PERIC1_BUS			201
+#define CLK_GOUT_CMU_PERIC1_SPEEDY2		202
+#define CLK_GOUT_CMU_PERIC1_SPI_CAM0		203
+#define CLK_GOUT_CMU_PERIC1_SPI_CAM1		204
+#define CLK_GOUT_CMU_PERIC1_UART_BT		205
+#define CLK_GOUT_CMU_PERIC1_USI04		206
+#define CLK_GOUT_CMU_PERIC1_USI05		207
+#define CLK_GOUT_CMU_PERIC1_USI06		208
+#define CLK_GOUT_CMU_PERIC1_USI07		209
+#define CLK_GOUT_CMU_PERIC1_USI08		210
+#define CLK_GOUT_CMU_PERIC1_USI09		211
+#define CLK_GOUT_CMU_PERIC1_USI10		212
+#define CLK_GOUT_CMU_PERIC1_USI11		213
+#define CLK_GOUT_CMU_PERIC1_USI12		214
+#define CLK_GOUT_CMU_PERIC1_USI13		215
+#define CLK_GOUT_CMU_PERIS_BUS			216
+#define CLK_GOUT_CMU_SRDZ_BUS			217
+#define CLK_GOUT_CMU_SRDZ_IMGD			218
+#define CLK_GOUT_CMU_VPU_BUS			219
+
+/* CMU_PERIS */
+#define CLK_MOUT_PERIS_BUS_USER				1
+#define CLK_MOUT_PERIS_GIC				2
+#define CLK_GOUT_PERIS_CMU_PERIS_PCLK			3
+#define CLK_GOUT_PERIS_AD_AXI_P_PERIS_ACLKM		4
+#define CLK_GOUT_PERIS_AD_AXI_P_PERIS_ACLKS		5
+#define CLK_GOUT_PERIS_AXI2APB_PERISP0_ACLK		6
+#define CLK_GOUT_PERIS_AXI2APB_PERISP1_ACLK		7
+#define CLK_GOUT_PERIS_BUSIF_TMU_PCLK			8
+#define CLK_GOUT_PERIS_GIC_CLK				9
+#define CLK_GOUT_PERIS_LHM_AXI_P_PERIS_I_CLK		10
+#define CLK_GOUT_PERIS_MCT_PCLK				11
+#define CLK_GOUT_PERIS_OTP_CON_BIRA_PCLK		12
+#define CLK_GOUT_PERIS_OTP_CON_TOP_PCLK			13
+#define CLK_GOUT_PERIS_PMU_PERIS_PCLK			14
+#define CLK_GOUT_PERIS_RSTNSYNC_CLK_PERIS_BUSP_CLK	15
+#define CLK_GOUT_PERIS_RSTNSYNC_CLK_PERIS_GIC_CLK	16
+#define CLK_GOUT_PERIS_SYSREG_PERIS_PCLK		17
+#define CLK_GOUT_PERIS_TZPC00_PCLK			18
+#define CLK_GOUT_PERIS_TZPC01_PCLK			19
+#define CLK_GOUT_PERIS_TZPC02_PCLK			20
+#define CLK_GOUT_PERIS_TZPC03_PCLK			21
+#define CLK_GOUT_PERIS_TZPC04_PCLK			22
+#define CLK_GOUT_PERIS_TZPC05_PCLK			23
+#define CLK_GOUT_PERIS_TZPC06_PCLK			24
+#define CLK_GOUT_PERIS_TZPC07_PCLK			25
+#define CLK_GOUT_PERIS_TZPC08_PCLK			26
+#define CLK_GOUT_PERIS_TZPC09_PCLK			27
+#define CLK_GOUT_PERIS_TZPC10_PCLK			28
+#define CLK_GOUT_PERIS_TZPC11_PCLK			29
+#define CLK_GOUT_PERIS_TZPC12_PCLK			30
+#define CLK_GOUT_PERIS_TZPC13_PCLK			31
+#define CLK_GOUT_PERIS_TZPC14_PCLK			32
+#define CLK_GOUT_PERIS_TZPC15_PCLK			33
+#define CLK_GOUT_PERIS_WDT_CLUSTER0_PCLK		34
+#define CLK_GOUT_PERIS_WDT_CLUSTER1_PCLK		35
+#define CLK_GOUT_PERIS_XIU_P_PERIS_ACLK			36
+
+/* CMU_FSYS0 */
+#define CLK_MOUT_FSYS0_BUS_USER				1
+#define CLK_MOUT_FSYS0_DPGTC_USER			2
+#define CLK_MOUT_FSYS0_MMC_EMBD_USER			3
+#define CLK_MOUT_FSYS0_UFS_EMBD_USER			4
+#define CLK_MOUT_FSYS0_USBDRD30_USER			5
+#define CLK_GOUT_FSYS0_FSYS0_CMU_FSYS0_PCLK		6
+#define CLK_GOUT_FSYS0_AHBBR_FSYS0_HCLK			7
+#define CLK_GOUT_FSYS0_AXI2AHB_FSYS0_ACLK		8
+#define CLK_GOUT_FSYS0_AXI2AHB_USB_FSYS0_ACLK		9
+#define CLK_GOUT_FSYS0_AXI2APB_FSYS0_ACLK		10
+#define CLK_GOUT_FSYS0_BTM_FSYS0_I_ACLK			11
+#define CLK_GOUT_FSYS0_BTM_FSYS0_I_PCLK			12
+#define CLK_GOUT_FSYS0_DP_LINK_I_GTC_EXT_CLK		13
+#define CLK_GOUT_FSYS0_DP_LINK_I_PCLK			14
+#define CLK_GOUT_FSYS0_ETR_MIU_I_ACLK			15
+#define CLK_GOUT_FSYS0_ETR_MIU_I_PCLK			16
+#define CLK_GOUT_FSYS0_GPIO_FSYS0_PCLK			17
+#define CLK_GOUT_FSYS0_LHM_AXI_D_USBTV_I_CLK		18
+#define CLK_GOUT_FSYS0_LHM_AXI_G_ETR_I_CLK		19
+#define CLK_GOUT_FSYS0_LHM_AXI_P_FSYS0_I_CLK		20
+#define CLK_GOUT_FSYS0_LHS_ACEL_D_FSYS0_I_CLK		21
+#define CLK_GOUT_FSYS0_MMC_EMBD_I_ACLK			22
+#define CLK_GOUT_FSYS0_MMC_EMBD_SDCLKIN			23
+#define CLK_GOUT_FSYS0_PMU_FSYS0_PCLK			24
+#define CLK_GOUT_FSYS0_BCM_FSYS0_ACLK			25
+#define CLK_GOUT_FSYS0_BCM_FSYS0_PCLK			26
+#define CLK_GOUT_FSYS0_RSTNSYNC_CLK_FSYS0_BUS_CLK	27
+#define CLK_GOUT_FSYS0_SYSREG_FSYS0_PCLK		28
+#define CLK_GOUT_FSYS0_UFS_EMBD_I_ACLK			29
+#define CLK_GOUT_FSYS0_UFS_EMBD_I_CLK_UNIPRO		30
+#define CLK_GOUT_FSYS0_UFS_EMBD_I_FMP_CLK		31
+#define CLK_GOUT_FSYS0_USBTV_I_USB30DRD_ACLK		32
+#define CLK_GOUT_FSYS0_USBTV_I_USB30DRD_REF_CLK		33
+#define CLK_GOUT_FSYS0_USBTV_I_USB30DRD_SUSPEND_CLK	34
+#define CLK_GOUT_FSYS0_USBTV_I_USBTVH_AHB_CLK		35
+#define CLK_GOUT_FSYS0_USBTV_I_USBTVH_CORE_CLK		36
+#define CLK_GOUT_FSYS0_USBTV_I_USBTVH_XIU_CLK		37
+#define CLK_GOUT_FSYS0_US_D_FSYS0_USB_ACLK		38
+#define CLK_GOUT_FSYS0_XIU_D_FSYS0_ACLK			39
+#define CLK_GOUT_FSYS0_XIU_D_FSYS0_USB_ACLK		40
+#define CLK_GOUT_FSYS0_XIU_P_FSYS0_ACLK			41
+
+/* CMU_FSYS1 */
+#define CLK_MOUT_FSYS1_BUS_USER								1
+#define CLK_MOUT_FSYS1_MMC_CARD_USER							2
+#define CLK_MOUT_FSYS1_PCIE_USER							3
+#define CLK_MOUT_FSYS1_UFS_CARD_USER							4
+#define CLK_GOUT_FSYS1_PCIE_PHY_REF_CLK_IN						5
+#define CLK_GOUT_FSYS1_ADM_AHB_SSS_HCLKM						6
+#define CLK_GOUT_FSYS1_AHBBR_FSYS1_HCLK							7
+#define CLK_GOUT_FSYS1_AXI2AHB_FSYS1_ACLK						8
+#define CLK_GOUT_FSYS1_AXI2APB_FSYS1P0_ACLK						9
+#define CLK_GOUT_FSYS1_AXI2APB_FSYS1P1_ACLK						10
+#define CLK_GOUT_FSYS1_BTM_FSYS1_I_ACLK							11
+#define CLK_GOUT_FSYS1_BTM_FSYS1_I_PCLK							12
+#define CLK_GOUT_FSYS1_FSYS1_CMU_FSYS1_PCLK						13
+#define CLK_GOUT_FSYS1_GPIO_FSYS1_PCLK							14
+#define CLK_GOUT_FSYS1_LHM_AXI_P_FSYS1_I_CLK						15
+#define CLK_GOUT_FSYS1_LHS_ACEL_D_FSYS1_I_CLK						16
+#define CLK_GOUT_FSYS1_MMC_CARD_I_ACLK							17
+#define CLK_GOUT_FSYS1_MMC_CARD_SDCLKIN							18
+#define CLK_GOUT_FSYS1_PCIE_DBI_ACLK_0							19
+#define CLK_GOUT_FSYS1_PCIE_DBI_ACLK_1							20
+#define CLK_GOUT_FSYS1_PCIE_IEEE1500_WRAPPER_FOR_PCIE_PHY_LC_X2_INST_0_I_SCL_APB_PCLK	21
+#define CLK_GOUT_FSYS1_PCIE_MSTR_ACLK_0							22
+#define CLK_GOUT_FSYS1_PCIE_MSTR_ACLK_1							23
+#define CLK_GOUT_FSYS1_PCIE_PCIE_SUB_CTRL_INST_0_I_DRIVER_APB_CLK			24
+#define CLK_GOUT_FSYS1_PCIE_PCIE_SUB_CTRL_INST_1_I_DRIVER_APB_CLK			25
+#define CLK_GOUT_FSYS1_PCIE_PIPE2_DIGITAL_X2_WRAP_INST_0_I_APB_PCLK_SCL			26
+#define CLK_GOUT_FSYS1_PCIE_SLV_ACLK_0							27
+#define CLK_GOUT_FSYS1_PCIE_SLV_ACLK_1							28
+#define CLK_GOUT_FSYS1_PMU_FSYS1_PCLK							29
+#define CLK_GOUT_FSYS1_BCM_FSYS1_ACLK							30
+#define CLK_GOUT_FSYS1_BCM_FSYS1_PCLK							31
+#define CLK_GOUT_FSYS1_RSTNSYNC_CLK_FSYS1_BUS_CLK					32
+#define CLK_GOUT_FSYS1_RTIC_I_ACLK							33
+#define CLK_GOUT_FSYS1_RTIC_I_PCLK							34
+#define CLK_GOUT_FSYS1_SSS_I_ACLK							35
+#define CLK_GOUT_FSYS1_SSS_I_PCLK							36
+#define CLK_GOUT_FSYS1_SYSREG_FSYS1_PCLK						37
+#define CLK_GOUT_FSYS1_TOE_WIFI0_I_CLK							38
+#define CLK_GOUT_FSYS1_TOE_WIFI1_I_CLK							39
+#define CLK_GOUT_FSYS1_UFS_CARD_I_ACLK							40
+#define CLK_GOUT_FSYS1_UFS_CARD_I_CLK_UNIPRO						41
+#define CLK_GOUT_FSYS1_UFS_CARD_I_FMP_CLK						42
+#define CLK_GOUT_FSYS1_XIU_D_FSYS1_ACLK							43
+#define CLK_GOUT_FSYS1_XIU_P_FSYS1_ACLK							44
+
+/* CMU_PERIC0 */
+#define CLK_MOUT_PERIC0_BUS_USER			1
+#define CLK_MOUT_PERIC0_UART_DBG_USER			2
+#define CLK_MOUT_PERIC0_USI00_USER			3
+#define CLK_MOUT_PERIC0_USI01_USER			4
+#define CLK_MOUT_PERIC0_USI02_USER			5
+#define CLK_MOUT_PERIC0_USI03_USER			6
+#define CLK_GOUT_PERIC0_PERIC0_CMU_PERIC0_PCLK		7
+#define CLK_GOUT_PERIC0_AXI2APB_PERIC0_ACLK		8
+#define CLK_GOUT_PERIC0_GPIO_PERIC0_PCLK		9
+#define CLK_GOUT_PERIC0_LHM_AXI_P_PERIC0_I_CLK		10
+#define CLK_GOUT_PERIC0_PMU_PERIC0_PCLK			11
+#define CLK_GOUT_PERIC0_PWM_I_PCLK_S0			12
+#define CLK_GOUT_PERIC0_RSTNSYNC_CLK_PERIC0_BUSP_CLK	13
+#define CLK_GOUT_PERIC0_SPEEDY2_TSP_CLK			14
+#define CLK_GOUT_PERIC0_SYSREG_PERIC0_PCLK		15
+#define CLK_GOUT_PERIC0_UART_DBG_EXT_UCLK		16
+#define CLK_GOUT_PERIC0_UART_DBG_PCLK			17
+#define CLK_GOUT_PERIC0_USI00_I_PCLK			18
+#define CLK_GOUT_PERIC0_USI00_I_SCLK_USI		19
+#define CLK_GOUT_PERIC0_USI01_I_PCLK			20
+#define CLK_GOUT_PERIC0_USI01_I_SCLK_USI		21
+#define CLK_GOUT_PERIC0_USI02_I_PCLK			22
+#define CLK_GOUT_PERIC0_USI02_I_SCLK_USI		23
+#define CLK_GOUT_PERIC0_USI03_I_PCLK			24
+#define CLK_GOUT_PERIC0_USI03_I_SCLK_USI		25
+
+/* CMU_PERIC1 */
+#define CLK_MOUT_PERIC1_BUS_USER			1
+#define CLK_MOUT_PERIC1_SPEEDY2_USER			2
+#define CLK_MOUT_PERIC1_SPI_CAM0_USER			3
+#define CLK_MOUT_PERIC1_SPI_CAM1_USER			4
+#define CLK_MOUT_PERIC1_UART_BT_USER			5
+#define CLK_MOUT_PERIC1_USI04_USER			6
+#define CLK_MOUT_PERIC1_USI05_USER			7
+#define CLK_MOUT_PERIC1_USI06_USER			8
+#define CLK_MOUT_PERIC1_USI07_USER			9
+#define CLK_MOUT_PERIC1_USI08_USER			10
+#define CLK_MOUT_PERIC1_USI09_USER			11
+#define CLK_MOUT_PERIC1_USI10_USER			12
+#define CLK_MOUT_PERIC1_USI11_USER			13
+#define CLK_MOUT_PERIC1_USI12_USER			14
+#define CLK_MOUT_PERIC1_USI13_USER			15
+#define CLK_GOUT_PERIC1_PERIC1_CMU_PERIC1_PCLK		16
+#define CLK_GOUT_PERIC1_RSTNSYNC_CLK_PERIC1_SPEEDY2_CLK	17
+#define CLK_GOUT_PERIC1_AXI2APB_PERIC1P0_ACLK		18
+#define CLK_GOUT_PERIC1_AXI2APB_PERIC1P1_ACLK		19
+#define CLK_GOUT_PERIC1_AXI2APB_PERIC1P2_ACLK		20
+#define CLK_GOUT_PERIC1_GPIO_PERIC1_PCLK		21
+#define CLK_GOUT_PERIC1_HSI2C_CAM0_IPCLK		22
+#define CLK_GOUT_PERIC1_HSI2C_CAM1_IPCLK		23
+#define CLK_GOUT_PERIC1_HSI2C_CAM2_IPCLK		24
+#define CLK_GOUT_PERIC1_HSI2C_CAM3_IPCLK		25
+#define CLK_GOUT_PERIC1_LHM_AXI_P_PERIC1_I_CLK		26
+#define CLK_GOUT_PERIC1_PMU_PERIC1_PCLK			27
+#define CLK_GOUT_PERIC1_RSTNSYNC_CLK_PERIC1_BUSP_CLK	28
+#define CLK_GOUT_PERIC1_SPEEDY2_DDI1_CLK		29
+#define CLK_GOUT_PERIC1_SPEEDY2_DDI1_SCLK		30
+#define CLK_GOUT_PERIC1_SPEEDY2_DDI2_CLK		31
+#define CLK_GOUT_PERIC1_SPEEDY2_DDI2_SCLK		32
+#define CLK_GOUT_PERIC1_SPEEDY2_DDI_CLK			33
+#define CLK_GOUT_PERIC1_SPEEDY2_DDI_SCLK		34
+#define CLK_GOUT_PERIC1_SPEEDY2_TSP1_CLK		35
+#define CLK_GOUT_PERIC1_SPEEDY2_TSP2_CLK		36
+#define CLK_GOUT_PERIC1_SPI_CAM0_PCLK			37
+#define CLK_GOUT_PERIC1_SPI_CAM0_SPI_EXT_CLK		38
+#define CLK_GOUT_PERIC1_SPI_CAM1_PCLK			39
+#define CLK_GOUT_PERIC1_SPI_CAM1_SPI_EXT_CLK		40
+#define CLK_GOUT_PERIC1_SYSREG_PERIC1_PCLK		41
+#define CLK_GOUT_PERIC1_UART_BT_EXT_UCLK		42
+#define CLK_GOUT_PERIC1_UART_BT_PCLK			43
+#define CLK_GOUT_PERIC1_USI04_I_PCLK			44
+#define CLK_GOUT_PERIC1_USI04_I_SCLK_USI		45
+#define CLK_GOUT_PERIC1_USI05_I_PCLK			46
+#define CLK_GOUT_PERIC1_USI05_I_SCLK_USI		47
+#define CLK_GOUT_PERIC1_USI06_I_PCLK			48
+#define CLK_GOUT_PERIC1_USI06_I_SCLK_USI		49
+#define CLK_GOUT_PERIC1_USI07_I_PCLK			50
+#define CLK_GOUT_PERIC1_USI07_I_SCLK_USI		51
+#define CLK_GOUT_PERIC1_USI08_I_PCLK			52
+#define CLK_GOUT_PERIC1_USI08_I_SCLK_USI		53
+#define CLK_GOUT_PERIC1_USI09_I_PCLK			54
+#define CLK_GOUT_PERIC1_USI09_I_SCLK_USI		55
+#define CLK_GOUT_PERIC1_USI10_I_PCLK			56
+#define CLK_GOUT_PERIC1_USI10_I_SCLK_USI		57
+#define CLK_GOUT_PERIC1_USI11_I_PCLK			58
+#define CLK_GOUT_PERIC1_USI11_I_SCLK_USI		59
+#define CLK_GOUT_PERIC1_USI12_I_PCLK			60
+#define CLK_GOUT_PERIC1_USI12_I_SCLK_USI		61
+#define CLK_GOUT_PERIC1_USI13_I_PCLK			62
+#define CLK_GOUT_PERIC1_USI13_I_SCLK_USI		63
+#define CLK_GOUT_PERIC1_XIU_P_PERIC1_ACLK		64
+
+#endif /* _DT_BINDINGS_CLOCK_EXYNOS8895_H */
-- 
cgit v1.2.3


From 8acdd0e7bfadda6b5103f2960d293581954454ed Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 25 Oct 2024 08:37:18 +0800
Subject: blk-mq: add non_owner variant of start_freeze/unfreeze queue APIs

Add non_owner variant of start_freeze/unfreeze queue APIs, so that the
caller knows that what they are doing, and we can skip lockdep support
for non_owner variant in per-call level.

Prepare for supporting lockdep for freezing/unfreezing queue.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20241025003722.3630252-2-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 59e9adf815a4..2035fad3131f 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -919,6 +919,8 @@ void blk_freeze_queue_start(struct request_queue *q);
 void blk_mq_freeze_queue_wait(struct request_queue *q);
 int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
 				     unsigned long timeout);
+void blk_mq_unfreeze_queue_non_owner(struct request_queue *q);
+void blk_freeze_queue_start_non_owner(struct request_queue *q);
 
 void blk_mq_map_queues(struct blk_mq_queue_map *qmap);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
-- 
cgit v1.2.3


From f1be1788a32e8fa63416ad4518bbd1a85a825c9d Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 25 Oct 2024 08:37:20 +0800
Subject: block: model freeze & enter queue as lock for supporting lockdep

Recently we got several deadlock report[1][2][3] caused by
blk_mq_freeze_queue and blk_enter_queue().

Turns out the two are just like acquiring read/write lock, so model them
as read/write lock for supporting lockdep:

1) model q->q_usage_counter as two locks(io and queue lock)

- queue lock covers sync with blk_enter_queue()

- io lock covers sync with bio_enter_queue()

2) make the lockdep class/key as per-queue:

- different subsystem has very different lock use pattern, shared lock
 class causes false positive easily

- freeze_queue degrades to no lock in case that disk state becomes DEAD
  because bio_enter_queue() won't be blocked any more

- freeze_queue degrades to no lock in case that request queue becomes dying
  because blk_enter_queue() won't be blocked any more

3) model blk_mq_freeze_queue() as acquire_exclusive & try_lock
- it is exclusive lock, so dependency with blk_enter_queue() is covered

- it is trylock because blk_mq_freeze_queue() are allowed to run
  concurrently

4) model blk_enter_queue() & bio_enter_queue() as acquire_read()
- nested blk_enter_queue() are allowed

- dependency with blk_mq_freeze_queue() is covered

- blk_queue_exit() is often called from other contexts(such as irq), and
it can't be annotated as lock_release(), so simply do it in
blk_enter_queue(), this way still covered cases as many as possible

With lockdep support, such kind of reports may be reported asap and
needn't wait until the real deadlock is triggered.

For example, lockdep report can be triggered in the report[3] with this
patch applied.

[1] occasional block layer hang when setting 'echo noop > /sys/block/sda/queue/scheduler'
https://bugzilla.kernel.org/show_bug.cgi?id=219166

[2] del_gendisk() vs blk_queue_enter() race condition
https://lore.kernel.org/linux-block/20241003085610.GK11458@google.com/

[3] queue_freeze & queue_enter deadlock in scsi
https://lore.kernel.org/linux-block/ZxG38G9BuFdBpBHZ@fedora/T/#u

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20241025003722.3630252-4-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 55bec14fe55f..d0a52ed05e60 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -25,6 +25,7 @@
 #include <linux/uuid.h>
 #include <linux/xarray.h>
 #include <linux/file.h>
+#include <linux/lockdep.h>
 
 struct module;
 struct request_queue;
@@ -474,6 +475,11 @@ struct request_queue {
 	struct xarray		hctx_table;
 
 	struct percpu_ref	q_usage_counter;
+	struct lock_class_key	io_lock_cls_key;
+	struct lockdep_map	io_lockdep_map;
+
+	struct lock_class_key	q_lock_cls_key;
+	struct lockdep_map	q_lockdep_map;
 
 	struct request		*last_merge;
 
-- 
cgit v1.2.3


From 30dac24e14b52e1787572d1d4e06eeabe8a63630 Mon Sep 17 00:00:00 2001
From: Pankaj Raghav <p.raghav@samsung.com>
Date: Thu, 26 Sep 2024 16:01:21 +0200
Subject: fs/writeback: convert wbc_account_cgroup_owner to take a folio

Most of the callers of wbc_account_cgroup_owner() are converting a folio
to page before calling the function. wbc_account_cgroup_owner() is
converting the page back to a folio to call mem_cgroup_css_from_folio().

Convert wbc_account_cgroup_owner() to take a folio instead of a page,
and convert all callers to pass a folio directly except f2fs.

Convert the page to folio for all the callers from f2fs as they were the
only callers calling wbc_account_cgroup_owner() with a page. As f2fs is
already in the process of converting to folios, these call sites might
also soon be calling wbc_account_cgroup_owner() with a folio directly in
the future.

No functional changes. Only compile tested.

Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Link: https://lore.kernel.org/r/20240926140121.203821-1-kernel@pankajraghav.com
Acked-by: David Sterba <dsterba@suse.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/writeback.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d6db822e4bb3..641a057e0413 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -217,7 +217,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
 				 struct inode *inode)
 	__releases(&inode->i_lock);
 void wbc_detach_inode(struct writeback_control *wbc);
-void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
+void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio,
 			      size_t bytes);
 int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
 			   enum wb_reason reason, struct wb_completion *done);
@@ -324,7 +324,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 }
 
 static inline void wbc_account_cgroup_owner(struct writeback_control *wbc,
-					    struct page *page, size_t bytes)
+					    struct folio *folio, size_t bytes)
 {
 }
 
-- 
cgit v1.2.3


From 0e152beb5aa1ccdac9aae9fa570a9e039aff7a03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@igalia.com>
Date: Mon, 21 Oct 2024 13:37:17 -0300
Subject: libfs: Create the helper function generic_ci_validate_strict_name()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Create a helper function for filesystems do the checks required for
casefold directories and strict encoding.

Suggested-by: Gabriel Krisman Bertazi <krisman@suse.de>
Reviewed-by: Gabriel Krisman Bertazi <gabriel@krisman.be>
Signed-off-by: André Almeida <andrealmeid@igalia.com>
Link: https://lore.kernel.org/r/20241021-tonyk-tmpfs-v8-1-f443d5814194@igalia.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3559446279c1..403ee5d54c60 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -45,6 +45,7 @@
 #include <linux/slab.h>
 #include <linux/maple_tree.h>
 #include <linux/rw_hint.h>
+#include <linux/unicode.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -3456,6 +3457,50 @@ extern int generic_ci_match(const struct inode *parent,
 			    const struct qstr *folded_name,
 			    const u8 *de_name, u32 de_name_len);
 
+#if IS_ENABLED(CONFIG_UNICODE)
+/**
+ * generic_ci_validate_strict_name - Check if a given name is suitable
+ * for a directory
+ *
+ * This functions checks if the proposed filename is valid for the
+ * parent directory. That means that only valid UTF-8 filenames will be
+ * accepted for casefold directories from filesystems created with the
+ * strict encoding flag.  That also means that any name will be
+ * accepted for directories that doesn't have casefold enabled, or
+ * aren't being strict with the encoding.
+ *
+ * @dir: inode of the directory where the new file will be created
+ * @name: name of the new file
+ *
+ * Return:
+ * * True if the filename is suitable for this directory. It can be
+ * true if a given name is not suitable for a strict encoding
+ * directory, but the directory being used isn't strict
+ * * False if the filename isn't suitable for this directory. This only
+ * happens when a directory is casefolded and the filesystem is strict
+ * about its encoding.
+ */
+static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name)
+{
+	if (!IS_CASEFOLDED(dir) || !sb_has_strict_encoding(dir->i_sb))
+		return true;
+
+	/*
+	 * A casefold dir must have a encoding set, unless the filesystem
+	 * is corrupted
+	 */
+	if (WARN_ON_ONCE(!dir->i_sb->s_encoding))
+		return true;
+
+	return !utf8_validate(dir->i_sb->s_encoding, name);
+}
+#else
+static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name)
+{
+	return true;
+}
+#endif
+
 static inline bool sb_has_encoding(const struct super_block *sb)
 {
 #if IS_ENABLED(CONFIG_UNICODE)
-- 
cgit v1.2.3


From 04dad6c6d37d741bad9946a92171bfa637e989f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@igalia.com>
Date: Mon, 21 Oct 2024 13:37:19 -0300
Subject: unicode: Export latest available UTF-8 version number
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Export latest available UTF-8 version number so filesystems can easily
load the newest one.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
Link: https://lore.kernel.org/r/20241021-tonyk-tmpfs-v8-3-f443d5814194@igalia.com
Acked-by: Gabriel Krisman Bertazi <krisman@suse.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/unicode.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 4d39e6e11a95..0c0ab04e84ee 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -16,6 +16,8 @@ struct utf8data_table;
 	 ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) |	\
 	 ((unsigned int)(REV)))
 
+#define UTF8_LATEST        UNICODE_AGE(12, 1, 0)
+
 static inline u8 unicode_major(unsigned int age)
 {
 	return (age >> UNICODE_MAJ_SHIFT) & 0xff;
-- 
cgit v1.2.3


From 142fa60f61f93805471012f24e029af6d113c5cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@igalia.com>
Date: Mon, 21 Oct 2024 13:37:20 -0300
Subject: unicode: Recreate utf8_parse_version()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All filesystems that currently support UTF-8 casefold can fetch the
UTF-8 version from the filesystem metadata stored on disk. They can get
the data stored and directly match it to a integer, so they can skip the
string parsing step, which motivated the removal of this function in the
first place.

However, for tmpfs, the only way to tell the kernel which UTF-8 version
we are about to use is via mount options, using a string. Re-introduce
utf8_parse_version() to be used by tmpfs.

This version differs from the original by skipping the intermediate step
of copying the version string to an auxiliary string before calling
match_token(). This versions calls match_token() in the argument string.
The paramenters are simpler now as well.

utf8_parse_version() was created by 9d53690f0d4 ("unicode: implement
higher level API for string handling") and later removed by 49bd03cc7e9
("unicode: pass a UNICODE_AGE() tripple to utf8_load").

Signed-off-by: André Almeida <andrealmeid@igalia.com>
Link: https://lore.kernel.org/r/20241021-tonyk-tmpfs-v8-4-f443d5814194@igalia.com
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/unicode.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 0c0ab04e84ee..5e6b212a2aed 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -78,4 +78,6 @@ int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
 struct unicode_map *utf8_load(unsigned int version);
 void utf8_unload(struct unicode_map *um);
 
+int utf8_parse_version(char *version);
+
 #endif /* _LINUX_UNICODE_H */
-- 
cgit v1.2.3


From 458532c8dfeb24edd5e07467605a6484a728e5c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@igalia.com>
Date: Mon, 21 Oct 2024 13:37:21 -0300
Subject: libfs: Export generic_ci_ dentry functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Export generic_ci_ dentry functions so they can be used by
case-insensitive filesystems that need something more custom than the
default one set by `struct generic_ci_dentry_ops`.

Reviewed-by: Gabriel Krisman Bertazi <gabriel@krisman.be>
Signed-off-by: André Almeida <andrealmeid@igalia.com>
Link: https://lore.kernel.org/r/20241021-tonyk-tmpfs-v8-5-f443d5814194@igalia.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 403ee5d54c60..b277369672a1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3458,6 +3458,10 @@ extern int generic_ci_match(const struct inode *parent,
 			    const u8 *de_name, u32 de_name_len);
 
 #if IS_ENABLED(CONFIG_UNICODE)
+int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str);
+int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+			 const char *str, const struct qstr *name);
+
 /**
  * generic_ci_validate_strict_name - Check if a given name is suitable
  * for a directory
-- 
cgit v1.2.3


From 5cd9aecbc72c07e8b83e900078669a7d0bc5f98f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@igalia.com>
Date: Mon, 21 Oct 2024 13:37:23 -0300
Subject: tmpfs: Add flag FS_CASEFOLD_FL support for tmpfs dirs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable setting flag FS_CASEFOLD_FL for tmpfs directories, when tmpfs is
mounted with casefold support. A special check is need for this flag,
since it can't be set for non-empty directories.

Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Reviewed-by: Gabriel Krisman Bertazi <gabriel@krisman.be>
Signed-off-by: André Almeida <andrealmeid@igalia.com>
Link: https://lore.kernel.org/r/20241021-tonyk-tmpfs-v8-7-f443d5814194@igalia.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/shmem_fs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 515a9a6a3c6f..018da28c01e7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -42,10 +42,10 @@ struct shmem_inode_info {
 	struct inode		vfs_inode;
 };
 
-#define SHMEM_FL_USER_VISIBLE		FS_FL_USER_VISIBLE
+#define SHMEM_FL_USER_VISIBLE		(FS_FL_USER_VISIBLE | FS_CASEFOLD_FL)
 #define SHMEM_FL_USER_MODIFIABLE \
-	(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL)
-#define SHMEM_FL_INHERITED		(FS_NODUMP_FL | FS_NOATIME_FL)
+	(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL)
+#define SHMEM_FL_INHERITED		(FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL)
 
 struct shmem_quota_limits {
 	qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */
-- 
cgit v1.2.3


From 9f070b1862f3411b8bcdfd51a8eaad25286f9deb Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 14 Oct 2024 16:52:41 +0200
Subject: media: v4l2-core: v4l2-dv-timings: check cvt/gtf result

The v4l2_detect_cvt/gtf functions should check the result against the
timing capabilities: these functions calculate the timings, so if they
are out of bounds, they should be rejected.

To do this, add the struct v4l2_dv_timings_cap as argument to those
functions.

This required updates to the adv7604 and adv7842 drivers since the
prototype of these functions has now changed. The timings struct
that is passed to v4l2_detect_cvt/gtf in those two drivers is filled
with the timings detected by the hardware.

The vivid driver was also updated, but an additional check was added:
the width and height specified by VIDIOC_S_DV_TIMINGS has to match the
calculated result, otherwise something went wrong. Note that vivid
*emulates* hardware, so all the values passed to the v4l2_detect_cvt/gtf
functions came from the timings struct that was filled by userspace
and passed on to the driver via VIDIOC_S_DV_TIMINGS. So these fields
can contain random data. Both the constraints check via
struct v4l2_dv_timings_cap and the additional width/height check
ensure that the resulting timings are sane and not messed up by the
v4l2_detect_cvt/gtf calculations.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Fixes: 2576415846bc ("[media] v4l2: move dv-timings related code to v4l2-dv-timings.c")
Cc: stable@vger.kernel.org
Reported-by: syzbot+a828133770f62293563e@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-media/000000000000013050062127830a@google.com/
---
 include/media/v4l2-dv-timings.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/media/v4l2-dv-timings.h b/include/media/v4l2-dv-timings.h
index 13830411bd6c..ff07dc6b103c 100644
--- a/include/media/v4l2-dv-timings.h
+++ b/include/media/v4l2-dv-timings.h
@@ -147,15 +147,18 @@ void v4l2_print_dv_timings(const char *dev_prefix, const char *prefix,
  * @polarities: the horizontal and vertical polarities (same as struct
  *		v4l2_bt_timings polarities).
  * @interlaced: if this flag is true, it indicates interlaced format
+ * @cap: the v4l2_dv_timings_cap capabilities.
  * @fmt: the resulting timings.
  *
  * This function will attempt to detect if the given values correspond to a
  * valid CVT format. If so, then it will return true, and fmt will be filled
  * in with the found CVT timings.
  */
-bool v4l2_detect_cvt(unsigned frame_height, unsigned hfreq, unsigned vsync,
-		unsigned active_width, u32 polarities, bool interlaced,
-		struct v4l2_dv_timings *fmt);
+bool v4l2_detect_cvt(unsigned int frame_height, unsigned int hfreq,
+		     unsigned int vsync, unsigned int active_width,
+		     u32 polarities, bool interlaced,
+		     const struct v4l2_dv_timings_cap *cap,
+		     struct v4l2_dv_timings *fmt);
 
 /**
  * v4l2_detect_gtf - detect if the given timings follow the GTF standard
@@ -171,15 +174,18 @@ bool v4l2_detect_cvt(unsigned frame_height, unsigned hfreq, unsigned vsync,
  *		image height, so it has to be passed explicitly. Usually
  *		the native screen aspect ratio is used for this. If it
  *		is not filled in correctly, then 16:9 will be assumed.
+ * @cap: the v4l2_dv_timings_cap capabilities.
  * @fmt: the resulting timings.
  *
  * This function will attempt to detect if the given values correspond to a
  * valid GTF format. If so, then it will return true, and fmt will be filled
  * in with the found GTF timings.
  */
-bool v4l2_detect_gtf(unsigned frame_height, unsigned hfreq, unsigned vsync,
-		u32 polarities, bool interlaced, struct v4l2_fract aspect,
-		struct v4l2_dv_timings *fmt);
+bool v4l2_detect_gtf(unsigned int frame_height, unsigned int hfreq,
+		     unsigned int vsync, u32 polarities, bool interlaced,
+		     struct v4l2_fract aspect,
+		     const struct v4l2_dv_timings_cap *cap,
+		     struct v4l2_dv_timings *fmt);
 
 /**
  * v4l2_calc_aspect_ratio - calculate the aspect ratio based on bytes
-- 
cgit v1.2.3


From d4a65302dd849fade9e2ca712826c35b8d068ecb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 24 Oct 2024 13:34:26 +0000
Subject: vdso: Change PAGE_MASK to signed on all 32-bit architectures

With the introduction of an architecture-independent defintion of
PAGE_MASK, we had to make a choice between defining it as 'unsigned long'
as on 64-bit architectures, or as signed 'long' as required for
architectures with a 64-bit phys_addr_t.

To reduce the risk for regressions and minimize the changes in behavior,
the result was using the signed value only when CONFIG_PHYS_ADDR_T_64BIT
is set, but that ended up causing a regression after all in the
early_init_dt_add_memory_arch() function that uses 64-bit integers for
address calculation.

Presumably the same regression also affects mips32 and powerpc32 when
dealing with large amounts of memory on DT platforms: like arm32, they were
using the signed version unconditionally.

The two most sensible options for addressing the regression are either to
go back to an architecture specific definition, using a signed constant on
arm/powerpc/mips and unsigned on the others, or to use the same definition
everywhere.

Use the simpler of those two and change them all to the signed version, in
the hope that this does not cause a different type of bug. Most of the
other 32-bit architectures have no large physical address support and are
rarely used, so it seems more likely that using the same definition helps
than hurts here.

In particular, x86-32 does have physical addressing extensions, so it
already changed to the signed version after the previous patch, so it makes
sense to use the same version on non-PAE as well.

Fixes: efe8419ae78d ("vdso: Introduce vdso/page.h")
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Anders Roxell <anders.roxell@linaro.org>
Tested-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Link: https://lore.kernel.org/all/20241024133447.3117273-1-arnd@kernel.org
Link: https://lore.kernel.org/lkml/CA+G9fYt86bUAu_v5dXPWnDUwQNVipj+Wq3Djir1KUSKdr9QLNg@mail.gmail.com/
---
 include/vdso/page.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/vdso/page.h b/include/vdso/page.h
index 4ada1ba6bd1f..710ae2414e68 100644
--- a/include/vdso/page.h
+++ b/include/vdso/page.h
@@ -14,13 +14,14 @@
 
 #define PAGE_SIZE	(_AC(1,UL) << CONFIG_PAGE_SHIFT)
 
-#if defined(CONFIG_PHYS_ADDR_T_64BIT) && !defined(CONFIG_64BIT)
+#if !defined(CONFIG_64BIT)
 /*
- * Applies only to 32-bit architectures with a 64-bit phys_addr_t.
+ * Applies only to 32-bit architectures.
  *
  * Subtle: (1 << CONFIG_PAGE_SHIFT) is an int, not an unsigned long.
  * So if we assign PAGE_MASK to a larger type it gets extended the
- * way we want (i.e. with 1s in the high bits)
+ * way we want (i.e. with 1s in the high bits) while masking a
+ * 64-bit value such as phys_addr_t.
  */
 #define PAGE_MASK	(~((1 << CONFIG_PAGE_SHIFT) - 1))
 #else
-- 
cgit v1.2.3


From a2ad1b8101a36c927bcbd1317475b9576c352155 Mon Sep 17 00:00:00 2001
From: Steve Sistare <steven.sistare@oracle.com>
Date: Fri, 25 Oct 2024 06:11:51 -0700
Subject: mm/gup: Add folio_add_pins()

Export a function that adds pins to an already-pinned huge-page folio.
This allows any range of small pages within the folio to be unpinned later.
For example, pages pinned via memfd_pin_folios and modified by
folio_add_pins could be unpinned via unpin_user_page(s).

Link: https://patch.msgid.link/r/1729861919-234514-2-git-send-email-steven.sistare@oracle.com
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ecf63d2b0582..f9de33ac2add 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2524,6 +2524,7 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end,
 		      struct folio **folios, unsigned int max_folios,
 		      pgoff_t *offset);
+int folio_add_pins(struct folio *folio, unsigned int pins);
 
 int get_user_pages_fast(unsigned long start, int nr_pages,
 			unsigned int gup_flags, struct page **pages);
-- 
cgit v1.2.3


From f4986a72d6e4be78ec0e4ee0e03531474621183f Mon Sep 17 00:00:00 2001
From: Steve Sistare <steven.sistare@oracle.com>
Date: Fri, 25 Oct 2024 06:11:57 -0700
Subject: iommufd: Add IOMMU_IOAS_MAP_FILE

Define the IOMMU_IOAS_MAP_FILE ioctl interface, which allows a user to
register memory by passing a memfd plus offset and length.  Implement it
using the memfd_pin_folios() kAPI.

Link: https://patch.msgid.link/r/1729861919-234514-8-git-send-email-steven.sistare@oracle.com
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 72010f71c5e4..41b1a01e9293 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -51,6 +51,7 @@ enum {
 	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
 	IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
 	IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
+	IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f,
 };
 
 /**
@@ -213,6 +214,30 @@ struct iommu_ioas_map {
 };
 #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
 
+/**
+ * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
+ * @size: sizeof(struct iommu_ioas_map_file)
+ * @flags: same as for iommu_ioas_map
+ * @ioas_id: same as for iommu_ioas_map
+ * @fd: the memfd to map
+ * @start: byte offset from start of file to map from
+ * @length: same as for iommu_ioas_map
+ * @iova: same as for iommu_ioas_map
+ *
+ * Set an IOVA mapping from a memfd file.  All other arguments and semantics
+ * match those of IOMMU_IOAS_MAP.
+ */
+struct iommu_ioas_map_file {
+	__u32 size;
+	__u32 flags;
+	__u32 ioas_id;
+	__s32 fd;
+	__aligned_u64 start;
+	__aligned_u64 length;
+	__aligned_u64 iova;
+};
+#define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE)
+
 /**
  * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
  * @size: sizeof(struct iommu_ioas_copy)
-- 
cgit v1.2.3


From 66418687ac895717dc2f6ddffe24cf9b74cd0d3e Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Thu, 10 Oct 2024 10:24:42 -0500
Subject: kernel/range: Const-ify range_contains parameters

range_contains() does not modify the range values.  David suggested it
is safer to keep those parameters as const.[1]

Make range parameters const

Link: https://lore.kernel.org/all/20241008161032.GB1609@twin.jikos.cz/ [1]
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Link: https://patch.msgid.link/20241010-const-range-v1-1-afb6e4bfd8ce@intel.com
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/range.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/range.h b/include/linux/range.h
index 6ad0b73cb7ad..7dc5e835e079 100644
--- a/include/linux/range.h
+++ b/include/linux/range.h
@@ -13,7 +13,8 @@ static inline u64 range_len(const struct range *range)
 	return range->end - range->start + 1;
 }
 
-static inline bool range_contains(struct range *r1, struct range *r2)
+static inline bool range_contains(const struct range *r1,
+				  const struct range *r2)
 {
 	return r1->start <= r2->start && r1->end >= r2->end;
 }
-- 
cgit v1.2.3


From 0bbff9ed81654d5f06bfca484681756ee407f924 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 2 Oct 2024 13:43:24 -0500
Subject: perf/arm_pmuv3: Add PMUv3.9 per counter EL0 access control

Armv8.9/9.4 PMUv3.9 adds per counter EL0 access controls. Per counter
access is enabled with the UEN bit in PMUSERENR_EL1 register. Individual
counters are enabled/disabled in the PMUACR_EL1 register. When UEN is
set, the CR/ER bits control EL0 write access and must be set to disable
write access.

With the access controls, the clearing of unused counters can be
skipped.

KVM also configures PMUSERENR_EL1 in order to trap to EL2. UEN does not
need to be set for it since only PMUv3.5 is exposed to guests.

Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://lore.kernel.org/r/20241002184326.1105499-1-robh@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/perf/arm_pmuv3.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index 3372c1b56486..d698efba28a2 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -257,6 +257,7 @@
 #define ARMV8_PMU_USERENR_SW	(1 << 1) /* PMSWINC can be written at EL0 */
 #define ARMV8_PMU_USERENR_CR	(1 << 2) /* Cycle counter can be read at EL0 */
 #define ARMV8_PMU_USERENR_ER	(1 << 3) /* Event counter can be read at EL0 */
+#define ARMV8_PMU_USERENR_UEN	(1 << 4) /* Fine grained per counter access at EL0 */
 /* Mask for writable bits */
 #define ARMV8_PMU_USERENR_MASK	(ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \
 				 ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER)
-- 
cgit v1.2.3


From e1dce56443a4a18978fe39ee4af663e5b6b31422 Mon Sep 17 00:00:00 2001
From: Gowthami Thiagarajan <gthiagarajan@marvell.com>
Date: Mon, 28 Oct 2024 11:23:09 +0530
Subject: perf/marvell: Marvell PEM performance monitor support

PCI Express Interface PMU includes various performance counters
to monitor the data that is transmitted over the PCIe link. The
counters track various inbound and outbound transactions which
includes separate counters for posted/non-posted/completion TLPs.
Also, inbound and outbound memory read requests along with their
latencies can also be monitored. Address Translation Services(ATS)events
such as ATS Translation, ATS Page Request, ATS Invalidation along with
their corresponding latencies are also supported.

The performance counters are 64 bits wide.

For instance,
perf stat -e ib_tlp_pr <workload>
tracks the inbound posted TLPs for the workload.

Co-developed-by: Linu Cherian <lcherian@marvell.com>
Signed-off-by: Linu Cherian <lcherian@marvell.com>
Signed-off-by: Gowthami Thiagarajan <gthiagarajan@marvell.com>
Link: https://lore.kernel.org/r/20241028055309.17893-1-gthiagarajan@marvell.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 2361ed4d2b15..61d9a66d1807 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -227,6 +227,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
 	CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
 	CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+	CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
 	CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
-- 
cgit v1.2.3


From dc60de4eb0a431bde94e5abe55be6f646cdb435d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 24 Oct 2024 22:04:54 +0300
Subject: iio: acpi: Add iio_get_acpi_device_name_and_data() helper function

A few drivers duplicate the code to retrieve ACPI device instance name.
Some of them want an associated driver data as well.

In order of deduplication introduce the common helper functions.

Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20241024191200.229894-6-andriy.shevchenko@linux.intel.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 3a9b57187a95..445d6666a291 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -831,6 +831,7 @@ int iio_device_resume_triggering(struct iio_dev *indio_dev);
 bool iio_read_acpi_mount_matrix(struct device *dev,
 				struct iio_mount_matrix *orientation,
 				char *acpi_method);
+const char *iio_get_acpi_device_name_and_data(struct device *dev, const void **data);
 #else
 static inline bool iio_read_acpi_mount_matrix(struct device *dev,
 					      struct iio_mount_matrix *orientation,
@@ -838,7 +839,16 @@ static inline bool iio_read_acpi_mount_matrix(struct device *dev,
 {
 	return false;
 }
+static inline const char *
+iio_get_acpi_device_name_and_data(struct device *dev, const void **data)
+{
+	return NULL;
+}
 #endif
+static inline const char *iio_get_acpi_device_name(struct device *dev)
+{
+	return iio_get_acpi_device_name_and_data(dev, NULL);
+}
 
 /**
  * iio_get_current_scan_type - Get the current scan type for a channel
-- 
cgit v1.2.3


From cb67ff6272eceb5fcb2fe3b74f0293fa0706841a Mon Sep 17 00:00:00 2001
From: Jonathan Kim <jonathan.kim@amd.com>
Date: Tue, 22 Oct 2024 12:30:50 -0400
Subject: drm/amdkfd: flag per-queue reset support for gfx9

Flag KFD support for per-queue reset on GFX9 devices.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Harish Kasiviswanathan <harish.kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_sysfs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h
index 5e8d28617efa..859b8e91d4d3 100644
--- a/include/uapi/linux/kfd_sysfs.h
+++ b/include/uapi/linux/kfd_sysfs.h
@@ -60,7 +60,8 @@
 #define HSA_CAP_FLAGS_COHERENTHOSTACCESS			0x10000000
 #define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED			0x20000000
 #define HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED	0x40000000
-#define HSA_CAP_RESERVED					0x800f8000
+#define HSA_CAP_PER_QUEUE_RESET_SUPPORTED			0x80000000
+#define HSA_CAP_RESERVED					0x000f8000
 
 /* debug_prop bits in node properties */
 #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK     0x0000000f
-- 
cgit v1.2.3


From 4261974701851630951e9ab31f0de4ade0faea53 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Fri, 25 Oct 2024 19:46:55 -0500
Subject: printf: Add print format (%pra) for struct range

The use of struct range in the CXL subsystem is growing.  In particular,
the addition of Dynamic Capacity devices uses struct range in a number
of places which are reported in debug and error messages.

To wit requiring the printing of the start/end fields in each print
became cumbersome.  Dan Williams mentions in [1] that it might be time
to have a print specifier for struct range similar to struct resource.

A few alternatives were considered including '%par', '%r', and '%pn'.
%pra follows that struct range is similar to struct resource (%p[rR])
but needs to be different.  Based on discussions with Petr and Andy
'%pra' was chosen.[2]

Andy also suggested to keep the range prints similar to struct resource
though combined code.  Add hex_range() to handle printing for both
pointer types.

Finally introduce DEFINE_RANGE() as a parallel to DEFINE_RES_*() and use
it in the tests.

Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-doc@vger.kernel.org
Cc: open list <linux-kernel@vger.kernel.org>
Link: https://lore.kernel.org/all/663922b475e50_d54d72945b@dwillia2-xfh.jf.intel.com.notmuch/ [1]
Link: https://lore.kernel.org/all/66cea3bf3332f_f937b29424@iweiny-mobl.notmuch/ [2]
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20241025-cxl-pra-v2-3-123a825daba2@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/range.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/range.h b/include/linux/range.h
index 6ad0b73cb7ad..1358d4b1807a 100644
--- a/include/linux/range.h
+++ b/include/linux/range.h
@@ -31,4 +31,10 @@ int clean_sort_range(struct range *range, int az);
 
 void sort_range(struct range *range, int nr_range);
 
+#define DEFINE_RANGE(_start, _end)		\
+(struct range) {				\
+		.start = (_start),		\
+		.end = (_end),			\
+	}
+
 #endif
-- 
cgit v1.2.3


From 6d89ead19946181df1e41d38917fddc951dbd95b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Fri, 12 Jul 2024 11:35:23 +0200
Subject: UAPI/ioctl: Improve parameter name of ioctl request definition
 helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The third parameter to _IOR et al is a type name, not a size. So the
parameter being named "size" is irritating. Rename it to "argtype"
instead to reduce confusion.

There is a very minor chance that this breaks stuff. It only hurts
however if there is a variable (or macro) in userspace that is called
"argtype" *and* it's used in the parameters of _IOR and friends. IMHO
this is negligible because usually definitions making use of these
macros are provided by kernel headers (i.e. us) or if they are
replicated in userspace code, they are replicated and so supposed to
match the kernel definitions (e.g. to make them usable by programs
without the need to update the kernel headers used to compile the
program).

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/asm-generic/ioctl.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/asm-generic/ioctl.h b/include/uapi/asm-generic/ioctl.h
index a84f4db8a250..e3290a5824c9 100644
--- a/include/uapi/asm-generic/ioctl.h
+++ b/include/uapi/asm-generic/ioctl.h
@@ -82,13 +82,13 @@
  * NOTE: _IOW means userland is writing and kernel is reading. _IOR
  * means userland is reading and kernel is writing.
  */
-#define _IO(type,nr)		_IOC(_IOC_NONE,(type),(nr),0)
-#define _IOR(type,nr,size)	_IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size)))
-#define _IOW(type,nr,size)	_IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size)))
-#define _IOWR(type,nr,size)	_IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size)))
-#define _IOR_BAD(type,nr,size)	_IOC(_IOC_READ,(type),(nr),sizeof(size))
-#define _IOW_BAD(type,nr,size)	_IOC(_IOC_WRITE,(type),(nr),sizeof(size))
-#define _IOWR_BAD(type,nr,size)	_IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IO(type,nr)			_IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,argtype)		_IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(argtype)))
+#define _IOW(type,nr,argtype)		_IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(argtype)))
+#define _IOWR(type,nr,argtype)		_IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(argtype)))
+#define _IOR_BAD(type,nr,argtype)	_IOC(_IOC_READ,(type),(nr),sizeof(argtype))
+#define _IOW_BAD(type,nr,argtype)	_IOC(_IOC_WRITE,(type),(nr),sizeof(argtype))
+#define _IOWR_BAD(type,nr,argtype)	_IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(argtype))
 
 /* used to decode ioctl numbers.. */
 #define _IOC_DIR(nr)		(((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
-- 
cgit v1.2.3


From 7c7e6c8924e7bf98db4e5b2edb202842003c00c2 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Thu, 24 Oct 2024 19:54:43 +0200
Subject: tty: serial: handle HAS_IOPORT dependencies

In a future patch HAS_IOPORT=n will disable inb()/outb() and friends at
compile time. We thus need to add HAS_IOPORT as dependency for those
drivers using them unconditionally. Some 8250 serial drivers support
MMIO only use, so fence only the parts requiring I/O ports and print an
error message if a device can't be supported with the current
configuration.

Co-developed-by: Arnd Bergmann <arnd@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/serial_core.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 4ab65874a850..743b4afaad4c 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -505,7 +505,11 @@ struct uart_port {
 	 * The remaining bits are serial-core specific and not modifiable by
 	 * userspace.
 	 */
+#ifdef CONFIG_HAS_IOPORT
 #define UPF_FOURPORT		((__force upf_t) ASYNC_FOURPORT       /* 1  */ )
+#else
+#define UPF_FOURPORT		0
+#endif
 #define UPF_SAK			((__force upf_t) ASYNC_SAK            /* 2  */ )
 #define UPF_SPD_HI		((__force upf_t) ASYNC_SPD_HI         /* 4  */ )
 #define UPF_SPD_VHI		((__force upf_t) ASYNC_SPD_VHI        /* 5  */ )
-- 
cgit v1.2.3


From 6f043e75744596968b6547c4bd43e4d30bbb6d6e Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Thu, 24 Oct 2024 19:54:44 +0200
Subject: asm-generic/io.h: Remove I/O port accessors for HAS_IOPORT=n
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With all subsystems and drivers either declaring their dependence on
HAS_IOPORT or fencing I/O port specific code sections we can finally
make inb()/outb() and friends compile-time dependent on HAS_IOPORT as
suggested by Linus in the linked mail. The main benefit of this is that
on platforms such as s390 which have no meaningful way of implementing
inb()/outb() their use without the proper HAS_IOPORT dependency will
result in easy to catch and fix compile-time errors instead of compiling
code that can never work.

Link: https://lore.kernel.org/lkml/CAHk-=wg80je=K7madF4e7WrRNp37e3qh6y10Svhdc7O8SZ_-8g@mail.gmail.com/
Co-developed-by: Arnd Bergmann <arnd@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@kernel.org>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Maciej W. Rozycki <macro@orcam.me.uk>
Acked-by: Damien Le Moal <dlemoal@kernel.org>
Acked-by: Jaroslav Kysela <perex@perex.cz>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net> # for ARCH=um
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Kalle Valo <kvalo@kernel.org>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Corey Minyard <cminyard@mvista.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Acked-by: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/io.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 80de699bf6af..1027be6a62bc 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -540,6 +540,7 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer,
 
 #if !defined(inb) && !defined(_inb)
 #define _inb _inb
+#ifdef CONFIG_HAS_IOPORT
 static inline u8 _inb(unsigned long addr)
 {
 	u8 val;
@@ -549,10 +550,15 @@ static inline u8 _inb(unsigned long addr)
 	__io_par(val);
 	return val;
 }
+#else
+u8 _inb(unsigned long addr)
+	__compiletime_error("inb()) requires CONFIG_HAS_IOPORT");
+#endif
 #endif
 
 #if !defined(inw) && !defined(_inw)
 #define _inw _inw
+#ifdef CONFIG_HAS_IOPORT
 static inline u16 _inw(unsigned long addr)
 {
 	u16 val;
@@ -562,10 +568,15 @@ static inline u16 _inw(unsigned long addr)
 	__io_par(val);
 	return val;
 }
+#else
+u16 _inw(unsigned long addr)
+	__compiletime_error("inw() requires CONFIG_HAS_IOPORT");
+#endif
 #endif
 
 #if !defined(inl) && !defined(_inl)
 #define _inl _inl
+#ifdef CONFIG_HAS_IOPORT
 static inline u32 _inl(unsigned long addr)
 {
 	u32 val;
@@ -575,36 +586,55 @@ static inline u32 _inl(unsigned long addr)
 	__io_par(val);
 	return val;
 }
+#else
+u32 _inl(unsigned long addr)
+	__compiletime_error("inl() requires CONFIG_HAS_IOPORT");
+#endif
 #endif
 
 #if !defined(outb) && !defined(_outb)
 #define _outb _outb
+#ifdef CONFIG_HAS_IOPORT
 static inline void _outb(u8 value, unsigned long addr)
 {
 	__io_pbw();
 	__raw_writeb(value, PCI_IOBASE + addr);
 	__io_paw();
 }
+#else
+void _outb(u8 value, unsigned long addr)
+	__compiletime_error("outb() requires CONFIG_HAS_IOPORT");
+#endif
 #endif
 
 #if !defined(outw) && !defined(_outw)
 #define _outw _outw
+#ifdef CONFIG_HAS_IOPORT
 static inline void _outw(u16 value, unsigned long addr)
 {
 	__io_pbw();
 	__raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
 	__io_paw();
 }
+#else
+void _outw(u16 value, unsigned long addr)
+	__compiletime_error("outw() requires CONFIG_HAS_IOPORT");
+#endif
 #endif
 
 #if !defined(outl) && !defined(_outl)
 #define _outl _outl
+#ifdef CONFIG_HAS_IOPORT
 static inline void _outl(u32 value, unsigned long addr)
 {
 	__io_pbw();
 	__raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
 	__io_paw();
 }
+#else
+void _outl(u32 value, unsigned long addr)
+	__compiletime_error("outl() requires CONFIG_HAS_IOPORT");
+#endif
 #endif
 
 #include <linux/logic_pio.h>
@@ -688,53 +718,83 @@ static inline void outl_p(u32 value, unsigned long addr)
 
 #ifndef insb
 #define insb insb
+#ifdef CONFIG_HAS_IOPORT
 static inline void insb(unsigned long addr, void *buffer, unsigned int count)
 {
 	readsb(PCI_IOBASE + addr, buffer, count);
 }
+#else
+void insb(unsigned long addr, void *buffer, unsigned int count)
+	__compiletime_error("insb() requires HAS_IOPORT");
+#endif
 #endif
 
 #ifndef insw
 #define insw insw
+#ifdef CONFIG_HAS_IOPORT
 static inline void insw(unsigned long addr, void *buffer, unsigned int count)
 {
 	readsw(PCI_IOBASE + addr, buffer, count);
 }
+#else
+void insw(unsigned long addr, void *buffer, unsigned int count)
+	__compiletime_error("insw() requires HAS_IOPORT");
+#endif
 #endif
 
 #ifndef insl
 #define insl insl
+#ifdef CONFIG_HAS_IOPORT
 static inline void insl(unsigned long addr, void *buffer, unsigned int count)
 {
 	readsl(PCI_IOBASE + addr, buffer, count);
 }
+#else
+void insl(unsigned long addr, void *buffer, unsigned int count)
+	__compiletime_error("insl() requires HAS_IOPORT");
+#endif
 #endif
 
 #ifndef outsb
 #define outsb outsb
+#ifdef CONFIG_HAS_IOPORT
 static inline void outsb(unsigned long addr, const void *buffer,
 			 unsigned int count)
 {
 	writesb(PCI_IOBASE + addr, buffer, count);
 }
+#else
+void outsb(unsigned long addr, const void *buffer, unsigned int count)
+	__compiletime_error("outsb() requires HAS_IOPORT");
+#endif
 #endif
 
 #ifndef outsw
 #define outsw outsw
+#ifdef CONFIG_HAS_IOPORT
 static inline void outsw(unsigned long addr, const void *buffer,
 			 unsigned int count)
 {
 	writesw(PCI_IOBASE + addr, buffer, count);
 }
+#else
+void outsw(unsigned long addr, const void *buffer, unsigned int count)
+	__compiletime_error("outsw() requires HAS_IOPORT");
+#endif
 #endif
 
 #ifndef outsl
 #define outsl outsl
+#ifdef CONFIG_HAS_IOPORT
 static inline void outsl(unsigned long addr, const void *buffer,
 			 unsigned int count)
 {
 	writesl(PCI_IOBASE + addr, buffer, count);
 }
+#else
+void outsl(unsigned long addr, const void *buffer, unsigned int count)
+	__compiletime_error("outsl() requires HAS_IOPORT");
+#endif
 #endif
 
 #ifndef insb_p
-- 
cgit v1.2.3


From c5c3238d9b8cee58cd4b08bbbe9347a94a566390 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 23 Oct 2024 07:36:36 +0200
Subject: asm-generic: provide generic page_to_phys and phys_to_page
 implementations

page_to_phys is duplicated by all architectures, and from some strange
reason placed in <asm/io.h> where it doesn't fit at all.

phys_to_page is only provided by a few architectures despite having a lot
of open coded users.

Provide generic versions in <asm-generic/memory_model.h> to make these
helpers more easily usable.

Note with this patch powerpc loses the CONFIG_DEBUG_VIRTUAL pfn_valid
check.  It will be added back in a generic version later.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/memory_model.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 6796abe1900e..a73a140cbecd 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -64,6 +64,9 @@ static inline int pfn_valid(unsigned long pfn)
 #define page_to_pfn __page_to_pfn
 #define pfn_to_page __pfn_to_page
 
+#define page_to_phys(page)	PFN_PHYS(page_to_pfn(page))
+#define phys_to_page(phys)	pfn_to_page(PHYS_PFN(phys))
+
 #endif /* __ASSEMBLY__ */
 
 #endif
-- 
cgit v1.2.3


From 3e25d5a49f99b75be2c6cfb165e4f77dc6d739a2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 23 Oct 2024 07:36:37 +0200
Subject: asm-generic: add an optional pfn_valid check to page_to_phys

page_to_pfn is usually implemented by pointer arithmetics on the memory
map, which means that bogus input can lead to even more bogus output.

Powerpc had a pfn_valid check on the intermediate pfn in the page_to_phys
implementation when CONFIG_DEBUG_VIRTUAL is defined, which seems
generally useful, so add that to the generic version.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/memory_model.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index a73a140cbecd..6d1fb6162ac1 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -64,7 +64,17 @@ static inline int pfn_valid(unsigned long pfn)
 #define page_to_pfn __page_to_pfn
 #define pfn_to_page __pfn_to_page
 
+#ifdef CONFIG_DEBUG_VIRTUAL
+#define page_to_phys(page)						\
+({									\
+	unsigned long __pfn = page_to_pfn(page);			\
+									\
+	WARN_ON_ONCE(!pfn_valid(__pfn));				\
+	PFN_PHYS(__pfn);						\
+})
+#else
 #define page_to_phys(page)	PFN_PHYS(page_to_pfn(page))
+#endif /* CONFIG_DEBUG_VIRTUAL */
 #define phys_to_page(phys)	pfn_to_page(PHYS_PFN(phys))
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3


From 00a31dd3acea0f88f947fc71e268ebb34b59f218 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Thu, 3 Oct 2024 17:16:14 -0400
Subject: asm-generic/div64: optimize/simplify __div64_const32()

Several years later I just realized that this code could be greatly
simplified.

First, let's formalize the need for overflow handling in
__arch_xprod64(). Assuming n = UINT64_MAX, there are 2 cases where
an overflow may occur:

1) If a bias must be added, we have m_lo * n_lo + m or
   m_lo * 0xffffffff + ((m_hi << 32) + m_lo) or
   ((m_lo << 32) - m_lo) + ((m_hi << 32) + m_lo) or
   (m_lo + m_hi) << 32 which must be < (1 << 64). So the criteria for no
   overflow is m_lo + m_hi < (1 << 32).

2) The cross product m_lo * n_hi + m_hi * n_lo or
   m_lo * 0xffffffff + m_hi * 0xffffffff or
   ((m_lo << 32) - m_lo) + ((m_hi << 32) - m_hi). Assuming the top
   result from the previous step (m_lo + m_hi) that must be added to
   this, we get (m_lo + m_hi) << 32 again.

So let's have a straight and simpler version when this is true.
Otherwise some reordering allows for taking care of possible overflows
without any actual conditionals. And prevent from generating both code
variants by making sure this is considered only if m is perceived as
constant by the compiler.

This, in turn, allows for greatly simplifying __div64_const32(). The
"special case" may go as well as the regular case works just fine
without needing a bias. Then reduction should be applied all the time as
minimizing m is the key.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/div64.h | 114 ++++++++++++++------------------------------
 1 file changed, 35 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index 13f5aa68a455..5d59cf7e73d9 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -74,7 +74,8 @@
 	 * do the trick here).						\
 	 */								\
 	uint64_t ___res, ___x, ___t, ___m, ___n = (n);			\
-	uint32_t ___p, ___bias;						\
+	uint32_t ___p;							\
+	bool ___bias = false;						\
 									\
 	/* determine MSB of b */					\
 	___p = 1 << ilog2(___b);					\
@@ -87,22 +88,14 @@
 	___x = ~0ULL / ___b * ___b - 1;					\
 									\
 	/* test our ___m with res = m * x / (p << 64) */		\
-	___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32;	\
-	___t = ___res += (___m & 0xffffffff) * (___x >> 32);		\
-	___res += (___x & 0xffffffff) * (___m >> 32);			\
-	___t = (___res < ___t) ? (1ULL << 32) : 0;			\
-	___res = (___res >> 32) + ___t;					\
-	___res += (___m >> 32) * (___x >> 32);				\
-	___res /= ___p;							\
+	___res = (___m & 0xffffffff) * (___x & 0xffffffff);		\
+	___t = (___m & 0xffffffff) * (___x >> 32) + (___res >> 32);	\
+	___res = (___m >> 32) * (___x >> 32) + (___t >> 32);		\
+	___t = (___m >> 32) * (___x & 0xffffffff) + (___t & 0xffffffff);\
+	___res = (___res + (___t >> 32)) / ___p;			\
 									\
-	/* Now sanitize and optimize what we've got. */			\
-	if (~0ULL % (___b / (___b & -___b)) == 0) {			\
-		/* special case, can be simplified to ... */		\
-		___n /= (___b & -___b);					\
-		___m = ~0ULL / (___b / (___b & -___b));			\
-		___p = 1;						\
-		___bias = 1;						\
-	} else if (___res != ___x / ___b) {				\
+	/* Now validate what we've got. */				\
+	if (___res != ___x / ___b) {					\
 		/*							\
 		 * We can't get away without a bias to compensate	\
 		 * for bit truncation errors.  To avoid it we'd need an	\
@@ -111,45 +104,18 @@
 		 *							\
 		 * Instead we do m = p / b and n / b = (n * m + m) / p.	\
 		 */							\
-		___bias = 1;						\
+		___bias = true;						\
 		/* Compute m = (p << 64) / b */				\
 		___m = (~0ULL / ___b) * ___p;				\
 		___m += ((~0ULL % ___b + 1) * ___p) / ___b;		\
-	} else {							\
-		/*							\
-		 * Reduce m / p, and try to clear bit 31 of m when	\
-		 * possible, otherwise that'll need extra overflow	\
-		 * handling later.					\
-		 */							\
-		uint32_t ___bits = -(___m & -___m);			\
-		___bits |= ___m >> 32;					\
-		___bits = (~___bits) << 1;				\
-		/*							\
-		 * If ___bits == 0 then setting bit 31 is  unavoidable.	\
-		 * Simply apply the maximum possible reduction in that	\
-		 * case. Otherwise the MSB of ___bits indicates the	\
-		 * best reduction we should apply.			\
-		 */							\
-		if (!___bits) {						\
-			___p /= (___m & -___m);				\
-			___m /= (___m & -___m);				\
-		} else {						\
-			___p >>= ilog2(___bits);			\
-			___m >>= ilog2(___bits);			\
-		}							\
-		/* No bias needed. */					\
-		___bias = 0;						\
 	}								\
 									\
+	/* Reduce m / p to help avoid overflow handling later. */	\
+	___p /= (___m & -___m);						\
+	___m /= (___m & -___m);						\
+									\
 	/*								\
-	 * Now we have a combination of 2 conditions:			\
-	 *								\
-	 * 1) whether or not we need to apply a bias, and		\
-	 *								\
-	 * 2) whether or not there might be an overflow in the cross	\
-	 *    product determined by (___m & ((1 << 63) | (1 << 31))).	\
-	 *								\
-	 * Select the best way to do (m_bias + m * n) / (1 << 64).	\
+	 * Perform (m_bias + m * n) / (1 << 64).			\
 	 * From now on there will be actual runtime code generated.	\
 	 */								\
 	___res = __arch_xprod_64(___m, ___n, ___bias);			\
@@ -165,7 +131,7 @@
  * Semantic:  retval = ((bias ? m : 0) + m * n) >> 64
  *
  * The product is a 128-bit value, scaled down to 64 bits.
- * Assuming constant propagation to optimize away unused conditional code.
+ * Hoping for compile-time optimization of  conditional code.
  * Architectures may provide their own optimized assembly implementation.
  */
 static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
@@ -174,38 +140,28 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
 	uint32_t m_hi = m >> 32;
 	uint32_t n_lo = n;
 	uint32_t n_hi = n >> 32;
-	uint64_t res;
-	uint32_t res_lo, res_hi, tmp;
-
-	if (!bias) {
-		res = ((uint64_t)m_lo * n_lo) >> 32;
-	} else if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
-		/* there can't be any overflow here */
-		res = (m + (uint64_t)m_lo * n_lo) >> 32;
-	} else {
-		res = m + (uint64_t)m_lo * n_lo;
-		res_lo = res >> 32;
-		res_hi = (res_lo < m_hi);
-		res = res_lo | ((uint64_t)res_hi << 32);
-	}
-
-	if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
-		/* there can't be any overflow here */
-		res += (uint64_t)m_lo * n_hi;
-		res += (uint64_t)m_hi * n_lo;
-		res >>= 32;
+	uint64_t x, y;
+
+	/* Determine if overflow handling can be dispensed with. */
+	bool no_ovf = __builtin_constant_p(m) &&
+		      ((m >> 32) + (m & 0xffffffff) < 0x100000000);
+
+	if (no_ovf) {
+		x = (uint64_t)m_lo * n_lo + (bias ? m : 0);
+		x >>= 32;
+		x += (uint64_t)m_lo * n_hi;
+		x += (uint64_t)m_hi * n_lo;
+		x >>= 32;
+		x += (uint64_t)m_hi * n_hi;
 	} else {
-		res += (uint64_t)m_lo * n_hi;
-		tmp = res >> 32;
-		res += (uint64_t)m_hi * n_lo;
-		res_lo = res >> 32;
-		res_hi = (res_lo < tmp);
-		res = res_lo | ((uint64_t)res_hi << 32);
+		x = (uint64_t)m_lo * n_lo + (bias ? m_lo : 0);
+		y = (uint64_t)m_lo * n_hi + (uint32_t)(x >> 32) + (bias ? m_hi : 0);
+		x = (uint64_t)m_hi * n_hi + (uint32_t)(y >> 32);
+		y = (uint64_t)m_hi * n_lo + (uint32_t)y;
+		x += (uint32_t)(y >> 32);
 	}
 
-	res += (uint64_t)m_hi * n_hi;
-
-	return res;
+	return x;
 }
 #endif
 
-- 
cgit v1.2.3


From d533cb2d2af400f4689d8c5ca41db6d83ff173be Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Thu, 3 Oct 2024 17:16:16 -0400
Subject: __arch_xprod64(): make __always_inline when optimizing for
 performance

Recent gcc versions started not systematically inline __arch_xprod64()
and that has performance implications. Give the compiler the freedom to
decide only when optimizing for size.

Here's some timing numbers from lib/math/test_div64.c

Using __always_inline:

```
test_div64: Starting 64bit/32bit division and modulo test
test_div64: Completed 64bit/32bit division and modulo test, 0.048285584s elapsed
```

Without __always_inline:

```
test_div64: Starting 64bit/32bit division and modulo test
test_div64: Completed 64bit/32bit division and modulo test, 0.053023584s elapsed
```

Forcing constant base through the non-constant base code path:

```
test_div64: Starting 64bit/32bit division and modulo test
test_div64: Completed 64bit/32bit division and modulo test, 0.103263776s elapsed
```

It is worth noting that test_div64 does half the test with non constant
divisors already so the impact is greater than what those numbers show.
And for what it is worth, those numbers were obtained using QEMU. The
gcc version is 14.1.0.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/div64.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index 5d59cf7e73d9..25e7b4b58dcf 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -134,7 +134,12 @@
  * Hoping for compile-time optimization of  conditional code.
  * Architectures may provide their own optimized assembly implementation.
  */
-static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
+#ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
+static __always_inline
+#else
+static inline
+#endif
+uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
 {
 	uint32_t m_lo = m;
 	uint32_t m_hi = m >> 32;
-- 
cgit v1.2.3


From b660d0a2acb9053d627befbb259a397d26aa9582 Mon Sep 17 00:00:00 2001
From: Julian Vetter <jvetter@kalrayinc.com>
Date: Mon, 28 Oct 2024 14:42:24 +0100
Subject: New implementation for IO memcpy and IO memset

The IO memcpy and IO memset functions in asm-generic/io.h simply call
memcpy and memset. This can lead to alignment problems or faults on
architectures that do not define their own version and fall back to
these defaults.
This patch introduces new implementations for IO memcpy and IO memset,
that use read{l,q} accessor functions, align accesses to machine word
size, and resort to byte accesses when the target memory is not aligned.
For new architectures and existing ones that were using the old
fallbacks these functions are save to use, because IO memory constraints
are taken into account. Moreover, architectures with similar
implementations can now use these new versions, not needing to implement
their own.

Reviewed-by: Yann Sionneau <ysionneau@kalrayinc.com>
Signed-off-by: Julian Vetter <jvetter@kalrayinc.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/io.h | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 1027be6a62bc..a5cbbf3e26ec 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -1211,7 +1211,6 @@ static inline void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
 #endif
 
 #ifndef memset_io
-#define memset_io memset_io
 /**
  * memset_io	Set a range of I/O memory to a constant value
  * @addr:	The beginning of the I/O-memory range to set
@@ -1220,15 +1219,10 @@ static inline void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
  *
  * Set a range of I/O memory to a given value.
  */
-static inline void memset_io(volatile void __iomem *addr, int value,
-			     size_t size)
-{
-	memset(__io_virt(addr), value, size);
-}
+void memset_io(volatile void __iomem *addr, int val, size_t count);
 #endif
 
 #ifndef memcpy_fromio
-#define memcpy_fromio memcpy_fromio
 /**
  * memcpy_fromio	Copy a block of data from I/O memory
  * @dst:		The (RAM) destination for the copy
@@ -1237,16 +1231,10 @@ static inline void memset_io(volatile void __iomem *addr, int value,
  *
  * Copy a block of data from I/O memory.
  */
-static inline void memcpy_fromio(void *buffer,
-				 const volatile void __iomem *addr,
-				 size_t size)
-{
-	memcpy(buffer, __io_virt(addr), size);
-}
+void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count);
 #endif
 
 #ifndef memcpy_toio
-#define memcpy_toio memcpy_toio
 /**
  * memcpy_toio		Copy a block of data into I/O memory
  * @dst:		The (I/O memory) destination for the copy
@@ -1255,11 +1243,7 @@ static inline void memcpy_fromio(void *buffer,
  *
  * Copy a block of data to I/O memory.
  */
-static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer,
-			       size_t size)
-{
-	memcpy(__io_virt(addr), buffer, size);
-}
+void memcpy_toio(volatile void __iomem *dst, const void *src, size_t count);
 #endif
 
 extern int devmem_is_allowed(unsigned long pfn);
-- 
cgit v1.2.3


From f390525d27bc03dc5925293cbd9f22329648b248 Mon Sep 17 00:00:00 2001
From: Dingyan Li <18500469033@163.com>
Date: Sun, 20 Oct 2024 15:47:21 +0800
Subject: usb: storage: fix wrong comments for struct bulk_cb_wrap

In the flags, direction is in bit 7 instead of bit 0 based
on the specification.

Signed-off-by: Dingyan Li <18500469033@163.com>
Link: https://lore.kernel.org/r/20241020074721.26905-1-18500469033@163.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/storage.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/usb/storage.h b/include/linux/usb/storage.h
index 2827ce72e502..8539956bc2be 100644
--- a/include/linux/usb/storage.h
+++ b/include/linux/usb/storage.h
@@ -53,7 +53,7 @@ struct bulk_cb_wrap {
 	__le32	Signature;		/* contains 'USBC' */
 	__u32	Tag;			/* unique per command id */
 	__le32	DataTransferLength;	/* size of data */
-	__u8	Flags;			/* direction in bit 0 */
+	__u8	Flags;			/* direction in bit 7 */
 	__u8	Lun;			/* LUN normally 0 */
 	__u8	Length;			/* length of the CDB */
 	__u8	CDB[16];		/* max command */
-- 
cgit v1.2.3


From 4fd06a5358e0d888d1bf23d274971ea7d1f45aad Mon Sep 17 00:00:00 2001
From: David Dai <davidai@google.com>
Date: Wed, 18 Sep 2024 17:08:33 -0700
Subject: cpufreq: add virtual-cpufreq driver

Introduce a virtualized cpufreq driver for guest kernels to improve
performance and power of workloads within VMs.

This driver does two main things:

1. Sends the frequency of vCPUs as a hint to the host. The host uses the
hint to schedule the vCPU threads and decide physical CPU frequency.

2. If a VM does not support a virtualized FIE(like AMUs), it queries the
host CPU frequency by reading a MMIO region of a virtual cpufreq device
to update the guest's frequency scaling factor periodically. This enables
accurate Per-Entity Load Tracking for tasks running in the guest.

Co-developed-by: Saravana Kannan <saravanak@google.com>
Signed-off-by: Saravana Kannan <saravanak@google.com>
Signed-off-by: David Dai <davidai@google.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/arch_topology.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index b721f360d759..d5d848849408 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -49,6 +49,7 @@ enum scale_freq_source {
 	SCALE_FREQ_SOURCE_CPUFREQ = 0,
 	SCALE_FREQ_SOURCE_ARCH,
 	SCALE_FREQ_SOURCE_CPPC,
+	SCALE_FREQ_SOURCE_VIRT,
 };
 
 struct scale_freq_data {
-- 
cgit v1.2.3


From 5935b8377a0f3b2401e4e487336ed90fe6b9254d Mon Sep 17 00:00:00 2001
From: Sui Jingfeng <sui.jingfeng@linux.dev>
Date: Wed, 23 Oct 2024 11:25:44 +0800
Subject: dma-mapping: remove an outdated comment from dma-map-ops.h

The "/* CONFIG_ARCH_HAS_DMA_COHERENCE_H */" was an description about the
ARCH_HAS_DMA_COHERENCE_H configure option, but it has been removed since
the dma_default_coherent variable was lifted from the mips architecture
to the driver core. Therefore it doesn't match any compile guard now.
Just remove it.

Fixes: 6d4e9a8efe3d ("driver core: lift dma_default_coherent into common code")
Signed-off-by: Sui Jingfeng <sui.jingfeng@linux.dev>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-map-ops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index b7773201414c..e172522cd936 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -242,7 +242,7 @@ static inline bool dev_is_dma_coherent(struct device *dev)
 {
 	return true;
 }
-#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */
+#endif
 
 static inline void dma_reset_need_sync(struct device *dev)
 {
-- 
cgit v1.2.3


From 5af5fc895fb9deec3d7b225f5bc2bd4949f8bbd5 Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson@linux.dev>
Date: Fri, 18 Oct 2024 11:00:34 -0400
Subject: dma-mapping: use macros to define events in a class

Use a macro to avoid repeating the parameters and arguments for each event
in a class.

Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/trace/events/dma.h | 60 ++++++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index b0f41265191c..3d348cea4d7c 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -65,15 +65,14 @@ DECLARE_EVENT_CLASS(dma_map,
 		decode_dma_attrs(__entry->attrs))
 );
 
-DEFINE_EVENT(dma_map, dma_map_page,
-	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
-		 size_t size, enum dma_data_direction dir, unsigned long attrs),
-	TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs));
+#define DEFINE_MAP_EVENT(name) \
+DEFINE_EVENT(dma_map, name, \
+	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, \
+		 size_t size, enum dma_data_direction dir, unsigned long attrs), \
+	TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs))
 
-DEFINE_EVENT(dma_map, dma_map_resource,
-	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
-		 size_t size, enum dma_data_direction dir, unsigned long attrs),
-	TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs));
+DEFINE_MAP_EVENT(dma_map_page);
+DEFINE_MAP_EVENT(dma_map_resource);
 
 DECLARE_EVENT_CLASS(dma_unmap,
 	TP_PROTO(struct device *dev, dma_addr_t addr, size_t size,
@@ -104,15 +103,14 @@ DECLARE_EVENT_CLASS(dma_unmap,
 		decode_dma_attrs(__entry->attrs))
 );
 
-DEFINE_EVENT(dma_unmap, dma_unmap_page,
-	TP_PROTO(struct device *dev, dma_addr_t addr, size_t size,
-		 enum dma_data_direction dir, unsigned long attrs),
-	TP_ARGS(dev, addr, size, dir, attrs));
+#define DEFINE_UNMAP_EVENT(name) \
+DEFINE_EVENT(dma_unmap, name, \
+	TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, \
+		 enum dma_data_direction dir, unsigned long attrs), \
+	TP_ARGS(dev, addr, size, dir, attrs))
 
-DEFINE_EVENT(dma_unmap, dma_unmap_resource,
-	TP_PROTO(struct device *dev, dma_addr_t addr, size_t size,
-		 enum dma_data_direction dir, unsigned long attrs),
-	TP_ARGS(dev, addr, size, dir, attrs));
+DEFINE_UNMAP_EVENT(dma_unmap_page);
+DEFINE_UNMAP_EVENT(dma_unmap_resource);
 
 TRACE_EVENT(dma_alloc,
 	TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr,
@@ -279,15 +277,14 @@ DECLARE_EVENT_CLASS(dma_sync_single,
 		__entry->size)
 );
 
-DEFINE_EVENT(dma_sync_single, dma_sync_single_for_cpu,
-	TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size,
-		 enum dma_data_direction dir),
-	TP_ARGS(dev, dma_addr, size, dir));
+#define DEFINE_SYNC_SINGLE_EVENT(name) \
+DEFINE_EVENT(dma_sync_single, name, \
+	TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, \
+		 enum dma_data_direction dir), \
+	TP_ARGS(dev, dma_addr, size, dir))
 
-DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device,
-	TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size,
-		 enum dma_data_direction dir),
-	TP_ARGS(dev, dma_addr, size, dir));
+DEFINE_SYNC_SINGLE_EVENT(dma_sync_single_for_cpu);
+DEFINE_SYNC_SINGLE_EVENT(dma_sync_single_for_device);
 
 DECLARE_EVENT_CLASS(dma_sync_sg,
 	TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
@@ -326,15 +323,14 @@ DECLARE_EVENT_CLASS(dma_sync_sg,
 				sizeof(unsigned int), sizeof(unsigned int)))
 );
 
-DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_cpu,
-	TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
-		 enum dma_data_direction dir),
-	TP_ARGS(dev, sg, nents, dir));
+#define DEFINE_SYNC_SG_EVENT(name) \
+DEFINE_EVENT(dma_sync_sg, name, \
+	TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, \
+		 enum dma_data_direction dir), \
+	TP_ARGS(dev, sg, nents, dir))
 
-DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_device,
-	TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
-		 enum dma_data_direction dir),
-	TP_ARGS(dev, sg, nents, dir));
+DEFINE_SYNC_SG_EVENT(dma_sync_sg_for_cpu);
+DEFINE_SYNC_SG_EVENT(dma_sync_sg_for_device);
 
 #endif /*  _TRACE_DMA_H */
 
-- 
cgit v1.2.3


From 3afff779a725cba914e6caba360b696ae6f90249 Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson@linux.dev>
Date: Fri, 18 Oct 2024 11:00:35 -0400
Subject: dma-mapping: trace dma_alloc/free direction

In preparation for using these tracepoints in a few more places, trace
the DMA direction as well. For coherent allocations this is always
bidirectional.

Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/trace/events/dma.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 3d348cea4d7c..267cfa49d9d5 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -114,8 +114,9 @@ DEFINE_UNMAP_EVENT(dma_unmap_resource);
 
 TRACE_EVENT(dma_alloc,
 	TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr,
-		 size_t size, gfp_t flags, unsigned long attrs),
-	TP_ARGS(dev, virt_addr, dma_addr, size, flags, attrs),
+		 size_t size, enum dma_data_direction dir, gfp_t flags,
+		 unsigned long attrs),
+	TP_ARGS(dev, virt_addr, dma_addr, size, dir, flags, attrs),
 
 	TP_STRUCT__entry(
 		__string(device, dev_name(dev))
@@ -123,6 +124,7 @@ TRACE_EVENT(dma_alloc,
 		__field(u64, dma_addr)
 		__field(size_t, size)
 		__field(gfp_t, flags)
+		__field(enum dma_data_direction, dir)
 		__field(unsigned long, attrs)
 	),
 
@@ -135,8 +137,9 @@ TRACE_EVENT(dma_alloc,
 		__entry->attrs = attrs;
 	),
 
-	TP_printk("%s dma_addr=%llx size=%zu virt_addr=%p flags=%s attrs=%s",
+	TP_printk("%s dir=%s dma_addr=%llx size=%zu virt_addr=%p flags=%s attrs=%s",
 		__get_str(device),
+		decode_dma_data_direction(__entry->dir),
 		__entry->dma_addr,
 		__entry->size,
 		__entry->virt_addr,
@@ -146,14 +149,15 @@ TRACE_EVENT(dma_alloc,
 
 TRACE_EVENT(dma_free,
 	TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr,
-		 size_t size, unsigned long attrs),
-	TP_ARGS(dev, virt_addr, dma_addr, size, attrs),
+		 size_t size, enum dma_data_direction dir, unsigned long attrs),
+	TP_ARGS(dev, virt_addr, dma_addr, size, dir, attrs),
 
 	TP_STRUCT__entry(
 		__string(device, dev_name(dev))
 		__field(void *, virt_addr)
 		__field(u64, dma_addr)
 		__field(size_t, size)
+		__field(enum dma_data_direction, dir)
 		__field(unsigned long, attrs)
 	),
 
@@ -162,11 +166,13 @@ TRACE_EVENT(dma_free,
 		__entry->virt_addr = virt_addr;
 		__entry->dma_addr = dma_addr;
 		__entry->size = size;
+		__entry->dir = dir;
 		__entry->attrs = attrs;
 	),
 
-	TP_printk("%s dma_addr=%llx size=%zu virt_addr=%p attrs=%s",
+	TP_printk("%s dir=%s dma_addr=%llx size=%zu virt_addr=%p attrs=%s",
 		__get_str(device),
+		decode_dma_data_direction(__entry->dir),
 		__entry->dma_addr,
 		__entry->size,
 		__entry->virt_addr,
-- 
cgit v1.2.3


From c4484ab86ee00f2d9236e2851621ea02c105f4cc Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson@linux.dev>
Date: Fri, 18 Oct 2024 11:00:36 -0400
Subject: dma-mapping: use trace_dma_alloc for dma_alloc* instead of using
 trace_dma_map

In some cases, we use trace_dma_map to trace dma_alloc* functions. This
generally follows dma_debug. However, this does not record all of the
relevant information for allocations, such as GFP flags. Create new
dma_alloc tracepoints for these functions. Note that while
dma_alloc_noncontiguous may allocate discontiguous pages (from the CPU's
point of view), the device will only see one contiguous mapping.
Therefore, we just need to trace dma_addr and size.

Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/trace/events/dma.h | 99 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 97 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 267cfa49d9d5..7a9606b8934e 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -112,7 +112,7 @@ DEFINE_EVENT(dma_unmap, name, \
 DEFINE_UNMAP_EVENT(dma_unmap_page);
 DEFINE_UNMAP_EVENT(dma_unmap_resource);
 
-TRACE_EVENT(dma_alloc,
+DECLARE_EVENT_CLASS(dma_alloc_class,
 	TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr,
 		 size_t size, enum dma_data_direction dir, gfp_t flags,
 		 unsigned long attrs),
@@ -147,7 +147,58 @@ TRACE_EVENT(dma_alloc,
 		decode_dma_attrs(__entry->attrs))
 );
 
-TRACE_EVENT(dma_free,
+#define DEFINE_ALLOC_EVENT(name) \
+DEFINE_EVENT(dma_alloc_class, name, \
+	TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, \
+		 size_t size, enum dma_data_direction dir, gfp_t flags, \
+		 unsigned long attrs), \
+	TP_ARGS(dev, virt_addr, dma_addr, size, dir, flags, attrs))
+
+DEFINE_ALLOC_EVENT(dma_alloc);
+DEFINE_ALLOC_EVENT(dma_alloc_pages);
+
+TRACE_EVENT(dma_alloc_sgt,
+	TP_PROTO(struct device *dev, struct sg_table *sgt, size_t size,
+		 enum dma_data_direction dir, gfp_t flags, unsigned long attrs),
+	TP_ARGS(dev, sgt, size, dir, flags, attrs),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__dynamic_array(u64, phys_addrs, sgt->orig_nents)
+		__field(u64, dma_addr)
+		__field(size_t, size)
+		__field(enum dma_data_direction, dir)
+		__field(gfp_t, flags)
+		__field(unsigned long, attrs)
+	),
+
+	TP_fast_assign(
+		struct scatterlist *sg;
+		int i;
+
+		__assign_str(device);
+		for_each_sg(sgt->sgl, sg, sgt->orig_nents, i)
+			((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg);
+		__entry->dma_addr = sg_dma_address(sgt->sgl);
+		__entry->size = size;
+		__entry->dir = dir;
+		__entry->flags = flags;
+		__entry->attrs = attrs;
+	),
+
+	TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addrs=%s flags=%s attrs=%s",
+		__get_str(device),
+		decode_dma_data_direction(__entry->dir),
+		__entry->dma_addr,
+		__entry->size,
+		__print_array(__get_dynamic_array(phys_addrs),
+			      __get_dynamic_array_len(phys_addrs) /
+				sizeof(u64), sizeof(u64)),
+		show_gfp_flags(__entry->flags),
+		decode_dma_attrs(__entry->attrs))
+);
+
+DECLARE_EVENT_CLASS(dma_free_class,
 	TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr,
 		 size_t size, enum dma_data_direction dir, unsigned long attrs),
 	TP_ARGS(dev, virt_addr, dma_addr, size, dir, attrs),
@@ -179,6 +230,50 @@ TRACE_EVENT(dma_free,
 		decode_dma_attrs(__entry->attrs))
 );
 
+#define DEFINE_FREE_EVENT(name) \
+DEFINE_EVENT(dma_free_class, name, \
+	TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, \
+		 size_t size, enum dma_data_direction dir, unsigned long attrs), \
+	TP_ARGS(dev, virt_addr, dma_addr, size, dir, attrs))
+
+DEFINE_FREE_EVENT(dma_free);
+DEFINE_FREE_EVENT(dma_free_pages);
+
+TRACE_EVENT(dma_free_sgt,
+	TP_PROTO(struct device *dev, struct sg_table *sgt, size_t size,
+		 enum dma_data_direction dir),
+	TP_ARGS(dev, sgt, size, dir),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__dynamic_array(u64, phys_addrs, sgt->orig_nents)
+		__field(u64, dma_addr)
+		__field(size_t, size)
+		__field(enum dma_data_direction, dir)
+	),
+
+	TP_fast_assign(
+		struct scatterlist *sg;
+		int i;
+
+		__assign_str(device);
+		for_each_sg(sgt->sgl, sg, sgt->orig_nents, i)
+			((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg);
+		__entry->dma_addr = sg_dma_address(sgt->sgl);
+		__entry->size = size;
+		__entry->dir = dir;
+	),
+
+	TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addrs=%s",
+		__get_str(device),
+		decode_dma_data_direction(__entry->dir),
+		__entry->dma_addr,
+		__entry->size,
+		__print_array(__get_dynamic_array(phys_addrs),
+			      __get_dynamic_array_len(phys_addrs) /
+				sizeof(u64), sizeof(u64)))
+);
+
 TRACE_EVENT(dma_map_sg,
 	TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
 		 int ents, enum dma_data_direction dir, unsigned long attrs),
-- 
cgit v1.2.3


From 68b6dbf1f441c4eba3b8511728a41cf9b01dca35 Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson@linux.dev>
Date: Fri, 18 Oct 2024 11:00:37 -0400
Subject: dma-mapping: trace more error paths

It can be surprising to the user if DMA functions are only traced on
success. On failure, it can be unclear what the source of the problem
is. Fix this by tracing all functions even when they fail. Cases where
we BUG/WARN are skipped, since those should be sufficiently noisy
already.

Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/trace/events/dma.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 7a9606b8934e..d8ddc27b6a7c 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -156,6 +156,7 @@ DEFINE_EVENT(dma_alloc_class, name, \
 
 DEFINE_ALLOC_EVENT(dma_alloc);
 DEFINE_ALLOC_EVENT(dma_alloc_pages);
+DEFINE_ALLOC_EVENT(dma_alloc_sgt_err);
 
 TRACE_EVENT(dma_alloc_sgt,
 	TP_PROTO(struct device *dev, struct sg_table *sgt, size_t size,
@@ -320,6 +321,41 @@ TRACE_EVENT(dma_map_sg,
 		decode_dma_attrs(__entry->attrs))
 );
 
+TRACE_EVENT(dma_map_sg_err,
+	TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
+		 int err, enum dma_data_direction dir, unsigned long attrs),
+	TP_ARGS(dev, sgl, nents, err, dir, attrs),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__dynamic_array(u64, phys_addrs, nents)
+		__field(int, err)
+		__field(enum dma_data_direction, dir)
+		__field(unsigned long, attrs)
+	),
+
+	TP_fast_assign(
+		struct scatterlist *sg;
+		int i;
+
+		__assign_str(device);
+		for_each_sg(sgl, sg, nents, i)
+			((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg);
+		__entry->err = err;
+		__entry->dir = dir;
+		__entry->attrs = attrs;
+	),
+
+	TP_printk("%s dir=%s dma_addrs=%s err=%d attrs=%s",
+		__get_str(device),
+		decode_dma_data_direction(__entry->dir),
+		__print_array(__get_dynamic_array(phys_addrs),
+			      __get_dynamic_array_len(phys_addrs) /
+				sizeof(u64), sizeof(u64)),
+		__entry->err,
+		decode_dma_attrs(__entry->attrs))
+);
+
 TRACE_EVENT(dma_unmap_sg,
 	TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
 		 enum dma_data_direction dir, unsigned long attrs),
-- 
cgit v1.2.3


From be164349e173a8e71cd76f17c7ed720813b8d69b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 13 Oct 2024 07:19:48 +0200
Subject: dma-mapping: drop unneeded includes from dma-mapping.h

Back in the day a lot of logic was implemented inline in dma-mapping.h and
needed various includes.  Move of this has long been moved out of line,
so we can drop various includes to improve kernel rebuild times.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-mapping.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 1524da363734..b79925b1c433 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -2,15 +2,11 @@
 #ifndef _LINUX_DMA_MAPPING_H
 #define _LINUX_DMA_MAPPING_H
 
-#include <linux/cache.h>
-#include <linux/sizes.h>
-#include <linux/string.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/dma-direction.h>
 #include <linux/scatterlist.h>
 #include <linux/bug.h>
-#include <linux/mem_encrypt.h>
 
 /**
  * List of possible attributes associated with a DMA mapping. The semantics
-- 
cgit v1.2.3


From 69e5a17511f654db0fe058d22719cef008c9faf1 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 18 Oct 2024 14:01:19 -0300
Subject: iommu: Remove useless flush from
 iommu_create_device_direct_mappings()

These days iommu_map() does not require external flushing, it always
internally handles any required flushes. Since
iommu_create_device_direct_mappings() only calls iommu_map(), remove
the extra call.

Since this is the last call site for iommu_flush_iotlb_all() remove it
too.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/0-v1-bb6c694e1b07+a29e1-iommu_no_flush_all_jgg@nvidia.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 62d1b85c80d3..03f9fa833db9 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -852,12 +852,6 @@ void iommu_set_dma_strict(void);
 extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
 			      unsigned long iova, int flags);
 
-static inline void iommu_flush_iotlb_all(struct iommu_domain *domain)
-{
-	if (domain->ops->flush_iotlb_all)
-		domain->ops->flush_iotlb_all(domain);
-}
-
 static inline void iommu_iotlb_sync(struct iommu_domain *domain,
 				  struct iommu_iotlb_gather *iotlb_gather)
 {
@@ -1141,10 +1135,6 @@ static inline ssize_t iommu_map_sg(struct iommu_domain *domain,
 	return -ENODEV;
 }
 
-static inline void iommu_flush_iotlb_all(struct iommu_domain *domain)
-{
-}
-
 static inline void iommu_iotlb_sync(struct iommu_domain *domain,
 				  struct iommu_iotlb_gather *iotlb_gather)
 {
-- 
cgit v1.2.3


From f6440fcc9c7b7aaad2e52830ab83fa71990f76ff Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 9 Oct 2024 12:11:47 +0800
Subject: iommu: Remove iommu_domain_alloc()

The iommu_domain_alloc() interface is no longer used in the tree anymore.
Remove it to avoid dead code.

There is increasing demand for supporting multiple IOMMU drivers, and this
is the last bus-based thing standing in the way of that.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20241009041147.28391-5-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 03f9fa833db9..d93c87f0c003 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -787,7 +787,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
 extern int bus_iommu_probe(const struct bus_type *bus);
 extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
 extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
-extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus);
 struct iommu_domain *iommu_paging_domain_alloc(struct device *dev);
 extern void iommu_domain_free(struct iommu_domain *domain);
 extern int iommu_attach_device(struct iommu_domain *domain,
@@ -1079,11 +1078,6 @@ static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
 	return false;
 }
 
-static inline struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
-{
-	return NULL;
-}
-
 static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev)
 {
 	return ERR_PTR(-ENODEV);
-- 
cgit v1.2.3


From 20858d4ebb423826b7a978d54cde195f51d87e20 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@ziepe.ca>
Date: Mon, 28 Oct 2024 09:38:00 +0000
Subject: iommu: Introduce iommu_paging_domain_alloc_flags()

Currently drivers calls iommu_paging_domain_alloc(dev) to get an
UNMANAGED domain. This is not sufficient to support PASID with
UNMANAGED domain as some HW like AMD requires certain page table type
to support PASIDs.

Also the domain_alloc_paging op only passes device as param for domain
allocation. This is not sufficient for AMD driver to decide the right
page table.

Instead of extending ops->domain_alloc_paging() it was decided to
enhance ops->domain_alloc_user() so that caller can pass various
additional flags.

Hence add iommu_paging_domain_alloc_flags() API which takes flags as
parameter. Caller can pass additional parameter to indicate type of
domain required, etc. iommu_paging_domain_alloc_flags() internally calls
appropriate callback function to allocate a domain.

Signed-off-by: Jason Gunthorpe <jgg@ziepe.ca>
[Added description - Vasant]
Signed-off-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20241028093810.5901-3-vasant.hegde@amd.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d93c87f0c003..aa78d911fdda 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -511,8 +511,6 @@ static inline int __iommu_copy_struct_from_user_array(
  *                the caller iommu_domain_alloc() returns.
  * @domain_alloc_user: Allocate an iommu domain corresponding to the input
  *                     parameters as defined in include/uapi/linux/iommufd.h.
- *                     Unlike @domain_alloc, it is called only by IOMMUFD and
- *                     must fully initialize the new domain before return.
  *                     Upon success, if the @user_data is valid and the @parent
  *                     points to a kernel-managed domain, the new domain must be
  *                     IOMMU_DOMAIN_NESTED type; otherwise, the @parent must be
@@ -787,7 +785,11 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
 extern int bus_iommu_probe(const struct bus_type *bus);
 extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
 extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
-struct iommu_domain *iommu_paging_domain_alloc(struct device *dev);
+struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, unsigned int flags);
+static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev)
+{
+	return iommu_paging_domain_alloc_flags(dev, 0);
+}
 extern void iommu_domain_free(struct iommu_domain *domain);
 extern int iommu_attach_device(struct iommu_domain *domain,
 			       struct device *dev);
@@ -1078,6 +1080,12 @@ static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
 	return false;
 }
 
+struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev,
+						     unsigned int flags)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev)
 {
 	return ERR_PTR(-ENODEV);
-- 
cgit v1.2.3


From b7a0855eb95f6db8ac8e17596e76f7b94a790fe6 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@ziepe.ca>
Date: Mon, 28 Oct 2024 09:38:01 +0000
Subject: iommu: Add new flag to explictly request PASID capable domain

Introduce new flag (IOMMU_HWPT_ALLOC_PASID) to domain_alloc_users() ops.
If IOMMU supports PASID it will allocate domain. Otherwise return error.
In error path check for -EOPNOTSUPP and try to allocate non-PASID
domain so that DMA-API mode work fine for drivers which does not support
PASID as well.

Also modify __iommu_group_alloc_default_domain() to call
iommu_paging_domain_alloc_flags() with appropriate flag when allocating
paging domain.

Signed-off-by: Jason Gunthorpe <jgg@ziepe.ca>
Co-developed-by: Vasant Hegde <vasant.hegde@amd.com>
Signed-off-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20241028093810.5901-4-vasant.hegde@amd.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/uapi/linux/iommufd.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 72010f71c5e4..0c0ed28ee113 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -359,11 +359,19 @@ struct iommu_vfio_ioas {
  *                                   enforced on device attachment
  * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
  *                             valid.
+ * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The
+ *                          domain can be attached to any PASID on the device.
+ *                          Any domain attached to the non-PASID part of the
+ *                          device must also be flaged, otherwise attaching a
+ *                          PASID will blocked.
+ *                          If IOMMU does not support PASID it will return
+ *                          error (-EOPNOTSUPP).
  */
 enum iommufd_hwpt_alloc_flags {
 	IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
 	IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
 	IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
+	IOMMU_HWPT_ALLOC_PASID = 1 << 3,
 };
 
 /**
-- 
cgit v1.2.3


From b6da940130579769a42605b2c7f529b6f14ef1f8 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Wed, 9 Oct 2024 16:29:37 +0200
Subject: mm, slab: add kerneldocs for common SLAB_ flags

We have many SLAB_ flags but many are used only internally, by kunit
tests or debugging subsystems cooperating with slab, or are set
according to slab_debug boot parameter.

Create kerneldocs for the commonly used flags that may be passed to
kmem_cache_create(). SLAB_TYPESAFE_BY_RCU already had a detailed
description, so turn it to a kerneldoc. Add some details for
SLAB_ACCOUNT, SLAB_RECLAIM_ACCOUNT and SLAB_HWCACHE_ALIGN. Reference
them from the __kmem_cache_create_args() kerneldoc.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 60 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index b35e2db7eb0e..49e9fb93e864 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -77,7 +77,17 @@ enum _slab_flag_bits {
 #define SLAB_POISON		__SLAB_FLAG_BIT(_SLAB_POISON)
 /* Indicate a kmalloc slab */
 #define SLAB_KMALLOC		__SLAB_FLAG_BIT(_SLAB_KMALLOC)
-/* Align objs on cache lines */
+/**
+ * define SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries.
+ *
+ * Sufficiently large objects are aligned on cache line boundary. For object
+ * size smaller than a half of cache line size, the alignment is on the half of
+ * cache line size. In general, if object size is smaller than 1/2^n of cache
+ * line size, the alignment is adjusted to 1/2^n.
+ *
+ * If explicit alignment is also requested by the respective
+ * &struct kmem_cache_args field, the greater of both is alignments is applied.
+ */
 #define SLAB_HWCACHE_ALIGN	__SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
 /* Use GFP_DMA memory */
 #define SLAB_CACHE_DMA		__SLAB_FLAG_BIT(_SLAB_CACHE_DMA)
@@ -87,8 +97,8 @@ enum _slab_flag_bits {
 #define SLAB_STORE_USER		__SLAB_FLAG_BIT(_SLAB_STORE_USER)
 /* Panic if kmem_cache_create() fails */
 #define SLAB_PANIC		__SLAB_FLAG_BIT(_SLAB_PANIC)
-/*
- * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
+/**
+ * define SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
  *
  * This delays freeing the SLAB page by a grace period, it does _NOT_
  * delay object freeing. This means that if you do kmem_cache_free()
@@ -99,20 +109,22 @@ enum _slab_flag_bits {
  * stays valid, the trick to using this is relying on an independent
  * object validation pass. Something like:
  *
- * begin:
- *  rcu_read_lock();
- *  obj = lockless_lookup(key);
- *  if (obj) {
- *    if (!try_get_ref(obj)) // might fail for free objects
- *      rcu_read_unlock();
- *      goto begin;
+ * ::
+ *
+ *  begin:
+ *   rcu_read_lock();
+ *   obj = lockless_lookup(key);
+ *   if (obj) {
+ *     if (!try_get_ref(obj)) // might fail for free objects
+ *       rcu_read_unlock();
+ *       goto begin;
  *
- *    if (obj->key != key) { // not the object we expected
- *      put_ref(obj);
- *      rcu_read_unlock();
- *      goto begin;
- *    }
- *  }
+ *     if (obj->key != key) { // not the object we expected
+ *       put_ref(obj);
+ *       rcu_read_unlock();
+ *       goto begin;
+ *     }
+ *   }
  *  rcu_read_unlock();
  *
  * This is useful if we need to approach a kernel structure obliquely,
@@ -137,7 +149,6 @@ enum _slab_flag_bits {
  *
  * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
  */
-/* Defer freeing slabs to RCU */
 #define SLAB_TYPESAFE_BY_RCU	__SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
 /* Trace allocations and frees */
 #define SLAB_TRACE		__SLAB_FLAG_BIT(_SLAB_TRACE)
@@ -170,7 +181,12 @@ enum _slab_flag_bits {
 #else
 # define SLAB_FAILSLAB		__SLAB_FLAG_UNUSED
 #endif
-/* Account to memcg */
+/**
+ * define SLAB_ACCOUNT - Account allocations to memcg.
+ *
+ * All object allocations from this cache will be memcg accounted, regardless of
+ * __GFP_ACCOUNT being or not being passed to individual allocations.
+ */
 #ifdef CONFIG_MEMCG
 # define SLAB_ACCOUNT		__SLAB_FLAG_BIT(_SLAB_ACCOUNT)
 #else
@@ -197,7 +213,13 @@ enum _slab_flag_bits {
 #endif
 
 /* The following flags affect the page allocator grouping pages by mobility */
-/* Objects are reclaimable */
+/**
+ * define SLAB_RECLAIM_ACCOUNT - Objects are reclaimable.
+ *
+ * Use this flag for caches that have an associated shrinker. As a result, slab
+ * pages are allocated with __GFP_RECLAIMABLE, which affects grouping pages by
+ * mobility, and are accounted in SReclaimable counter in /proc/meminfo
+ */
 #ifndef CONFIG_SLUB_TINY
 #define SLAB_RECLAIM_ACCOUNT	__SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT)
 #else
-- 
cgit v1.2.3


From 68f99be287a59d50a9ad231d523f7e578f8bd28a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 1 Oct 2024 10:42:00 +0200
Subject: signal: Confine POSIX_TIMERS properly

Move the itimer rearming out of the signal code and consolidate all posix
timer related functions in the signal code under one ifdef.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241001083835.314100569@linutronix.de
---
 include/linux/posix-timers.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 453691710839..670bf03a56ef 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -100,6 +100,8 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
 {
 	pct->bases[CPUCLOCK_SCHED].nextevt = runtime;
 }
+void posixtimer_rearm_itimer(struct task_struct *p);
+void posixtimer_rearm(struct kernel_siginfo *info);
 
 /* Init task static initializer */
 #define INIT_CPU_TIMERBASE(b) {						\
@@ -122,6 +124,8 @@ struct cpu_timer { };
 static inline void posix_cputimers_init(struct posix_cputimers *pct) { }
 static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
 					      u64 cpu_limit) { }
+static inline void posixtimer_rearm_itimer(struct task_struct *p) { }
+static inline void posixtimer_rearm(struct kernel_siginfo *info) { }
 #endif
 
 #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
@@ -196,5 +200,4 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 
 int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
 
-void posixtimer_rearm(struct kernel_siginfo *info);
 #endif
-- 
cgit v1.2.3


From 4febce44cfebcb490b196d5d10ae9f403ca4c956 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 1 Oct 2024 10:42:03 +0200
Subject: posix-timers: Cure si_sys_private race

The si_sys_private member of the siginfo which is embedded in the
preallocated sigqueue is used by the posix timer code to decide whether a
timer must be reprogrammed on signal delivery.

The handling of this is racy as a long standing comment in that code
documents. It is modified with the timer lock held, but without sighand
lock being held. The actual signal delivery code checks for it under
sighand lock without holding the timer lock.

Hand the new value to send_sigqueue() as argument and store it with sighand
lock held. This is an intermediate change to address this issue.

The arguments to this function will be cleanup in subsequent changes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241001083835.434338954@linutronix.de
---
 include/linux/sched/signal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index c8ed09ac29ac..bd9f569231d9 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -340,7 +340,7 @@ extern int send_sig(int, struct task_struct *, int);
 extern int zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
-extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type);
+extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type, int si_private);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 
 static inline void clear_notify_signal(void)
-- 
cgit v1.2.3


From c775ea28d4e23f5e58b6953645ef90c1b27a8e83 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 1 Oct 2024 10:42:04 +0200
Subject: signal: Allow POSIX timer signals to be dropped

In case that a timer was reprogrammed or deleted an already pending signal
is obsolete. Right now such signals are kept around and eventually
delivered. While POSIX is blury about this:

 - "The effect of disarming or resetting a timer with pending expiration
    notifications is unspecified."

 - "The disposition of pending signals for the deleted timer is
    unspecified."

it is reasonable in both cases to expect that pending signals are discarded
as they have no meaning anymore.

Prepare the signal code to allow dropping posix timer signals.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241001083835.494416923@linutronix.de
---
 include/linux/posix-timers.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 670bf03a56ef..4ab49e5c42af 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -100,8 +100,9 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
 {
 	pct->bases[CPUCLOCK_SCHED].nextevt = runtime;
 }
+
 void posixtimer_rearm_itimer(struct task_struct *p);
-void posixtimer_rearm(struct kernel_siginfo *info);
+bool posixtimer_deliver_signal(struct kernel_siginfo *info);
 
 /* Init task static initializer */
 #define INIT_CPU_TIMERBASE(b) {						\
@@ -125,7 +126,7 @@ static inline void posix_cputimers_init(struct posix_cputimers *pct) { }
 static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
 					      u64 cpu_limit) { }
 static inline void posixtimer_rearm_itimer(struct task_struct *p) { }
-static inline void posixtimer_rearm(struct kernel_siginfo *info) { }
+static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info) { return false; }
 #endif
 
 #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
-- 
cgit v1.2.3


From cd1e93aedab7f749760a33e9e094381973b1120e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 1 Oct 2024 10:42:07 +0200
Subject: posix-timers: Rename k_itimer:: It_requeue_pending

Prepare for using this struct member to do a proper reprogramming and
deletion accounting so that stale signals can be dropped.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241001083835.611997737@linutronix.de
---
 include/linux/posix-timers.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 4ab49e5c42af..253d106fac2c 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -150,8 +150,7 @@ static inline void posix_cputimers_init_work(void) { }
  * @it_active:		Marker that timer is active
  * @it_overrun:		The overrun counter for pending signals
  * @it_overrun_last:	The overrun at the time of the last delivered signal
- * @it_requeue_pending:	Indicator that timer waits for being requeued on
- *			signal delivery
+ * @it_signal_seq:	Sequence count to control signal delivery
  * @it_sigev_notify:	The notify word of sigevent struct for signal delivery
  * @it_interval:	The interval for periodic timers
  * @it_signal:		Pointer to the creators signal struct
@@ -172,7 +171,7 @@ struct k_itimer {
 	int			it_active;
 	s64			it_overrun;
 	s64			it_overrun_last;
-	int			it_requeue_pending;
+	unsigned int		it_signal_seq;
 	int			it_sigev_notify;
 	ktime_t			it_interval;
 	struct signal_struct	*it_signal;
-- 
cgit v1.2.3


From 1550dde8a537b35dbf066c7f9cfe5f9b360bce0d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 1 Oct 2024 10:42:09 +0200
Subject: posix-timers: Add proper state tracking

Right now the state tracking is done by two struct members:

 - it_active:
     A boolean which tracks armed/disarmed state

 - it_signal_seq:
     A sequence counter which is used to invalidate settings
     and prevent rearming

Replace it_active with it_status and keep properly track about the states
in one place.

This allows to reuse it_signal_seq to track reprogramming, disarm and
delete operations in order to drop signals which are related to the state
previous of those operations.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241001083835.670337048@linutronix.de
---
 include/linux/posix-timers.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 253d106fac2c..02afbb4da7f7 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -147,7 +147,7 @@ static inline void posix_cputimers_init_work(void) { }
  * @kclock:		Pointer to the k_clock struct handling this timer
  * @it_clock:		The posix timer clock id
  * @it_id:		The posix timer id for identifying the timer
- * @it_active:		Marker that timer is active
+ * @it_status:		The status of the timer
  * @it_overrun:		The overrun counter for pending signals
  * @it_overrun_last:	The overrun at the time of the last delivered signal
  * @it_signal_seq:	Sequence count to control signal delivery
@@ -168,7 +168,7 @@ struct k_itimer {
 	const struct k_clock	*kclock;
 	clockid_t		it_clock;
 	timer_t			it_id;
-	int			it_active;
+	int			it_status;
 	s64			it_overrun;
 	s64			it_overrun_last;
 	unsigned int		it_signal_seq;
-- 
cgit v1.2.3


From 9cb7e40d388d6c0e4677809c6b2950bc67fd8830 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Mon, 21 Oct 2024 11:32:28 -0700
Subject: rtnetlink: Make per-netns RTNL dereference helpers to macro.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When CONFIG_DEBUG_NET_SMALL_RTNL is off, rtnl_net_dereference() is the
static inline wrapper of rtnl_dereference() returning a plain (void *)
pointer to make sure net is always evaluated as requested in [0].

But, it makes sparse complain [1] when the pointer has __rcu annotation:

  net/ipv4/devinet.c:674:47: sparse: warning: incorrect type in argument 2 (different address spaces)
  net/ipv4/devinet.c:674:47: sparse:    expected void *p
  net/ipv4/devinet.c:674:47: sparse:    got struct in_ifaddr [noderef] __rcu *

Also, if we evaluate net as (void *) in a macro, then the compiler
in turn fails to build due to -Werror=unused-value.

  #define rtnl_net_dereference(net, p)                  \
        ({                                              \
                (void *)net;                            \
                rtnl_dereference(p);                    \
        })

  net/ipv4/devinet.c: In function ‘inet_rtm_deladdr’:
  ./include/linux/rtnetlink.h:154:17: error: statement with no effect [-Werror=unused-value]
    154 |                 (void *)net;                            \
  net/ipv4/devinet.c:674:21: note: in expansion of macro ‘rtnl_net_dereference’
    674 |              (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
        |                     ^~~~~~~~~~~~~~~~~~~~

Let's go back to the original simplest macro.

Note that checkpatch complains about this approach, but it's one-shot and
less noisy than the other two.

  WARNING: Argument 'net' is not used in function-like macro
  #76: FILE: include/linux/rtnetlink.h:142:
  +#define rtnl_net_dereference(net, p)			\
  +	rtnl_dereference(p)

Fixes: 844e5e7e656d ("rtnetlink: Add assertion helpers for per-netns RTNL.")
Link: https://lore.kernel.org/netdev/20241004132145.7fd208e9@kernel.org/ [0]
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202410200325.SaEJmyZS-lkp@intel.com/ [1]
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/rtnetlink.h | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 8468a4ce8510..0e62918de63b 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -137,21 +137,12 @@ static inline void ASSERT_RTNL_NET(struct net *net)
 	ASSERT_RTNL();
 }
 
-static inline void *rcu_dereference_rtnl_net(struct net *net, void *p)
-{
-	return rcu_dereference_rtnl(p);
-}
-
-static inline void *rtnl_net_dereference(struct net *net, void *p)
-{
-	return rtnl_dereference(p);
-}
-
-static inline void *rcu_replace_pointer_rtnl_net(struct net *net,
-						 void *rp, void *p)
-{
-	return rcu_replace_pointer_rtnl(rp, p);
-}
+#define rcu_dereference_rtnl_net(net, p)		\
+	rcu_dereference_rtnl(p)
+#define rtnl_net_dereference(net, p)			\
+	rtnl_dereference(p)
+#define rcu_replace_pointer_rtnl_net(net, rp, p)	\
+	rcu_replace_pointer_rtnl(rp, p)
 #endif
 
 static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
-- 
cgit v1.2.3


From 26d8db55eeacb7dc78672523f57825916d203de4 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Mon, 21 Oct 2024 11:32:29 -0700
Subject: rtnetlink: Define RTNL_FLAG_DOIT_PERNET for per-netns RTNL doit().

We will push RTNL down to each doit() as rtnl_net_lock().

We can use RTNL_FLAG_DOIT_UNLOCKED to call doit() without RTNL, but doit()
will still hold RTNL.

Let's define RTNL_FLAG_DOIT_PERNET as an alias of RTNL_FLAG_DOIT_UNLOCKED.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/rtnetlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index e0d9a8eae6b6..b260c0cc9671 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
 
 enum rtnl_link_flags {
 	RTNL_FLAG_DOIT_UNLOCKED		= BIT(0),
+#define RTNL_FLAG_DOIT_PERNET		RTNL_FLAG_DOIT_UNLOCKED
 	RTNL_FLAG_BULK_DEL_SUPPORTED	= BIT(1),
 	RTNL_FLAG_DUMP_UNLOCKED		= BIT(2),
 	RTNL_FLAG_DUMP_SPLIT_NLM_DONE	= BIT(3),	/* legacy behavior */
-- 
cgit v1.2.3


From d4b483208b2606add41a22bdd3c8cd6d36009319 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Mon, 21 Oct 2024 11:32:33 -0700
Subject: ipv4: Use per-netns RTNL helpers in inet_rtm_newaddr().

inet_rtm_to_ifa() and find_matching_ifa() are called
under rtnl_net_lock().

__in_dev_get_rtnl() and in_dev_for_each_ifa_rtnl() there
can use per-netns RTNL helpers.

Let's define and use __in_dev_get_rtnl_net() and
in_dev_for_each_ifa_rtnl_net().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/inetdevice.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index d9c690c8c80b..5730ba6b1cfa 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -226,6 +226,10 @@ static __inline__ bool bad_mask(__be32 mask, __be32 addr)
 	for (ifa = rtnl_dereference((in_dev)->ifa_list); ifa;	\
 	     ifa = rtnl_dereference(ifa->ifa_next))
 
+#define in_dev_for_each_ifa_rtnl_net(net, ifa, in_dev)			\
+	for (ifa = rtnl_net_dereference(net, (in_dev)->ifa_list); ifa;	\
+	     ifa = rtnl_net_dereference(net, ifa->ifa_next))
+
 #define in_dev_for_each_ifa_rcu(ifa, in_dev)			\
 	for (ifa = rcu_dereference((in_dev)->ifa_list); ifa;	\
 	     ifa = rcu_dereference(ifa->ifa_next))
@@ -252,6 +256,11 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev)
 	return rtnl_dereference(dev->ip_ptr);
 }
 
+static inline struct in_device *__in_dev_get_rtnl_net(const struct net_device *dev)
+{
+	return rtnl_net_dereference(dev_net(dev), dev->ip_ptr);
+}
+
 /* called with rcu_read_lock or rtnl held */
 static inline bool ip_ignore_linkdown(const struct net_device *dev)
 {
-- 
cgit v1.2.3


From d1c81818aa227b37d65b40f9883109c5256b9bfb Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Mon, 21 Oct 2024 11:32:36 -0700
Subject: rtnetlink: Define rtnl_net_trylock().

We will need the per-netns version of rtnl_trylock().

rtnl_net_trylock() calls __rtnl_net_lock() only when rtnl_trylock()
successfully holds RTNL.

When RTNL is removed, we will use mutex_trylock() for per-netns RTNL.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/rtnetlink.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 0e62918de63b..14b88f551920 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -101,6 +101,7 @@ void __rtnl_net_lock(struct net *net);
 void __rtnl_net_unlock(struct net *net);
 void rtnl_net_lock(struct net *net);
 void rtnl_net_unlock(struct net *net);
+int rtnl_net_trylock(struct net *net);
 int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
 
 bool rtnl_net_is_locked(struct net *net);
@@ -132,6 +133,11 @@ static inline void rtnl_net_unlock(struct net *net)
 	rtnl_unlock();
 }
 
+static inline int rtnl_net_trylock(struct net *net)
+{
+	return rtnl_trylock();
+}
+
 static inline void ASSERT_RTNL_NET(struct net *net)
 {
 	ASSERT_RTNL();
-- 
cgit v1.2.3


From 1ddf9916ac09313128e40d6581cef889c0b4ce84 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 23 Oct 2024 12:53:42 +0200
Subject: xfrm: Add support for per cpu xfrm state handling.

Currently all flows for a certain SA must be processed by the same
cpu to avoid packet reordering and lock contention of the xfrm
state lock.

To get rid of this limitation, the IETF standardized per cpu SAs
in RFC 9611. This patch implements the xfrm part of it.

We add the cpu as a lookup key for xfrm states and a config option
to generate acquire messages for each cpu.

With that, we can have on each cpu a SA with identical traffic selector
so that flows can be processed in parallel on all cpus.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Tested-by: Antony Antony <antony.antony@secunet.com>
Tested-by: Tobias Brunner <tobias@strongswan.org>
---
 include/net/xfrm.h        | 5 +++--
 include/uapi/linux/xfrm.h | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a0bdd58f401c..f5275618e744 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -188,6 +188,7 @@ struct xfrm_state {
 	refcount_t		refcnt;
 	spinlock_t		lock;
 
+	u32			pcpu_num;
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
 	struct xfrm_mark	mark;
@@ -1684,7 +1685,7 @@ struct xfrmk_spdinfo {
 	u32 spdhmcnt;
 };
 
-struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
+struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
 int xfrm_state_delete(struct xfrm_state *x);
 int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
 int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
@@ -1796,7 +1797,7 @@ int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack);
 int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi,
 		   struct netlink_ext_ack *extack);
 struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
-				 u8 mode, u32 reqid, u32 if_id, u8 proto,
+				 u8 mode, u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
 				 const xfrm_address_t *daddr,
 				 const xfrm_address_t *saddr, int create,
 				 unsigned short family);
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index f28701500714..d73a97e3030a 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -322,6 +322,7 @@ enum xfrm_attr_type_t {
 	XFRMA_MTIMER_THRESH,	/* __u32 in seconds for input SA */
 	XFRMA_SA_DIR,		/* __u8 */
 	XFRMA_NAT_KEEPALIVE_INTERVAL,	/* __u32 in seconds for NAT keepalive */
+	XFRMA_SA_PCPU,		/* __u32 */
 	__XFRMA_MAX
 
 #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK	/* Compatibility */
@@ -437,6 +438,7 @@ struct xfrm_userpolicy_info {
 #define XFRM_POLICY_LOCALOK	1	/* Allow user to override global policy */
 	/* Automatically expand selector to include matching ICMP payloads. */
 #define XFRM_POLICY_ICMP	2
+#define XFRM_POLICY_CPU_ACQUIRE	4
 	__u8				share;
 };
 
-- 
cgit v1.2.3


From 0045e3d80613cc7174dc15f189ee6fc4e73b9365 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 23 Oct 2024 12:53:43 +0200
Subject: xfrm: Cache used outbound xfrm states at the policy.

Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we cache the used xfrm states at the policy for outbound
IPsec traffic.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Tested-by: Antony Antony <antony.antony@secunet.com>
Tested-by: Tobias Brunner <tobias@strongswan.org>
---
 include/net/xfrm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index f5275618e744..0b394c5fb5f3 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -184,6 +184,7 @@ struct xfrm_state {
 	};
 	struct hlist_node	byspi;
 	struct hlist_node	byseq;
+	struct hlist_node	state_cache;
 
 	refcount_t		refcnt;
 	spinlock_t		lock;
@@ -537,6 +538,7 @@ struct xfrm_policy_queue {
  *	@xp_net: network namespace the policy lives in
  *	@bydst: hlist node for SPD hash table or rbtree list
  *	@byidx: hlist node for index hash table
+ *	@state_cache_list: hlist head for policy cached xfrm states
  *	@lock: serialize changes to policy structure members
  *	@refcnt: reference count, freed once it reaches 0
  *	@pos: kernel internal tie-breaker to determine age of policy
@@ -567,6 +569,8 @@ struct xfrm_policy {
 	struct hlist_node	bydst;
 	struct hlist_node	byidx;
 
+	struct hlist_head	state_cache_list;
+
 	/* This lock only affects elements except for entry. */
 	rwlock_t		lock;
 	refcount_t		refcnt;
-- 
cgit v1.2.3


From 81a331a0e72ddc2f75092603d9577bd1a0ca23ad Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 23 Oct 2024 12:53:44 +0200
Subject: xfrm: Add an inbound percpu state cache.

Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we add a percpu cache to cache the used inbound xfrm states.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Tested-by: Antony Antony <antony.antony@secunet.com>
Tested-by: Tobias Brunner <tobias@strongswan.org>
---
 include/net/netns/xfrm.h | 1 +
 include/net/xfrm.h       | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index ae60d6664095..23dd647fe024 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -43,6 +43,7 @@ struct netns_xfrm {
 	struct hlist_head	__rcu *state_bysrc;
 	struct hlist_head	__rcu *state_byspi;
 	struct hlist_head	__rcu *state_byseq;
+	struct hlist_head	 __percpu *state_cache_input;
 	unsigned int		state_hmask;
 	unsigned int		state_num;
 	struct work_struct	state_hash_work;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0b394c5fb5f3..2b87999bd5aa 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -185,6 +185,7 @@ struct xfrm_state {
 	struct hlist_node	byspi;
 	struct hlist_node	byseq;
 	struct hlist_node	state_cache;
+	struct hlist_node	state_cache_input;
 
 	refcount_t		refcnt;
 	spinlock_t		lock;
@@ -1650,6 +1651,10 @@ int xfrm_state_update(struct xfrm_state *x);
 struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark,
 				     const xfrm_address_t *daddr, __be32 spi,
 				     u8 proto, unsigned short family);
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+					   const xfrm_address_t *daddr,
+					   __be32 spi, u8 proto,
+					   unsigned short family);
 struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 					    const xfrm_address_t *daddr,
 					    const xfrm_address_t *saddr,
-- 
cgit v1.2.3


From d14772c0d88c387f881a577aa136e1e9b1291d07 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 29 Oct 2024 11:54:25 +0100
Subject: iommu: Fix prototype of iommu_paging_domain_alloc_flags()

The iommu_paging_domain_alloc_flags() prototype for
non-iommu kernel configurations lacks the 'static inline'
prefixes.

Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Vasant Hegde <vasant.hegde@amd.com>
Fixes: 20858d4ebb42 ("iommu: Introduce iommu_paging_domain_alloc_flags()")
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index aa78d911fdda..522efdc7d815 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1080,7 +1080,7 @@ static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
 	return false;
 }
 
-struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev,
+static inline struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev,
 						     unsigned int flags)
 {
 	return ERR_PTR(-ENODEV);
-- 
cgit v1.2.3


From a33bf8d8ce7e06bf0f033865b0cea5887cd2ac8c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 29 Oct 2024 12:11:49 +0100
Subject: iommu: Restore iommu_flush_iotlb_all()

This patch restores the iommu_flush_iotlb_all() function.
Commit

	69e5a17511f6 ("iommu: Remove useless flush from iommu_create_device_direct_mappings()")

claims it removed the last call-site, except it did not. There is still
at least one caller in

	drivers/gpu/drm/msm/msm_iommu.c

so keep the function around until all call-sites are updated.

Cc: Jason Gunthorpe <jgg@ziepe.ca>
Fixes: 69e5a17511f6 ("iommu: Remove useless flush from iommu_create_device_direct_mappings()")
Acked-by: Will Deacon <will@kernel.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 522efdc7d815..8cce372a33f1 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -853,6 +853,12 @@ void iommu_set_dma_strict(void);
 extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
 			      unsigned long iova, int flags);
 
+static inline void iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+	if (domain->ops->flush_iotlb_all)
+		domain->ops->flush_iotlb_all(domain);
+}
+
 static inline void iommu_iotlb_sync(struct iommu_domain *domain,
 				  struct iommu_iotlb_gather *iotlb_gather)
 {
@@ -1137,6 +1143,10 @@ static inline ssize_t iommu_map_sg(struct iommu_domain *domain,
 	return -ENODEV;
 }
 
+static inline void iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+}
+
 static inline void iommu_iotlb_sync(struct iommu_domain *domain,
 				  struct iommu_iotlb_gather *iotlb_gather)
 {
-- 
cgit v1.2.3


From c382429b587ac49bd179d768f13e7fa5e7ed1787 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sat, 26 Oct 2024 21:38:02 +0200
Subject: platform/x86: wmi: Replace dev_to_wdev() with to_wmi_device()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace dev_to_wdev() with to_wmi_device() to stop duplicating
functionality.

Also switch to_wmi_device() to use container_of_const() so const
values are handled correctly.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20241026193803.8802-2-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/wmi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index 120019677fc6..fca5fd43c707 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -34,7 +34,7 @@ struct wmi_device {
  *
  * Cast a struct device to a struct wmi_device.
  */
-#define to_wmi_device(device)	container_of(device, struct wmi_device, dev)
+#define to_wmi_device(device)	container_of_const(device, struct wmi_device, dev)
 
 extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev,
 					  u8 instance, u32 method_id,
-- 
cgit v1.2.3


From e001341a984e709e377b275123aecb5a763eaef9 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sat, 26 Oct 2024 21:38:03 +0200
Subject: platform/x86: wmi: Introduce to_wmi_driver()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce to_wmi_driver() as a replacement for dev_to_wdrv()
so WMI drivers can use this support macro instead of having
to duplicate its functionality.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20241026193803.8802-3-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/wmi.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index fca5fd43c707..10751c8e5e6a 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -73,6 +73,14 @@ struct wmi_driver {
 	void (*notify)(struct wmi_device *device, union acpi_object *data);
 };
 
+/**
+ * to_wmi_driver() - Helper macro to cast a driver to a wmi_driver
+ * @drv: driver struct
+ *
+ * Cast a struct device_driver to a struct wmi_driver.
+ */
+#define to_wmi_driver(drv)	container_of_const(drv, struct wmi_driver, driver)
+
 extern int __must_check __wmi_driver_register(struct wmi_driver *driver,
 					      struct module *owner);
 extern void wmi_driver_unregister(struct wmi_driver *driver);
-- 
cgit v1.2.3


From 1db363f6979d1fc8a94ea561a50f79bac40d39e4 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 22 Oct 2024 12:41:49 +0300
Subject: drm/i915/pciids: add PVC PCI ID macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xe PCI ID macros are a subset of the i915 PCI IDs macros, apart from
the PVC PCI IDs (naturally, because i915 does not and will not support
PVC). In preparation of using a shared file, add PVC PCI IDs to
i915_pciids.h.

Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/bc62e37cbfa3ed4dbfc75a7ca69b87afae6a727b.1729590029.git.jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/intel/i915_pciids.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h
index 6b92f8c3731b..81d4dc5d9242 100644
--- a/include/drm/intel/i915_pciids.h
+++ b/include/drm/intel/i915_pciids.h
@@ -780,6 +780,22 @@
 	MACRO__(0x7D60, ## __VA_ARGS__), \
 	MACRO__(0x7DD5, ## __VA_ARGS__)
 
+/* PVC */
+#define INTEL_PVC_IDS(MACRO__, ...) \
+	MACRO__(0x0B69, ## __VA_ARGS__), \
+	MACRO__(0x0B6E, ## __VA_ARGS__), \
+	MACRO__(0x0BD4, ## __VA_ARGS__), \
+	MACRO__(0x0BD5, ## __VA_ARGS__), \
+	MACRO__(0x0BD6, ## __VA_ARGS__), \
+	MACRO__(0x0BD7, ## __VA_ARGS__), \
+	MACRO__(0x0BD8, ## __VA_ARGS__), \
+	MACRO__(0x0BD9, ## __VA_ARGS__), \
+	MACRO__(0x0BDA, ## __VA_ARGS__), \
+	MACRO__(0x0BDB, ## __VA_ARGS__), \
+	MACRO__(0x0BE0, ## __VA_ARGS__), \
+	MACRO__(0x0BE1, ## __VA_ARGS__), \
+	MACRO__(0x0BE5, ## __VA_ARGS__)
+
 /* LNL */
 #define INTEL_LNL_IDS(MACRO__, ...) \
 	MACRO__(0x6420, ## __VA_ARGS__), \
-- 
cgit v1.2.3


From f719c2a2d1e7fb891d45998f241ff4273d7ae7e6 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 22 Oct 2024 12:41:50 +0300
Subject: drm/intel/pciids: rename i915_pciids.h to just pciids.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation of sharing the PCI ID macros between i915 and xe, rename
i915_pciids.h to pciids.h.

Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/835143845faa5310e4bb58405a8a0848392bbf06.1729590029.git.jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/intel/i915_pciids.h | 825 ----------------------------------------
 include/drm/intel/pciids.h      | 825 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 825 insertions(+), 825 deletions(-)
 delete mode 100644 include/drm/intel/i915_pciids.h
 create mode 100644 include/drm/intel/pciids.h

(limited to 'include')

diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h
deleted file mode 100644
index 81d4dc5d9242..000000000000
--- a/include/drm/intel/i915_pciids.h
+++ /dev/null
@@ -1,825 +0,0 @@
-/*
- * Copyright 2013 Intel Corporation
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-#ifndef _I915_PCIIDS_H
-#define _I915_PCIIDS_H
-
-#ifdef __KERNEL__
-#define INTEL_VGA_DEVICE(_id, _info) { \
-	PCI_DEVICE(PCI_VENDOR_ID_INTEL, (_id)), \
-	.class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \
-	.driver_data = (kernel_ulong_t)(_info), \
-}
-
-#define INTEL_QUANTA_VGA_DEVICE(_info) { \
-	.vendor = PCI_VENDOR_ID_INTEL, .device = 0x16a, \
-	.subvendor = 0x152d, .subdevice = 0x8990, \
-	.class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \
-	.driver_data = (kernel_ulong_t)(_info), \
-}
-#endif
-
-#define INTEL_I810_IDS(MACRO__, ...) \
-	MACRO__(0x7121, ## __VA_ARGS__), /* I810 */ \
-	MACRO__(0x7123, ## __VA_ARGS__), /* I810_DC100 */ \
-	MACRO__(0x7125, ## __VA_ARGS__)  /* I810_E */
-
-#define INTEL_I815_IDS(MACRO__, ...) \
-	MACRO__(0x1132, ## __VA_ARGS__)  /* I815*/
-
-#define INTEL_I830_IDS(MACRO__, ...) \
-	MACRO__(0x3577, ## __VA_ARGS__)
-
-#define INTEL_I845G_IDS(MACRO__, ...) \
-	MACRO__(0x2562, ## __VA_ARGS__)
-
-#define INTEL_I85X_IDS(MACRO__, ...) \
-	MACRO__(0x3582, ## __VA_ARGS__), /* I855_GM */ \
-	MACRO__(0x358e, ## __VA_ARGS__)
-
-#define INTEL_I865G_IDS(MACRO__, ...) \
-	MACRO__(0x2572, ## __VA_ARGS__) /* I865_G */
-
-#define INTEL_I915G_IDS(MACRO__, ...) \
-	MACRO__(0x2582, ## __VA_ARGS__), /* I915_G */ \
-	MACRO__(0x258a, ## __VA_ARGS__)  /* E7221_G */
-
-#define INTEL_I915GM_IDS(MACRO__, ...) \
-	MACRO__(0x2592, ## __VA_ARGS__) /* I915_GM */
-
-#define INTEL_I945G_IDS(MACRO__, ...) \
-	MACRO__(0x2772, ## __VA_ARGS__) /* I945_G */
-
-#define INTEL_I945GM_IDS(MACRO__, ...) \
-	MACRO__(0x27a2, ## __VA_ARGS__), /* I945_GM */ \
-	MACRO__(0x27ae, ## __VA_ARGS__)  /* I945_GME */
-
-#define INTEL_I965G_IDS(MACRO__, ...) \
-	MACRO__(0x2972, ## __VA_ARGS__), /* I946_GZ */ \
-	MACRO__(0x2982, ## __VA_ARGS__),	/* G35_G */ \
-	MACRO__(0x2992, ## __VA_ARGS__),	/* I965_Q */ \
-	MACRO__(0x29a2, ## __VA_ARGS__)	/* I965_G */
-
-#define INTEL_G33_IDS(MACRO__, ...) \
-	MACRO__(0x29b2, ## __VA_ARGS__), /* Q35_G */ \
-	MACRO__(0x29c2, ## __VA_ARGS__),	/* G33_G */ \
-	MACRO__(0x29d2, ## __VA_ARGS__)	/* Q33_G */
-
-#define INTEL_I965GM_IDS(MACRO__, ...) \
-	MACRO__(0x2a02, ## __VA_ARGS__),	/* I965_GM */ \
-	MACRO__(0x2a12, ## __VA_ARGS__)  /* I965_GME */
-
-#define INTEL_GM45_IDS(MACRO__, ...) \
-	MACRO__(0x2a42, ## __VA_ARGS__) /* GM45_G */
-
-#define INTEL_G45_IDS(MACRO__, ...) \
-	MACRO__(0x2e02, ## __VA_ARGS__), /* IGD_E_G */ \
-	MACRO__(0x2e12, ## __VA_ARGS__), /* Q45_G */ \
-	MACRO__(0x2e22, ## __VA_ARGS__), /* G45_G */ \
-	MACRO__(0x2e32, ## __VA_ARGS__), /* G41_G */ \
-	MACRO__(0x2e42, ## __VA_ARGS__), /* B43_G */ \
-	MACRO__(0x2e92, ## __VA_ARGS__)	/* B43_G.1 */
-
-#define INTEL_PNV_G_IDS(MACRO__, ...) \
-	MACRO__(0xa001, ## __VA_ARGS__)
-
-#define INTEL_PNV_M_IDS(MACRO__, ...) \
-	MACRO__(0xa011, ## __VA_ARGS__)
-
-#define INTEL_PNV_IDS(MACRO__, ...) \
-	INTEL_PNV_G_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_PNV_M_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_ILK_D_IDS(MACRO__, ...) \
-	MACRO__(0x0042, ## __VA_ARGS__)
-
-#define INTEL_ILK_M_IDS(MACRO__, ...) \
-	MACRO__(0x0046, ## __VA_ARGS__)
-
-#define INTEL_ILK_IDS(MACRO__, ...) \
-	INTEL_ILK_D_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_ILK_M_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_SNB_D_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x0102, ## __VA_ARGS__), \
-	MACRO__(0x010A, ## __VA_ARGS__)
-
-#define INTEL_SNB_D_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x0112, ## __VA_ARGS__), \
-	MACRO__(0x0122, ## __VA_ARGS__)
-
-#define INTEL_SNB_D_IDS(MACRO__, ...) \
-	INTEL_SNB_D_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_SNB_D_GT2_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_SNB_M_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x0106, ## __VA_ARGS__)
-
-#define INTEL_SNB_M_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x0116, ## __VA_ARGS__), \
-	MACRO__(0x0126, ## __VA_ARGS__)
-
-#define INTEL_SNB_M_IDS(MACRO__, ...) \
-	INTEL_SNB_M_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_SNB_M_GT2_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_SNB_IDS(MACRO__, ...) \
-	INTEL_SNB_D_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_SNB_M_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_IVB_M_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x0156, ## __VA_ARGS__) /* GT1 mobile */
-
-#define INTEL_IVB_M_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x0166, ## __VA_ARGS__) /* GT2 mobile */
-
-#define INTEL_IVB_M_IDS(MACRO__, ...) \
-	INTEL_IVB_M_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_IVB_M_GT2_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_IVB_D_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x0152, ## __VA_ARGS__), /* GT1 desktop */ \
-	MACRO__(0x015a, ## __VA_ARGS__)  /* GT1 server */
-
-#define INTEL_IVB_D_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x0162, ## __VA_ARGS__), /* GT2 desktop */ \
-	MACRO__(0x016a, ## __VA_ARGS__)  /* GT2 server */
-
-#define INTEL_IVB_D_IDS(MACRO__, ...) \
-	INTEL_IVB_D_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_IVB_D_GT2_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_IVB_IDS(MACRO__, ...) \
-	INTEL_IVB_M_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_IVB_D_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_IVB_Q_IDS(MACRO__, ...) \
-	INTEL_QUANTA_VGA_DEVICE(__VA_ARGS__) /* Quanta transcode */
-
-#define INTEL_HSW_ULT_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x0A02, ## __VA_ARGS__), /* ULT GT1 desktop */ \
-	MACRO__(0x0A06, ## __VA_ARGS__), /* ULT GT1 mobile */ \
-	MACRO__(0x0A0A, ## __VA_ARGS__), /* ULT GT1 server */ \
-	MACRO__(0x0A0B, ## __VA_ARGS__)  /* ULT GT1 reserved */
-
-#define INTEL_HSW_ULX_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x0A0E, ## __VA_ARGS__) /* ULX GT1 mobile */
-
-#define INTEL_HSW_GT1_IDS(MACRO__, ...) \
-	INTEL_HSW_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_HSW_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x0402, ## __VA_ARGS__), /* GT1 desktop */ \
-	MACRO__(0x0406, ## __VA_ARGS__), /* GT1 mobile */ \
-	MACRO__(0x040A, ## __VA_ARGS__), /* GT1 server */ \
-	MACRO__(0x040B, ## __VA_ARGS__), /* GT1 reserved */ \
-	MACRO__(0x040E, ## __VA_ARGS__), /* GT1 reserved */ \
-	MACRO__(0x0C02, ## __VA_ARGS__), /* SDV GT1 desktop */ \
-	MACRO__(0x0C06, ## __VA_ARGS__), /* SDV GT1 mobile */ \
-	MACRO__(0x0C0A, ## __VA_ARGS__), /* SDV GT1 server */ \
-	MACRO__(0x0C0B, ## __VA_ARGS__), /* SDV GT1 reserved */ \
-	MACRO__(0x0C0E, ## __VA_ARGS__), /* SDV GT1 reserved */ \
-	MACRO__(0x0D02, ## __VA_ARGS__), /* CRW GT1 desktop */ \
-	MACRO__(0x0D06, ## __VA_ARGS__), /* CRW GT1 mobile */ \
-	MACRO__(0x0D0A, ## __VA_ARGS__), /* CRW GT1 server */ \
-	MACRO__(0x0D0B, ## __VA_ARGS__), /* CRW GT1 reserved */ \
-	MACRO__(0x0D0E, ## __VA_ARGS__)  /* CRW GT1 reserved */
-
-#define INTEL_HSW_ULT_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x0A12, ## __VA_ARGS__), /* ULT GT2 desktop */ \
-	MACRO__(0x0A16, ## __VA_ARGS__), /* ULT GT2 mobile */ \
-	MACRO__(0x0A1A, ## __VA_ARGS__), /* ULT GT2 server */ \
-	MACRO__(0x0A1B, ## __VA_ARGS__)  /* ULT GT2 reserved */ \
-
-#define INTEL_HSW_ULX_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x0A1E, ## __VA_ARGS__) /* ULX GT2 mobile */ \
-
-#define INTEL_HSW_GT2_IDS(MACRO__, ...) \
-	INTEL_HSW_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_HSW_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x0412, ## __VA_ARGS__), /* GT2 desktop */ \
-	MACRO__(0x0416, ## __VA_ARGS__), /* GT2 mobile */ \
-	MACRO__(0x041A, ## __VA_ARGS__), /* GT2 server */ \
-	MACRO__(0x041B, ## __VA_ARGS__), /* GT2 reserved */ \
-	MACRO__(0x041E, ## __VA_ARGS__), /* GT2 reserved */ \
-	MACRO__(0x0C12, ## __VA_ARGS__), /* SDV GT2 desktop */ \
-	MACRO__(0x0C16, ## __VA_ARGS__), /* SDV GT2 mobile */ \
-	MACRO__(0x0C1A, ## __VA_ARGS__), /* SDV GT2 server */ \
-	MACRO__(0x0C1B, ## __VA_ARGS__), /* SDV GT2 reserved */ \
-	MACRO__(0x0C1E, ## __VA_ARGS__), /* SDV GT2 reserved */ \
-	MACRO__(0x0D12, ## __VA_ARGS__), /* CRW GT2 desktop */ \
-	MACRO__(0x0D16, ## __VA_ARGS__), /* CRW GT2 mobile */ \
-	MACRO__(0x0D1A, ## __VA_ARGS__), /* CRW GT2 server */ \
-	MACRO__(0x0D1B, ## __VA_ARGS__), /* CRW GT2 reserved */ \
-	MACRO__(0x0D1E, ## __VA_ARGS__)  /* CRW GT2 reserved */
-
-#define INTEL_HSW_ULT_GT3_IDS(MACRO__, ...) \
-	MACRO__(0x0A22, ## __VA_ARGS__), /* ULT GT3 desktop */ \
-	MACRO__(0x0A26, ## __VA_ARGS__), /* ULT GT3 mobile */ \
-	MACRO__(0x0A2A, ## __VA_ARGS__), /* ULT GT3 server */ \
-	MACRO__(0x0A2B, ## __VA_ARGS__), /* ULT GT3 reserved */ \
-	MACRO__(0x0A2E, ## __VA_ARGS__)  /* ULT GT3 reserved */
-
-#define INTEL_HSW_GT3_IDS(MACRO__, ...) \
-	INTEL_HSW_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x0422, ## __VA_ARGS__), /* GT3 desktop */ \
-	MACRO__(0x0426, ## __VA_ARGS__), /* GT3 mobile */ \
-	MACRO__(0x042A, ## __VA_ARGS__), /* GT3 server */ \
-	MACRO__(0x042B, ## __VA_ARGS__), /* GT3 reserved */ \
-	MACRO__(0x042E, ## __VA_ARGS__), /* GT3 reserved */ \
-	MACRO__(0x0C22, ## __VA_ARGS__), /* SDV GT3 desktop */ \
-	MACRO__(0x0C26, ## __VA_ARGS__), /* SDV GT3 mobile */ \
-	MACRO__(0x0C2A, ## __VA_ARGS__), /* SDV GT3 server */ \
-	MACRO__(0x0C2B, ## __VA_ARGS__), /* SDV GT3 reserved */ \
-	MACRO__(0x0C2E, ## __VA_ARGS__), /* SDV GT3 reserved */ \
-	MACRO__(0x0D22, ## __VA_ARGS__), /* CRW GT3 desktop */ \
-	MACRO__(0x0D26, ## __VA_ARGS__), /* CRW GT3 mobile */ \
-	MACRO__(0x0D2A, ## __VA_ARGS__), /* CRW GT3 server */ \
-	MACRO__(0x0D2B, ## __VA_ARGS__), /* CRW GT3 reserved */ \
-	MACRO__(0x0D2E, ## __VA_ARGS__)  /* CRW GT3 reserved */
-
-#define INTEL_HSW_IDS(MACRO__, ...) \
-	INTEL_HSW_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_HSW_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_HSW_GT3_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_VLV_IDS(MACRO__, ...) \
-	MACRO__(0x0f30, ## __VA_ARGS__), \
-	MACRO__(0x0f31, ## __VA_ARGS__), \
-	MACRO__(0x0f32, ## __VA_ARGS__), \
-	MACRO__(0x0f33, ## __VA_ARGS__)
-
-#define INTEL_BDW_ULT_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x1606, ## __VA_ARGS__), /* GT1 ULT */ \
-	MACRO__(0x160B, ## __VA_ARGS__)  /* GT1 Iris */
-
-#define INTEL_BDW_ULX_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x160E, ## __VA_ARGS__) /* GT1 ULX */
-
-#define INTEL_BDW_GT1_IDS(MACRO__, ...) \
-	INTEL_BDW_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_BDW_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x1602, ## __VA_ARGS__), /* GT1 ULT */ \
-	MACRO__(0x160A, ## __VA_ARGS__), /* GT1 Server */ \
-	MACRO__(0x160D, ## __VA_ARGS__)  /* GT1 Workstation */
-
-#define INTEL_BDW_ULT_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x1616, ## __VA_ARGS__), /* GT2 ULT */ \
-	MACRO__(0x161B, ## __VA_ARGS__)  /* GT2 ULT */
-
-#define INTEL_BDW_ULX_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x161E, ## __VA_ARGS__) /* GT2 ULX */
-
-#define INTEL_BDW_GT2_IDS(MACRO__, ...) \
-	INTEL_BDW_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_BDW_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x1612, ## __VA_ARGS__), /* GT2 Halo */ \
-	MACRO__(0x161A, ## __VA_ARGS__), /* GT2 Server */ \
-	MACRO__(0x161D, ## __VA_ARGS__)  /* GT2 Workstation */
-
-#define INTEL_BDW_ULT_GT3_IDS(MACRO__, ...) \
-	MACRO__(0x1626, ## __VA_ARGS__), /* ULT */ \
-	MACRO__(0x162B, ## __VA_ARGS__)  /* Iris */ \
-
-#define INTEL_BDW_ULX_GT3_IDS(MACRO__, ...) \
-	MACRO__(0x162E, ## __VA_ARGS__)  /* ULX */
-
-#define INTEL_BDW_GT3_IDS(MACRO__, ...) \
-	INTEL_BDW_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_BDW_ULX_GT3_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x1622, ## __VA_ARGS__), /* ULT */ \
-	MACRO__(0x162A, ## __VA_ARGS__), /* Server */ \
-	MACRO__(0x162D, ## __VA_ARGS__)  /* Workstation */
-
-#define INTEL_BDW_ULT_RSVD_IDS(MACRO__, ...) \
-	MACRO__(0x1636, ## __VA_ARGS__), /* ULT */ \
-	MACRO__(0x163B, ## __VA_ARGS__)  /* Iris */
-
-#define INTEL_BDW_ULX_RSVD_IDS(MACRO__, ...) \
-	MACRO__(0x163E, ## __VA_ARGS__) /* ULX */
-
-#define INTEL_BDW_RSVD_IDS(MACRO__, ...) \
-	INTEL_BDW_ULT_RSVD_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_BDW_ULX_RSVD_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x1632, ## __VA_ARGS__), /* ULT */ \
-	MACRO__(0x163A, ## __VA_ARGS__), /* Server */ \
-	MACRO__(0x163D, ## __VA_ARGS__)  /* Workstation */
-
-#define INTEL_BDW_IDS(MACRO__, ...) \
-	INTEL_BDW_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_BDW_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_BDW_GT3_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_BDW_RSVD_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_CHV_IDS(MACRO__, ...) \
-	MACRO__(0x22b0, ## __VA_ARGS__), \
-	MACRO__(0x22b1, ## __VA_ARGS__), \
-	MACRO__(0x22b2, ## __VA_ARGS__), \
-	MACRO__(0x22b3, ## __VA_ARGS__)
-
-#define INTEL_SKL_ULT_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x1906, ## __VA_ARGS__), /* ULT GT1 */ \
-	MACRO__(0x1913, ## __VA_ARGS__)  /* ULT GT1.5 */
-
-#define INTEL_SKL_ULX_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x190E, ## __VA_ARGS__), /* ULX GT1 */ \
-	MACRO__(0x1915, ## __VA_ARGS__)  /* ULX GT1.5 */
-
-#define INTEL_SKL_GT1_IDS(MACRO__, ...) \
-	INTEL_SKL_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_SKL_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x1902, ## __VA_ARGS__), /* DT  GT1 */ \
-	MACRO__(0x190A, ## __VA_ARGS__), /* SRV GT1 */ \
-	MACRO__(0x190B, ## __VA_ARGS__), /* Halo GT1 */ \
-	MACRO__(0x1917, ## __VA_ARGS__)  /* DT  GT1.5 */
-
-#define INTEL_SKL_ULT_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x1916, ## __VA_ARGS__), /* ULT GT2 */ \
-	MACRO__(0x1921, ## __VA_ARGS__)  /* ULT GT2F */
-
-#define INTEL_SKL_ULX_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x191E, ## __VA_ARGS__) /* ULX GT2 */
-
-#define INTEL_SKL_GT2_IDS(MACRO__, ...) \
-	INTEL_SKL_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_SKL_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x1912, ## __VA_ARGS__), /* DT  GT2 */ \
-	MACRO__(0x191A, ## __VA_ARGS__), /* SRV GT2 */ \
-	MACRO__(0x191B, ## __VA_ARGS__), /* Halo GT2 */ \
-	MACRO__(0x191D, ## __VA_ARGS__)  /* WKS GT2 */
-
-#define INTEL_SKL_ULT_GT3_IDS(MACRO__, ...) \
-	MACRO__(0x1923, ## __VA_ARGS__), /* ULT GT3 */ \
-	MACRO__(0x1926, ## __VA_ARGS__), /* ULT GT3e */ \
-	MACRO__(0x1927, ## __VA_ARGS__)  /* ULT GT3e */
-
-#define INTEL_SKL_GT3_IDS(MACRO__, ...) \
-	INTEL_SKL_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x192A, ## __VA_ARGS__), /* SRV GT3 */ \
-	MACRO__(0x192B, ## __VA_ARGS__), /* Halo GT3e */ \
-	MACRO__(0x192D, ## __VA_ARGS__)  /* SRV GT3e */
-
-#define INTEL_SKL_GT4_IDS(MACRO__, ...) \
-	MACRO__(0x1932, ## __VA_ARGS__), /* DT GT4 */ \
-	MACRO__(0x193A, ## __VA_ARGS__), /* SRV GT4e */ \
-	MACRO__(0x193B, ## __VA_ARGS__), /* Halo GT4e */ \
-	MACRO__(0x193D, ## __VA_ARGS__) /* WKS GT4e */
-
-#define INTEL_SKL_IDS(MACRO__, ...) \
-	INTEL_SKL_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_SKL_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_SKL_GT3_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_SKL_GT4_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_BXT_IDS(MACRO__, ...) \
-	MACRO__(0x0A84, ## __VA_ARGS__), \
-	MACRO__(0x1A84, ## __VA_ARGS__), \
-	MACRO__(0x1A85, ## __VA_ARGS__), \
-	MACRO__(0x5A84, ## __VA_ARGS__), /* APL HD Graphics 505 */ \
-	MACRO__(0x5A85, ## __VA_ARGS__)  /* APL HD Graphics 500 */
-
-#define INTEL_GLK_IDS(MACRO__, ...) \
-	MACRO__(0x3184, ## __VA_ARGS__), \
-	MACRO__(0x3185, ## __VA_ARGS__)
-
-#define INTEL_KBL_ULT_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x5906, ## __VA_ARGS__), /* ULT GT1 */ \
-	MACRO__(0x5913, ## __VA_ARGS__)  /* ULT GT1.5 */
-
-#define INTEL_KBL_ULX_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x590E, ## __VA_ARGS__), /* ULX GT1 */ \
-	MACRO__(0x5915, ## __VA_ARGS__)  /* ULX GT1.5 */
-
-#define INTEL_KBL_GT1_IDS(MACRO__, ...) \
-	INTEL_KBL_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_KBL_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x5902, ## __VA_ARGS__), /* DT  GT1 */ \
-	MACRO__(0x5908, ## __VA_ARGS__), /* Halo GT1 */ \
-	MACRO__(0x590A, ## __VA_ARGS__), /* SRV GT1 */ \
-	MACRO__(0x590B, ## __VA_ARGS__) /* Halo GT1 */
-
-#define INTEL_KBL_ULT_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x5916, ## __VA_ARGS__), /* ULT GT2 */ \
-	MACRO__(0x5921, ## __VA_ARGS__)  /* ULT GT2F */
-
-#define INTEL_KBL_ULX_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x591E, ## __VA_ARGS__)  /* ULX GT2 */
-
-#define INTEL_KBL_GT2_IDS(MACRO__, ...) \
-	INTEL_KBL_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_KBL_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x5912, ## __VA_ARGS__), /* DT  GT2 */ \
-	MACRO__(0x5917, ## __VA_ARGS__), /* Mobile GT2 */ \
-	MACRO__(0x591A, ## __VA_ARGS__), /* SRV GT2 */ \
-	MACRO__(0x591B, ## __VA_ARGS__), /* Halo GT2 */ \
-	MACRO__(0x591D, ## __VA_ARGS__) /* WKS GT2 */
-
-#define INTEL_KBL_ULT_GT3_IDS(MACRO__, ...) \
-	MACRO__(0x5926, ## __VA_ARGS__) /* ULT GT3 */
-
-#define INTEL_KBL_GT3_IDS(MACRO__, ...) \
-	INTEL_KBL_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x5923, ## __VA_ARGS__), /* ULT GT3 */ \
-	MACRO__(0x5927, ## __VA_ARGS__) /* ULT GT3 */
-
-#define INTEL_KBL_GT4_IDS(MACRO__, ...) \
-	MACRO__(0x593B, ## __VA_ARGS__) /* Halo GT4 */
-
-/* AML/KBL Y GT2 */
-#define INTEL_AML_KBL_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x591C, ## __VA_ARGS__),  /* ULX GT2 */ \
-	MACRO__(0x87C0, ## __VA_ARGS__) /* ULX GT2 */
-
-/* AML/CFL Y GT2 */
-#define INTEL_AML_CFL_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x87CA, ## __VA_ARGS__)
-
-/* CML GT1 */
-#define INTEL_CML_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x9BA2, ## __VA_ARGS__), \
-	MACRO__(0x9BA4, ## __VA_ARGS__), \
-	MACRO__(0x9BA5, ## __VA_ARGS__), \
-	MACRO__(0x9BA8, ## __VA_ARGS__)
-
-#define INTEL_CML_U_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x9B21, ## __VA_ARGS__), \
-	MACRO__(0x9BAA, ## __VA_ARGS__), \
-	MACRO__(0x9BAC, ## __VA_ARGS__)
-
-/* CML GT2 */
-#define INTEL_CML_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x9BC2, ## __VA_ARGS__), \
-	MACRO__(0x9BC4, ## __VA_ARGS__), \
-	MACRO__(0x9BC5, ## __VA_ARGS__), \
-	MACRO__(0x9BC6, ## __VA_ARGS__), \
-	MACRO__(0x9BC8, ## __VA_ARGS__), \
-	MACRO__(0x9BE6, ## __VA_ARGS__), \
-	MACRO__(0x9BF6, ## __VA_ARGS__)
-
-#define INTEL_CML_U_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x9B41, ## __VA_ARGS__), \
-	MACRO__(0x9BCA, ## __VA_ARGS__), \
-	MACRO__(0x9BCC, ## __VA_ARGS__)
-
-#define INTEL_CML_IDS(MACRO__, ...) \
-	INTEL_CML_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_CML_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_CML_U_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_CML_U_GT2_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_KBL_IDS(MACRO__, ...) \
-	INTEL_KBL_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_KBL_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_KBL_GT3_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_KBL_GT4_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_AML_KBL_GT2_IDS(MACRO__, ## __VA_ARGS__)
-
-/* CFL S */
-#define INTEL_CFL_S_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x3E90, ## __VA_ARGS__), /* SRV GT1 */ \
-	MACRO__(0x3E93, ## __VA_ARGS__), /* SRV GT1 */ \
-	MACRO__(0x3E99, ## __VA_ARGS__)  /* SRV GT1 */
-
-#define INTEL_CFL_S_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x3E91, ## __VA_ARGS__), /* SRV GT2 */ \
-	MACRO__(0x3E92, ## __VA_ARGS__), /* SRV GT2 */ \
-	MACRO__(0x3E96, ## __VA_ARGS__), /* SRV GT2 */ \
-	MACRO__(0x3E98, ## __VA_ARGS__), /* SRV GT2 */ \
-	MACRO__(0x3E9A, ## __VA_ARGS__)  /* SRV GT2 */
-
-/* CFL H */
-#define INTEL_CFL_H_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x3E9C, ## __VA_ARGS__)
-
-#define INTEL_CFL_H_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x3E94, ## __VA_ARGS__),  /* Halo GT2 */ \
-	MACRO__(0x3E9B, ## __VA_ARGS__) /* Halo GT2 */
-
-/* CFL U GT2 */
-#define INTEL_CFL_U_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x3EA9, ## __VA_ARGS__)
-
-/* CFL U GT3 */
-#define INTEL_CFL_U_GT3_IDS(MACRO__, ...) \
-	MACRO__(0x3EA5, ## __VA_ARGS__), /* ULT GT3 */ \
-	MACRO__(0x3EA6, ## __VA_ARGS__), /* ULT GT3 */ \
-	MACRO__(0x3EA7, ## __VA_ARGS__), /* ULT GT3 */ \
-	MACRO__(0x3EA8, ## __VA_ARGS__)  /* ULT GT3 */
-
-#define INTEL_CFL_IDS(MACRO__, ...) \
-	INTEL_CFL_S_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_CFL_S_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_CFL_H_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_CFL_H_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_CFL_U_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_CFL_U_GT3_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_AML_CFL_GT2_IDS(MACRO__, ## __VA_ARGS__)
-
-/* WHL/CFL U GT1 */
-#define INTEL_WHL_U_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x3EA1, ## __VA_ARGS__), \
-	MACRO__(0x3EA4, ## __VA_ARGS__)
-
-/* WHL/CFL U GT2 */
-#define INTEL_WHL_U_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x3EA0, ## __VA_ARGS__), \
-	MACRO__(0x3EA3, ## __VA_ARGS__)
-
-/* WHL/CFL U GT3 */
-#define INTEL_WHL_U_GT3_IDS(MACRO__, ...) \
-	MACRO__(0x3EA2, ## __VA_ARGS__)
-
-#define INTEL_WHL_IDS(MACRO__, ...) \
-	INTEL_WHL_U_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_WHL_U_GT2_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_WHL_U_GT3_IDS(MACRO__, ## __VA_ARGS__)
-
-/* CNL */
-#define INTEL_CNL_PORT_F_IDS(MACRO__, ...) \
-	MACRO__(0x5A44, ## __VA_ARGS__), \
-	MACRO__(0x5A4C, ## __VA_ARGS__), \
-	MACRO__(0x5A54, ## __VA_ARGS__), \
-	MACRO__(0x5A5C, ## __VA_ARGS__)
-
-#define INTEL_CNL_IDS(MACRO__, ...) \
-	INTEL_CNL_PORT_F_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x5A40, ## __VA_ARGS__), \
-	MACRO__(0x5A41, ## __VA_ARGS__), \
-	MACRO__(0x5A42, ## __VA_ARGS__), \
-	MACRO__(0x5A49, ## __VA_ARGS__), \
-	MACRO__(0x5A4A, ## __VA_ARGS__), \
-	MACRO__(0x5A50, ## __VA_ARGS__), \
-	MACRO__(0x5A51, ## __VA_ARGS__), \
-	MACRO__(0x5A52, ## __VA_ARGS__), \
-	MACRO__(0x5A59, ## __VA_ARGS__), \
-	MACRO__(0x5A5A, ## __VA_ARGS__)
-
-/* ICL */
-#define INTEL_ICL_PORT_F_IDS(MACRO__, ...) \
-	MACRO__(0x8A50, ## __VA_ARGS__), \
-	MACRO__(0x8A52, ## __VA_ARGS__), \
-	MACRO__(0x8A53, ## __VA_ARGS__), \
-	MACRO__(0x8A54, ## __VA_ARGS__), \
-	MACRO__(0x8A56, ## __VA_ARGS__), \
-	MACRO__(0x8A57, ## __VA_ARGS__), \
-	MACRO__(0x8A58, ## __VA_ARGS__), \
-	MACRO__(0x8A59, ## __VA_ARGS__), \
-	MACRO__(0x8A5A, ## __VA_ARGS__), \
-	MACRO__(0x8A5B, ## __VA_ARGS__), \
-	MACRO__(0x8A5C, ## __VA_ARGS__), \
-	MACRO__(0x8A70, ## __VA_ARGS__), \
-	MACRO__(0x8A71, ## __VA_ARGS__)
-
-#define INTEL_ICL_IDS(MACRO__, ...) \
-	INTEL_ICL_PORT_F_IDS(MACRO__, ## __VA_ARGS__), \
-	MACRO__(0x8A51, ## __VA_ARGS__), \
-	MACRO__(0x8A5D, ## __VA_ARGS__)
-
-/* EHL */
-#define INTEL_EHL_IDS(MACRO__, ...) \
-	MACRO__(0x4541, ## __VA_ARGS__), \
-	MACRO__(0x4551, ## __VA_ARGS__), \
-	MACRO__(0x4555, ## __VA_ARGS__), \
-	MACRO__(0x4557, ## __VA_ARGS__), \
-	MACRO__(0x4570, ## __VA_ARGS__), \
-	MACRO__(0x4571, ## __VA_ARGS__)
-
-/* JSL */
-#define INTEL_JSL_IDS(MACRO__, ...) \
-	MACRO__(0x4E51, ## __VA_ARGS__), \
-	MACRO__(0x4E55, ## __VA_ARGS__), \
-	MACRO__(0x4E57, ## __VA_ARGS__), \
-	MACRO__(0x4E61, ## __VA_ARGS__), \
-	MACRO__(0x4E71, ## __VA_ARGS__)
-
-/* TGL */
-#define INTEL_TGL_GT1_IDS(MACRO__, ...) \
-	MACRO__(0x9A60, ## __VA_ARGS__), \
-	MACRO__(0x9A68, ## __VA_ARGS__), \
-	MACRO__(0x9A70, ## __VA_ARGS__)
-
-#define INTEL_TGL_GT2_IDS(MACRO__, ...) \
-	MACRO__(0x9A40, ## __VA_ARGS__), \
-	MACRO__(0x9A49, ## __VA_ARGS__), \
-	MACRO__(0x9A59, ## __VA_ARGS__), \
-	MACRO__(0x9A78, ## __VA_ARGS__), \
-	MACRO__(0x9AC0, ## __VA_ARGS__), \
-	MACRO__(0x9AC9, ## __VA_ARGS__), \
-	MACRO__(0x9AD9, ## __VA_ARGS__), \
-	MACRO__(0x9AF8, ## __VA_ARGS__)
-
-#define INTEL_TGL_IDS(MACRO__, ...) \
-	INTEL_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__)
-
-/* RKL */
-#define INTEL_RKL_IDS(MACRO__, ...) \
-	MACRO__(0x4C80, ## __VA_ARGS__), \
-	MACRO__(0x4C8A, ## __VA_ARGS__), \
-	MACRO__(0x4C8B, ## __VA_ARGS__), \
-	MACRO__(0x4C8C, ## __VA_ARGS__), \
-	MACRO__(0x4C90, ## __VA_ARGS__), \
-	MACRO__(0x4C9A, ## __VA_ARGS__)
-
-/* DG1 */
-#define INTEL_DG1_IDS(MACRO__, ...) \
-	MACRO__(0x4905, ## __VA_ARGS__), \
-	MACRO__(0x4906, ## __VA_ARGS__), \
-	MACRO__(0x4907, ## __VA_ARGS__), \
-	MACRO__(0x4908, ## __VA_ARGS__), \
-	MACRO__(0x4909, ## __VA_ARGS__)
-
-/* ADL-S */
-#define INTEL_ADLS_IDS(MACRO__, ...) \
-	MACRO__(0x4680, ## __VA_ARGS__), \
-	MACRO__(0x4682, ## __VA_ARGS__), \
-	MACRO__(0x4688, ## __VA_ARGS__), \
-	MACRO__(0x468A, ## __VA_ARGS__), \
-	MACRO__(0x468B, ## __VA_ARGS__), \
-	MACRO__(0x4690, ## __VA_ARGS__), \
-	MACRO__(0x4692, ## __VA_ARGS__), \
-	MACRO__(0x4693, ## __VA_ARGS__)
-
-/* ADL-P */
-#define INTEL_ADLP_IDS(MACRO__, ...) \
-	MACRO__(0x46A0, ## __VA_ARGS__), \
-	MACRO__(0x46A1, ## __VA_ARGS__), \
-	MACRO__(0x46A2, ## __VA_ARGS__), \
-	MACRO__(0x46A3, ## __VA_ARGS__), \
-	MACRO__(0x46A6, ## __VA_ARGS__), \
-	MACRO__(0x46A8, ## __VA_ARGS__), \
-	MACRO__(0x46AA, ## __VA_ARGS__), \
-	MACRO__(0x462A, ## __VA_ARGS__), \
-	MACRO__(0x4626, ## __VA_ARGS__), \
-	MACRO__(0x4628, ## __VA_ARGS__), \
-	MACRO__(0x46B0, ## __VA_ARGS__), \
-	MACRO__(0x46B1, ## __VA_ARGS__), \
-	MACRO__(0x46B2, ## __VA_ARGS__), \
-	MACRO__(0x46B3, ## __VA_ARGS__), \
-	MACRO__(0x46C0, ## __VA_ARGS__), \
-	MACRO__(0x46C1, ## __VA_ARGS__), \
-	MACRO__(0x46C2, ## __VA_ARGS__), \
-	MACRO__(0x46C3, ## __VA_ARGS__)
-
-/* ADL-N */
-#define INTEL_ADLN_IDS(MACRO__, ...) \
-	MACRO__(0x46D0, ## __VA_ARGS__), \
-	MACRO__(0x46D1, ## __VA_ARGS__), \
-	MACRO__(0x46D2, ## __VA_ARGS__), \
-	MACRO__(0x46D3, ## __VA_ARGS__), \
-	MACRO__(0x46D4, ## __VA_ARGS__)
-
-/* RPL-S */
-#define INTEL_RPLS_IDS(MACRO__, ...) \
-	MACRO__(0xA780, ## __VA_ARGS__), \
-	MACRO__(0xA781, ## __VA_ARGS__), \
-	MACRO__(0xA782, ## __VA_ARGS__), \
-	MACRO__(0xA783, ## __VA_ARGS__), \
-	MACRO__(0xA788, ## __VA_ARGS__), \
-	MACRO__(0xA789, ## __VA_ARGS__), \
-	MACRO__(0xA78A, ## __VA_ARGS__), \
-	MACRO__(0xA78B, ## __VA_ARGS__)
-
-/* RPL-U */
-#define INTEL_RPLU_IDS(MACRO__, ...) \
-	MACRO__(0xA721, ## __VA_ARGS__), \
-	MACRO__(0xA7A1, ## __VA_ARGS__), \
-	MACRO__(0xA7A9, ## __VA_ARGS__), \
-	MACRO__(0xA7AC, ## __VA_ARGS__), \
-	MACRO__(0xA7AD, ## __VA_ARGS__)
-
-/* RPL-P */
-#define INTEL_RPLP_IDS(MACRO__, ...) \
-	MACRO__(0xA720, ## __VA_ARGS__), \
-	MACRO__(0xA7A0, ## __VA_ARGS__), \
-	MACRO__(0xA7A8, ## __VA_ARGS__), \
-	MACRO__(0xA7AA, ## __VA_ARGS__), \
-	MACRO__(0xA7AB, ## __VA_ARGS__)
-
-/* DG2 */
-#define INTEL_DG2_G10_IDS(MACRO__, ...) \
-	MACRO__(0x5690, ## __VA_ARGS__), \
-	MACRO__(0x5691, ## __VA_ARGS__), \
-	MACRO__(0x5692, ## __VA_ARGS__), \
-	MACRO__(0x56A0, ## __VA_ARGS__), \
-	MACRO__(0x56A1, ## __VA_ARGS__), \
-	MACRO__(0x56A2, ## __VA_ARGS__), \
-	MACRO__(0x56BE, ## __VA_ARGS__), \
-	MACRO__(0x56BF, ## __VA_ARGS__)
-
-#define INTEL_DG2_G11_IDS(MACRO__, ...) \
-	MACRO__(0x5693, ## __VA_ARGS__), \
-	MACRO__(0x5694, ## __VA_ARGS__), \
-	MACRO__(0x5695, ## __VA_ARGS__), \
-	MACRO__(0x56A5, ## __VA_ARGS__), \
-	MACRO__(0x56A6, ## __VA_ARGS__), \
-	MACRO__(0x56B0, ## __VA_ARGS__), \
-	MACRO__(0x56B1, ## __VA_ARGS__), \
-	MACRO__(0x56BA, ## __VA_ARGS__), \
-	MACRO__(0x56BB, ## __VA_ARGS__), \
-	MACRO__(0x56BC, ## __VA_ARGS__), \
-	MACRO__(0x56BD, ## __VA_ARGS__)
-
-#define INTEL_DG2_G12_IDS(MACRO__, ...) \
-	MACRO__(0x5696, ## __VA_ARGS__), \
-	MACRO__(0x5697, ## __VA_ARGS__), \
-	MACRO__(0x56A3, ## __VA_ARGS__), \
-	MACRO__(0x56A4, ## __VA_ARGS__), \
-	MACRO__(0x56B2, ## __VA_ARGS__), \
-	MACRO__(0x56B3, ## __VA_ARGS__)
-
-#define INTEL_DG2_IDS(MACRO__, ...) \
-	INTEL_DG2_G10_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_DG2_G11_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_DG2_G12_IDS(MACRO__, ## __VA_ARGS__)
-
-#define INTEL_ATS_M150_IDS(MACRO__, ...) \
-	MACRO__(0x56C0, ## __VA_ARGS__), \
-	MACRO__(0x56C2, ## __VA_ARGS__)
-
-#define INTEL_ATS_M75_IDS(MACRO__, ...) \
-	MACRO__(0x56C1, ## __VA_ARGS__)
-
-#define INTEL_ATS_M_IDS(MACRO__, ...) \
-	INTEL_ATS_M150_IDS(MACRO__, ## __VA_ARGS__), \
-	INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
-
-/* ARL */
-#define INTEL_ARL_IDS(MACRO__, ...) \
-	MACRO__(0x7D41, ## __VA_ARGS__), \
-	MACRO__(0x7D51, ## __VA_ARGS__), \
-	MACRO__(0x7D67, ## __VA_ARGS__), \
-	MACRO__(0x7DD1, ## __VA_ARGS__), \
-	MACRO__(0xB640, ## __VA_ARGS__)
-
-/* MTL */
-#define INTEL_MTL_IDS(MACRO__, ...) \
-	MACRO__(0x7D40, ## __VA_ARGS__), \
-	MACRO__(0x7D45, ## __VA_ARGS__), \
-	MACRO__(0x7D55, ## __VA_ARGS__), \
-	MACRO__(0x7D60, ## __VA_ARGS__), \
-	MACRO__(0x7DD5, ## __VA_ARGS__)
-
-/* PVC */
-#define INTEL_PVC_IDS(MACRO__, ...) \
-	MACRO__(0x0B69, ## __VA_ARGS__), \
-	MACRO__(0x0B6E, ## __VA_ARGS__), \
-	MACRO__(0x0BD4, ## __VA_ARGS__), \
-	MACRO__(0x0BD5, ## __VA_ARGS__), \
-	MACRO__(0x0BD6, ## __VA_ARGS__), \
-	MACRO__(0x0BD7, ## __VA_ARGS__), \
-	MACRO__(0x0BD8, ## __VA_ARGS__), \
-	MACRO__(0x0BD9, ## __VA_ARGS__), \
-	MACRO__(0x0BDA, ## __VA_ARGS__), \
-	MACRO__(0x0BDB, ## __VA_ARGS__), \
-	MACRO__(0x0BE0, ## __VA_ARGS__), \
-	MACRO__(0x0BE1, ## __VA_ARGS__), \
-	MACRO__(0x0BE5, ## __VA_ARGS__)
-
-/* LNL */
-#define INTEL_LNL_IDS(MACRO__, ...) \
-	MACRO__(0x6420, ## __VA_ARGS__), \
-	MACRO__(0x64A0, ## __VA_ARGS__), \
-	MACRO__(0x64B0, ## __VA_ARGS__)
-
-/* BMG */
-#define INTEL_BMG_IDS(MACRO__, ...) \
-	MACRO__(0xE202, ## __VA_ARGS__), \
-	MACRO__(0xE20B, ## __VA_ARGS__), \
-	MACRO__(0xE20C, ## __VA_ARGS__), \
-	MACRO__(0xE20D, ## __VA_ARGS__), \
-	MACRO__(0xE212, ## __VA_ARGS__)
-
-/* PTL */
-#define INTEL_PTL_IDS(MACRO__, ...) \
-	MACRO__(0xB080, ## __VA_ARGS__), \
-	MACRO__(0xB081, ## __VA_ARGS__), \
-	MACRO__(0xB082, ## __VA_ARGS__), \
-	MACRO__(0xB090, ## __VA_ARGS__), \
-	MACRO__(0xB091, ## __VA_ARGS__), \
-	MACRO__(0xB092, ## __VA_ARGS__), \
-	MACRO__(0xB0A0, ## __VA_ARGS__), \
-	MACRO__(0xB0A1, ## __VA_ARGS__), \
-	MACRO__(0xB0A2, ## __VA_ARGS__)
-
-#endif /* _I915_PCIIDS_H */
diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h
new file mode 100644
index 000000000000..7632507af166
--- /dev/null
+++ b/include/drm/intel/pciids.h
@@ -0,0 +1,825 @@
+/*
+ * Copyright 2013 Intel Corporation
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __PCIIDS_H__
+#define __PCIIDS_H__
+
+#ifdef __KERNEL__
+#define INTEL_VGA_DEVICE(_id, _info) { \
+	PCI_DEVICE(PCI_VENDOR_ID_INTEL, (_id)), \
+	.class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \
+	.driver_data = (kernel_ulong_t)(_info), \
+}
+
+#define INTEL_QUANTA_VGA_DEVICE(_info) { \
+	.vendor = PCI_VENDOR_ID_INTEL, .device = 0x16a, \
+	.subvendor = 0x152d, .subdevice = 0x8990, \
+	.class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \
+	.driver_data = (kernel_ulong_t)(_info), \
+}
+#endif
+
+#define INTEL_I810_IDS(MACRO__, ...) \
+	MACRO__(0x7121, ## __VA_ARGS__), /* I810 */ \
+	MACRO__(0x7123, ## __VA_ARGS__), /* I810_DC100 */ \
+	MACRO__(0x7125, ## __VA_ARGS__)  /* I810_E */
+
+#define INTEL_I815_IDS(MACRO__, ...) \
+	MACRO__(0x1132, ## __VA_ARGS__)  /* I815*/
+
+#define INTEL_I830_IDS(MACRO__, ...) \
+	MACRO__(0x3577, ## __VA_ARGS__)
+
+#define INTEL_I845G_IDS(MACRO__, ...) \
+	MACRO__(0x2562, ## __VA_ARGS__)
+
+#define INTEL_I85X_IDS(MACRO__, ...) \
+	MACRO__(0x3582, ## __VA_ARGS__), /* I855_GM */ \
+	MACRO__(0x358e, ## __VA_ARGS__)
+
+#define INTEL_I865G_IDS(MACRO__, ...) \
+	MACRO__(0x2572, ## __VA_ARGS__) /* I865_G */
+
+#define INTEL_I915G_IDS(MACRO__, ...) \
+	MACRO__(0x2582, ## __VA_ARGS__), /* I915_G */ \
+	MACRO__(0x258a, ## __VA_ARGS__)  /* E7221_G */
+
+#define INTEL_I915GM_IDS(MACRO__, ...) \
+	MACRO__(0x2592, ## __VA_ARGS__) /* I915_GM */
+
+#define INTEL_I945G_IDS(MACRO__, ...) \
+	MACRO__(0x2772, ## __VA_ARGS__) /* I945_G */
+
+#define INTEL_I945GM_IDS(MACRO__, ...) \
+	MACRO__(0x27a2, ## __VA_ARGS__), /* I945_GM */ \
+	MACRO__(0x27ae, ## __VA_ARGS__)  /* I945_GME */
+
+#define INTEL_I965G_IDS(MACRO__, ...) \
+	MACRO__(0x2972, ## __VA_ARGS__), /* I946_GZ */ \
+	MACRO__(0x2982, ## __VA_ARGS__),	/* G35_G */ \
+	MACRO__(0x2992, ## __VA_ARGS__),	/* I965_Q */ \
+	MACRO__(0x29a2, ## __VA_ARGS__)	/* I965_G */
+
+#define INTEL_G33_IDS(MACRO__, ...) \
+	MACRO__(0x29b2, ## __VA_ARGS__), /* Q35_G */ \
+	MACRO__(0x29c2, ## __VA_ARGS__),	/* G33_G */ \
+	MACRO__(0x29d2, ## __VA_ARGS__)	/* Q33_G */
+
+#define INTEL_I965GM_IDS(MACRO__, ...) \
+	MACRO__(0x2a02, ## __VA_ARGS__),	/* I965_GM */ \
+	MACRO__(0x2a12, ## __VA_ARGS__)  /* I965_GME */
+
+#define INTEL_GM45_IDS(MACRO__, ...) \
+	MACRO__(0x2a42, ## __VA_ARGS__) /* GM45_G */
+
+#define INTEL_G45_IDS(MACRO__, ...) \
+	MACRO__(0x2e02, ## __VA_ARGS__), /* IGD_E_G */ \
+	MACRO__(0x2e12, ## __VA_ARGS__), /* Q45_G */ \
+	MACRO__(0x2e22, ## __VA_ARGS__), /* G45_G */ \
+	MACRO__(0x2e32, ## __VA_ARGS__), /* G41_G */ \
+	MACRO__(0x2e42, ## __VA_ARGS__), /* B43_G */ \
+	MACRO__(0x2e92, ## __VA_ARGS__)	/* B43_G.1 */
+
+#define INTEL_PNV_G_IDS(MACRO__, ...) \
+	MACRO__(0xa001, ## __VA_ARGS__)
+
+#define INTEL_PNV_M_IDS(MACRO__, ...) \
+	MACRO__(0xa011, ## __VA_ARGS__)
+
+#define INTEL_PNV_IDS(MACRO__, ...) \
+	INTEL_PNV_G_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_PNV_M_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_ILK_D_IDS(MACRO__, ...) \
+	MACRO__(0x0042, ## __VA_ARGS__)
+
+#define INTEL_ILK_M_IDS(MACRO__, ...) \
+	MACRO__(0x0046, ## __VA_ARGS__)
+
+#define INTEL_ILK_IDS(MACRO__, ...) \
+	INTEL_ILK_D_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_ILK_M_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_SNB_D_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x0102, ## __VA_ARGS__), \
+	MACRO__(0x010A, ## __VA_ARGS__)
+
+#define INTEL_SNB_D_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x0112, ## __VA_ARGS__), \
+	MACRO__(0x0122, ## __VA_ARGS__)
+
+#define INTEL_SNB_D_IDS(MACRO__, ...) \
+	INTEL_SNB_D_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_SNB_D_GT2_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_SNB_M_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x0106, ## __VA_ARGS__)
+
+#define INTEL_SNB_M_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x0116, ## __VA_ARGS__), \
+	MACRO__(0x0126, ## __VA_ARGS__)
+
+#define INTEL_SNB_M_IDS(MACRO__, ...) \
+	INTEL_SNB_M_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_SNB_M_GT2_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_SNB_IDS(MACRO__, ...) \
+	INTEL_SNB_D_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_SNB_M_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_IVB_M_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x0156, ## __VA_ARGS__) /* GT1 mobile */
+
+#define INTEL_IVB_M_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x0166, ## __VA_ARGS__) /* GT2 mobile */
+
+#define INTEL_IVB_M_IDS(MACRO__, ...) \
+	INTEL_IVB_M_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_IVB_M_GT2_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_IVB_D_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x0152, ## __VA_ARGS__), /* GT1 desktop */ \
+	MACRO__(0x015a, ## __VA_ARGS__)  /* GT1 server */
+
+#define INTEL_IVB_D_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x0162, ## __VA_ARGS__), /* GT2 desktop */ \
+	MACRO__(0x016a, ## __VA_ARGS__)  /* GT2 server */
+
+#define INTEL_IVB_D_IDS(MACRO__, ...) \
+	INTEL_IVB_D_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_IVB_D_GT2_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_IVB_IDS(MACRO__, ...) \
+	INTEL_IVB_M_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_IVB_D_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_IVB_Q_IDS(MACRO__, ...) \
+	INTEL_QUANTA_VGA_DEVICE(__VA_ARGS__) /* Quanta transcode */
+
+#define INTEL_HSW_ULT_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x0A02, ## __VA_ARGS__), /* ULT GT1 desktop */ \
+	MACRO__(0x0A06, ## __VA_ARGS__), /* ULT GT1 mobile */ \
+	MACRO__(0x0A0A, ## __VA_ARGS__), /* ULT GT1 server */ \
+	MACRO__(0x0A0B, ## __VA_ARGS__)  /* ULT GT1 reserved */
+
+#define INTEL_HSW_ULX_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x0A0E, ## __VA_ARGS__) /* ULX GT1 mobile */
+
+#define INTEL_HSW_GT1_IDS(MACRO__, ...) \
+	INTEL_HSW_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_HSW_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x0402, ## __VA_ARGS__), /* GT1 desktop */ \
+	MACRO__(0x0406, ## __VA_ARGS__), /* GT1 mobile */ \
+	MACRO__(0x040A, ## __VA_ARGS__), /* GT1 server */ \
+	MACRO__(0x040B, ## __VA_ARGS__), /* GT1 reserved */ \
+	MACRO__(0x040E, ## __VA_ARGS__), /* GT1 reserved */ \
+	MACRO__(0x0C02, ## __VA_ARGS__), /* SDV GT1 desktop */ \
+	MACRO__(0x0C06, ## __VA_ARGS__), /* SDV GT1 mobile */ \
+	MACRO__(0x0C0A, ## __VA_ARGS__), /* SDV GT1 server */ \
+	MACRO__(0x0C0B, ## __VA_ARGS__), /* SDV GT1 reserved */ \
+	MACRO__(0x0C0E, ## __VA_ARGS__), /* SDV GT1 reserved */ \
+	MACRO__(0x0D02, ## __VA_ARGS__), /* CRW GT1 desktop */ \
+	MACRO__(0x0D06, ## __VA_ARGS__), /* CRW GT1 mobile */ \
+	MACRO__(0x0D0A, ## __VA_ARGS__), /* CRW GT1 server */ \
+	MACRO__(0x0D0B, ## __VA_ARGS__), /* CRW GT1 reserved */ \
+	MACRO__(0x0D0E, ## __VA_ARGS__)  /* CRW GT1 reserved */
+
+#define INTEL_HSW_ULT_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x0A12, ## __VA_ARGS__), /* ULT GT2 desktop */ \
+	MACRO__(0x0A16, ## __VA_ARGS__), /* ULT GT2 mobile */ \
+	MACRO__(0x0A1A, ## __VA_ARGS__), /* ULT GT2 server */ \
+	MACRO__(0x0A1B, ## __VA_ARGS__)  /* ULT GT2 reserved */ \
+
+#define INTEL_HSW_ULX_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x0A1E, ## __VA_ARGS__) /* ULX GT2 mobile */ \
+
+#define INTEL_HSW_GT2_IDS(MACRO__, ...) \
+	INTEL_HSW_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_HSW_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x0412, ## __VA_ARGS__), /* GT2 desktop */ \
+	MACRO__(0x0416, ## __VA_ARGS__), /* GT2 mobile */ \
+	MACRO__(0x041A, ## __VA_ARGS__), /* GT2 server */ \
+	MACRO__(0x041B, ## __VA_ARGS__), /* GT2 reserved */ \
+	MACRO__(0x041E, ## __VA_ARGS__), /* GT2 reserved */ \
+	MACRO__(0x0C12, ## __VA_ARGS__), /* SDV GT2 desktop */ \
+	MACRO__(0x0C16, ## __VA_ARGS__), /* SDV GT2 mobile */ \
+	MACRO__(0x0C1A, ## __VA_ARGS__), /* SDV GT2 server */ \
+	MACRO__(0x0C1B, ## __VA_ARGS__), /* SDV GT2 reserved */ \
+	MACRO__(0x0C1E, ## __VA_ARGS__), /* SDV GT2 reserved */ \
+	MACRO__(0x0D12, ## __VA_ARGS__), /* CRW GT2 desktop */ \
+	MACRO__(0x0D16, ## __VA_ARGS__), /* CRW GT2 mobile */ \
+	MACRO__(0x0D1A, ## __VA_ARGS__), /* CRW GT2 server */ \
+	MACRO__(0x0D1B, ## __VA_ARGS__), /* CRW GT2 reserved */ \
+	MACRO__(0x0D1E, ## __VA_ARGS__)  /* CRW GT2 reserved */
+
+#define INTEL_HSW_ULT_GT3_IDS(MACRO__, ...) \
+	MACRO__(0x0A22, ## __VA_ARGS__), /* ULT GT3 desktop */ \
+	MACRO__(0x0A26, ## __VA_ARGS__), /* ULT GT3 mobile */ \
+	MACRO__(0x0A2A, ## __VA_ARGS__), /* ULT GT3 server */ \
+	MACRO__(0x0A2B, ## __VA_ARGS__), /* ULT GT3 reserved */ \
+	MACRO__(0x0A2E, ## __VA_ARGS__)  /* ULT GT3 reserved */
+
+#define INTEL_HSW_GT3_IDS(MACRO__, ...) \
+	INTEL_HSW_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x0422, ## __VA_ARGS__), /* GT3 desktop */ \
+	MACRO__(0x0426, ## __VA_ARGS__), /* GT3 mobile */ \
+	MACRO__(0x042A, ## __VA_ARGS__), /* GT3 server */ \
+	MACRO__(0x042B, ## __VA_ARGS__), /* GT3 reserved */ \
+	MACRO__(0x042E, ## __VA_ARGS__), /* GT3 reserved */ \
+	MACRO__(0x0C22, ## __VA_ARGS__), /* SDV GT3 desktop */ \
+	MACRO__(0x0C26, ## __VA_ARGS__), /* SDV GT3 mobile */ \
+	MACRO__(0x0C2A, ## __VA_ARGS__), /* SDV GT3 server */ \
+	MACRO__(0x0C2B, ## __VA_ARGS__), /* SDV GT3 reserved */ \
+	MACRO__(0x0C2E, ## __VA_ARGS__), /* SDV GT3 reserved */ \
+	MACRO__(0x0D22, ## __VA_ARGS__), /* CRW GT3 desktop */ \
+	MACRO__(0x0D26, ## __VA_ARGS__), /* CRW GT3 mobile */ \
+	MACRO__(0x0D2A, ## __VA_ARGS__), /* CRW GT3 server */ \
+	MACRO__(0x0D2B, ## __VA_ARGS__), /* CRW GT3 reserved */ \
+	MACRO__(0x0D2E, ## __VA_ARGS__)  /* CRW GT3 reserved */
+
+#define INTEL_HSW_IDS(MACRO__, ...) \
+	INTEL_HSW_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_HSW_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_HSW_GT3_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_VLV_IDS(MACRO__, ...) \
+	MACRO__(0x0f30, ## __VA_ARGS__), \
+	MACRO__(0x0f31, ## __VA_ARGS__), \
+	MACRO__(0x0f32, ## __VA_ARGS__), \
+	MACRO__(0x0f33, ## __VA_ARGS__)
+
+#define INTEL_BDW_ULT_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x1606, ## __VA_ARGS__), /* GT1 ULT */ \
+	MACRO__(0x160B, ## __VA_ARGS__)  /* GT1 Iris */
+
+#define INTEL_BDW_ULX_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x160E, ## __VA_ARGS__) /* GT1 ULX */
+
+#define INTEL_BDW_GT1_IDS(MACRO__, ...) \
+	INTEL_BDW_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_BDW_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x1602, ## __VA_ARGS__), /* GT1 ULT */ \
+	MACRO__(0x160A, ## __VA_ARGS__), /* GT1 Server */ \
+	MACRO__(0x160D, ## __VA_ARGS__)  /* GT1 Workstation */
+
+#define INTEL_BDW_ULT_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x1616, ## __VA_ARGS__), /* GT2 ULT */ \
+	MACRO__(0x161B, ## __VA_ARGS__)  /* GT2 ULT */
+
+#define INTEL_BDW_ULX_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x161E, ## __VA_ARGS__) /* GT2 ULX */
+
+#define INTEL_BDW_GT2_IDS(MACRO__, ...) \
+	INTEL_BDW_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_BDW_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x1612, ## __VA_ARGS__), /* GT2 Halo */ \
+	MACRO__(0x161A, ## __VA_ARGS__), /* GT2 Server */ \
+	MACRO__(0x161D, ## __VA_ARGS__)  /* GT2 Workstation */
+
+#define INTEL_BDW_ULT_GT3_IDS(MACRO__, ...) \
+	MACRO__(0x1626, ## __VA_ARGS__), /* ULT */ \
+	MACRO__(0x162B, ## __VA_ARGS__)  /* Iris */ \
+
+#define INTEL_BDW_ULX_GT3_IDS(MACRO__, ...) \
+	MACRO__(0x162E, ## __VA_ARGS__)  /* ULX */
+
+#define INTEL_BDW_GT3_IDS(MACRO__, ...) \
+	INTEL_BDW_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_BDW_ULX_GT3_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x1622, ## __VA_ARGS__), /* ULT */ \
+	MACRO__(0x162A, ## __VA_ARGS__), /* Server */ \
+	MACRO__(0x162D, ## __VA_ARGS__)  /* Workstation */
+
+#define INTEL_BDW_ULT_RSVD_IDS(MACRO__, ...) \
+	MACRO__(0x1636, ## __VA_ARGS__), /* ULT */ \
+	MACRO__(0x163B, ## __VA_ARGS__)  /* Iris */
+
+#define INTEL_BDW_ULX_RSVD_IDS(MACRO__, ...) \
+	MACRO__(0x163E, ## __VA_ARGS__) /* ULX */
+
+#define INTEL_BDW_RSVD_IDS(MACRO__, ...) \
+	INTEL_BDW_ULT_RSVD_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_BDW_ULX_RSVD_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x1632, ## __VA_ARGS__), /* ULT */ \
+	MACRO__(0x163A, ## __VA_ARGS__), /* Server */ \
+	MACRO__(0x163D, ## __VA_ARGS__)  /* Workstation */
+
+#define INTEL_BDW_IDS(MACRO__, ...) \
+	INTEL_BDW_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_BDW_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_BDW_GT3_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_BDW_RSVD_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_CHV_IDS(MACRO__, ...) \
+	MACRO__(0x22b0, ## __VA_ARGS__), \
+	MACRO__(0x22b1, ## __VA_ARGS__), \
+	MACRO__(0x22b2, ## __VA_ARGS__), \
+	MACRO__(0x22b3, ## __VA_ARGS__)
+
+#define INTEL_SKL_ULT_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x1906, ## __VA_ARGS__), /* ULT GT1 */ \
+	MACRO__(0x1913, ## __VA_ARGS__)  /* ULT GT1.5 */
+
+#define INTEL_SKL_ULX_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x190E, ## __VA_ARGS__), /* ULX GT1 */ \
+	MACRO__(0x1915, ## __VA_ARGS__)  /* ULX GT1.5 */
+
+#define INTEL_SKL_GT1_IDS(MACRO__, ...) \
+	INTEL_SKL_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_SKL_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x1902, ## __VA_ARGS__), /* DT  GT1 */ \
+	MACRO__(0x190A, ## __VA_ARGS__), /* SRV GT1 */ \
+	MACRO__(0x190B, ## __VA_ARGS__), /* Halo GT1 */ \
+	MACRO__(0x1917, ## __VA_ARGS__)  /* DT  GT1.5 */
+
+#define INTEL_SKL_ULT_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x1916, ## __VA_ARGS__), /* ULT GT2 */ \
+	MACRO__(0x1921, ## __VA_ARGS__)  /* ULT GT2F */
+
+#define INTEL_SKL_ULX_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x191E, ## __VA_ARGS__) /* ULX GT2 */
+
+#define INTEL_SKL_GT2_IDS(MACRO__, ...) \
+	INTEL_SKL_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_SKL_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x1912, ## __VA_ARGS__), /* DT  GT2 */ \
+	MACRO__(0x191A, ## __VA_ARGS__), /* SRV GT2 */ \
+	MACRO__(0x191B, ## __VA_ARGS__), /* Halo GT2 */ \
+	MACRO__(0x191D, ## __VA_ARGS__)  /* WKS GT2 */
+
+#define INTEL_SKL_ULT_GT3_IDS(MACRO__, ...) \
+	MACRO__(0x1923, ## __VA_ARGS__), /* ULT GT3 */ \
+	MACRO__(0x1926, ## __VA_ARGS__), /* ULT GT3e */ \
+	MACRO__(0x1927, ## __VA_ARGS__)  /* ULT GT3e */
+
+#define INTEL_SKL_GT3_IDS(MACRO__, ...) \
+	INTEL_SKL_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x192A, ## __VA_ARGS__), /* SRV GT3 */ \
+	MACRO__(0x192B, ## __VA_ARGS__), /* Halo GT3e */ \
+	MACRO__(0x192D, ## __VA_ARGS__)  /* SRV GT3e */
+
+#define INTEL_SKL_GT4_IDS(MACRO__, ...) \
+	MACRO__(0x1932, ## __VA_ARGS__), /* DT GT4 */ \
+	MACRO__(0x193A, ## __VA_ARGS__), /* SRV GT4e */ \
+	MACRO__(0x193B, ## __VA_ARGS__), /* Halo GT4e */ \
+	MACRO__(0x193D, ## __VA_ARGS__) /* WKS GT4e */
+
+#define INTEL_SKL_IDS(MACRO__, ...) \
+	INTEL_SKL_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_SKL_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_SKL_GT3_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_SKL_GT4_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_BXT_IDS(MACRO__, ...) \
+	MACRO__(0x0A84, ## __VA_ARGS__), \
+	MACRO__(0x1A84, ## __VA_ARGS__), \
+	MACRO__(0x1A85, ## __VA_ARGS__), \
+	MACRO__(0x5A84, ## __VA_ARGS__), /* APL HD Graphics 505 */ \
+	MACRO__(0x5A85, ## __VA_ARGS__)  /* APL HD Graphics 500 */
+
+#define INTEL_GLK_IDS(MACRO__, ...) \
+	MACRO__(0x3184, ## __VA_ARGS__), \
+	MACRO__(0x3185, ## __VA_ARGS__)
+
+#define INTEL_KBL_ULT_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x5906, ## __VA_ARGS__), /* ULT GT1 */ \
+	MACRO__(0x5913, ## __VA_ARGS__)  /* ULT GT1.5 */
+
+#define INTEL_KBL_ULX_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x590E, ## __VA_ARGS__), /* ULX GT1 */ \
+	MACRO__(0x5915, ## __VA_ARGS__)  /* ULX GT1.5 */
+
+#define INTEL_KBL_GT1_IDS(MACRO__, ...) \
+	INTEL_KBL_ULT_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_KBL_ULX_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x5902, ## __VA_ARGS__), /* DT  GT1 */ \
+	MACRO__(0x5908, ## __VA_ARGS__), /* Halo GT1 */ \
+	MACRO__(0x590A, ## __VA_ARGS__), /* SRV GT1 */ \
+	MACRO__(0x590B, ## __VA_ARGS__) /* Halo GT1 */
+
+#define INTEL_KBL_ULT_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x5916, ## __VA_ARGS__), /* ULT GT2 */ \
+	MACRO__(0x5921, ## __VA_ARGS__)  /* ULT GT2F */
+
+#define INTEL_KBL_ULX_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x591E, ## __VA_ARGS__)  /* ULX GT2 */
+
+#define INTEL_KBL_GT2_IDS(MACRO__, ...) \
+	INTEL_KBL_ULT_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_KBL_ULX_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x5912, ## __VA_ARGS__), /* DT  GT2 */ \
+	MACRO__(0x5917, ## __VA_ARGS__), /* Mobile GT2 */ \
+	MACRO__(0x591A, ## __VA_ARGS__), /* SRV GT2 */ \
+	MACRO__(0x591B, ## __VA_ARGS__), /* Halo GT2 */ \
+	MACRO__(0x591D, ## __VA_ARGS__) /* WKS GT2 */
+
+#define INTEL_KBL_ULT_GT3_IDS(MACRO__, ...) \
+	MACRO__(0x5926, ## __VA_ARGS__) /* ULT GT3 */
+
+#define INTEL_KBL_GT3_IDS(MACRO__, ...) \
+	INTEL_KBL_ULT_GT3_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x5923, ## __VA_ARGS__), /* ULT GT3 */ \
+	MACRO__(0x5927, ## __VA_ARGS__) /* ULT GT3 */
+
+#define INTEL_KBL_GT4_IDS(MACRO__, ...) \
+	MACRO__(0x593B, ## __VA_ARGS__) /* Halo GT4 */
+
+/* AML/KBL Y GT2 */
+#define INTEL_AML_KBL_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x591C, ## __VA_ARGS__),  /* ULX GT2 */ \
+	MACRO__(0x87C0, ## __VA_ARGS__) /* ULX GT2 */
+
+/* AML/CFL Y GT2 */
+#define INTEL_AML_CFL_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x87CA, ## __VA_ARGS__)
+
+/* CML GT1 */
+#define INTEL_CML_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x9BA2, ## __VA_ARGS__), \
+	MACRO__(0x9BA4, ## __VA_ARGS__), \
+	MACRO__(0x9BA5, ## __VA_ARGS__), \
+	MACRO__(0x9BA8, ## __VA_ARGS__)
+
+#define INTEL_CML_U_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x9B21, ## __VA_ARGS__), \
+	MACRO__(0x9BAA, ## __VA_ARGS__), \
+	MACRO__(0x9BAC, ## __VA_ARGS__)
+
+/* CML GT2 */
+#define INTEL_CML_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x9BC2, ## __VA_ARGS__), \
+	MACRO__(0x9BC4, ## __VA_ARGS__), \
+	MACRO__(0x9BC5, ## __VA_ARGS__), \
+	MACRO__(0x9BC6, ## __VA_ARGS__), \
+	MACRO__(0x9BC8, ## __VA_ARGS__), \
+	MACRO__(0x9BE6, ## __VA_ARGS__), \
+	MACRO__(0x9BF6, ## __VA_ARGS__)
+
+#define INTEL_CML_U_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x9B41, ## __VA_ARGS__), \
+	MACRO__(0x9BCA, ## __VA_ARGS__), \
+	MACRO__(0x9BCC, ## __VA_ARGS__)
+
+#define INTEL_CML_IDS(MACRO__, ...) \
+	INTEL_CML_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_CML_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_CML_U_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_CML_U_GT2_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_KBL_IDS(MACRO__, ...) \
+	INTEL_KBL_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_KBL_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_KBL_GT3_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_KBL_GT4_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_AML_KBL_GT2_IDS(MACRO__, ## __VA_ARGS__)
+
+/* CFL S */
+#define INTEL_CFL_S_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x3E90, ## __VA_ARGS__), /* SRV GT1 */ \
+	MACRO__(0x3E93, ## __VA_ARGS__), /* SRV GT1 */ \
+	MACRO__(0x3E99, ## __VA_ARGS__)  /* SRV GT1 */
+
+#define INTEL_CFL_S_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x3E91, ## __VA_ARGS__), /* SRV GT2 */ \
+	MACRO__(0x3E92, ## __VA_ARGS__), /* SRV GT2 */ \
+	MACRO__(0x3E96, ## __VA_ARGS__), /* SRV GT2 */ \
+	MACRO__(0x3E98, ## __VA_ARGS__), /* SRV GT2 */ \
+	MACRO__(0x3E9A, ## __VA_ARGS__)  /* SRV GT2 */
+
+/* CFL H */
+#define INTEL_CFL_H_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x3E9C, ## __VA_ARGS__)
+
+#define INTEL_CFL_H_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x3E94, ## __VA_ARGS__),  /* Halo GT2 */ \
+	MACRO__(0x3E9B, ## __VA_ARGS__) /* Halo GT2 */
+
+/* CFL U GT2 */
+#define INTEL_CFL_U_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x3EA9, ## __VA_ARGS__)
+
+/* CFL U GT3 */
+#define INTEL_CFL_U_GT3_IDS(MACRO__, ...) \
+	MACRO__(0x3EA5, ## __VA_ARGS__), /* ULT GT3 */ \
+	MACRO__(0x3EA6, ## __VA_ARGS__), /* ULT GT3 */ \
+	MACRO__(0x3EA7, ## __VA_ARGS__), /* ULT GT3 */ \
+	MACRO__(0x3EA8, ## __VA_ARGS__)  /* ULT GT3 */
+
+#define INTEL_CFL_IDS(MACRO__, ...) \
+	INTEL_CFL_S_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_CFL_S_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_CFL_H_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_CFL_H_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_CFL_U_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_CFL_U_GT3_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_AML_CFL_GT2_IDS(MACRO__, ## __VA_ARGS__)
+
+/* WHL/CFL U GT1 */
+#define INTEL_WHL_U_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x3EA1, ## __VA_ARGS__), \
+	MACRO__(0x3EA4, ## __VA_ARGS__)
+
+/* WHL/CFL U GT2 */
+#define INTEL_WHL_U_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x3EA0, ## __VA_ARGS__), \
+	MACRO__(0x3EA3, ## __VA_ARGS__)
+
+/* WHL/CFL U GT3 */
+#define INTEL_WHL_U_GT3_IDS(MACRO__, ...) \
+	MACRO__(0x3EA2, ## __VA_ARGS__)
+
+#define INTEL_WHL_IDS(MACRO__, ...) \
+	INTEL_WHL_U_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_WHL_U_GT2_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_WHL_U_GT3_IDS(MACRO__, ## __VA_ARGS__)
+
+/* CNL */
+#define INTEL_CNL_PORT_F_IDS(MACRO__, ...) \
+	MACRO__(0x5A44, ## __VA_ARGS__), \
+	MACRO__(0x5A4C, ## __VA_ARGS__), \
+	MACRO__(0x5A54, ## __VA_ARGS__), \
+	MACRO__(0x5A5C, ## __VA_ARGS__)
+
+#define INTEL_CNL_IDS(MACRO__, ...) \
+	INTEL_CNL_PORT_F_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x5A40, ## __VA_ARGS__), \
+	MACRO__(0x5A41, ## __VA_ARGS__), \
+	MACRO__(0x5A42, ## __VA_ARGS__), \
+	MACRO__(0x5A49, ## __VA_ARGS__), \
+	MACRO__(0x5A4A, ## __VA_ARGS__), \
+	MACRO__(0x5A50, ## __VA_ARGS__), \
+	MACRO__(0x5A51, ## __VA_ARGS__), \
+	MACRO__(0x5A52, ## __VA_ARGS__), \
+	MACRO__(0x5A59, ## __VA_ARGS__), \
+	MACRO__(0x5A5A, ## __VA_ARGS__)
+
+/* ICL */
+#define INTEL_ICL_PORT_F_IDS(MACRO__, ...) \
+	MACRO__(0x8A50, ## __VA_ARGS__), \
+	MACRO__(0x8A52, ## __VA_ARGS__), \
+	MACRO__(0x8A53, ## __VA_ARGS__), \
+	MACRO__(0x8A54, ## __VA_ARGS__), \
+	MACRO__(0x8A56, ## __VA_ARGS__), \
+	MACRO__(0x8A57, ## __VA_ARGS__), \
+	MACRO__(0x8A58, ## __VA_ARGS__), \
+	MACRO__(0x8A59, ## __VA_ARGS__), \
+	MACRO__(0x8A5A, ## __VA_ARGS__), \
+	MACRO__(0x8A5B, ## __VA_ARGS__), \
+	MACRO__(0x8A5C, ## __VA_ARGS__), \
+	MACRO__(0x8A70, ## __VA_ARGS__), \
+	MACRO__(0x8A71, ## __VA_ARGS__)
+
+#define INTEL_ICL_IDS(MACRO__, ...) \
+	INTEL_ICL_PORT_F_IDS(MACRO__, ## __VA_ARGS__), \
+	MACRO__(0x8A51, ## __VA_ARGS__), \
+	MACRO__(0x8A5D, ## __VA_ARGS__)
+
+/* EHL */
+#define INTEL_EHL_IDS(MACRO__, ...) \
+	MACRO__(0x4541, ## __VA_ARGS__), \
+	MACRO__(0x4551, ## __VA_ARGS__), \
+	MACRO__(0x4555, ## __VA_ARGS__), \
+	MACRO__(0x4557, ## __VA_ARGS__), \
+	MACRO__(0x4570, ## __VA_ARGS__), \
+	MACRO__(0x4571, ## __VA_ARGS__)
+
+/* JSL */
+#define INTEL_JSL_IDS(MACRO__, ...) \
+	MACRO__(0x4E51, ## __VA_ARGS__), \
+	MACRO__(0x4E55, ## __VA_ARGS__), \
+	MACRO__(0x4E57, ## __VA_ARGS__), \
+	MACRO__(0x4E61, ## __VA_ARGS__), \
+	MACRO__(0x4E71, ## __VA_ARGS__)
+
+/* TGL */
+#define INTEL_TGL_GT1_IDS(MACRO__, ...) \
+	MACRO__(0x9A60, ## __VA_ARGS__), \
+	MACRO__(0x9A68, ## __VA_ARGS__), \
+	MACRO__(0x9A70, ## __VA_ARGS__)
+
+#define INTEL_TGL_GT2_IDS(MACRO__, ...) \
+	MACRO__(0x9A40, ## __VA_ARGS__), \
+	MACRO__(0x9A49, ## __VA_ARGS__), \
+	MACRO__(0x9A59, ## __VA_ARGS__), \
+	MACRO__(0x9A78, ## __VA_ARGS__), \
+	MACRO__(0x9AC0, ## __VA_ARGS__), \
+	MACRO__(0x9AC9, ## __VA_ARGS__), \
+	MACRO__(0x9AD9, ## __VA_ARGS__), \
+	MACRO__(0x9AF8, ## __VA_ARGS__)
+
+#define INTEL_TGL_IDS(MACRO__, ...) \
+	INTEL_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__)
+
+/* RKL */
+#define INTEL_RKL_IDS(MACRO__, ...) \
+	MACRO__(0x4C80, ## __VA_ARGS__), \
+	MACRO__(0x4C8A, ## __VA_ARGS__), \
+	MACRO__(0x4C8B, ## __VA_ARGS__), \
+	MACRO__(0x4C8C, ## __VA_ARGS__), \
+	MACRO__(0x4C90, ## __VA_ARGS__), \
+	MACRO__(0x4C9A, ## __VA_ARGS__)
+
+/* DG1 */
+#define INTEL_DG1_IDS(MACRO__, ...) \
+	MACRO__(0x4905, ## __VA_ARGS__), \
+	MACRO__(0x4906, ## __VA_ARGS__), \
+	MACRO__(0x4907, ## __VA_ARGS__), \
+	MACRO__(0x4908, ## __VA_ARGS__), \
+	MACRO__(0x4909, ## __VA_ARGS__)
+
+/* ADL-S */
+#define INTEL_ADLS_IDS(MACRO__, ...) \
+	MACRO__(0x4680, ## __VA_ARGS__), \
+	MACRO__(0x4682, ## __VA_ARGS__), \
+	MACRO__(0x4688, ## __VA_ARGS__), \
+	MACRO__(0x468A, ## __VA_ARGS__), \
+	MACRO__(0x468B, ## __VA_ARGS__), \
+	MACRO__(0x4690, ## __VA_ARGS__), \
+	MACRO__(0x4692, ## __VA_ARGS__), \
+	MACRO__(0x4693, ## __VA_ARGS__)
+
+/* ADL-P */
+#define INTEL_ADLP_IDS(MACRO__, ...) \
+	MACRO__(0x46A0, ## __VA_ARGS__), \
+	MACRO__(0x46A1, ## __VA_ARGS__), \
+	MACRO__(0x46A2, ## __VA_ARGS__), \
+	MACRO__(0x46A3, ## __VA_ARGS__), \
+	MACRO__(0x46A6, ## __VA_ARGS__), \
+	MACRO__(0x46A8, ## __VA_ARGS__), \
+	MACRO__(0x46AA, ## __VA_ARGS__), \
+	MACRO__(0x462A, ## __VA_ARGS__), \
+	MACRO__(0x4626, ## __VA_ARGS__), \
+	MACRO__(0x4628, ## __VA_ARGS__), \
+	MACRO__(0x46B0, ## __VA_ARGS__), \
+	MACRO__(0x46B1, ## __VA_ARGS__), \
+	MACRO__(0x46B2, ## __VA_ARGS__), \
+	MACRO__(0x46B3, ## __VA_ARGS__), \
+	MACRO__(0x46C0, ## __VA_ARGS__), \
+	MACRO__(0x46C1, ## __VA_ARGS__), \
+	MACRO__(0x46C2, ## __VA_ARGS__), \
+	MACRO__(0x46C3, ## __VA_ARGS__)
+
+/* ADL-N */
+#define INTEL_ADLN_IDS(MACRO__, ...) \
+	MACRO__(0x46D0, ## __VA_ARGS__), \
+	MACRO__(0x46D1, ## __VA_ARGS__), \
+	MACRO__(0x46D2, ## __VA_ARGS__), \
+	MACRO__(0x46D3, ## __VA_ARGS__), \
+	MACRO__(0x46D4, ## __VA_ARGS__)
+
+/* RPL-S */
+#define INTEL_RPLS_IDS(MACRO__, ...) \
+	MACRO__(0xA780, ## __VA_ARGS__), \
+	MACRO__(0xA781, ## __VA_ARGS__), \
+	MACRO__(0xA782, ## __VA_ARGS__), \
+	MACRO__(0xA783, ## __VA_ARGS__), \
+	MACRO__(0xA788, ## __VA_ARGS__), \
+	MACRO__(0xA789, ## __VA_ARGS__), \
+	MACRO__(0xA78A, ## __VA_ARGS__), \
+	MACRO__(0xA78B, ## __VA_ARGS__)
+
+/* RPL-U */
+#define INTEL_RPLU_IDS(MACRO__, ...) \
+	MACRO__(0xA721, ## __VA_ARGS__), \
+	MACRO__(0xA7A1, ## __VA_ARGS__), \
+	MACRO__(0xA7A9, ## __VA_ARGS__), \
+	MACRO__(0xA7AC, ## __VA_ARGS__), \
+	MACRO__(0xA7AD, ## __VA_ARGS__)
+
+/* RPL-P */
+#define INTEL_RPLP_IDS(MACRO__, ...) \
+	MACRO__(0xA720, ## __VA_ARGS__), \
+	MACRO__(0xA7A0, ## __VA_ARGS__), \
+	MACRO__(0xA7A8, ## __VA_ARGS__), \
+	MACRO__(0xA7AA, ## __VA_ARGS__), \
+	MACRO__(0xA7AB, ## __VA_ARGS__)
+
+/* DG2 */
+#define INTEL_DG2_G10_IDS(MACRO__, ...) \
+	MACRO__(0x5690, ## __VA_ARGS__), \
+	MACRO__(0x5691, ## __VA_ARGS__), \
+	MACRO__(0x5692, ## __VA_ARGS__), \
+	MACRO__(0x56A0, ## __VA_ARGS__), \
+	MACRO__(0x56A1, ## __VA_ARGS__), \
+	MACRO__(0x56A2, ## __VA_ARGS__), \
+	MACRO__(0x56BE, ## __VA_ARGS__), \
+	MACRO__(0x56BF, ## __VA_ARGS__)
+
+#define INTEL_DG2_G11_IDS(MACRO__, ...) \
+	MACRO__(0x5693, ## __VA_ARGS__), \
+	MACRO__(0x5694, ## __VA_ARGS__), \
+	MACRO__(0x5695, ## __VA_ARGS__), \
+	MACRO__(0x56A5, ## __VA_ARGS__), \
+	MACRO__(0x56A6, ## __VA_ARGS__), \
+	MACRO__(0x56B0, ## __VA_ARGS__), \
+	MACRO__(0x56B1, ## __VA_ARGS__), \
+	MACRO__(0x56BA, ## __VA_ARGS__), \
+	MACRO__(0x56BB, ## __VA_ARGS__), \
+	MACRO__(0x56BC, ## __VA_ARGS__), \
+	MACRO__(0x56BD, ## __VA_ARGS__)
+
+#define INTEL_DG2_G12_IDS(MACRO__, ...) \
+	MACRO__(0x5696, ## __VA_ARGS__), \
+	MACRO__(0x5697, ## __VA_ARGS__), \
+	MACRO__(0x56A3, ## __VA_ARGS__), \
+	MACRO__(0x56A4, ## __VA_ARGS__), \
+	MACRO__(0x56B2, ## __VA_ARGS__), \
+	MACRO__(0x56B3, ## __VA_ARGS__)
+
+#define INTEL_DG2_IDS(MACRO__, ...) \
+	INTEL_DG2_G10_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_DG2_G11_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_DG2_G12_IDS(MACRO__, ## __VA_ARGS__)
+
+#define INTEL_ATS_M150_IDS(MACRO__, ...) \
+	MACRO__(0x56C0, ## __VA_ARGS__), \
+	MACRO__(0x56C2, ## __VA_ARGS__)
+
+#define INTEL_ATS_M75_IDS(MACRO__, ...) \
+	MACRO__(0x56C1, ## __VA_ARGS__)
+
+#define INTEL_ATS_M_IDS(MACRO__, ...) \
+	INTEL_ATS_M150_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
+
+/* ARL */
+#define INTEL_ARL_IDS(MACRO__, ...) \
+	MACRO__(0x7D41, ## __VA_ARGS__), \
+	MACRO__(0x7D51, ## __VA_ARGS__), \
+	MACRO__(0x7D67, ## __VA_ARGS__), \
+	MACRO__(0x7DD1, ## __VA_ARGS__), \
+	MACRO__(0xB640, ## __VA_ARGS__)
+
+/* MTL */
+#define INTEL_MTL_IDS(MACRO__, ...) \
+	MACRO__(0x7D40, ## __VA_ARGS__), \
+	MACRO__(0x7D45, ## __VA_ARGS__), \
+	MACRO__(0x7D55, ## __VA_ARGS__), \
+	MACRO__(0x7D60, ## __VA_ARGS__), \
+	MACRO__(0x7DD5, ## __VA_ARGS__)
+
+/* PVC */
+#define INTEL_PVC_IDS(MACRO__, ...) \
+	MACRO__(0x0B69, ## __VA_ARGS__), \
+	MACRO__(0x0B6E, ## __VA_ARGS__), \
+	MACRO__(0x0BD4, ## __VA_ARGS__), \
+	MACRO__(0x0BD5, ## __VA_ARGS__), \
+	MACRO__(0x0BD6, ## __VA_ARGS__), \
+	MACRO__(0x0BD7, ## __VA_ARGS__), \
+	MACRO__(0x0BD8, ## __VA_ARGS__), \
+	MACRO__(0x0BD9, ## __VA_ARGS__), \
+	MACRO__(0x0BDA, ## __VA_ARGS__), \
+	MACRO__(0x0BDB, ## __VA_ARGS__), \
+	MACRO__(0x0BE0, ## __VA_ARGS__), \
+	MACRO__(0x0BE1, ## __VA_ARGS__), \
+	MACRO__(0x0BE5, ## __VA_ARGS__)
+
+/* LNL */
+#define INTEL_LNL_IDS(MACRO__, ...) \
+	MACRO__(0x6420, ## __VA_ARGS__), \
+	MACRO__(0x64A0, ## __VA_ARGS__), \
+	MACRO__(0x64B0, ## __VA_ARGS__)
+
+/* BMG */
+#define INTEL_BMG_IDS(MACRO__, ...) \
+	MACRO__(0xE202, ## __VA_ARGS__), \
+	MACRO__(0xE20B, ## __VA_ARGS__), \
+	MACRO__(0xE20C, ## __VA_ARGS__), \
+	MACRO__(0xE20D, ## __VA_ARGS__), \
+	MACRO__(0xE212, ## __VA_ARGS__)
+
+/* PTL */
+#define INTEL_PTL_IDS(MACRO__, ...) \
+	MACRO__(0xB080, ## __VA_ARGS__), \
+	MACRO__(0xB081, ## __VA_ARGS__), \
+	MACRO__(0xB082, ## __VA_ARGS__), \
+	MACRO__(0xB090, ## __VA_ARGS__), \
+	MACRO__(0xB091, ## __VA_ARGS__), \
+	MACRO__(0xB092, ## __VA_ARGS__), \
+	MACRO__(0xB0A0, ## __VA_ARGS__), \
+	MACRO__(0xB0A1, ## __VA_ARGS__), \
+	MACRO__(0xB0A2, ## __VA_ARGS__)
+
+#endif /* __PCIIDS_H__ */
-- 
cgit v1.2.3


From 493454445c9531051bd27a0305a61953780bd453 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 22 Oct 2024 12:41:51 +0300
Subject: drm/xe: switch to common PCI ID macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Switch to the shared PCI ID macros in drm/intel/pciids.h. Remove
xe_pciids.h.

Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/84e08172184bdc6409cf6dd13f6c52971c647dbb.1729590029.git.jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/intel/xe_pciids.h | 234 ------------------------------------------
 1 file changed, 234 deletions(-)
 delete mode 100644 include/drm/intel/xe_pciids.h

(limited to 'include')

diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h
deleted file mode 100644
index 6d8d013f74e0..000000000000
--- a/include/drm/intel/xe_pciids.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#ifndef _XE_PCIIDS_H_
-#define _XE_PCIIDS_H_
-
-/*
- * Lists below can be turned into initializers for a struct pci_device_id
- * by defining INTEL_VGA_DEVICE:
- *
- * #define INTEL_VGA_DEVICE(id, info) { \
- *	0x8086, id,			\
- *	~0, ~0,				\
- *	0x030000, 0xff0000,		\
- *	(unsigned long) info }
- *
- * And then calling like:
- *
- * XE_TGL_12_GT1_IDS(INTEL_VGA_DEVICE, ## __VA_ARGS__)
- *
- * To turn them into something else, just provide a different macro passed as
- * first argument.
- */
-
-/* TGL */
-#define XE_TGL_GT1_IDS(MACRO__, ...)		\
-	MACRO__(0x9A60, ## __VA_ARGS__),	\
-	MACRO__(0x9A68, ## __VA_ARGS__),	\
-	MACRO__(0x9A70, ## __VA_ARGS__)
-
-#define XE_TGL_GT2_IDS(MACRO__, ...)		\
-	MACRO__(0x9A40, ## __VA_ARGS__),	\
-	MACRO__(0x9A49, ## __VA_ARGS__),	\
-	MACRO__(0x9A59, ## __VA_ARGS__),	\
-	MACRO__(0x9A78, ## __VA_ARGS__),	\
-	MACRO__(0x9AC0, ## __VA_ARGS__),	\
-	MACRO__(0x9AC9, ## __VA_ARGS__),	\
-	MACRO__(0x9AD9, ## __VA_ARGS__),	\
-	MACRO__(0x9AF8, ## __VA_ARGS__)
-
-#define XE_TGL_IDS(MACRO__, ...)		\
-	XE_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__),\
-	XE_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__)
-
-/* RKL */
-#define XE_RKL_IDS(MACRO__, ...)		\
-	MACRO__(0x4C80, ## __VA_ARGS__),	\
-	MACRO__(0x4C8A, ## __VA_ARGS__),	\
-	MACRO__(0x4C8B, ## __VA_ARGS__),	\
-	MACRO__(0x4C8C, ## __VA_ARGS__),	\
-	MACRO__(0x4C90, ## __VA_ARGS__),	\
-	MACRO__(0x4C9A, ## __VA_ARGS__)
-
-/* DG1 */
-#define XE_DG1_IDS(MACRO__, ...)		\
-	MACRO__(0x4905, ## __VA_ARGS__),	\
-	MACRO__(0x4906, ## __VA_ARGS__),	\
-	MACRO__(0x4907, ## __VA_ARGS__),	\
-	MACRO__(0x4908, ## __VA_ARGS__),	\
-	MACRO__(0x4909, ## __VA_ARGS__)
-
-/* ADL-S */
-#define XE_ADLS_IDS(MACRO__, ...)		\
-	MACRO__(0x4680, ## __VA_ARGS__),	\
-	MACRO__(0x4682, ## __VA_ARGS__),	\
-	MACRO__(0x4688, ## __VA_ARGS__),	\
-	MACRO__(0x468A, ## __VA_ARGS__),	\
-	MACRO__(0x468B, ## __VA_ARGS__),	\
-	MACRO__(0x4690, ## __VA_ARGS__),	\
-	MACRO__(0x4692, ## __VA_ARGS__),	\
-	MACRO__(0x4693, ## __VA_ARGS__)
-
-/* ADL-P */
-#define XE_ADLP_IDS(MACRO__, ...)		\
-	MACRO__(0x46A0, ## __VA_ARGS__),	\
-	MACRO__(0x46A1, ## __VA_ARGS__),	\
-	MACRO__(0x46A2, ## __VA_ARGS__),	\
-	MACRO__(0x46A3, ## __VA_ARGS__),	\
-	MACRO__(0x46A6, ## __VA_ARGS__),	\
-	MACRO__(0x46A8, ## __VA_ARGS__),	\
-	MACRO__(0x46AA, ## __VA_ARGS__),	\
-	MACRO__(0x462A, ## __VA_ARGS__),	\
-	MACRO__(0x4626, ## __VA_ARGS__),	\
-	MACRO__(0x4628, ## __VA_ARGS__),	\
-	MACRO__(0x46B0, ## __VA_ARGS__),	\
-	MACRO__(0x46B1, ## __VA_ARGS__),	\
-	MACRO__(0x46B2, ## __VA_ARGS__),	\
-	MACRO__(0x46B3, ## __VA_ARGS__),	\
-	MACRO__(0x46C0, ## __VA_ARGS__),	\
-	MACRO__(0x46C1, ## __VA_ARGS__),	\
-	MACRO__(0x46C2, ## __VA_ARGS__),	\
-	MACRO__(0x46C3, ## __VA_ARGS__)
-
-/* ADL-N */
-#define XE_ADLN_IDS(MACRO__, ...)		\
-	MACRO__(0x46D0, ## __VA_ARGS__),	\
-	MACRO__(0x46D1, ## __VA_ARGS__),	\
-	MACRO__(0x46D2, ## __VA_ARGS__),	\
-	MACRO__(0x46D3, ## __VA_ARGS__),	\
-	MACRO__(0x46D4, ## __VA_ARGS__)
-
-/* RPL-S */
-#define XE_RPLS_IDS(MACRO__, ...)		\
-	MACRO__(0xA780, ## __VA_ARGS__),	\
-	MACRO__(0xA781, ## __VA_ARGS__),	\
-	MACRO__(0xA782, ## __VA_ARGS__),	\
-	MACRO__(0xA783, ## __VA_ARGS__),	\
-	MACRO__(0xA788, ## __VA_ARGS__),	\
-	MACRO__(0xA789, ## __VA_ARGS__),	\
-	MACRO__(0xA78A, ## __VA_ARGS__),	\
-	MACRO__(0xA78B, ## __VA_ARGS__)
-
-/* RPL-U */
-#define XE_RPLU_IDS(MACRO__, ...)		\
-	MACRO__(0xA721, ## __VA_ARGS__),	\
-	MACRO__(0xA7A1, ## __VA_ARGS__),	\
-	MACRO__(0xA7A9, ## __VA_ARGS__),	\
-	MACRO__(0xA7AC, ## __VA_ARGS__),	\
-	MACRO__(0xA7AD, ## __VA_ARGS__)
-
-/* RPL-P */
-#define XE_RPLP_IDS(MACRO__, ...)		\
-	MACRO__(0xA720, ## __VA_ARGS__),	\
-	MACRO__(0xA7A0, ## __VA_ARGS__),	\
-	MACRO__(0xA7A8, ## __VA_ARGS__),	\
-	MACRO__(0xA7AA, ## __VA_ARGS__),	\
-	MACRO__(0xA7AB, ## __VA_ARGS__)
-
-/* DG2 */
-#define XE_DG2_G10_IDS(MACRO__, ...)		\
-	MACRO__(0x5690, ## __VA_ARGS__),	\
-	MACRO__(0x5691, ## __VA_ARGS__),	\
-	MACRO__(0x5692, ## __VA_ARGS__),	\
-	MACRO__(0x56A0, ## __VA_ARGS__),	\
-	MACRO__(0x56A1, ## __VA_ARGS__),	\
-	MACRO__(0x56A2, ## __VA_ARGS__),	\
-	MACRO__(0x56BE, ## __VA_ARGS__),	\
-	MACRO__(0x56BF, ## __VA_ARGS__)
-
-#define XE_DG2_G11_IDS(MACRO__, ...)		\
-	MACRO__(0x5693, ## __VA_ARGS__),	\
-	MACRO__(0x5694, ## __VA_ARGS__),	\
-	MACRO__(0x5695, ## __VA_ARGS__),	\
-	MACRO__(0x56A5, ## __VA_ARGS__),	\
-	MACRO__(0x56A6, ## __VA_ARGS__),	\
-	MACRO__(0x56B0, ## __VA_ARGS__),	\
-	MACRO__(0x56B1, ## __VA_ARGS__),	\
-	MACRO__(0x56BA, ## __VA_ARGS__),	\
-	MACRO__(0x56BB, ## __VA_ARGS__),	\
-	MACRO__(0x56BC, ## __VA_ARGS__),	\
-	MACRO__(0x56BD, ## __VA_ARGS__)
-
-#define XE_DG2_G12_IDS(MACRO__, ...)		\
-	MACRO__(0x5696, ## __VA_ARGS__),	\
-	MACRO__(0x5697, ## __VA_ARGS__),	\
-	MACRO__(0x56A3, ## __VA_ARGS__),	\
-	MACRO__(0x56A4, ## __VA_ARGS__),	\
-	MACRO__(0x56B2, ## __VA_ARGS__),	\
-	MACRO__(0x56B3, ## __VA_ARGS__)
-
-#define XE_DG2_IDS(MACRO__, ...)		\
-	XE_DG2_G10_IDS(MACRO__, ## __VA_ARGS__),\
-	XE_DG2_G11_IDS(MACRO__, ## __VA_ARGS__),\
-	XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__)
-
-#define XE_ATS_M150_IDS(MACRO__, ...)		\
-	MACRO__(0x56C0, ## __VA_ARGS__),	\
-	MACRO__(0x56C2, ## __VA_ARGS__)
-
-#define XE_ATS_M75_IDS(MACRO__, ...)		\
-	MACRO__(0x56C1, ## __VA_ARGS__)
-
-#define XE_ATS_M_IDS(MACRO__, ...)		\
-	XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\
-	XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
-
-/* ARL */
-#define XE_ARL_IDS(MACRO__, ...)		\
-	MACRO__(0x7D41, ## __VA_ARGS__),	\
-	MACRO__(0x7D51, ## __VA_ARGS__),        \
-	MACRO__(0x7D67, ## __VA_ARGS__),	\
-	MACRO__(0x7DD1, ## __VA_ARGS__),	\
-	MACRO__(0xB640, ## __VA_ARGS__)
-
-/* MTL */
-#define XE_MTL_IDS(MACRO__, ...)		\
-	MACRO__(0x7D40, ## __VA_ARGS__),	\
-	MACRO__(0x7D45, ## __VA_ARGS__),	\
-	MACRO__(0x7D55, ## __VA_ARGS__),	\
-	MACRO__(0x7D60, ## __VA_ARGS__),	\
-	MACRO__(0x7DD5, ## __VA_ARGS__)
-
-/* PVC */
-#define XE_PVC_IDS(MACRO__, ...)		\
-	MACRO__(0x0B69, ## __VA_ARGS__),	\
-	MACRO__(0x0B6E, ## __VA_ARGS__),	\
-	MACRO__(0x0BD4, ## __VA_ARGS__),	\
-	MACRO__(0x0BD5, ## __VA_ARGS__),	\
-	MACRO__(0x0BD6, ## __VA_ARGS__),	\
-	MACRO__(0x0BD7, ## __VA_ARGS__),	\
-	MACRO__(0x0BD8, ## __VA_ARGS__),	\
-	MACRO__(0x0BD9, ## __VA_ARGS__),	\
-	MACRO__(0x0BDA, ## __VA_ARGS__),	\
-	MACRO__(0x0BDB, ## __VA_ARGS__),	\
-	MACRO__(0x0BE0, ## __VA_ARGS__),	\
-	MACRO__(0x0BE1, ## __VA_ARGS__),	\
-	MACRO__(0x0BE5, ## __VA_ARGS__)
-
-#define XE_LNL_IDS(MACRO__, ...) \
-	MACRO__(0x6420, ## __VA_ARGS__), \
-	MACRO__(0x64A0, ## __VA_ARGS__), \
-	MACRO__(0x64B0, ## __VA_ARGS__)
-
-#define XE_BMG_IDS(MACRO__, ...) \
-	MACRO__(0xE202, ## __VA_ARGS__), \
-	MACRO__(0xE20B, ## __VA_ARGS__), \
-	MACRO__(0xE20C, ## __VA_ARGS__), \
-	MACRO__(0xE20D, ## __VA_ARGS__), \
-	MACRO__(0xE212, ## __VA_ARGS__)
-
-#define XE_PTL_IDS(MACRO__, ...) \
-	MACRO__(0xB080, ## __VA_ARGS__), \
-	MACRO__(0xB081, ## __VA_ARGS__), \
-	MACRO__(0xB082, ## __VA_ARGS__), \
-	MACRO__(0xB090, ## __VA_ARGS__), \
-	MACRO__(0xB091, ## __VA_ARGS__), \
-	MACRO__(0xB092, ## __VA_ARGS__), \
-	MACRO__(0xB0A0, ## __VA_ARGS__), \
-	MACRO__(0xB0A1, ## __VA_ARGS__), \
-	MACRO__(0xB0A2, ## __VA_ARGS__)
-
-#endif
-- 
cgit v1.2.3


From 2f5a65ef30a636d5030917eebd283ac447a212af Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 29 Oct 2024 15:19:37 +0100
Subject: block: add a bdev_limits helper

Add a helper to get the queue_limits from the bdev without having to
poke into the request_queue.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20241029141937.249920-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d0a52ed05e60..7bfc877e159e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1159,6 +1159,11 @@ enum blk_default_limits {
  */
 #define BLK_DEF_MAX_SECTORS_CAP	2560u
 
+static inline struct queue_limits *bdev_limits(struct block_device *bdev)
+{
+	return &bdev_get_queue(bdev)->limits;
+}
+
 static inline unsigned long queue_segment_boundary(const struct request_queue *q)
 {
 	return q->limits.seg_boundary_mask;
@@ -1293,23 +1298,23 @@ unsigned int bdev_discard_alignment(struct block_device *bdev);
 
 static inline unsigned int bdev_max_discard_sectors(struct block_device *bdev)
 {
-	return bdev_get_queue(bdev)->limits.max_discard_sectors;
+	return bdev_limits(bdev)->max_discard_sectors;
 }
 
 static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
 {
-	return bdev_get_queue(bdev)->limits.discard_granularity;
+	return bdev_limits(bdev)->discard_granularity;
 }
 
 static inline unsigned int
 bdev_max_secure_erase_sectors(struct block_device *bdev)
 {
-	return bdev_get_queue(bdev)->limits.max_secure_erase_sectors;
+	return bdev_limits(bdev)->max_secure_erase_sectors;
 }
 
 static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
 {
-	return bdev_get_queue(bdev)->limits.max_write_zeroes_sectors;
+	return bdev_limits(bdev)->max_write_zeroes_sectors;
 }
 
 static inline bool bdev_nonrot(struct block_device *bdev)
@@ -1345,7 +1350,7 @@ static inline bool bdev_write_cache(struct block_device *bdev)
 
 static inline bool bdev_fua(struct block_device *bdev)
 {
-	return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA;
+	return bdev_limits(bdev)->features & BLK_FEAT_FUA;
 }
 
 static inline bool bdev_nowait(struct block_device *bdev)
-- 
cgit v1.2.3


From a377132154ab8404dafcc52e8bc0c73050a954c2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 24 Sep 2024 05:57:31 -0600
Subject: io_uring/msg_ring: add support for sending a sync message

Normally MSG_RING requires both a source and a destination ring. But
some users don't always have a ring avilable to send a message from, yet
they still need to notify a target ring.

Add support for using io_uring_register(2) without having a source ring,
using a file descriptor of -1 for that. Internally those are called
blind registration opcodes. Implement IORING_REGISTER_SEND_MSG_RING as a
blind opcode, which simply takes an sqe that the application can put on
the stack and use the normal liburing helpers to initialize it. Then the
app can call:

io_uring_register(-1, IORING_REGISTER_SEND_MSG_RING, &sqe, 1);

and get the same behavior in terms of the target, where a CQE is posted
with the details given in the sqe.

For now this takes a single sqe pointer argument, and hence arg must
be set to that, and nr_args must be 1. Could easily be extended to take
an array of sqes, but for now let's keep it simple.

Link: https://lore.kernel.org/r/20240924115932.116167-3-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1fe79e750470..86cb385fe0b5 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -612,6 +612,9 @@ enum io_uring_register_op {
 	/* clone registered buffers from source ring to current ring */
 	IORING_REGISTER_CLONE_BUFFERS		= 30,
 
+	/* send MSG_RING without having a ring */
+	IORING_REGISTER_SEND_MSG_RING		= 31,
+
 	/* this goes last */
 	IORING_REGISTER_LAST,
 
-- 
cgit v1.2.3


From 085268829b07202cf7bf8ec1a8fb7fd9d8f6a41a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 30 Sep 2024 14:22:36 -0600
Subject: io_uring/poll: get rid of unlocked cancel hash

io_uring maintains two hash lists of inflight requests:

1) ctx->cancel_table_locked. This is used when the caller has the
   ctx->uring_lock held already. This is only an issue side parameter,
   as removal or task_work will always have it held.

2) ctx->cancel_table. This is used when the issuer does NOT have the
   ctx->uring_lock held, and relies on the table spinlocks for access.

However, it's pretty trivial to simply grab the lock in the one spot
where we care about it, for insertion. With that, we can kill the
unlocked table (and get rid of the _locked postfix for the other one).

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 4b9ba523978d..d8ca27da1341 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -291,7 +291,7 @@ struct io_ring_ctx {
 
 		struct xarray		io_bl_xa;
 
-		struct io_hash_table	cancel_table_locked;
+		struct io_hash_table	cancel_table;
 		struct io_alloc_cache	apoll_cache;
 		struct io_alloc_cache	netmsg_cache;
 		struct io_alloc_cache	rw_cache;
@@ -342,7 +342,6 @@ struct io_ring_ctx {
 
 	struct list_head	io_buffers_comp;
 	struct list_head	cq_overflow_list;
-	struct io_hash_table	cancel_table;
 
 	struct hlist_head	waitid_list;
 
@@ -459,7 +458,6 @@ enum {
 	REQ_F_DOUBLE_POLL_BIT,
 	REQ_F_APOLL_MULTISHOT_BIT,
 	REQ_F_CLEAR_POLLIN_BIT,
-	REQ_F_HASH_LOCKED_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
@@ -534,8 +532,6 @@ enum {
 	REQ_F_APOLL_MULTISHOT	= IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT),
 	/* recvmsg special flag, clear EPOLLIN */
 	REQ_F_CLEAR_POLLIN	= IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT),
-	/* hashed into ->cancel_hash_locked, protected by ->uring_lock */
-	REQ_F_HASH_LOCKED	= IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
 	/* don't use lazy poll wake for this request */
 	REQ_F_POLL_NO_LAZY	= IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
 	/* file is pollable */
-- 
cgit v1.2.3


From ba4366f57b117c2eab996642288e5c75646ccfc9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 30 Sep 2024 14:29:06 -0600
Subject: io_uring/poll: get rid of per-hashtable bucket locks

Any access to the table is protected by ctx->uring_lock now anyway, the
per-bucket locking doesn't buy us anything.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index d8ca27da1341..9c7e1d3f06e5 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -67,7 +67,6 @@ struct io_file_table {
 };
 
 struct io_hash_bucket {
-	spinlock_t		lock;
 	struct hlist_head	list;
 } ____cacheline_aligned_in_smp;
 
-- 
cgit v1.2.3


From 2946f08ae9ed650b94e0ffebcdfdda8de76bd926 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 18 Oct 2024 17:14:00 +0100
Subject: io_uring: clean up cqe trace points

We have too many helpers posting CQEs, instead of tracing completion
events before filling in a CQE and thus having to pass all the data,
set the CQE first, pass it to the tracing helper and let it extract
everything it needs.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/b83c1ca9ee5aed2df0f3bb743bf5ed699cce4c86.1729267437.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h  |  5 +++++
 include/trace/events/io_uring.h | 24 +++++++++---------------
 2 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 9c7e1d3f06e5..391087144666 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -662,4 +662,9 @@ struct io_overflow_cqe {
 	struct io_uring_cqe cqe;
 };
 
+static inline bool io_ctx_cqe32(struct io_ring_ctx *ctx)
+{
+	return ctx->flags & IORING_SETUP_CQE32;
+}
+
 #endif
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 412c9c210a32..fb81c533b310 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -315,20 +315,14 @@ TRACE_EVENT(io_uring_fail_link,
  * io_uring_complete - called when completing an SQE
  *
  * @ctx:		pointer to a ring context structure
- * @req:		pointer to a submitted request
- * @user_data:		user data associated with the request
- * @res:		result of the request
- * @cflags:		completion flags
- * @extra1:		extra 64-bit data for CQE32
- * @extra2:		extra 64-bit data for CQE32
- *
+ * @req:		(optional) pointer to a submitted request
+ * @cqe:		pointer to the filled in CQE being posted
  */
 TRACE_EVENT(io_uring_complete,
 
-	TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags,
-		 u64 extra1, u64 extra2),
+TP_PROTO(struct io_ring_ctx *ctx, void *req, struct io_uring_cqe *cqe),
 
-	TP_ARGS(ctx, req, user_data, res, cflags, extra1, extra2),
+	TP_ARGS(ctx, req, cqe),
 
 	TP_STRUCT__entry (
 		__field(  void *,	ctx		)
@@ -343,11 +337,11 @@ TRACE_EVENT(io_uring_complete,
 	TP_fast_assign(
 		__entry->ctx		= ctx;
 		__entry->req		= req;
-		__entry->user_data	= user_data;
-		__entry->res		= res;
-		__entry->cflags		= cflags;
-		__entry->extra1		= extra1;
-		__entry->extra2		= extra2;
+		__entry->user_data	= cqe->user_data;
+		__entry->res		= cqe->res;
+		__entry->cflags		= cqe->flags;
+		__entry->extra1		= io_ctx_cqe32(ctx) ? cqe->big_cqe[0] : 0;
+		__entry->extra2		= io_ctx_cqe32(ctx) ? cqe->big_cqe[1] : 0;
 	),
 
 	TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x "
-- 
cgit v1.2.3


From e6d43739d0ee49a39505d696ba6a656f47c2bd39 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 16 Oct 2024 15:54:06 -0600
Subject: io_uring: kill 'imu' from struct io_kiocb

It's no longer being used, remove it.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 391087144666..6d3ee71bd832 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -613,9 +613,6 @@ struct io_kiocb {
 	struct task_struct		*task;
 
 	union {
-		/* store used ubuf, so we can prevent reloading */
-		struct io_mapped_ubuf	*imu;
-
 		/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
 		struct io_buffer	*kbuf;
 
-- 
cgit v1.2.3


From 79cfe9e59c2a12c3b3faeeefe38d23f3d8030972 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 21 Oct 2024 13:34:10 -0600
Subject: io_uring/register: add IORING_REGISTER_RESIZE_RINGS

Once a ring has been created, the size of the CQ and SQ rings are fixed.
Usually this isn't a problem on the SQ ring side, as it merely controls
the available number of requests that can be submitted in a single
system call, and there's rarely a need to change that.

For the CQ ring, it's a different story. For most efficient use of
io_uring, it's important that the CQ ring never overflows. This means
that applications must size it for the worst case scenario, which can
be wasteful.

Add IORING_REGISTER_RESIZE_RINGS, which allows an application to resize
the existing rings. It takes a struct io_uring_params argument, the same
one which is used to setup the ring initially, and resizes rings
according to the sizes given.

Certain properties are always inherited from the original ring setup,
like SQE128/CQE32 and other setup options. The implementation only
allows flag associated with how the CQ ring is sized and clamped.

Existing unconsumed SQE and CQE entries are copied as part of the
process. If either the SQ or CQ resized destination ring cannot hold the
entries already present in the source rings, then the operation is failed
with -EOVERFLOW. Any register op holds ->uring_lock, which prevents new
submissions, and the internal mapping holds the completion lock as well
across moving CQ ring state.

To prevent races between mmap and ring resizing, add a mutex that's
solely used to serialize ring resize and mmap. mmap_sem can't be used
here, as as fork'ed process may be doing mmaps on the ring as well.
The ctx->resize_lock is held across mmap operations, and the resize
will grab it before swapping out the already mapped new data.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 7 +++++++
 include/uapi/linux/io_uring.h  | 5 +++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 6d3ee71bd832..841579dcdae9 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -415,6 +415,13 @@ struct io_ring_ctx {
 	/* protected by ->completion_lock */
 	unsigned			evfd_last_cq_tail;
 
+	/*
+	 * Protection for resize vs mmap races - both the mmap and resize
+	 * side will need to grab this lock, to prevent either side from
+	 * being run concurrently with the other.
+	 */
+	struct mutex			resize_lock;
+
 	/*
 	 * If IORING_SETUP_NO_MMAP is used, then the below holds
 	 * the gup'ed pages for the two rings, and the sqes.
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 86cb385fe0b5..60b9c98595fa 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -615,6 +615,11 @@ enum io_uring_register_op {
 	/* send MSG_RING without having a ring */
 	IORING_REGISTER_SEND_MSG_RING		= 31,
 
+	/* 32 reserved for zc rx */
+
+	/* resize CQ ring */
+	IORING_REGISTER_RESIZE_RINGS		= 33,
+
 	/* this goes last */
 	IORING_REGISTER_LAST,
 
-- 
cgit v1.2.3


From aa00f67adc2c0d6439f81b5a81ff181377c47a7e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 22 Oct 2024 13:47:00 -0600
Subject: io_uring: add support for fixed wait regions

Generally applications have 1 or a few waits of waiting, yet they pass
in a struct io_uring_getevents_arg every time. This needs to get copied
and, in turn, the timeout value needs to get copied.

Rather than do this for every invocation, allow the application to
register a fixed set of wait regions that can simply be indexed when
asking the kernel to wait on events.

At ring setup time, the application can register a number of these wait
regions and initialize region/index 0 upfront:

	struct io_uring_reg_wait *reg;

	reg = io_uring_setup_reg_wait(ring, nr_regions, &ret);

	/* set timeout and mark as set, sigmask/sigmask_sz as needed */
	reg->ts.tv_sec = 0;
	reg->ts.tv_nsec = 100000;
	reg->flags = IORING_REG_WAIT_TS;

where nr_regions >= 1 && nr_regions <= PAGE_SIZE / sizeof(*reg). The
above initializes index 0, but 63 other regions can be initialized,
if needed. Now, instead of doing:

	struct __kernel_timespec timeout = { .tv_nsec = 100000, };

	io_uring_submit_and_wait_timeout(ring, &cqe, nr, &t, NULL);

to wait for events for each submit_and_wait, or just wait, operation, it
can just reference the above region at offset 0 and do:

	io_uring_submit_and_wait_reg(ring, &cqe, nr, 0);

to achieve the same goal of waiting 100usec without needing to copy
both struct io_uring_getevents_arg (24b) and struct __kernel_timeout
(16b) for each invocation. Struct io_uring_reg_wait looks as follows:

struct io_uring_reg_wait {
	struct __kernel_timespec	ts;
	__u32				min_wait_usec;
	__u32				flags;
	__u64				sigmask;
	__u32				sigmask_sz;
	__u32				pad[3];
	__u64				pad2[2];
};

embedding the timeout itself in the region, rather than passing it as
a pointer as well. Note that the signal mask is still passed as a
pointer, both for compatability reasons, but also because there doesn't
seem to be a lot of high frequency waits scenarios that involve setting
and resetting the signal mask for each wait.

The application is free to modify any region before a wait call, or it
can use keep multiple regions with different settings to avoid needing to
modify the same one for wait calls. Up to a page size of regions is mapped
by default, allowing PAGE_SIZE / 64 available regions for use.

The registered region must fit within a page. On a 4kb page size system,
that allows for 64 wait regions if a full page is used, as the size of
struct io_uring_reg_wait is 64b. The region registered must be aligned
to io_uring_reg_wait in size. It's valid to register less than 64
entries.

In network performance testing with zero-copy, this reduced the time
spent waiting on the TX side from 3.12% to 0.3% and the RX side from 4.4%
to 0.3%.

Wait regions are fixed for the lifetime of the ring - once registered,
they are persistent until the ring is torn down. The regions support
minimum wait timeout as well as the regular waits.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 10 ++++++++++
 include/uapi/linux/io_uring.h  | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 841579dcdae9..2f12828b22a4 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -327,6 +327,14 @@ struct io_ring_ctx {
 		atomic_t		cq_wait_nr;
 		atomic_t		cq_timeouts;
 		struct wait_queue_head	cq_wait;
+
+		/*
+		 * If registered with IORING_REGISTER_CQWAIT_REG, a single
+		 * page holds N entries, mapped in cq_wait_arg. cq_wait_index
+		 * is the maximum allowable index.
+		 */
+		struct io_uring_reg_wait	*cq_wait_arg;
+		unsigned char			cq_wait_index;
 	} ____cacheline_aligned_in_smp;
 
 	/* timeouts */
@@ -430,6 +438,8 @@ struct io_ring_ctx {
 	unsigned short			n_sqe_pages;
 	struct page			**ring_pages;
 	struct page			**sqe_pages;
+
+	struct page			**cq_wait_page;
 };
 
 struct io_tw_state {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 60b9c98595fa..65b7417c1b05 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -518,6 +518,7 @@ struct io_cqring_offsets {
 #define IORING_ENTER_EXT_ARG		(1U << 3)
 #define IORING_ENTER_REGISTERED_RING	(1U << 4)
 #define IORING_ENTER_ABS_TIMER		(1U << 5)
+#define IORING_ENTER_EXT_ARG_REG	(1U << 6)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -620,6 +621,9 @@ enum io_uring_register_op {
 	/* resize CQ ring */
 	IORING_REGISTER_RESIZE_RINGS		= 33,
 
+	/* register fixed io_uring_reg_wait arguments */
+	IORING_REGISTER_CQWAIT_REG		= 34,
+
 	/* this goes last */
 	IORING_REGISTER_LAST,
 
@@ -803,6 +807,43 @@ enum io_uring_register_restriction_op {
 	IORING_RESTRICTION_LAST
 };
 
+enum {
+	IORING_REG_WAIT_TS		= (1U << 0),
+};
+
+/*
+ * Argument for IORING_REGISTER_CQWAIT_REG, registering a region of
+ * struct io_uring_reg_wait that can be indexed when io_uring_enter(2) is
+ * called rather than pass in a wait argument structure separately.
+ */
+struct io_uring_cqwait_reg_arg {
+	__u32		flags;
+	__u32		struct_size;
+	__u32		nr_entries;
+	__u32		pad;
+	__u64		user_addr;
+	__u64		pad2[3];
+};
+
+/*
+ * Argument for io_uring_enter(2) with
+ * IORING_GETEVENTS | IORING_ENTER_EXT_ARG_REG set, where the actual argument
+ * is an index into a previously registered fixed wait region described by
+ * the below structure.
+ */
+struct io_uring_reg_wait {
+	struct __kernel_timespec	ts;
+	__u32				min_wait_usec;
+	__u32				flags;
+	__u64				sigmask;
+	__u32				sigmask_sz;
+	__u32				pad[3];
+	__u64				pad2[2];
+};
+
+/*
+ * Argument for io_uring_enter(2) with IORING_GETEVENTS | IORING_ENTER_EXT_ARG
+ */
 struct io_uring_getevents_arg {
 	__u64	sigmask;
 	__u32	sigmask_sz;
-- 
cgit v1.2.3


From a85f31052bce52111b4e9d5a536003481d0421d0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sun, 27 Oct 2024 08:59:10 -0600
Subject: io_uring/nop: add support for testing registered files and buffers

Useful for testing performance/efficiency impact of registered files
and buffers, vs (particularly) non-registered files.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 65b7417c1b05..024745283783 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -416,6 +416,9 @@ enum io_uring_msg_ring_flags {
  * IORING_NOP_INJECT_RESULT	Inject result from sqe->result
  */
 #define IORING_NOP_INJECT_RESULT	(1U << 0)
+#define IORING_NOP_FILE			(1U << 1)
+#define IORING_NOP_FIXED_FILE		(1U << 2)
+#define IORING_NOP_FIXED_BUFFER		(1U << 3)
 
 /*
  * IO completion data structure (Completion Queue Entry)
-- 
cgit v1.2.3


From ff1256b8f3c45f222bce19fbfc1e1bc498b31d03 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 25 Oct 2024 08:54:28 -0600
Subject: io_uring/rsrc: move struct io_fixed_file to rsrc.h header

There's no need for this internal structure to be visible, move it to
the private rsrc.h header instead.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 2f12828b22a4..d4ba4ae480d6 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -55,11 +55,6 @@ struct io_wq_work {
 	int cancel_seq;
 };
 
-struct io_fixed_file {
-	/* file * with additional FFS_* flags */
-	unsigned long file_ptr;
-};
-
 struct io_file_table {
 	struct io_fixed_file *files;
 	unsigned long *bitmap;
-- 
cgit v1.2.3


From 729ee76561fd3d0766ab70de92076d7f12f465b9 Mon Sep 17 00:00:00 2001
From: Nikunj Kela <quic_nkela@quicinc.com>
Date: Tue, 10 Sep 2024 10:15:32 -0700
Subject: dt-bindings: arm: qcom: add the SoC ID for SA8255P

Add the SoC ID entry for SA8255P.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Nikunj Kela <quic_nkela@quicinc.com>
Link: https://lore.kernel.org/r/20240910171534.2412263-2-quic_nkela@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/arm/qcom,ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h
index 1a53ca2df523..f27c96603f91 100644
--- a/include/dt-bindings/arm/qcom,ids.h
+++ b/include/dt-bindings/arm/qcom,ids.h
@@ -258,6 +258,7 @@
 #define QCOM_ID_SAR2130P		525
 #define QCOM_ID_SM8475			530
 #define QCOM_ID_SM8475P			531
+#define QCOM_ID_SA8255P			532
 #define QCOM_ID_SA8775P			534
 #define QCOM_ID_QRU1000			539
 #define QCOM_ID_SM8475_2		540
-- 
cgit v1.2.3


From 8cbebdfe3e794e5b4f3e08a7297a513bf14fec6e Mon Sep 17 00:00:00 2001
From: Tengfei Fan <quic_tengfan@quicinc.com>
Date: Wed, 11 Sep 2024 19:10:55 +0800
Subject: dt-bindings: arm: qcom,ids: add SoC ID for QCS9100

Add the ID for the Qualcomm QCS9100 SoC.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Tengfei Fan <quic_tengfan@quicinc.com>
Link: https://lore.kernel.org/r/20240911-add_qcs9100_support-v2-1-e43a71ceb017@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/arm/qcom,ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h
index f27c96603f91..b66f8aeb9f47 100644
--- a/include/dt-bindings/arm/qcom,ids.h
+++ b/include/dt-bindings/arm/qcom,ids.h
@@ -279,6 +279,7 @@
 #define QCOM_ID_QCM8550			604
 #define QCOM_ID_IPQ5300			624
 #define QCOM_ID_IPQ5321			650
+#define QCOM_ID_QCS9100			667
 #define QCOM_ID_QCS8300			674
 #define QCOM_ID_QCS8275			675
 #define QCOM_ID_QCS615			680
-- 
cgit v1.2.3


From b2473a359763e27567993e7d8f37de82f57a0829 Mon Sep 17 00:00:00 2001
From: Usama Arif <usamaarif642@gmail.com>
Date: Wed, 23 Oct 2024 18:14:26 +0100
Subject: of/fdt: add dt_phys arg to early_init_dt_scan and
 early_init_dt_verify

 __pa() is only intended to be used for linear map addresses and using
it for initial_boot_params which is in fixmap for arm64 will give an
incorrect value. Hence save the physical address when it is known at
boot time when calling early_init_dt_scan for arm64 and use it at kexec
time instead of converting the virtual address using __pa().

Note that arm64 doesn't need the FDT region reserved in the DT as the
kernel explicitly reserves the passed in FDT. Therefore, only a debug
warning is fixed with this change.

Reported-by: Breno Leitao <leitao@debian.org>
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Fixes: ac10be5cdbfa ("arm64: Use common of_kexec_alloc_and_setup_fdt()")
Link: https://lore.kernel.org/r/20241023171426.452688-1-usamaarif642@gmail.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of_fdt.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index d69ad5bb1eb1..b8d6c0c20876 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -31,6 +31,7 @@ extern void *of_fdt_unflatten_tree(const unsigned long *blob,
 extern int __initdata dt_root_addr_cells;
 extern int __initdata dt_root_size_cells;
 extern void *initial_boot_params;
+extern phys_addr_t initial_boot_params_pa;
 
 extern char __dtb_start[];
 extern char __dtb_end[];
@@ -70,8 +71,8 @@ extern u64 dt_mem_next_cell(int s, const __be32 **cellp);
 /* Early flat tree scan hooks */
 extern int early_init_dt_scan_root(void);
 
-extern bool early_init_dt_scan(void *params);
-extern bool early_init_dt_verify(void *params);
+extern bool early_init_dt_scan(void *dt_virt, phys_addr_t dt_phys);
+extern bool early_init_dt_verify(void *dt_virt, phys_addr_t dt_phys);
 extern void early_init_dt_scan_nodes(void);
 
 extern const char *of_flat_dt_get_machine_name(void);
-- 
cgit v1.2.3


From 4bbd360a5084d8f890f814327e1d9fbb1f0f6fa1 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 24 Oct 2024 13:14:58 -0700
Subject: socket: Print pf->create() when it does not clear sock->sk on
 failure.

I suggested to put DEBUG_NET_WARN_ON_ONCE() in __sock_create() to
catch possible use-after-free.

But the warning itself was not useful because our interest is in
the callee than the caller.

Let's define DEBUG_NET_WARN_ONCE() and print the name of pf->create()
and the socket identifier.

While at it, we enclose DEBUG_NET_WARN_ON_ONCE() in parentheses too
to avoid a checkpatch error.

Note that %pf or %pF were obsoleted and will be removed later as per
comment in lib/vsprintf.c.

Link: https://lore.kernel.org/netdev/202410231427.633734b3-lkp@intel.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241024201458.49412-1-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/net_debug.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/net_debug.h b/include/net/net_debug.h
index 1e74684cbbdb..9fecb1496be3 100644
--- a/include/net/net_debug.h
+++ b/include/net/net_debug.h
@@ -149,9 +149,11 @@ do {								\
 
 
 #if defined(CONFIG_DEBUG_NET)
-#define DEBUG_NET_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
+#define DEBUG_NET_WARN_ON_ONCE(cond) ((void)WARN_ON_ONCE(cond))
+#define DEBUG_NET_WARN_ONCE(cond, format...) ((void)WARN_ONCE(cond, format))
 #else
 #define DEBUG_NET_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define DEBUG_NET_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #endif
 
 #endif	/* _LINUX_NET_DEBUG_H */
-- 
cgit v1.2.3


From 2a0df10434ddeb8b5b5aded90a5659aff3b106d7 Mon Sep 17 00:00:00 2001
From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Date: Wed, 23 Oct 2024 15:09:06 +0200
Subject: devlink: remove unused devlink_resource_occ_get_register() and
 _unregister()

Remove not used devlink_resource_occ_get_register() and
devlink_resource_occ_get_unregister() functions; current devlink resource
users are fine with devl_ variants of the two.

Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Link: https://patch.msgid.link/20241023131248.27192-7-przemyslaw.kitszel@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index db5eff6cb60f..fdd6a0f9891d 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1797,15 +1797,8 @@ void devl_resource_occ_get_register(struct devlink *devlink,
 				    u64 resource_id,
 				    devlink_resource_occ_get_t *occ_get,
 				    void *occ_get_priv);
-void devlink_resource_occ_get_register(struct devlink *devlink,
-				       u64 resource_id,
-				       devlink_resource_occ_get_t *occ_get,
-				       void *occ_get_priv);
 void devl_resource_occ_get_unregister(struct devlink *devlink,
 				      u64 resource_id);
-
-void devlink_resource_occ_get_unregister(struct devlink *devlink,
-					 u64 resource_id);
 int devl_params_register(struct devlink *devlink,
 			 const struct devlink_param *params,
 			 size_t params_count);
-- 
cgit v1.2.3


From e3302f9a503a632c125170dc3c72b2886a910b0a Mon Sep 17 00:00:00 2001
From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Date: Wed, 23 Oct 2024 15:09:07 +0200
Subject: devlink: remove unused devlink_resource_register()

Remove unused devlink_resource_register(); all the drivers use
devl_resource_register() variant instead.

Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Link: https://patch.msgid.link/20241023131248.27192-8-przemyslaw.kitszel@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index fdd6a0f9891d..fbb9a2668e24 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1779,12 +1779,6 @@ int devl_resource_register(struct devlink *devlink,
 			   u64 resource_id,
 			   u64 parent_resource_id,
 			   const struct devlink_resource_size_params *size_params);
-int devlink_resource_register(struct devlink *devlink,
-			      const char *resource_name,
-			      u64 resource_size,
-			      u64 resource_id,
-			      u64 parent_resource_id,
-			      const struct devlink_resource_size_params *size_params);
 void devl_resources_unregister(struct devlink *devlink);
 void devlink_resources_unregister(struct devlink *devlink);
 int devl_resource_size_get(struct devlink *devlink,
-- 
cgit v1.2.3


From 90ee6ed776c06435a3fe79c7f5344761f52e1760 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 7 Oct 2024 16:23:59 +0200
Subject: fs: port files to file_ref

Port files to rely on file_ref reference to improve scaling and gain
overflow protection.

- We continue to WARN during get_file() in case a file that is already
  marked dead is revived as get_file() is only valid if the caller
  already holds a reference to the file. This hasn't changed just the
  check changes.

- The semantics for epoll and ttm's dmabuf usage have changed. Both
  epoll and ttm synchronize with __fput() to prevent the underlying file
  from beeing freed.

  (1) epoll

      Explaining epoll is straightforward using a simple diagram.
      Essentially, the mutex of the epoll instance needs to be taken in both
      __fput() and around epi_fget() preventing the file from being freed
      while it is polled or preventing the file from being resurrected.

          CPU1                                   CPU2
          fput(file)
          -> __fput(file)
             -> eventpoll_release(file)
                -> eventpoll_release_file(file)
                                                 mutex_lock(&ep->mtx)
                                                 epi_item_poll()
                                                 -> epi_fget()
                                                    -> file_ref_get(file)
                                                 mutex_unlock(&ep->mtx)
                   mutex_lock(&ep->mtx);
                   __ep_remove()
                   mutex_unlock(&ep->mtx);
             -> kmem_cache_free(file)

  (2) ttm dmabuf

      This explanation is a bit more involved. A regular dmabuf file stashed
      the dmabuf in file->private_data and the file in dmabuf->file:

          file->private_data = dmabuf;
          dmabuf->file = file;

      The generic release method of a dmabuf file handles file specific
      things:

          f_op->release::dma_buf_file_release()

      while the generic dentry release method of a dmabuf handles dmabuf
      freeing including driver specific things:

          dentry->d_release::dma_buf_release()

      During ttm dmabuf initialization in ttm_object_device_init() the ttm
      driver copies the provided struct dma_buf_ops into a private location:

          struct ttm_object_device {
                  spinlock_t object_lock;
                  struct dma_buf_ops ops;
                  void (*dmabuf_release)(struct dma_buf *dma_buf);
                  struct idr idr;
          };

          ttm_object_device_init(const struct dma_buf_ops *ops)
          {
                  // copy original dma_buf_ops in private location
                  tdev->ops = *ops;

                  // stash the release method of the original struct dma_buf_ops
                  tdev->dmabuf_release = tdev->ops.release;

                  // override the release method in the copy of the struct dma_buf_ops
                  // with ttm's own dmabuf release method
                  tdev->ops.release = ttm_prime_dmabuf_release;
          }

      When a new dmabuf is created the struct dma_buf_ops with the overriden
      release method set to ttm_prime_dmabuf_release is passed in exp_info.ops:

          DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
          exp_info.ops = &tdev->ops;
          exp_info.size = prime->size;
          exp_info.flags = flags;
          exp_info.priv = prime;

      The call to dma_buf_export() then sets

          mutex_lock_interruptible(&prime->mutex);
          dma_buf = dma_buf_export(&exp_info)
          {
                  dmabuf->ops = exp_info->ops;
          }
          mutex_unlock(&prime->mutex);

      which creates a new dmabuf file and then install a file descriptor to
      it in the callers file descriptor table:

          ret = dma_buf_fd(dma_buf, flags);

      When that dmabuf file is closed we now get:

          fput(file)
          -> __fput(file)
             -> f_op->release::dma_buf_file_release()
             -> dput()
                -> d_op->d_release::dma_buf_release()
                   -> dmabuf->ops->release::ttm_prime_dmabuf_release()
                      mutex_lock(&prime->mutex);
                      if (prime->dma_buf == dma_buf)
                            prime->dma_buf = NULL;
                      mutex_unlock(&prime->mutex);

      Where we can see that prime->dma_buf is set to NULL. So when we have
      the following diagram:

          CPU1                                                          CPU2
          fput(file)
          -> __fput(file)
             -> f_op->release::dma_buf_file_release()
             -> dput()
                -> d_op->d_release::dma_buf_release()
                   -> dmabuf->ops->release::ttm_prime_dmabuf_release()
                                                                        ttm_prime_handle_to_fd()
                                                                        mutex_lock_interruptible(&prime->mutex)
                                                                        dma_buf = prime->dma_buf
                                                                        dma_buf && get_dma_buf_unless_doomed(dma_buf)
                                                                        -> file_ref_get(dma_buf->file)
                                                                        mutex_unlock(&prime->mutex);

                      mutex_lock(&prime->mutex);
                      if (prime->dma_buf == dma_buf)
                            prime->dma_buf = NULL;
                      mutex_unlock(&prime->mutex);
             -> kmem_cache_free(file)

      The logic of the mechanism is the same as for epoll: sync with
      __fput() preventing the file from being freed. Here the
      synchronization happens through the ttm instance's prime->mutex.
      Basically, the lifetime of the dma_buf and the file are tighly
      coupled.

  Both (1) and (2) used to call atomic_inc_not_zero() to check whether
  the file has already been marked dead and then refuse to revive it.

  This is only safe because both (1) and (2) sync with __fput() and thus
  prevent kmem_cache_free() on the file being called and thus prevent
  the file from being immediately recycled due to SLAB_TYPESAFE_BY_RCU.

  Both (1) and (2) have been ported from atomic_inc_not_zero() to
  file_ref_get(). That means a file that is already in the process of
  being marked as FILE_REF_DEAD:

  file_ref_put()
  cnt = atomic_long_dec_return()
  -> __file_ref_put(cnt)
     if (cnt == FIlE_REF_NOREF)
             atomic_long_try_cmpxchg_release(cnt, FILE_REF_DEAD)

  can be revived again:

  CPU1                                                             CPU2
  file_ref_put()
  cnt = atomic_long_dec_return()
  -> __file_ref_put(cnt)
     if (cnt == FIlE_REF_NOREF)
                                                                   file_ref_get()
                                                                   // Brings reference back to FILE_REF_ONEREF
                                                                   atomic_long_add_negative()
             atomic_long_try_cmpxchg_release(cnt, FILE_REF_DEAD)

  This is fine and inherent to the file_ref_get()/file_ref_put()
  semantics. For both (1) and (2) this is safe because __fput() is
  prevented from making progress if file_ref_get() fails due to the
  aforementioned synchronization mechanisms.

  Two cases need to be considered that affect both (1) epoll and (2) ttm
  dmabuf:

   (i) fput()'s file_ref_put() and marks the file as FILE_REF_NOREF but
       before that fput() can mark the file as FILE_REF_DEAD someone
       manages to sneak in a file_ref_get() and brings the refcount back
       from FILE_REF_NOREF to FILE_REF_ONEREF. In that case the original
       fput() doesn't call __fput(). For epoll the poll will finish and
       for ttm dmabuf the file can be used again. For ttm dambuf this is
       actually an advantage because it avoids immediately allocating
       a new dmabuf object.

       CPU1                                                             CPU2
       file_ref_put()
       cnt = atomic_long_dec_return()
       -> __file_ref_put(cnt)
          if (cnt == FIlE_REF_NOREF)
                                                                        file_ref_get()
                                                                        // Brings reference back to FILE_REF_ONEREF
                                                                        atomic_long_add_negative()
                  atomic_long_try_cmpxchg_release(cnt, FILE_REF_DEAD)

  (ii) fput()'s file_ref_put() marks the file FILE_REF_NOREF and
       also suceeds in actually marking it FILE_REF_DEAD and then calls
       into __fput() to free the file.

       When either (1) or (2) call file_ref_get() they fail as
       atomic_long_add_negative() will return true.

       At the same time, both (1) and (2) all file_ref_get() under
       mutexes that __fput() must also acquire preventing
       kmem_cache_free() from freeing the file.

  So while this might be treated as a change in semantics for (1) and
  (2) it really isn't. It if should end up causing issues this can be
  fixed by adding a helper that does something like:

  long cnt = atomic_long_read(&ref->refcnt);
  do {
          if (cnt < 0)
                  return false;
  } while (!atomic_long_try_cmpxchg(&ref->refcnt, &cnt, cnt + 1));
  return true;

  which would block FILE_REF_NOREF to FILE_REF_ONEREF transitions.

- Jann correctly pointed out that kmem_cache_zalloc() cannot be used
  anymore once files have been ported to file_ref_t.

  The kmem_cache_zalloc() call will memset() the whole struct file to
  zero when it is reallocated. This will also set file->f_ref to zero
  which mens that a concurrent file_ref_get() can return true:

  CPU1                            CPU2
                                  __get_file_rcu()
                                    rcu_dereference_raw()
  close()
    [frees file]
  alloc_empty_file()
    kmem_cache_zalloc()
      [reallocates same file]
      memset(..., 0, ...)
                                    file_ref_get()
                                      [increments 0->1, returns true]
    init_file()
      file_ref_init(..., 1)
        [sets to 0]
                                    rcu_dereference_raw()
                                    fput()
                                      file_ref_put()
                                        [decrements 0->FILE_REF_NOREF, frees file]
    [UAF]

   causing a concurrent __get_file_rcu() call to acquire a reference to
   the file that is about to be reallocated and immediately freeing it
   on realizing that it has been recycled. This causes a UAF for the
   task that reallocated/recycled the file.

   This is prevented by switching from kmem_cache_zalloc() to
   kmem_cache_alloc() and initializing the fields manually. With
   file->f_ref initialized last.

   Note that a memset() also isn't guaranteed to atomically update an
   unsigned long so it's theoretically possible to see torn and
   therefore bogus counter values.

Link: https://lore.kernel.org/r/20241007-brauner-file-rcuref-v2-3-387e24dc9163@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e3c603d01337..c13f648a1c13 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -45,6 +45,7 @@
 #include <linux/slab.h>
 #include <linux/maple_tree.h>
 #include <linux/rw_hint.h>
+#include <linux/file_ref.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -1005,7 +1006,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
 
 /**
  * struct file - Represents a file
- * @f_count: reference count
+ * @f_ref: reference count
  * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context.
  * @f_mode: FMODE_* flags often used in hotpaths
  * @f_op: file operations
@@ -1030,7 +1031,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
  * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.)
  */
 struct file {
-	atomic_long_t			f_count;
+	file_ref_t			f_ref;
 	spinlock_t			f_lock;
 	fmode_t				f_mode;
 	const struct file_operations	*f_op;
@@ -1078,15 +1079,14 @@ struct file_handle {
 
 static inline struct file *get_file(struct file *f)
 {
-	long prior = atomic_long_fetch_inc_relaxed(&f->f_count);
-	WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n");
+	file_ref_inc(&f->f_ref);
 	return f;
 }
 
 struct file *get_file_rcu(struct file __rcu **f);
 struct file *get_file_active(struct file **f);
 
-#define file_count(x)	atomic_long_read(&(x)->f_count)
+#define file_count(f)	file_ref_read(&(f)->f_ref)
 
 #define	MAX_NON_LFS	((1UL<<31) - 1)
 
-- 
cgit v1.2.3


From 94b2a2c0e7cba3f163609dbd94120ee533ad2a07 Mon Sep 17 00:00:00 2001
From: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
Date: Thu, 17 Oct 2024 16:58:09 +0200
Subject: accel/ivpu: Remove copy engine support

Copy engine was deprecated by the FW and is no longer supported.
Compute engine includes all copy engine functionality and should be used
instead.

This change does not affect user space as the copy engine was never
used outside of a couple of tests.

Signed-off-by: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241017145817.121590-4-jacek.lawrynowicz@linux.intel.com
---
 include/uapi/drm/ivpu_accel.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h
index 234664d35250..a35b97b097bf 100644
--- a/include/uapi/drm/ivpu_accel.h
+++ b/include/uapi/drm/ivpu_accel.h
@@ -258,7 +258,7 @@ struct drm_ivpu_bo_info {
 
 /* drm_ivpu_submit engines */
 #define DRM_IVPU_ENGINE_COMPUTE 0
-#define DRM_IVPU_ENGINE_COPY    1
+#define DRM_IVPU_ENGINE_COPY    1 /* Deprecated */
 
 /**
  * struct drm_ivpu_submit - Submit commands to the VPU
@@ -289,10 +289,6 @@ struct drm_ivpu_submit {
 	 * %DRM_IVPU_ENGINE_COMPUTE:
 	 *
 	 * Performs Deep Learning Neural Compute Inference Operations
-	 *
-	 * %DRM_IVPU_ENGINE_COPY:
-	 *
-	 * Performs memory copy operations to/from system memory allocated for VPU
 	 */
 	__u32 engine;
 
-- 
cgit v1.2.3


From 95b6235e36953e9d6c822270961e79751eee2276 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 28 Oct 2024 17:58:38 +0000
Subject: iommu: Make bus_iommu_probe() static

With the last external caller of bus_iommu_probe() now gone, make it
internal as it really should be.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: H. Nikolaus Schaller <hns@goldelico.com>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Tested-by: Kevin Hilman <khilman@baylibre.com>
Tested-by: Beleswar Padhi <b-padhi@ti.com>
Link: https://lore.kernel.org/r/a7511a034a27259aff4e14d80a861d3c40fbff1e.1730136799.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bd722f473635..84a6ed5e803a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -784,7 +784,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
 	};
 }
 
-extern int bus_iommu_probe(const struct bus_type *bus);
 extern bool iommu_present(const struct bus_type *bus);
 extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
 extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
-- 
cgit v1.2.3


From 386c2b877b97cde53c6e422f9081ae133492fd05 Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Wed, 23 Oct 2024 16:14:51 +0800
Subject: tcp: add a common helper to debug the underlying issue

Following the commit c8770db2d544 ("tcp: check skb is non-NULL
in tcp_rto_delta_us()"), we decided to add a helper so that it's
easier to get verbose warning on either cases.

Link: https://lore.kernel.org/all/5632e043-bdba-4d75-bc7e-bf58014492fd@redhat.com/
Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Jason Xing <kernelxing@tencent.com>
Cc: Neal Cardwell <ncardwell@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 739a9fb83d0c..8b8d94bb1746 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2430,6 +2430,19 @@ void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb,
 void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb);
 void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb);
 
+static inline void tcp_warn_once(const struct sock *sk, bool cond, const char *str)
+{
+	WARN_ONCE(cond,
+		  "%sout:%u sacked:%u lost:%u retrans:%u tlp_high_seq:%u sk_state:%u ca_state:%u advmss:%u mss_cache:%u pmtu:%u\n",
+		  str,
+		  tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out,
+		  tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out,
+		  tcp_sk(sk)->tlp_high_seq, sk->sk_state,
+		  inet_csk(sk)->icsk_ca_state,
+		  tcp_sk(sk)->advmss, tcp_sk(sk)->mss_cache,
+		  inet_csk(sk)->icsk_pmtu_cookie);
+}
+
 /* At how many usecs into the future should the RTO fire? */
 static inline s64 tcp_rto_delta_us(const struct sock *sk)
 {
@@ -2441,17 +2454,7 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk)
 
 		return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
 	} else {
-		WARN_ONCE(1,
-			"rtx queue empty: "
-			"out:%u sacked:%u lost:%u retrans:%u "
-			"tlp_high_seq:%u sk_state:%u ca_state:%u "
-			"advmss:%u mss_cache:%u pmtu:%u\n",
-			tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out,
-			tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out,
-			tcp_sk(sk)->tlp_high_seq, sk->sk_state,
-			inet_csk(sk)->icsk_ca_state,
-			tcp_sk(sk)->advmss, tcp_sk(sk)->mss_cache,
-			inet_csk(sk)->icsk_pmtu_cookie);
+		tcp_warn_once(sk, 1, "rtx queue empty: ");
 		return jiffies_to_usecs(rto);
 	}
 
-- 
cgit v1.2.3


From 668d663989c77fcb2a92748645e4c394b03d5988 Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Wed, 23 Oct 2024 16:14:52 +0800
Subject: tcp: add more warn of socket in tcp_send_loss_probe()

Add two fields to print in the helper which here covers tcp_send_loss_probe().

Link: https://lore.kernel.org/all/5632e043-bdba-4d75-bc7e-bf58014492fd@redhat.com/
Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Jason Xing <kernelxing@tencent.com>
Cc: Neal Cardwell <ncardwell@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8b8d94bb1746..e9b37b76e894 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2433,8 +2433,9 @@ void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb);
 static inline void tcp_warn_once(const struct sock *sk, bool cond, const char *str)
 {
 	WARN_ONCE(cond,
-		  "%sout:%u sacked:%u lost:%u retrans:%u tlp_high_seq:%u sk_state:%u ca_state:%u advmss:%u mss_cache:%u pmtu:%u\n",
+		  "%scwn:%u out:%u sacked:%u lost:%u retrans:%u tlp_high_seq:%u sk_state:%u ca_state:%u advmss:%u mss_cache:%u pmtu:%u\n",
 		  str,
+		  tcp_snd_cwnd(tcp_sk(sk)),
 		  tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out,
 		  tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out,
 		  tcp_sk(sk)->tlp_high_seq, sk->sk_state,
-- 
cgit v1.2.3


From 133008e84b99e4f5f8cf3d8b768c995732df9406 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 16 Oct 2024 13:13:09 -0700
Subject: blk-integrity: remove seed for user mapped buffers

The seed is only used for kernel generation and verification. That
doesn't happen for user buffers, so passing the seed around doesn't
accomplish anything.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Link: https://lore.kernel.org/r/20241016201309.1090320-1-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio-integrity.h | 4 ++--
 include/linux/blk-integrity.h | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index dd831c269e99..dbf0f74c1529 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -72,7 +72,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp,
 		unsigned int nr);
 int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len,
 		unsigned int offset);
-int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
+int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len);
 void bio_integrity_unmap_user(struct bio *bio);
 bool bio_integrity_prep(struct bio *bio);
 void bio_integrity_advance(struct bio *bio, unsigned int bytes_done);
@@ -99,7 +99,7 @@ static inline void bioset_integrity_free(struct bio_set *bs)
 }
 
 static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
-					 ssize_t len, u32 seed)
+					 ssize_t len)
 {
 	return -EINVAL;
 }
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 676f8f860c47..c7eae0bfb013 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -28,7 +28,7 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
 int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
 int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
 int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
-			      ssize_t bytes, u32 seed);
+			      ssize_t bytes);
 
 static inline bool
 blk_integrity_queue_supports_integrity(struct request_queue *q)
@@ -104,8 +104,7 @@ static inline int blk_rq_map_integrity_sg(struct request *q,
 }
 static inline int blk_rq_integrity_map_user(struct request *rq,
 					    void __user *ubuf,
-					    ssize_t bytes,
-					    u32 seed)
+					    ssize_t bytes)
 {
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From db71aae70e3e646d8ba4cb50e4bd4c281a91c804 Mon Sep 17 00:00:00 2001
From: Puranjay Mohan <puranjay@kernel.org>
Date: Sat, 26 Oct 2024 12:53:36 +0000
Subject: net: checksum: Move from32to16() to generic header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

from32to16() is used by lib/checksum.c and also by
arch/parisc/lib/checksum.c. The next patch will use it in the
bpf_csum_diff helper.

Move from32to16() to the include/net/checksum.h as csum_from32to16() and
remove other implementations.

Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20241026125339.26459-2-puranjay@kernel.org
---
 include/net/checksum.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/checksum.h b/include/net/checksum.h
index 1338cb92c8e7..243f972267b8 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -151,6 +151,12 @@ static inline void csum_replace(__wsum *csum, __wsum old, __wsum new)
 	*csum = csum_add(csum_sub(*csum, old), new);
 }
 
+static inline unsigned short csum_from32to16(unsigned int sum)
+{
+	sum += (sum >> 16) | (sum << 16);
+	return (unsigned short)(sum >> 16);
+}
+
 struct sk_buff;
 void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
 			      __be32 from, __be32 to, bool pseudohdr);
-- 
cgit v1.2.3


From 899f44531fe6cac4b024710fec647ecc127724b8 Mon Sep 17 00:00:00 2001
From: Sibi Sankar <quic_sibis@quicinc.com>
Date: Wed, 30 Oct 2024 18:25:10 +0530
Subject: pmdomain: core: Add GENPD_FLAG_DEV_NAME_FW flag

Introduce GENPD_FLAG_DEV_NAME_FW flag which instructs genpd to generate
an unique device name using ida. It is aimed to be used by genpd providers
which derive their names directly from FW making them susceptible to
debugfs node creation failures.

Reported-by: Johan Hovold <johan+linaro@kernel.org>
Closes: https://lore.kernel.org/lkml/ZoQjAWse2YxwyRJv@hovoldconsulting.com/
Fixes: 718072ceb211 ("PM: domains: create debugfs nodes when adding power domains")
Suggested-by: Ulf Hansson <ulf.hansson@linaro.org>
Suggested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Sibi Sankar <quic_sibis@quicinc.com>
Cc: stable@vger.kernel.org
Message-ID: <20241030125512.2884761-5-quic_sibis@quicinc.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/pm_domain.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index b637ec14025f..cf4b11be3709 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -92,6 +92,10 @@ struct dev_pm_domain_list {
  * GENPD_FLAG_OPP_TABLE_FW:	The genpd provider supports performance states,
  *				but its corresponding OPP tables are not
  *				described in DT, but are given directly by FW.
+ *
+ * GENPD_FLAG_DEV_NAME_FW:	Instructs genpd to generate an unique device name
+ *				using ida. It is used by genpd providers which
+ *				get their genpd-names directly from FW.
  */
 #define GENPD_FLAG_PM_CLK	 (1U << 0)
 #define GENPD_FLAG_IRQ_SAFE	 (1U << 1)
@@ -101,6 +105,7 @@ struct dev_pm_domain_list {
 #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5)
 #define GENPD_FLAG_MIN_RESIDENCY (1U << 6)
 #define GENPD_FLAG_OPP_TABLE_FW	 (1U << 7)
+#define GENPD_FLAG_DEV_NAME_FW	 (1U << 8)
 
 enum gpd_status {
 	GENPD_STATE_ON = 0,	/* PM domain is on */
@@ -163,6 +168,7 @@ struct generic_pm_domain {
 	atomic_t sd_count;	/* Number of subdomains with power "on" */
 	enum gpd_status status;	/* Current state of the domain */
 	unsigned int device_count;	/* Number of devices */
+	unsigned int device_id;		/* unique device id */
 	unsigned int suspended_count;	/* System suspend device counter */
 	unsigned int prepared_count;	/* Suspend counter of prepared devices */
 	unsigned int performance_state;	/* Aggregated max performance state */
-- 
cgit v1.2.3


From 750fd23926f1507cc826b5a4fdd4bfc7283e7723 Mon Sep 17 00:00:00 2001
From: Avadhut Naik <avadhut.naik@amd.com>
Date: Tue, 22 Oct 2024 19:36:27 +0000
Subject: x86/mce: Add wrapper for struct mce to export vendor specific info

Currently, exporting new additional machine check error information
involves adding new fields for the same at the end of the struct mce.
This additional information can then be consumed through mcelog or
tracepoint.

However, as new MSRs are being added (and will be added in the future)
by CPU vendors on their newer CPUs with additional machine check error
information to be exported, the size of struct mce will balloon on some
CPUs, unnecessarily, since those fields are vendor-specific. Moreover,
different CPU vendors may export the additional information in varying
sizes.

The problem particularly intensifies since struct mce is exposed to
userspace as part of UAPI. It's bloating through vendor-specific data
should be avoided to limit the information being sent out to userspace.

Add a new structure mce_hw_err to wrap the existing struct mce. The same
will prevent its ballooning since vendor-specifc data, if any, can now be
exported through a union within the wrapper structure and through
__dynamic_array in mce_record tracepoint.

Furthermore, new internal kernel fields can be added to the wrapper
struct without impacting the user space API.

  [ bp: Restore reverse x-mas tree order of function vars declarations. ]

Suggested-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Link: https://lore.kernel.org/r/20241022194158.110073-2-avadhut.naik@amd.com
---
 include/trace/events/mce.h | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index f0f7b3cb2041..65aba1afcd07 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -19,9 +19,9 @@
 
 TRACE_EVENT(mce_record,
 
-	TP_PROTO(struct mce *m),
+	TP_PROTO(struct mce_hw_err *err),
 
-	TP_ARGS(m),
+	TP_ARGS(err),
 
 	TP_STRUCT__entry(
 		__field(	u64,		mcgcap		)
@@ -46,25 +46,25 @@ TRACE_EVENT(mce_record,
 	),
 
 	TP_fast_assign(
-		__entry->mcgcap		= m->mcgcap;
-		__entry->mcgstatus	= m->mcgstatus;
-		__entry->status		= m->status;
-		__entry->addr		= m->addr;
-		__entry->misc		= m->misc;
-		__entry->synd		= m->synd;
-		__entry->ipid		= m->ipid;
-		__entry->ip		= m->ip;
-		__entry->tsc		= m->tsc;
-		__entry->ppin		= m->ppin;
-		__entry->walltime	= m->time;
-		__entry->cpu		= m->extcpu;
-		__entry->cpuid		= m->cpuid;
-		__entry->apicid		= m->apicid;
-		__entry->socketid	= m->socketid;
-		__entry->cs		= m->cs;
-		__entry->bank		= m->bank;
-		__entry->cpuvendor	= m->cpuvendor;
-		__entry->microcode	= m->microcode;
+		__entry->mcgcap		= err->m.mcgcap;
+		__entry->mcgstatus	= err->m.mcgstatus;
+		__entry->status		= err->m.status;
+		__entry->addr		= err->m.addr;
+		__entry->misc		= err->m.misc;
+		__entry->synd		= err->m.synd;
+		__entry->ipid		= err->m.ipid;
+		__entry->ip		= err->m.ip;
+		__entry->tsc		= err->m.tsc;
+		__entry->ppin		= err->m.ppin;
+		__entry->walltime	= err->m.time;
+		__entry->cpu		= err->m.extcpu;
+		__entry->cpuid		= err->m.cpuid;
+		__entry->apicid		= err->m.apicid;
+		__entry->socketid	= err->m.socketid;
+		__entry->cs		= err->m.cs;
+		__entry->bank		= err->m.bank;
+		__entry->cpuvendor	= err->m.cpuvendor;
+		__entry->microcode	= err->m.microcode;
 	),
 
 	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x",
-- 
cgit v1.2.3


From e52750fb1458ae9ea5860a08ed7a149185bc5b97 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 22 Oct 2024 19:36:28 +0000
Subject: tracing: Add __print_dynamic_array() helper

When printing a dynamic array in a trace event, the method is rather ugly.
It has the format of:

  __print_array(__get_dynamic_array(array),
            __get_dynmaic_array_len(array) / el_size, el_size)

Since dynamic arrays are known to the tracing infrastructure, create a
helper macro that does the above for you.

  __print_dynamic_array(array, el_size)

Which would expand to the same output.

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Link: https://lore.kernel.org/r/20241022194158.110073-3-avadhut.naik@amd.com
---
 include/trace/stages/stage3_trace_output.h | 8 ++++++++
 include/trace/stages/stage7_class_define.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h
index c1fb1355d309..1e7b0bef95f5 100644
--- a/include/trace/stages/stage3_trace_output.h
+++ b/include/trace/stages/stage3_trace_output.h
@@ -119,6 +119,14 @@
 		trace_print_array_seq(p, array, count, el_size);	\
 	})
 
+#undef __print_dynamic_array
+#define __print_dynamic_array(array, el_size)				\
+	({								\
+		__print_array(__get_dynamic_array(array),		\
+			      __get_dynamic_array_len(array) / (el_size), \
+			      (el_size));				\
+	})
+
 #undef __print_hex_dump
 #define __print_hex_dump(prefix_str, prefix_type,			\
 			 rowsize, groupsize, buf, len, ascii)		\
diff --git a/include/trace/stages/stage7_class_define.h b/include/trace/stages/stage7_class_define.h
index bcb960d16fc0..fcd564a590f4 100644
--- a/include/trace/stages/stage7_class_define.h
+++ b/include/trace/stages/stage7_class_define.h
@@ -22,6 +22,7 @@
 #undef __get_rel_cpumask
 #undef __get_rel_sockaddr
 #undef __print_array
+#undef __print_dynamic_array
 #undef __print_hex_dump
 #undef __get_buf
 
-- 
cgit v1.2.3


From b35108a51cf7bab58d7eace1267d7965978bcdb8 Mon Sep 17 00:00:00 2001
From: Easwar Hariharan <eahariha@linux.microsoft.com>
Date: Wed, 30 Oct 2024 17:47:35 +0000
Subject: jiffies: Define secs_to_jiffies()

secs_to_jiffies() is defined in hci_event.c and cannot be reused by
other call sites. Hoist it into the core code to allow conversion of the
~1150 usages of msecs_to_jiffies() that either:

 - use a multiplier value of 1000 or equivalently MSEC_PER_SEC, or
 - have timeouts that are denominated in seconds (i.e. end in 000)

It's implemented as a macro to allow usage in static initializers.

This will also allow conversion of yet more sites that use (sec * HZ)
directly, and improve their readability.

Suggested-by: Michael Kelley <mhklinux@outlook.com>
Signed-off-by: Easwar Hariharan <eahariha@linux.microsoft.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Link: https://lore.kernel.org/all/20241030-open-coded-timeouts-v3-1-9ba123facf88@linux.microsoft.com
---
 include/linux/jiffies.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 5d21dacd62bc..ed945f42e064 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -526,6 +526,19 @@ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m)
 	}
 }
 
+/**
+ * secs_to_jiffies: - convert seconds to jiffies
+ * @_secs: time in seconds
+ *
+ * Conversion is done by simple multiplication with HZ
+ *
+ * secs_to_jiffies() is defined as a macro rather than a static inline
+ * function so it can be used in static initializers.
+ *
+ * Return: jiffies value
+ */
+#define secs_to_jiffies(_secs) ((_secs) * HZ)
+
 extern unsigned long __usecs_to_jiffies(const unsigned int u);
 #if !(USEC_PER_SEC % HZ)
 static inline unsigned long _usecs_to_jiffies(const unsigned int u)
-- 
cgit v1.2.3


From d9d959c36bec59f11c0eb6b5308729e3c4901b5e Mon Sep 17 00:00:00 2001
From: Philipp Stanner <pstanner@redhat.com>
Date: Wed, 30 Oct 2024 12:27:34 +0100
Subject: PCI: Make pcim_request_all_regions() a public function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to remove the deprecated function
pcim_iomap_regions_request_all(), a few drivers need an interface to
request all BARs a PCI device offers.

Make pcim_request_all_regions() a public interface.

Link: https://lore.kernel.org/r/20241030112743.104395-2-pstanner@redhat.com
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 573b4c4c2be6..3b151c8331e5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2293,6 +2293,7 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass,
 				    struct pci_dev *dev) { }
 #endif
 
+int pcim_request_all_regions(struct pci_dev *pdev, const char *name);
 void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
 void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar,
 				const char *name);
-- 
cgit v1.2.3


From 6d9c59212523e719a63575222f1cd1b0aca3da4f Mon Sep 17 00:00:00 2001
From: Philipp Stanner <pstanner@redhat.com>
Date: Wed, 30 Oct 2024 12:27:43 +0100
Subject: PCI: Remove pcim_iomap_regions_request_all()

pcim_iomap_regions_request_all() have been deprecated in
commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(),
pcim_iomap_regions_request_all()").

All users of this function have been ported to other interfaces by now.

Remove pcim_iomap_regions_request_all().

Link: https://lore.kernel.org/r/20241030112743.104395-11-pstanner@redhat.com
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
---
 include/linux/pci.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 3b151c8331e5..b59197635c5c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2301,8 +2301,6 @@ void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr);
 void __iomem * const *pcim_iomap_table(struct pci_dev *pdev);
 int pcim_request_region(struct pci_dev *pdev, int bar, const char *name);
 int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name);
-int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask,
-				   const char *name);
 void pcim_iounmap_regions(struct pci_dev *pdev, int mask);
 void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar,
 				unsigned long offset, unsigned long len);
-- 
cgit v1.2.3


From 4a6afd60733c75369680f4a40c82b7c8528f4a7a Mon Sep 17 00:00:00 2001
From: Philipp Stanner <pstanner@redhat.com>
Date: Wed, 16 Oct 2024 11:49:04 +0200
Subject: PCI: Make pcim_iounmap_region() a public function

The function pcim_iounmap_regions() is problematic because it uses a
bitmask mechanism to release / iounmap multiple BARs at once. It, thus,
prevents getting rid of the problematic iomap table mechanism which was
deprecated in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(),
pcim_iomap_regions_request_all()").

pcim_iounmap_region() does not have that problem. Make it public as the
successor of pcim_iounmap_regions().

Link: https://lore.kernel.org/r/20241016094911.24818-3-pstanner@redhat.com
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index b59197635c5c..b8d248c136ba 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2297,6 +2297,7 @@ int pcim_request_all_regions(struct pci_dev *pdev, const char *name);
 void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
 void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar,
 				const char *name);
+void pcim_iounmap_region(struct pci_dev *pdev, int bar);
 void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr);
 void __iomem * const *pcim_iomap_table(struct pci_dev *pdev);
 int pcim_request_region(struct pci_dev *pdev, int bar, const char *name);
-- 
cgit v1.2.3


From 3e7f43188ee227bcf0f07f60a00f1fd1aca10e6a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 2 Aug 2024 13:01:36 -0700
Subject: KVM: Protect vCPU's "last run PID" with rwlock, not RCU

To avoid jitter on KVM_RUN due to synchronize_rcu(), use a rwlock instead
of RCU to protect vcpu->pid, a.k.a. the pid of the task last used to a
vCPU.  When userspace is doing M:N scheduling of tasks to vCPUs, e.g. to
run SEV migration helper vCPUs during post-copy, the synchronize_rcu()
needed to change the PID associated with the vCPU can stall for hundreds
of milliseconds, which is problematic for latency sensitive post-copy
operations.

In the directed yield path, do not acquire the lock if it's contended,
i.e. if the associated PID is changing, as that means the vCPU's task is
already running.

Reported-by: Steve Rutherford <srutherford@google.com>
Reviewed-by: Steve Rutherford <srutherford@google.com>
Acked-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240802200136.329973-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 02f0206fd2dc..18a1672ffcbf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -334,7 +334,8 @@ struct kvm_vcpu {
 #ifndef __KVM_HAVE_ARCH_WQP
 	struct rcuwait wait;
 #endif
-	struct pid __rcu *pid;
+	struct pid *pid;
+	rwlock_t pid_lock;
 	int sigset_active;
 	sigset_t sigset;
 	unsigned int halt_poll_ns;
-- 
cgit v1.2.3


From 9e9af8bbb5f9b565b9faf691f96f661791e199b0 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 10 Oct 2024 07:26:04 -0700
Subject: perf/x86/rapl: Clean up cpumask and hotplug

The rapl pmu is die scope, which is supported by the generic perf_event
subsystem now.

Set the scope for the rapl PMU and remove all the cpumask and hotplug
codes.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Oliver Sang <oliver.sang@intel.com>
Tested-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Link: https://lore.kernel.org/r/20241010142604.770192-2-kan.liang@linux.intel.com
---
 include/linux/cpuhotplug.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 2361ed4d2b15..37a9afffb59e 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -208,7 +208,6 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_X86_UNCORE_ONLINE,
 	CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
 	CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
-	CPUHP_AP_PERF_X86_RAPL_ONLINE,
 	CPUHP_AP_PERF_S390_CF_ONLINE,
 	CPUHP_AP_PERF_S390_SF_ONLINE,
 	CPUHP_AP_PERF_ARM_CCI_ONLINE,
-- 
cgit v1.2.3


From dd1a7567784e2b1f80258be04f57bcfa82c997eb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 23 Oct 2024 21:41:59 -0700
Subject: uprobes: SRCU-protect uretprobe lifetime (with timeout)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid taking refcount on uprobe in prepare_uretprobe(), instead take
uretprobe-specific SRCU lock and keep it active as kernel transfers
control back to user space.

Given we can't rely on user space returning from traced function within
reasonable time period, we need to make sure not to keep SRCU lock
active for too long, though. To that effect, we employ a timer callback
which is meant to terminate SRCU lock region after predefined timeout
(currently set to 100ms), and instead transfer underlying struct
uprobe's lifetime protection to refcounting.

This fallback to less scalable refcounting after 100ms is a fine
tradeoff from uretprobe's scalability and performance perspective,
because uretprobing *long running* user functions inherently doesn't run
into scalability issues (there is just not enough frequency to cause
noticeable issues with either performance or scalability).

The overall trick is in ensuring synchronization between current thread
and timer's callback fired on some other thread. To cope with that with
minimal logic complications, we add hprobe wrapper which is used to
contain all the synchronization related issues behind a small number of
basic helpers: hprobe_expire() for "downgrading" uprobe from SRCU-protected
state to refcounted state, and a hprobe_consume() and hprobe_finalize()
pair of single-use consuming helpers. Other than that, whatever current
thread's logic is there stays the same, as timer thread cannot modify
return_instance state (or add new/remove old return_instances). It only
takes care of SRCU unlock and uprobe refcounting, which is hidden from
the higher-level uretprobe handling logic.

We use atomic xchg() in hprobe_consume(), which is called from
performance critical handle_uretprobe_chain() function run in the
current context. When uncontended, this xchg() doesn't seem to hurt
performance as there are no other competing CPUs fighting for the same
cache line. We also mark struct return_instance as ____cacheline_aligned
to ensure no false sharing can happen.

Another technical moment. We need to make sure that the list of return
instances can be safely traversed under RCU from timer callback, so we
delay return_instance freeing with kfree_rcu() and make sure that list
modifications use RCU-aware operations.

Also, given SRCU lock survives transition from kernel to user space and
back we need to use lower-level __srcu_read_lock() and
__srcu_read_unlock() to avoid lockdep complaining.

Just to give an impression of a kind of performance improvements this
change brings, below are benchmarking results with and without these
SRCU changes, assuming other uprobe optimizations (mainly RCU Tasks
Trace for entry uprobes, lockless RB-tree lookup, and lockless VMA to
uprobe lookup) are left intact:

WITHOUT SRCU for uretprobes
===========================
uretprobe-nop         ( 1 cpus):    2.197 ± 0.002M/s  (  2.197M/s/cpu)
uretprobe-nop         ( 2 cpus):    3.325 ± 0.001M/s  (  1.662M/s/cpu)
uretprobe-nop         ( 3 cpus):    4.129 ± 0.002M/s  (  1.376M/s/cpu)
uretprobe-nop         ( 4 cpus):    6.180 ± 0.003M/s  (  1.545M/s/cpu)
uretprobe-nop         ( 8 cpus):    7.323 ± 0.005M/s  (  0.915M/s/cpu)
uretprobe-nop         (16 cpus):    6.943 ± 0.005M/s  (  0.434M/s/cpu)
uretprobe-nop         (32 cpus):    5.931 ± 0.014M/s  (  0.185M/s/cpu)
uretprobe-nop         (64 cpus):    5.145 ± 0.003M/s  (  0.080M/s/cpu)
uretprobe-nop         (80 cpus):    4.925 ± 0.005M/s  (  0.062M/s/cpu)

WITH SRCU for uretprobes
========================
uretprobe-nop         ( 1 cpus):    1.968 ± 0.001M/s  (  1.968M/s/cpu)
uretprobe-nop         ( 2 cpus):    3.739 ± 0.003M/s  (  1.869M/s/cpu)
uretprobe-nop         ( 3 cpus):    5.616 ± 0.003M/s  (  1.872M/s/cpu)
uretprobe-nop         ( 4 cpus):    7.286 ± 0.002M/s  (  1.822M/s/cpu)
uretprobe-nop         ( 8 cpus):   13.657 ± 0.007M/s  (  1.707M/s/cpu)
uretprobe-nop         (32 cpus):   45.305 ± 0.066M/s  (  1.416M/s/cpu)
uretprobe-nop         (64 cpus):   42.390 ± 0.922M/s  (  0.662M/s/cpu)
uretprobe-nop         (80 cpus):   47.554 ± 2.411M/s  (  0.594M/s/cpu)

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20241024044159.3156646-3-andrii@kernel.org
---
 include/linux/uprobes.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index dbaf04189548..7a051b5d2edd 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -15,6 +15,7 @@
 #include <linux/rbtree.h>
 #include <linux/types.h>
 #include <linux/wait.h>
+#include <linux/timer.h>
 
 struct uprobe;
 struct vm_area_struct;
@@ -67,6 +68,53 @@ enum uprobe_task_state {
 	UTASK_SSTEP_TRAPPED,
 };
 
+/* The state of hybrid-lifetime uprobe inside struct return_instance */
+enum hprobe_state {
+	HPROBE_LEASED,		/* uretprobes_srcu-protected uprobe */
+	HPROBE_STABLE,		/* refcounted uprobe */
+	HPROBE_GONE,		/* NULL uprobe, SRCU expired, refcount failed */
+	HPROBE_CONSUMED,	/* uprobe "consumed" by uretprobe handler */
+};
+
+/*
+ * Hybrid lifetime uprobe. Represents a uprobe instance that could be either
+ * SRCU protected (with SRCU protection eventually potentially timing out),
+ * refcounted using uprobe->ref, or there could be no valid uprobe (NULL).
+ *
+ * hprobe's internal state is setup such that background timer thread can
+ * atomically "downgrade" temporarily RCU-protected uprobe into refcounted one
+ * (or no uprobe, if refcounting failed).
+ *
+ * *stable* pointer always point to the uprobe (or could be NULL if there is
+ * was no valid underlying uprobe to begin with).
+ *
+ * *leased* pointer is the key to achieving race-free atomic lifetime state
+ * transition and can have three possible states:
+ *   - either the same non-NULL value as *stable*, in which case uprobe is
+ *     SRCU-protected;
+ *   - NULL, in which case uprobe (if there is any) is refcounted;
+ *   - special __UPROBE_DEAD value, which represents an uprobe that was SRCU
+ *     protected initially, but SRCU period timed out and we attempted to
+ *     convert it to refcounted, but refcount_inc_not_zero() failed, because
+ *     uprobe effectively went away (the last consumer unsubscribed). In this
+ *     case it's important to know that *stable* pointer (which still has
+ *     non-NULL uprobe pointer) shouldn't be used, because lifetime of
+ *     underlying uprobe is not guaranteed anymore. __UPROBE_DEAD is just an
+ *     internal marker and is handled transparently by hprobe_fetch() helper.
+ *
+ * When uprobe is SRCU-protected, we also record srcu_idx value, necessary for
+ * SRCU unlocking.
+ *
+ * See hprobe_expire() and hprobe_fetch() for details of race-free uprobe
+ * state transitioning details. It all hinges on atomic xchg() over *leaded*
+ * pointer. *stable* pointer, once initially set, is not modified concurrently.
+ */
+struct hprobe {
+	enum hprobe_state state;
+	int srcu_idx;
+	struct uprobe *uprobe;
+};
+
 /*
  * uprobe_task: Metadata of a task while it singlesteps.
  */
@@ -86,6 +134,7 @@ struct uprobe_task {
 	};
 
 	struct uprobe			*active_uprobe;
+	struct timer_list		ri_timer;
 	unsigned long			xol_vaddr;
 
 	struct arch_uprobe              *auprobe;
@@ -100,7 +149,7 @@ struct return_consumer {
 };
 
 struct return_instance {
-	struct uprobe		*uprobe;
+	struct hprobe		hprobe;
 	unsigned long		func;
 	unsigned long		stack;		/* stack pointer */
 	unsigned long		orig_ret_vaddr; /* original return address */
@@ -108,9 +157,10 @@ struct return_instance {
 	int			consumers_cnt;
 
 	struct return_instance	*next;		/* keep as stack */
+	struct rcu_head		rcu;
 
 	struct return_consumer	consumers[] __counted_by(consumers_cnt);
-};
+} ____cacheline_aligned;
 
 enum rp_check {
 	RP_CHECK_CALL,
-- 
cgit v1.2.3


From f247fd22e9f27d919d81861e4dcc438e0b6d179b Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Wed, 23 Oct 2024 08:56:00 +0200
Subject: s390/time: Add clocksource id to TOD clock

To allow specifying the clock source in the upcoming PtP driver,
add a clocksource ID to the s390 TOD clock.

Acked-by: Heiko Carstens <hca@linux.ibm.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Link: https://patch.msgid.link/20241023065601.449586-2-svens@linux.ibm.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/clocksource_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h
index 2bb4d8c2f1b0..c4ef4ae2eded 100644
--- a/include/linux/clocksource_ids.h
+++ b/include/linux/clocksource_ids.h
@@ -6,6 +6,7 @@
 enum clocksource_ids {
 	CSID_GENERIC		= 0,
 	CSID_ARM_ARCH_COUNTER,
+	CSID_S390_TOD,
 	CSID_X86_TSC_EARLY,
 	CSID_X86_TSC,
 	CSID_X86_KVM_CLK,
-- 
cgit v1.2.3


From 2748697225c38a19666bba6a83afc6bf46ee16be Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 23 Oct 2024 16:52:46 +0300
Subject: net: sched: propagate "skip_sw" flag to struct
 flow_cls_common_offload

Background: switchdev ports offload the Linux bridge, and most of the
packets they handle will never see the CPU. The ports between which
there exists no hardware data path are considered 'foreign' to switchdev.
These can either be normal physical NICs without switchdev offload, or
incompatible switchdev ports, or virtual interfaces like veth/dummy/etc.

In some cases, an offloaded filter can only do half the work, and the
rest must be handled by software. Redirecting/mirroring from the ingress
of a switchdev port towards a foreign interface is one example of
combined hardware/software data path. The most that the switchdev port
can do is to extract the matching packets from its offloaded data path
and send them to the CPU. From there on, the software filter runs
(a second time, after the first run in hardware) on the packet and
performs the mirred action.

It makes sense for switchdev drivers which allow this kind of "half
offloading" to sense the "skip_sw" flag of the filter/action pair, and
deny attempts from the user to install a filter that does not run in
software, because that simply won't work.

In fact, a mirred action on a switchdev port towards a dummy interface
appears to be a valid way of (selectively) monitoring offloaded traffic
that flows through it. IFF_PROMISC was also discussed years ago, but
(despite initial disagreement) there seems to be consensus that this
flag should not affect the destination taken by packets, but merely
whether or not the NIC discards packets with unknown MAC DA for local
processing.

[1] https://lore.kernel.org/netdev/20190830092637.7f83d162@ceranb/
[2] https://lore.kernel.org/netdev/20191002233750.13566-1-olteanv@gmail.com/
Suggested-by: Ido Schimmel <idosch@nvidia.com>
Link: https://lore.kernel.org/netdev/ZxUo0Dc0M5Y6l9qF@shredder.mtl.com/
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20241023135251.1752488-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/flow_offload.h | 1 +
 include/net/pkt_cls.h      | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 292cd8f4b762..596ab9791e4d 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -685,6 +685,7 @@ struct flow_cls_common_offload {
 	u32 chain_index;
 	__be16 protocol;
 	u32 prio;
+	bool skip_sw;
 	struct netlink_ext_ack *extack;
 };
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 4880b3a7aced..cf199af85c52 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -755,6 +755,7 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common,
 	cls_common->chain_index = tp->chain->index;
 	cls_common->protocol = tp->protocol;
 	cls_common->prio = tp->prio >> 16;
+	cls_common->skip_sw = tc_skip_sw(flags);
 	if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE)
 		cls_common->extack = extack;
 }
-- 
cgit v1.2.3


From 164b5e20cdf6038f1b38867d2f6252ec6f10c356 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 23 Oct 2024 15:53:14 +0100
Subject: KVM: arm64: nv: Handle CNTHCTL_EL2 specially

Accessing CNTHCTL_EL2 is fraught with danger if running with
HCR_EL2.E2H=1: half of the bits are held in CNTKCTL_EL1, and
thus can be changed behind our back, while the rest lives
in the CNTHCTL_EL2 shadow copy that is memory-based.

Yes, this is a lot of fun!

Make sure that we merge the two on read access, while we can
write to CNTKCTL_EL1 in a more straightforward manner.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20241023145345.1613824-7-maz@kernel.org
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 include/kvm/arm_arch_timer.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index c819c5d16613..fd650a8789b9 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -147,6 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt);
 void kvm_timer_cpu_up(void);
 void kvm_timer_cpu_down(void);
 
+/* CNTKCTL_EL1 valid bits as of DDI0487J.a */
+#define CNTKCTL_VALID_BITS	(BIT(17) | GENMASK_ULL(9, 0))
+
 static inline bool has_cntpoff(void)
 {
 	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
-- 
cgit v1.2.3


From d4fca1358ea9096f2f6ed942e2cb3a820073dfc1 Mon Sep 17 00:00:00 2001
From: Avadhut Naik <avadhut.naik@amd.com>
Date: Tue, 22 Oct 2024 19:36:29 +0000
Subject: x86/MCE/AMD: Add support for new MCA_SYND{1,2} registers

Starting with Zen4, AMD's Scalable MCA systems incorporate two new registers:
MCA_SYND1 and MCA_SYND2.

These registers will include supplemental error information in addition to the
existing MCA_SYND register. The data within these registers is considered
valid if MCA_STATUS[SyndV] is set.

Userspace error decoding tools like rasdaemon gather related hardware error
information through the tracepoints.

Therefore, export these two registers through the mce_record tracepoint so
that tools like rasdaemon can parse them and output the supplemental error
information like FRU text contained in them.

  [ bp: Massage. ]

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Link: https://lore.kernel.org/r/20241022194158.110073-4-avadhut.naik@amd.com
---
 include/trace/events/mce.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 65aba1afcd07..c1c50df9ecfd 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -43,6 +43,7 @@ TRACE_EVENT(mce_record,
 		__field(	u8,		bank		)
 		__field(	u8,		cpuvendor	)
 		__field(	u32,		microcode	)
+		__dynamic_array(u8, v_data, sizeof(err->vendor))
 	),
 
 	TP_fast_assign(
@@ -65,9 +66,10 @@ TRACE_EVENT(mce_record,
 		__entry->bank		= err->m.bank;
 		__entry->cpuvendor	= err->m.cpuvendor;
 		__entry->microcode	= err->m.microcode;
+		memcpy(__get_dynamic_array(v_data), &err->vendor, sizeof(err->vendor));
 	),
 
-	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x",
+	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016llx, IPID: %016llx, ADDR: %016llx, MISC: %016llx, SYND: %016llx, RIP: %02x:<%016llx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x, vendor data: %s",
 		__entry->cpu,
 		__entry->mcgcap, __entry->mcgstatus,
 		__entry->bank, __entry->status,
@@ -83,7 +85,8 @@ TRACE_EVENT(mce_record,
 		__entry->walltime,
 		__entry->socketid,
 		__entry->apicid,
-		__entry->microcode)
+		__entry->microcode,
+		__print_dynamic_array(v_data, sizeof(u8)))
 );
 
 #endif /* _TRACE_MCE_H */
-- 
cgit v1.2.3


From 3b1596a21fbf210f5b763fd3c0be280650475b52 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 29 Oct 2024 13:54:43 +0100
Subject: clockevents: Shutdown and unregister current clockevents at
 CPUHP_AP_TICK_DYING

The way the clockevent devices are finally stopped while a CPU is
offlining is currently chaotic. The layout being by order:

1) tick_sched_timer_dying() stops the tick and the underlying clockevent
  but only for oneshot case. The periodic tick and its related
  clockevent still runs.

2) tick_broadcast_offline() detaches and stops the per-cpu oneshot
  broadcast and append it to the released list.

3) Some individual clockevent drivers stop the clockevents (a second time if
  the tick is oneshot)

4) Once the CPU is dead, a control CPU remotely detaches and stops
  (a 3rd time if oneshot mode) the CPU clockevent and adds it to the
  released list.

5) The released list containing the broadcast device released on step 2)
   and the remotely detached clockevent from step 4) are unregistered.

These random events can be factorized if the current clockevent is
detached and stopped by the dying CPU at the generic layer, that is
from the dying CPU:

a) Stop the tick
b) Stop/detach the underlying per-cpu oneshot broadcast clockevent
c) Stop/detach the underlying clockevent
d) Release / unregister the clockevents from b) and c)
e) Release / unregister the remaining clockevents from the dying CPU.
   This part could be performed by the dying CPU

This way the drivers and the tick layer don't need to care about
clockevent operations during cpuhotplug down. This also unifies the tick
behaviour on offline CPUs between oneshot and periodic modes, avoiding
offline ticks altogether for sanity.

Adopt the simplification.

[ tglx: Remove the WARN_ON() in clockevents_register_device() as that
  	is called from an upcoming CPU before the CPU is marked online ]

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20241029125451.54574-3-frederic@kernel.org
---
 include/linux/tick.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 72744638c5b0..b0c74bfe0600 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -20,12 +20,10 @@ extern void __init tick_init(void);
 extern void tick_suspend_local(void);
 /* Should be core only, but XEN resume magic and ARM BL switcher require it */
 extern void tick_resume_local(void);
-extern void tick_cleanup_dead_cpu(int cpu);
 #else /* CONFIG_GENERIC_CLOCKEVENTS */
 static inline void tick_init(void) { }
 static inline void tick_suspend_local(void) { }
 static inline void tick_resume_local(void) { }
-static inline void tick_cleanup_dead_cpu(int cpu) { }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU)
-- 
cgit v1.2.3


From 697a4001d31a607a72c6297e4eb0f7918c6e6929 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Mon, 7 Oct 2024 01:14:05 +0100
Subject: mfd: axp20x: Ensure relationship between IDs and model names

At the moment there is an implicit relationship between the AXP model
IDs and the order of the strings in the axp20x_model_names[] array.
This is fragile, and makes adding IDs in the middle error prone.

Make this relationship official by changing the ID type to the actual
enum used, and using indexed initialisers for the string list.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Link: https://lore.kernel.org/r/20241007001408.27249-3-andre.przywara@arm.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/axp20x.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index f4dfc1871a95..79ecaaaa2070 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -959,7 +959,7 @@ struct axp20x_dev {
 	unsigned long			irq_flags;
 	struct regmap			*regmap;
 	struct regmap_irq_chip_data	*regmap_irqc;
-	long				variant;
+	enum axp20x_variants		variant;
 	int                             nr_cells;
 	const struct mfd_cell           *cells;
 	const struct regmap_config	*regmap_cfg;
-- 
cgit v1.2.3


From 35fec94afe045856456faca4879b9c560e39d1e3 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Mon, 7 Oct 2024 01:14:07 +0100
Subject: mfd: axp20x: Add support for AXP323

The X-Powers AXP323 is a very close sibling of the AXP313A. The only
difference seems to be the ability to dual-phase the first two DC/DC
converter, which adds another register.

Add the required boilerplate to introduce a new PMIC to the AXP MFD
driver. Where possible, this just maps into the existing structs defined
for the AXP313A, only deviating where needed.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Link: https://lore.kernel.org/r/20241007001408.27249-5-andre.przywara@arm.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/axp20x.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index 79ecaaaa2070..c3df0e615fbf 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -19,6 +19,7 @@ enum axp20x_variants {
 	AXP223_ID,
 	AXP288_ID,
 	AXP313A_ID,
+	AXP323_ID,
 	AXP717_ID,
 	AXP803_ID,
 	AXP806_ID,
@@ -113,6 +114,7 @@ enum axp20x_variants {
 #define AXP313A_SHUTDOWN_CTRL		0x1a
 #define AXP313A_IRQ_EN			0x20
 #define AXP313A_IRQ_STATE		0x21
+#define AXP323_DCDC_MODE_CTRL2		0x22
 
 #define AXP717_ON_INDICATE		0x00
 #define AXP717_PMU_STATUS_2		0x01
-- 
cgit v1.2.3


From 6e31bb8d3a63bb2c3efab2fb6bcfccac677a4581 Mon Sep 17 00:00:00 2001
From: Yassine Oudjana <y.oudjana@protonmail.com>
Date: Fri, 18 Oct 2024 11:10:46 +0300
Subject: mfd: mt6397: Add initial support for MT6328

The MT6328 PMIC is commonly used with the MT6735 SoC. Add initial
support for this PMIC.

Signed-off-by: Yassine Oudjana <y.oudjana@protonmail.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20241018081050.23592-5-y.oudjana@protonmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/mt6328/core.h      |  53 +++
 include/linux/mfd/mt6328/registers.h | 822 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/mt6397/core.h      |  11 +-
 3 files changed, 881 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/mfd/mt6328/core.h
 create mode 100644 include/linux/mfd/mt6328/registers.h

(limited to 'include')

diff --git a/include/linux/mfd/mt6328/core.h b/include/linux/mfd/mt6328/core.h
new file mode 100644
index 000000000000..9a08aed72b9f
--- /dev/null
+++ b/include/linux/mfd/mt6328/core.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015 MediaTek Inc.
+ * Copyright (c) 2022 Yassine Oudjana <y.oudjana@protonmail.com>
+ */
+
+#ifndef __MFD_MT6328_CORE_H__
+#define __MFD_MT6328_CORE_H__
+
+enum mt6328_irq_status_numbers {
+	MT6328_IRQ_STATUS_PWRKEY = 0,
+	MT6328_IRQ_STATUS_HOMEKEY,
+	MT6328_IRQ_STATUS_PWRKEY_R,
+	MT6328_IRQ_STATUS_HOMEKEY_R,
+	MT6328_IRQ_STATUS_THR_H,
+	MT6328_IRQ_STATUS_THR_L,
+	MT6328_IRQ_STATUS_BAT_H,
+	MT6328_IRQ_STATUS_BAT_L,
+	MT6328_IRQ_STATUS_RTC,
+	MT6328_IRQ_STATUS_AUDIO,
+	MT6328_IRQ_STATUS_ACCDET,
+	MT6328_IRQ_STATUS_ACCDET_EINT,
+	MT6328_IRQ_STATUS_ACCDET_NEGV,
+	MT6328_IRQ_STATUS_NI_LBAT_INT,
+	MT6328_IRQ_STATUS_VPROC_OC = 16,
+	MT6328_IRQ_STATUS_VSYS_OC,
+	MT6328_IRQ_STATUS_VLTE_OC,
+	MT6328_IRQ_STATUS_VCORE_OC,
+	MT6328_IRQ_STATUS_VPA_OC,
+	MT6328_IRQ_STATUS_LDO_OC,
+	MT6328_IRQ_STATUS_BAT2_H,
+	MT6328_IRQ_STATUS_BAT2_L,
+	MT6328_IRQ_STATUS_VISMPS0_H,
+	MT6328_IRQ_STATUS_VISMPS0_L,
+	MT6328_IRQ_STATUS_AUXADC_IMP,
+	MT6328_IRQ_STATUS_OV = 32,
+	MT6328_IRQ_STATUS_BVALID_DET,
+	MT6328_IRQ_STATUS_VBATON_HV,
+	MT6328_IRQ_STATUS_VBATON_UNDET,
+	MT6328_IRQ_STATUS_WATCHDOG,
+	MT6328_IRQ_STATUS_PCHR_CM_VDEC,
+	MT6328_IRQ_STATUS_CHRDET,
+	MT6328_IRQ_STATUS_PCHR_CM_VINC,
+	MT6328_IRQ_STATUS_FG_BAT_H,
+	MT6328_IRQ_STATUS_FG_BAT_L,
+	MT6328_IRQ_STATUS_FG_CUR_H,
+	MT6328_IRQ_STATUS_FG_CUR_L,
+	MT6328_IRQ_STATUS_FG_ZCV,
+	MT6328_IRQ_STATUS_SPKL_D,
+	MT6328_IRQ_STATUS_SPKL_AB,
+};
+
+#endif /* __MFD_MT6323_CORE_H__ */
diff --git a/include/linux/mfd/mt6328/registers.h b/include/linux/mfd/mt6328/registers.h
new file mode 100644
index 000000000000..8199aaea27b9
--- /dev/null
+++ b/include/linux/mfd/mt6328/registers.h
@@ -0,0 +1,822 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Yassine Oudjana <y.oudjana@protonmail.com>
+ */
+
+#ifndef __MFD_MT6328_REGISTERS_H__
+#define __MFD_MT6328_REGISTERS_H__
+
+/* PMIC Registers */
+#define MT6328_STRUP_CON0            0x0000
+#define MT6328_STRUP_CON2            0x0002
+#define MT6328_STRUP_CON3            0x0004
+#define MT6328_STRUP_CON4            0x0006
+#define MT6328_STRUP_CON5            0x0008
+#define MT6328_STRUP_CON6            0x000a
+#define MT6328_STRUP_CON7            0x000c
+#define MT6328_STRUP_CON8            0x000e
+#define MT6328_STRUP_CON9            0x0010
+#define MT6328_STRUP_CON10           0x0012
+#define MT6328_STRUP_CON11           0x0014
+#define MT6328_STRUP_CON12           0x0016
+#define MT6328_STRUP_CON13           0x0018
+#define MT6328_STRUP_CON14           0x001a
+#define MT6328_STRUP_CON15           0x001c
+#define MT6328_STRUP_CON16           0x001e
+#define MT6328_STRUP_CON17           0x0020
+#define MT6328_STRUP_CON18           0x0022
+#define MT6328_STRUP_CON19           0x0024
+#define MT6328_STRUP_CON20           0x0026
+#define MT6328_STRUP_CON21           0x0028
+#define MT6328_STRUP_CON22           0x002a
+#define MT6328_STRUP_CON23           0x002c
+#define MT6328_STRUP_CON24           0x002e
+#define MT6328_STRUP_CON25           0x0030
+#define MT6328_STRUP_CON26           0x0032
+#define MT6328_STRUP_CON27           0x0034
+#define MT6328_STRUP_CON28           0x0036
+#define MT6328_STRUP_CON29           0x0038
+#define MT6328_STRUP_CON30           0x003a
+#define MT6328_STRUP_CON31           0x003c
+#define MT6328_STRUP_CON32           0x003e
+#define MT6328_STRUP_ANA_CON0        0x0040
+#define MT6328_HWCID                 0x0200
+#define MT6328_SWCID                 0x0202
+#define MT6328_TOP_CON               0x0204
+#define MT6328_TEST_OUT              0x0206
+#define MT6328_TEST_CON0             0x0208
+#define MT6328_TEST_CON1             0x020a
+#define MT6328_TESTMODE_SW           0x020c
+#define MT6328_EN_STATUS0            0x020e
+#define MT6328_EN_STATUS1            0x0210
+#define MT6328_EN_STATUS2            0x0212
+#define MT6328_OCSTATUS0             0x0214
+#define MT6328_OCSTATUS1             0x0216
+#define MT6328_OCSTATUS2             0x0218
+#define MT6328_PGDEBSTATUS           0x021a
+#define MT6328_PGSTATUS              0x021c
+#define MT6328_THERMALSTATUS         0x021e
+#define MT6328_TOPSTATUS             0x0220
+#define MT6328_TDSEL_CON             0x0222
+#define MT6328_RDSEL_CON             0x0224
+#define MT6328_SMT_CON0              0x0226
+#define MT6328_SMT_CON1              0x0228
+#define MT6328_SMT_CON2              0x022a
+#define MT6328_DRV_CON0              0x022c
+#define MT6328_DRV_CON1              0x022e
+#define MT6328_DRV_CON2              0x0230
+#define MT6328_DRV_CON3              0x0232
+#define MT6328_TOP_STATUS            0x0234
+#define MT6328_TOP_STATUS_SET        0x0236
+#define MT6328_TOP_STATUS_CLR        0x0238
+#define MT6328_RGS_ANA_MON           0x023a
+#define MT6328_TOP_CKPDN_CON0        0x023c
+#define MT6328_TOP_CKPDN_CON0_SET    0x023e
+#define MT6328_TOP_CKPDN_CON0_CLR    0x0240
+#define MT6328_TOP_CKPDN_CON1        0x0242
+#define MT6328_TOP_CKPDN_CON1_SET    0x0244
+#define MT6328_TOP_CKPDN_CON1_CLR    0x0246
+#define MT6328_TOP_CKPDN_CON2        0x0248
+#define MT6328_TOP_CKPDN_CON2_SET    0x024a
+#define MT6328_TOP_CKPDN_CON2_CLR    0x024c
+#define MT6328_TOP_CKPDN_CON3        0x024e
+#define MT6328_TOP_CKPDN_CON3_SET    0x0250
+#define MT6328_TOP_CKPDN_CON3_CLR    0x0252
+#define MT6328_TOP_CKPDN_CON4        0x0254
+#define MT6328_TOP_CKPDN_CON4_SET    0x0256
+#define MT6328_TOP_CKPDN_CON4_CLR    0x0258
+#define MT6328_TOP_CKSEL_CON0        0x025a
+#define MT6328_TOP_CKSEL_CON0_SET    0x025c
+#define MT6328_TOP_CKSEL_CON0_CLR    0x025e
+#define MT6328_TOP_CKSEL_CON1        0x0260
+#define MT6328_TOP_CKSEL_CON1_SET    0x0262
+#define MT6328_TOP_CKSEL_CON1_CLR    0x0264
+#define MT6328_TOP_CKSEL_CON2        0x0266
+#define MT6328_TOP_CKSEL_CON2_SET    0x0268
+#define MT6328_TOP_CKSEL_CON2_CLR    0x026a
+#define MT6328_TOP_CKDIVSEL_CON0     0x026c
+#define MT6328_TOP_CKDIVSEL_CON0_SET 0x026e
+#define MT6328_TOP_CKDIVSEL_CON0_CLR 0x0270
+#define MT6328_TOP_CKDIVSEL_CON1     0x0272
+#define MT6328_TOP_CKDIVSEL_CON1_SET 0x0274
+#define MT6328_TOP_CKDIVSEL_CON1_CLR 0x0276
+#define MT6328_TOP_CKHWEN_CON0       0x0278
+#define MT6328_TOP_CKHWEN_CON0_SET   0x027a
+#define MT6328_TOP_CKHWEN_CON0_CLR   0x027c
+#define MT6328_TOP_CKHWEN_CON1       0x027e
+#define MT6328_TOP_CKHWEN_CON1_SET   0x0280
+#define MT6328_TOP_CKHWEN_CON1_CLR   0x0282
+#define MT6328_TOP_CKTST_CON0        0x0284
+#define MT6328_TOP_CKTST_CON1        0x0286
+#define MT6328_TOP_CKTST_CON2        0x0288
+#define MT6328_TOP_CLKSQ             0x028a
+#define MT6328_TOP_CLKSQ_SET         0x028c
+#define MT6328_TOP_CLKSQ_CLR         0x028e
+#define MT6328_TOP_CLKSQ_RTC         0x0290
+#define MT6328_TOP_CLKSQ_RTC_SET     0x0292
+#define MT6328_TOP_CLKSQ_RTC_CLR     0x0294
+#define MT6328_TOP_CLK_TRIM          0x0296
+#define MT6328_TOP_RST_CON0          0x0298
+#define MT6328_TOP_RST_CON0_SET      0x029a
+#define MT6328_TOP_RST_CON0_CLR      0x029c
+#define MT6328_TOP_RST_CON1          0x029e
+#define MT6328_TOP_RST_MISC          0x02a0
+#define MT6328_TOP_RST_MISC_SET      0x02a2
+#define MT6328_TOP_RST_MISC_CLR      0x02a4
+#define MT6328_TOP_RST_STATUS        0x02a6
+#define MT6328_TOP_RST_STATUS_SET    0x02a8
+#define MT6328_TOP_RST_STATUS_CLR    0x02aa
+#define MT6328_INT_CON0              0x02ac
+#define MT6328_INT_CON0_SET          0x02ae
+#define MT6328_INT_CON0_CLR          0x02b0
+#define MT6328_INT_CON1              0x02b2
+#define MT6328_INT_CON1_SET          0x02b4
+#define MT6328_INT_CON1_CLR          0x02b6
+#define MT6328_INT_CON2              0x02b8
+#define MT6328_INT_CON2_SET          0x02ba
+#define MT6328_INT_CON2_CLR          0x02bc
+#define MT6328_INT_MISC_CON          0x02be
+#define MT6328_INT_MISC_CON_SET      0x02c0
+#define MT6328_INT_MISC_CON_CLR      0x02c2
+#define MT6328_INT_STATUS0           0x02c4
+#define MT6328_INT_STATUS1           0x02c6
+#define MT6328_INT_STATUS2           0x02c8
+#define MT6328_OC_GEAR_0             0x02ca
+#define MT6328_FQMTR_CON0            0x02cc
+#define MT6328_FQMTR_CON1            0x02ce
+#define MT6328_FQMTR_CON2            0x02d0
+#define MT6328_RG_SPI_CON            0x02d2
+#define MT6328_DEW_DIO_EN            0x02d4
+#define MT6328_DEW_READ_TEST         0x02d6
+#define MT6328_DEW_WRITE_TEST        0x02d8
+#define MT6328_DEW_CRC_SWRST         0x02da
+#define MT6328_DEW_CRC_EN            0x02dc
+#define MT6328_DEW_CRC_VAL           0x02de
+#define MT6328_DEW_DBG_MON_SEL       0x02e0
+#define MT6328_DEW_CIPHER_KEY_SEL    0x02e2
+#define MT6328_DEW_CIPHER_IV_SEL     0x02e4
+#define MT6328_DEW_CIPHER_EN         0x02e6
+#define MT6328_DEW_CIPHER_RDY        0x02e8
+#define MT6328_DEW_CIPHER_MODE       0x02ea
+#define MT6328_DEW_CIPHER_SWRST      0x02ec
+#define MT6328_DEW_RDDMY_NO          0x02ee
+#define MT6328_INT_TYPE_CON0         0x02f0
+#define MT6328_INT_TYPE_CON0_SET     0x02f2
+#define MT6328_INT_TYPE_CON0_CLR     0x02f4
+#define MT6328_INT_TYPE_CON1         0x02f6
+#define MT6328_INT_TYPE_CON1_SET     0x02f8
+#define MT6328_INT_TYPE_CON1_CLR     0x02fa
+#define MT6328_INT_TYPE_CON2         0x02fc
+#define MT6328_INT_TYPE_CON2_SET     0x02fe
+#define MT6328_INT_TYPE_CON2_CLR     0x0300
+#define MT6328_INT_STA               0x0302
+#define MT6328_BUCK_ALL_CON0         0x0400
+#define MT6328_BUCK_ALL_CON1         0x0402
+#define MT6328_BUCK_ALL_CON2         0x0404
+#define MT6328_BUCK_ALL_CON3         0x0406
+#define MT6328_BUCK_ALL_CON4         0x0408
+#define MT6328_BUCK_ALL_CON5         0x040a
+#define MT6328_BUCK_ALL_CON6         0x040c
+#define MT6328_BUCK_ALL_CON9         0x040e
+#define MT6328_BUCK_ALL_CON12        0x0410
+#define MT6328_BUCK_ALL_CON13        0x0412
+#define MT6328_BUCK_ALL_CON14        0x0414
+#define MT6328_BUCK_ALL_CON16        0x0416
+#define MT6328_BUCK_ALL_CON18        0x0418
+#define MT6328_BUCK_ALL_CON19        0x041a
+#define MT6328_BUCK_ALL_CON20        0x041c
+#define MT6328_BUCK_ALL_CON21        0x041e
+#define MT6328_BUCK_ALL_CON22        0x0420
+#define MT6328_BUCK_ALL_CON23        0x0422
+#define MT6328_BUCK_ALL_CON24        0x0424
+#define MT6328_BUCK_ALL_CON25        0x0426
+#define MT6328_BUCK_ALL_CON26        0x0428
+#define MT6328_BUCK_ALL_CON27        0x042a
+#define MT6328_BUCK_ALL_CON28        0x042c
+#define MT6328_SMPS_TOP_ANA_CON0     0x042e
+#define MT6328_SMPS_TOP_ANA_CON1     0x0430
+#define MT6328_SMPS_TOP_ANA_CON2     0x0432
+#define MT6328_SMPS_TOP_ANA_CON3     0x0434
+#define MT6328_SMPS_TOP_ANA_CON4     0x0436
+#define MT6328_SMPS_TOP_ANA_CON5     0x0438
+#define MT6328_SMPS_TOP_ANA_CON6     0x043a
+#define MT6328_SMPS_TOP_ANA_CON7     0x043c
+#define MT6328_SMPS_TOP_ANA_CON8     0x043e
+#define MT6328_VCORE_ANA_CON0        0x0440
+#define MT6328_VCORE_ANA_CON1        0x0442
+#define MT6328_VCORE_ANA_CON2        0x0444
+#define MT6328_VCORE_ANA_CON3        0x0446
+#define MT6328_VCORE_ANA_CON4        0x0448
+#define MT6328_VSYS22_ANA_CON0       0x044a
+#define MT6328_VSYS22_ANA_CON1       0x044c
+#define MT6328_VSYS22_ANA_CON2       0x044e
+#define MT6328_VSYS22_ANA_CON3       0x0450
+#define MT6328_VSYS22_ANA_CON4       0x0452
+#define MT6328_VPROC_ANA_CON0        0x0454
+#define MT6328_VPROC_ANA_CON1        0x0456
+#define MT6328_VPROC_ANA_CON2        0x0458
+#define MT6328_VPROC_ANA_CON3        0x045a
+#define MT6328_VPROC_ANA_CON4        0x045c
+#define MT6328_OSC32_ANA_CON0        0x045e
+#define MT6328_OSC32_ANA_CON1        0x0460
+#define MT6328_VPA_ANA_CON0          0x0462
+#define MT6328_VPA_ANA_CON1          0x0464
+#define MT6328_VPA_ANA_CON2          0x0466
+#define MT6328_VPA_ANA_CON3          0x0468
+#define MT6328_VLTE_ANA_CON0         0x046a
+#define MT6328_VLTE_ANA_CON1         0x046c
+#define MT6328_VLTE_ANA_CON2         0x046e
+#define MT6328_VLTE_ANA_CON3         0x0470
+#define MT6328_VLTE_ANA_CON4         0x0472
+#define MT6328_VPROC_CON0            0x0474
+#define MT6328_VPROC_CON1            0x0476
+#define MT6328_VPROC_CON2            0x0478
+#define MT6328_VPROC_CON3            0x047a
+#define MT6328_VPROC_CON4            0x047c
+#define MT6328_VPROC_CON5            0x047e
+#define MT6328_VPROC_CON6            0x0480
+#define MT6328_VPROC_CON7            0x0482
+#define MT6328_VPROC_CON8            0x0484
+#define MT6328_VPROC_CON9            0x0486
+#define MT6328_VPROC_CON10           0x0488
+#define MT6328_VPROC_CON11           0x048a
+#define MT6328_VPROC_CON12           0x048c
+#define MT6328_VPROC_CON13           0x048e
+#define MT6328_VPROC_CON14           0x0490
+#define MT6328_VPROC_CON15           0x0492
+#define MT6328_VPROC_CON16           0x0494
+#define MT6328_VPROC_CON17           0x0496
+#define MT6328_VPROC_CON18           0x0498
+#define MT6328_VPROC_CON19           0x049a
+#define MT6328_VSRAM_CON0            0x049c
+#define MT6328_VSRAM_CON1            0x049e
+#define MT6328_VSRAM_CON2            0x04a0
+#define MT6328_VSRAM_CON3            0x04a2
+#define MT6328_VSRAM_CON4            0x04a4
+#define MT6328_VSRAM_CON5            0x04a6
+#define MT6328_VSRAM_CON6            0x04a8
+#define MT6328_VSRAM_CON7            0x04aa
+#define MT6328_VSRAM_CON8            0x04ac
+#define MT6328_VSRAM_CON9            0x04ae
+#define MT6328_VSRAM_CON10           0x04b0
+#define MT6328_VSRAM_CON11           0x04b2
+#define MT6328_VSRAM_CON12           0x04b4
+#define MT6328_VSRAM_CON13           0x04b6
+#define MT6328_VSRAM_CON14           0x04b8
+#define MT6328_VSRAM_CON15           0x04ba
+#define MT6328_VSRAM_CON16           0x04bc
+#define MT6328_VSRAM_CON17           0x04be
+#define MT6328_VSRAM_CON18           0x04c0
+#define MT6328_VSRAM_CON19           0x04c2
+#define MT6328_VLTE_CON0             0x04c4
+#define MT6328_VLTE_CON1             0x04c6
+#define MT6328_VLTE_CON2             0x04c8
+#define MT6328_VLTE_CON3             0x04ca
+#define MT6328_VLTE_CON4             0x04cc
+#define MT6328_VLTE_CON5             0x04ce
+#define MT6328_VLTE_CON6             0x04d0
+#define MT6328_VLTE_CON7             0x04d2
+#define MT6328_VLTE_CON8             0x04d4
+#define MT6328_VLTE_CON9             0x04d6
+#define MT6328_VLTE_CON10            0x04d8
+#define MT6328_VLTE_CON11            0x04da
+#define MT6328_VLTE_CON12            0x04dc
+#define MT6328_VLTE_CON13            0x04de
+#define MT6328_VLTE_CON14            0x04e0
+#define MT6328_VLTE_CON15            0x04e2
+#define MT6328_VLTE_CON16            0x04e4
+#define MT6328_VLTE_CON17            0x04e6
+#define MT6328_VLTE_CON18            0x04e8
+#define MT6328_VLTE_CON19            0x04ea
+#define MT6328_VCORE1_CON0           0x0600
+#define MT6328_VCORE1_CON1           0x0602
+#define MT6328_VCORE1_CON2           0x0604
+#define MT6328_VCORE1_CON3           0x0606
+#define MT6328_VCORE1_CON4           0x0608
+#define MT6328_VCORE1_CON5           0x060a
+#define MT6328_VCORE1_CON6           0x060c
+#define MT6328_VCORE1_CON7           0x060e
+#define MT6328_VCORE1_CON8           0x0610
+#define MT6328_VCORE1_CON9           0x0612
+#define MT6328_VCORE1_CON10          0x0614
+#define MT6328_VCORE1_CON11          0x0616
+#define MT6328_VCORE1_CON12          0x0618
+#define MT6328_VCORE1_CON13          0x061a
+#define MT6328_VCORE1_CON14          0x061c
+#define MT6328_VCORE1_CON15          0x061e
+#define MT6328_VCORE1_CON16          0x0620
+#define MT6328_VCORE1_CON17          0x0622
+#define MT6328_VCORE1_CON18          0x0624
+#define MT6328_VCORE1_CON19          0x0626
+#define MT6328_VSYS22_CON0           0x0628
+#define MT6328_VSYS22_CON1           0x062a
+#define MT6328_VSYS22_CON2           0x062c
+#define MT6328_VSYS22_CON3           0x062e
+#define MT6328_VSYS22_CON4           0x0630
+#define MT6328_VSYS22_CON5           0x0632
+#define MT6328_VSYS22_CON6           0x0634
+#define MT6328_VSYS22_CON7           0x0636
+#define MT6328_VSYS22_CON8           0x0638
+#define MT6328_VSYS22_CON9           0x063a
+#define MT6328_VSYS22_CON10          0x063c
+#define MT6328_VSYS22_CON11          0x063e
+#define MT6328_VSYS22_CON12          0x0640
+#define MT6328_VSYS22_CON13          0x0642
+#define MT6328_VSYS22_CON14          0x0644
+#define MT6328_VSYS22_CON15          0x0646
+#define MT6328_VSYS22_CON16          0x0648
+#define MT6328_VSYS22_CON17          0x064a
+#define MT6328_VSYS22_CON18          0x064c
+#define MT6328_VSYS22_CON19          0x064e
+#define MT6328_VPA_CON0              0x0650
+#define MT6328_VPA_CON1              0x0652
+#define MT6328_VPA_CON2              0x0654
+#define MT6328_VPA_CON3              0x0656
+#define MT6328_VPA_CON4              0x0658
+#define MT6328_VPA_CON5              0x065a
+#define MT6328_VPA_CON6              0x065c
+#define MT6328_VPA_CON7              0x065e
+#define MT6328_VPA_CON8              0x0660
+#define MT6328_VPA_CON9              0x0662
+#define MT6328_VPA_CON10             0x0664
+#define MT6328_VPA_CON11             0x0666
+#define MT6328_VPA_CON12             0x0668
+#define MT6328_VPA_CON13             0x066a
+#define MT6328_VPA_CON14             0x066c
+#define MT6328_VPA_CON15             0x066e
+#define MT6328_VPA_CON16             0x0670
+#define MT6328_VPA_CON17             0x0672
+#define MT6328_VPA_CON18             0x0674
+#define MT6328_VPA_CON19             0x0676
+#define MT6328_VPA_CON20             0x0678
+#define MT6328_VPA_CON21             0x067a
+#define MT6328_VPA_CON22             0x067c
+#define MT6328_VPA_CON23             0x067e
+#define MT6328_VPA_CON24             0x0680
+#define MT6328_BUCK_K_CON0           0x0682
+#define MT6328_BUCK_K_CON1           0x0684
+#define MT6328_BUCK_K_CON2           0x0686
+#define MT6328_BUCK_K_CON3           0x0688
+#define MT6328_ZCD_CON0              0x0800
+#define MT6328_ZCD_CON1              0x0802
+#define MT6328_ZCD_CON2              0x0804
+#define MT6328_ZCD_CON3              0x0806
+#define MT6328_ZCD_CON4              0x0808
+#define MT6328_ZCD_CON5              0x080a
+#define MT6328_ISINK0_CON0           0x080c
+#define MT6328_ISINK0_CON1           0x080e
+#define MT6328_ISINK0_CON2           0x0810
+#define MT6328_ISINK0_CON3           0x0812
+#define MT6328_ISINK1_CON0           0x0814
+#define MT6328_ISINK1_CON1           0x0816
+#define MT6328_ISINK1_CON2           0x0818
+#define MT6328_ISINK1_CON3           0x081a
+#define MT6328_ISINK2_CON1           0x081c
+#define MT6328_ISINK3_CON1           0x081e
+#define MT6328_ISINK_ANA0            0x0820
+#define MT6328_ISINK_ANA1            0x0822
+#define MT6328_ISINK_PHASE_DLY       0x0824
+#define MT6328_ISINK_SFSTR           0x0826
+#define MT6328_ISINK_EN_CTRL         0x0828
+#define MT6328_ISINK_MODE_CTRL       0x082a
+#define MT6328_VTCXO_0_CON0          0x0a00
+#define MT6328_VTCXO_1_CON0          0x0a02
+#define MT6328_VAUD28_CON0           0x0a04
+#define MT6328_VAUX18_CON0           0x0a06
+#define MT6328_VRF18_0_CON0          0x0a08
+#define MT6328_VRF18_0_CON1          0x0a0a
+#define MT6328_VCAMA_CON0            0x0a0c
+#define MT6328_VCN28_CON0            0x0a0e
+#define MT6328_VCN33_CON0            0x0a10
+#define MT6328_VCN33_CON1            0x0a12
+#define MT6328_VCN33_CON2            0x0a14
+#define MT6328_VRF18_1_CON0          0x0a16
+#define MT6328_VRF18_1_CON1          0x0a18
+#define MT6328_VUSB33_CON0           0x0a1a
+#define MT6328_VMCH_CON0             0x0a1c
+#define MT6328_VMCH_CON1             0x0a1e
+#define MT6328_VMC_CON0              0x0a20
+#define MT6328_VMC_CON1              0x0a22
+#define MT6328_VEMC_3V3_CON0         0x0a24
+#define MT6328_VEMC_3V3_CON1         0x0a26
+#define MT6328_VIO28_CON0            0x0a28
+#define MT6328_VCAMAF_CON0           0x0a2a
+#define MT6328_VGP1_CON0             0x0a2c
+#define MT6328_VGP1_CON1             0x0a2e
+#define MT6328_VEFUSE_CON0           0x0a30
+#define MT6328_VSIM1_CON0            0x0a32
+#define MT6328_VSIM2_CON0            0x0a34
+#define MT6328_VIO18_CON0            0x0a36
+#define MT6328_VIBR_CON0             0x0a38
+#define MT6328_VCN18_CON0            0x0a3a
+#define MT6328_VCAM_CON0             0x0a3c
+#define MT6328_VCAMIO_CON0           0x0a3e
+#define MT6328_LDO_VSRAM_CON0        0x0a40
+#define MT6328_LDO_VSRAM_CON1        0x0a42
+#define MT6328_VTREF_CON0            0x0a44
+#define MT6328_VM_CON0               0x0a46
+#define MT6328_VM_CON1               0x0a48
+#define MT6328_VRTC_CON0             0x0a4a
+#define MT6328_LDO_OCFB0             0x0a4c
+#define MT6328_ALDO_ANA_CON0         0x0a4e
+#define MT6328_ADLDO_ANA_CON1        0x0a50
+#define MT6328_ADLDO_ANA_CON2        0x0a52
+#define MT6328_ADLDO_ANA_CON3        0x0a54
+#define MT6328_ADLDO_ANA_CON4        0x0a56
+#define MT6328_ADLDO_ANA_CON5        0x0a58
+#define MT6328_ADLDO_ANA_CON6        0x0a5a
+#define MT6328_ADLDO_ANA_CON7        0x0a5c
+#define MT6328_ADLDO_ANA_CON8        0x0a5e
+#define MT6328_ADLDO_ANA_CON9        0x0a60
+#define MT6328_ADLDO_ANA_CON10       0x0a62
+#define MT6328_ADLDO_ANA_CON11       0x0a64
+#define MT6328_ADLDO_ANA_CON12       0x0a66
+#define MT6328_ADLDO_ANA_CON13       0x0a68
+#define MT6328_DLDO_ANA_CON0         0x0a6a
+#define MT6328_DLDO_ANA_CON1         0x0a6c
+#define MT6328_DLDO_ANA_CON2         0x0a6e
+#define MT6328_DLDO_ANA_CON3         0x0a70
+#define MT6328_DLDO_ANA_CON4         0x0a72
+#define MT6328_DLDO_ANA_CON5         0x0a74
+#define MT6328_SLDO_ANA_CON0         0x0a76
+#define MT6328_SLDO_ANA_CON1         0x0a78
+#define MT6328_SLDO_ANA_CON2         0x0a7a
+#define MT6328_SLDO_ANA_CON3         0x0a7c
+#define MT6328_SLDO_ANA_CON4         0x0a7e
+#define MT6328_SLDO_ANA_CON5         0x0a80
+#define MT6328_SLDO_ANA_CON6         0x0a82
+#define MT6328_SLDO_ANA_CON7         0x0a84
+#define MT6328_SLDO_ANA_CON8         0x0a86
+#define MT6328_SLDO_ANA_CON9         0x0a88
+#define MT6328_SLDO_ANA_CON10        0x0a8a
+#define MT6328_LDO_RSV_CON0          0x0a8c
+#define MT6328_LDO_RSV_CON1          0x0a8e
+#define MT6328_SPK_CON0              0x0a90
+#define MT6328_SPK_CON1              0x0a92
+#define MT6328_SPK_CON2              0x0a94
+#define MT6328_SPK_CON3              0x0a96
+#define MT6328_SPK_CON4              0x0a98
+#define MT6328_SPK_CON5              0x0a9a
+#define MT6328_SPK_CON6              0x0a9c
+#define MT6328_SPK_CON7              0x0a9e
+#define MT6328_SPK_CON8              0x0aa0
+#define MT6328_SPK_CON9              0x0aa2
+#define MT6328_SPK_CON10             0x0aa4
+#define MT6328_SPK_CON11             0x0aa6
+#define MT6328_SPK_CON12             0x0aa8
+#define MT6328_SPK_CON13             0x0aaa
+#define MT6328_SPK_CON14             0x0aac
+#define MT6328_SPK_CON15             0x0aae
+#define MT6328_SPK_CON16             0x0ab0
+#define MT6328_SPK_ANA_CON0          0x0ab2
+#define MT6328_SPK_ANA_CON1          0x0ab4
+#define MT6328_SPK_ANA_CON3          0x0ab6
+#define MT6328_OTP_CON0              0x0c00
+#define MT6328_OTP_CON1              0x0c02
+#define MT6328_OTP_CON2              0x0c04
+#define MT6328_OTP_CON3              0x0c06
+#define MT6328_OTP_CON4              0x0c08
+#define MT6328_OTP_CON5              0x0c0a
+#define MT6328_OTP_CON6              0x0c0c
+#define MT6328_OTP_CON7              0x0c0e
+#define MT6328_OTP_CON8              0x0c10
+#define MT6328_OTP_CON9              0x0c12
+#define MT6328_OTP_CON10             0x0c14
+#define MT6328_OTP_CON11             0x0c16
+#define MT6328_OTP_CON12             0x0c18
+#define MT6328_OTP_CON13             0x0c1a
+#define MT6328_OTP_CON14             0x0c1c
+#define MT6328_OTP_DOUT_0_15         0x0c1e
+#define MT6328_OTP_DOUT_16_31        0x0c20
+#define MT6328_OTP_DOUT_32_47        0x0c22
+#define MT6328_OTP_DOUT_48_63        0x0c24
+#define MT6328_OTP_DOUT_64_79        0x0c26
+#define MT6328_OTP_DOUT_80_95        0x0c28
+#define MT6328_OTP_DOUT_96_111       0x0c2a
+#define MT6328_OTP_DOUT_112_127      0x0c2c
+#define MT6328_OTP_DOUT_128_143      0x0c2e
+#define MT6328_OTP_DOUT_144_159      0x0c30
+#define MT6328_OTP_DOUT_160_175      0x0c32
+#define MT6328_OTP_DOUT_176_191      0x0c34
+#define MT6328_OTP_DOUT_192_207      0x0c36
+#define MT6328_OTP_DOUT_208_223      0x0c38
+#define MT6328_OTP_DOUT_224_239      0x0c3a
+#define MT6328_OTP_DOUT_240_255      0x0c3c
+#define MT6328_OTP_DOUT_256_271      0x0c3e
+#define MT6328_OTP_DOUT_272_287      0x0c40
+#define MT6328_OTP_DOUT_288_303      0x0c42
+#define MT6328_OTP_DOUT_304_319      0x0c44
+#define MT6328_OTP_DOUT_320_335      0x0c46
+#define MT6328_OTP_DOUT_336_351      0x0c48
+#define MT6328_OTP_DOUT_352_367      0x0c4a
+#define MT6328_OTP_DOUT_368_383      0x0c4c
+#define MT6328_OTP_DOUT_384_399      0x0c4e
+#define MT6328_OTP_DOUT_400_415      0x0c50
+#define MT6328_OTP_DOUT_416_431      0x0c52
+#define MT6328_OTP_DOUT_432_447      0x0c54
+#define MT6328_OTP_DOUT_448_463      0x0c56
+#define MT6328_OTP_DOUT_464_479      0x0c58
+#define MT6328_OTP_DOUT_480_495      0x0c5a
+#define MT6328_OTP_DOUT_496_511      0x0c5c
+#define MT6328_OTP_VAL_0_15          0x0c5e
+#define MT6328_OTP_VAL_16_31         0x0c60
+#define MT6328_OTP_VAL_32_47         0x0c62
+#define MT6328_OTP_VAL_48_63         0x0c64
+#define MT6328_OTP_VAL_64_79         0x0c66
+#define MT6328_OTP_VAL_80_95         0x0c68
+#define MT6328_OTP_VAL_96_111        0x0c6a
+#define MT6328_OTP_VAL_112_127       0x0c6c
+#define MT6328_OTP_VAL_128_143       0x0c6e
+#define MT6328_OTP_VAL_144_159       0x0c70
+#define MT6328_OTP_VAL_160_175       0x0c72
+#define MT6328_OTP_VAL_176_191       0x0c74
+#define MT6328_OTP_VAL_192_207       0x0c76
+#define MT6328_OTP_VAL_208_223       0x0c78
+#define MT6328_OTP_VAL_224_239       0x0c7a
+#define MT6328_OTP_VAL_240_255       0x0c7c
+#define MT6328_OTP_VAL_256_271       0x0c7e
+#define MT6328_OTP_VAL_272_287       0x0c80
+#define MT6328_OTP_VAL_288_303       0x0c82
+#define MT6328_OTP_VAL_304_319       0x0c84
+#define MT6328_OTP_VAL_320_335       0x0c86
+#define MT6328_OTP_VAL_336_351       0x0c88
+#define MT6328_OTP_VAL_352_367       0x0c8a
+#define MT6328_OTP_VAL_368_383       0x0c8c
+#define MT6328_OTP_VAL_384_399       0x0c8e
+#define MT6328_OTP_VAL_400_415       0x0c90
+#define MT6328_OTP_VAL_416_431       0x0c92
+#define MT6328_OTP_VAL_432_447       0x0c94
+#define MT6328_OTP_VAL_448_463       0x0c96
+#define MT6328_OTP_VAL_464_479       0x0c98
+#define MT6328_OTP_VAL_480_495       0x0c9a
+#define MT6328_OTP_VAL_496_511       0x0c9c
+#define MT6328_RTC_MIX_CON0          0x0c9e
+#define MT6328_RTC_MIX_CON1          0x0ca0
+#define MT6328_RTC_MIX_CON2          0x0ca2
+#define MT6328_FGADC_CON0            0x0ca4
+#define MT6328_FGADC_CON1            0x0ca6
+#define MT6328_FGADC_CON2            0x0ca8
+#define MT6328_FGADC_CON3            0x0caa
+#define MT6328_FGADC_CON4            0x0cac
+#define MT6328_FGADC_CON5            0x0cae
+#define MT6328_FGADC_CON6            0x0cb0
+#define MT6328_FGADC_CON7            0x0cb2
+#define MT6328_FGADC_CON8            0x0cb4
+#define MT6328_FGADC_CON9            0x0cb6
+#define MT6328_FGADC_CON10           0x0cb8
+#define MT6328_FGADC_CON11           0x0cba
+#define MT6328_FGADC_CON12           0x0cbc
+#define MT6328_FGADC_CON13           0x0cbe
+#define MT6328_FGADC_CON14           0x0cc0
+#define MT6328_FGADC_CON15           0x0cc2
+#define MT6328_FGADC_CON16           0x0cc4
+#define MT6328_FGADC_CON17           0x0cc6
+#define MT6328_FGADC_CON18           0x0cc8
+#define MT6328_FGADC_CON19           0x0cca
+#define MT6328_FGADC_CON20           0x0ccc
+#define MT6328_FGADC_CON21           0x0cce
+#define MT6328_FGADC_CON22           0x0cd0
+#define MT6328_FGADC_CON23           0x0cd2
+#define MT6328_FGADC_CON24           0x0cd4
+#define MT6328_FGADC_CON25           0x0cd6
+#define MT6328_FGADC_CON26           0x0cd8
+#define MT6328_FGADC_CON27           0x0cda
+#define MT6328_AUDDEC_ANA_CON0       0x0cdc
+#define MT6328_AUDDEC_ANA_CON1       0x0cde
+#define MT6328_AUDDEC_ANA_CON2       0x0ce0
+#define MT6328_AUDDEC_ANA_CON3       0x0ce2
+#define MT6328_AUDDEC_ANA_CON4       0x0ce4
+#define MT6328_AUDDEC_ANA_CON5       0x0ce6
+#define MT6328_AUDDEC_ANA_CON6       0x0ce8
+#define MT6328_AUDDEC_ANA_CON7       0x0cea
+#define MT6328_AUDDEC_ANA_CON8       0x0cec
+#define MT6328_AUDENC_ANA_CON0       0x0cee
+#define MT6328_AUDENC_ANA_CON1       0x0cf0
+#define MT6328_AUDENC_ANA_CON2       0x0cf2
+#define MT6328_AUDENC_ANA_CON3       0x0cf4
+#define MT6328_AUDENC_ANA_CON4       0x0cf6
+#define MT6328_AUDENC_ANA_CON5       0x0cf8
+#define MT6328_AUDENC_ANA_CON6       0x0cfa
+#define MT6328_AUDENC_ANA_CON7       0x0cfc
+#define MT6328_AUDENC_ANA_CON8       0x0cfe
+#define MT6328_AUDENC_ANA_CON9       0x0d00
+#define MT6328_AUDENC_ANA_CON10      0x0d02
+#define MT6328_AUDNCP_CLKDIV_CON0    0x0d04
+#define MT6328_AUDNCP_CLKDIV_CON1    0x0d06
+#define MT6328_AUDNCP_CLKDIV_CON2    0x0d08
+#define MT6328_AUDNCP_CLKDIV_CON3    0x0d0a
+#define MT6328_AUDNCP_CLKDIV_CON4    0x0d0c
+#define MT6328_AUXADC_ADC0           0x0e00
+#define MT6328_AUXADC_ADC1           0x0e02
+#define MT6328_AUXADC_ADC2           0x0e04
+#define MT6328_AUXADC_ADC3           0x0e06
+#define MT6328_AUXADC_ADC4           0x0e08
+#define MT6328_AUXADC_ADC5           0x0e0a
+#define MT6328_AUXADC_ADC6           0x0e0c
+#define MT6328_AUXADC_ADC7           0x0e0e
+#define MT6328_AUXADC_ADC8           0x0e10
+#define MT6328_AUXADC_ADC9           0x0e12
+#define MT6328_AUXADC_ADC10          0x0e14
+#define MT6328_AUXADC_ADC11          0x0e16
+#define MT6328_AUXADC_ADC12          0x0e18
+#define MT6328_AUXADC_ADC13          0x0e1a
+#define MT6328_AUXADC_ADC14          0x0e1c
+#define MT6328_AUXADC_ADC15          0x0e1e
+#define MT6328_AUXADC_ADC16          0x0e20
+#define MT6328_AUXADC_ADC17          0x0e22
+#define MT6328_AUXADC_ADC18          0x0e24
+#define MT6328_AUXADC_ADC19          0x0e26
+#define MT6328_AUXADC_ADC20          0x0e28
+#define MT6328_AUXADC_ADC21          0x0e2a
+#define MT6328_AUXADC_ADC22          0x0e2c
+#define MT6328_AUXADC_ADC23          0x0e2e
+#define MT6328_AUXADC_ADC24          0x0e30
+#define MT6328_AUXADC_ADC25          0x0e32
+#define MT6328_AUXADC_ADC26          0x0e34
+#define MT6328_AUXADC_ADC27          0x0e36
+#define MT6328_AUXADC_ADC28          0x0e38
+#define MT6328_AUXADC_ADC29          0x0e3a
+#define MT6328_AUXADC_ADC30          0x0e3c
+#define MT6328_AUXADC_ADC31          0x0e3e
+#define MT6328_AUXADC_ADC32          0x0e40
+#define MT6328_AUXADC_ADC33          0x0e42
+#define MT6328_AUXADC_BUF0           0x0e44
+#define MT6328_AUXADC_BUF1           0x0e46
+#define MT6328_AUXADC_BUF2           0x0e48
+#define MT6328_AUXADC_BUF3           0x0e4a
+#define MT6328_AUXADC_BUF4           0x0e4c
+#define MT6328_AUXADC_BUF5           0x0e4e
+#define MT6328_AUXADC_BUF6           0x0e50
+#define MT6328_AUXADC_BUF7           0x0e52
+#define MT6328_AUXADC_BUF8           0x0e54
+#define MT6328_AUXADC_BUF9           0x0e56
+#define MT6328_AUXADC_BUF10          0x0e58
+#define MT6328_AUXADC_BUF11          0x0e5a
+#define MT6328_AUXADC_BUF12          0x0e5c
+#define MT6328_AUXADC_BUF13          0x0e5e
+#define MT6328_AUXADC_BUF14          0x0e60
+#define MT6328_AUXADC_BUF15          0x0e62
+#define MT6328_AUXADC_BUF16          0x0e64
+#define MT6328_AUXADC_BUF17          0x0e66
+#define MT6328_AUXADC_BUF18          0x0e68
+#define MT6328_AUXADC_BUF19          0x0e6a
+#define MT6328_AUXADC_BUF20          0x0e6c
+#define MT6328_AUXADC_BUF21          0x0e6e
+#define MT6328_AUXADC_BUF22          0x0e70
+#define MT6328_AUXADC_BUF23          0x0e72
+#define MT6328_AUXADC_BUF24          0x0e74
+#define MT6328_AUXADC_BUF25          0x0e76
+#define MT6328_AUXADC_BUF26          0x0e78
+#define MT6328_AUXADC_BUF27          0x0e7a
+#define MT6328_AUXADC_BUF28          0x0e7c
+#define MT6328_AUXADC_BUF29          0x0e7e
+#define MT6328_AUXADC_BUF30          0x0e80
+#define MT6328_AUXADC_BUF31          0x0e82
+#define MT6328_AUXADC_STA0           0x0e84
+#define MT6328_AUXADC_STA1           0x0e86
+#define MT6328_AUXADC_RQST0          0x0e88
+#define MT6328_AUXADC_RQST0_SET      0x0e8a
+#define MT6328_AUXADC_RQST0_CLR      0x0e8c
+#define MT6328_AUXADC_RQST1          0x0e8e
+#define MT6328_AUXADC_RQST1_SET      0x0e90
+#define MT6328_AUXADC_RQST1_CLR      0x0e92
+#define MT6328_AUXADC_CON0           0x0e94
+#define MT6328_AUXADC_CON0_SET       0x0e96
+#define MT6328_AUXADC_CON0_CLR       0x0e98
+#define MT6328_AUXADC_CON1           0x0e9a
+#define MT6328_AUXADC_CON2           0x0e9c
+#define MT6328_AUXADC_CON3           0x0e9e
+#define MT6328_AUXADC_CON4           0x0ea0
+#define MT6328_AUXADC_CON5           0x0ea2
+#define MT6328_AUXADC_CON6           0x0ea4
+#define MT6328_AUXADC_CON7           0x0ea6
+#define MT6328_AUXADC_CON8           0x0ea8
+#define MT6328_AUXADC_CON9           0x0eaa
+#define MT6328_AUXADC_CON10          0x0eac
+#define MT6328_AUXADC_CON11          0x0eae
+#define MT6328_AUXADC_CON12          0x0eb0
+#define MT6328_AUXADC_CON13          0x0eb2
+#define MT6328_AUXADC_CON14          0x0eb4
+#define MT6328_AUXADC_CON15          0x0eb6
+#define MT6328_AUXADC_CON16          0x0eb8
+#define MT6328_AUXADC_AUTORPT0       0x0eba
+#define MT6328_AUXADC_LBAT0          0x0ebc
+#define MT6328_AUXADC_LBAT1          0x0ebe
+#define MT6328_AUXADC_LBAT2          0x0ec0
+#define MT6328_AUXADC_LBAT3          0x0ec2
+#define MT6328_AUXADC_LBAT4          0x0ec4
+#define MT6328_AUXADC_LBAT5          0x0ec6
+#define MT6328_AUXADC_LBAT6          0x0ec8
+#define MT6328_AUXADC_ACCDET         0x0eca
+#define MT6328_AUXADC_THR0           0x0ecc
+#define MT6328_AUXADC_THR1           0x0ece
+#define MT6328_AUXADC_THR2           0x0ed0
+#define MT6328_AUXADC_THR3           0x0ed2
+#define MT6328_AUXADC_THR4           0x0ed4
+#define MT6328_AUXADC_THR5           0x0ed6
+#define MT6328_AUXADC_THR6           0x0ed8
+#define MT6328_AUXADC_EFUSE0         0x0eda
+#define MT6328_AUXADC_EFUSE1         0x0edc
+#define MT6328_AUXADC_EFUSE2         0x0ede
+#define MT6328_AUXADC_EFUSE3         0x0ee0
+#define MT6328_AUXADC_EFUSE4         0x0ee2
+#define MT6328_AUXADC_EFUSE5         0x0ee4
+#define MT6328_AUXADC_DBG0           0x0ee6
+#define MT6328_AUXADC_IMP0           0x0ee8
+#define MT6328_AUXADC_IMP1           0x0eea
+#define MT6328_AUXADC_VISMPS0_1      0x0eec
+#define MT6328_AUXADC_VISMPS0_2      0x0eee
+#define MT6328_AUXADC_VISMPS0_3      0x0ef0
+#define MT6328_AUXADC_VISMPS0_4      0x0ef2
+#define MT6328_AUXADC_VISMPS0_5      0x0ef4
+#define MT6328_AUXADC_VISMPS0_6      0x0ef6
+#define MT6328_AUXADC_VISMPS0_7      0x0ef8
+#define MT6328_AUXADC_LBAT2_1        0x0efa
+#define MT6328_AUXADC_LBAT2_2        0x0efc
+#define MT6328_AUXADC_LBAT2_3        0x0efe
+#define MT6328_AUXADC_LBAT2_4        0x0f00
+#define MT6328_AUXADC_LBAT2_5        0x0f02
+#define MT6328_AUXADC_LBAT2_6        0x0f04
+#define MT6328_AUXADC_LBAT2_7        0x0f06
+#define MT6328_AUXADC_MDBG_0         0x0f08
+#define MT6328_AUXADC_MDBG_1         0x0f0a
+#define MT6328_AUXADC_MDBG_2         0x0f0c
+#define MT6328_AUXADC_MDRT_0         0x0f0e
+#define MT6328_AUXADC_MDRT_1         0x0f10
+#define MT6328_AUXADC_MDRT_2         0x0f12
+#define MT6328_ACCDET_CON0           0x0f14
+#define MT6328_ACCDET_CON1           0x0f16
+#define MT6328_ACCDET_CON2           0x0f18
+#define MT6328_ACCDET_CON3           0x0f1a
+#define MT6328_ACCDET_CON4           0x0f1c
+#define MT6328_ACCDET_CON5           0x0f1e
+#define MT6328_ACCDET_CON6           0x0f20
+#define MT6328_ACCDET_CON7           0x0f22
+#define MT6328_ACCDET_CON8           0x0f24
+#define MT6328_ACCDET_CON9           0x0f26
+#define MT6328_ACCDET_CON10          0x0f28
+#define MT6328_ACCDET_CON11          0x0f2a
+#define MT6328_ACCDET_CON12          0x0f2c
+#define MT6328_ACCDET_CON13          0x0f2e
+#define MT6328_ACCDET_CON14          0x0f30
+#define MT6328_ACCDET_CON15          0x0f32
+#define MT6328_ACCDET_CON16          0x0f34
+#define MT6328_ACCDET_CON17          0x0f36
+#define MT6328_ACCDET_CON18          0x0f38
+#define MT6328_ACCDET_CON19          0x0f3a
+#define MT6328_ACCDET_CON20          0x0f3c
+#define MT6328_ACCDET_CON21          0x0f3e
+#define MT6328_ACCDET_CON22          0x0f40
+#define MT6328_ACCDET_CON23          0x0f42
+#define MT6328_ACCDET_CON24          0x0f44
+#define MT6328_ACCDET_CON25          0x0f46
+#define MT6328_CHR_CON0              0x0f48
+#define MT6328_CHR_CON1              0x0f4a
+#define MT6328_CHR_CON2              0x0f4c
+#define MT6328_CHR_CON3              0x0f4e
+#define MT6328_CHR_CON4              0x0f50
+#define MT6328_CHR_CON5              0x0f52
+#define MT6328_CHR_CON6              0x0f54
+#define MT6328_CHR_CON7              0x0f56
+#define MT6328_CHR_CON8              0x0f58
+#define MT6328_CHR_CON9              0x0f5a
+#define MT6328_CHR_CON10             0x0f5c
+#define MT6328_CHR_CON11             0x0f5e
+#define MT6328_CHR_CON12             0x0f60
+#define MT6328_CHR_CON13             0x0f62
+#define MT6328_CHR_CON14             0x0f64
+#define MT6328_CHR_CON15             0x0f66
+#define MT6328_CHR_CON16             0x0f68
+#define MT6328_CHR_CON17             0x0f6a
+#define MT6328_CHR_CON18             0x0f6c
+#define MT6328_CHR_CON19             0x0f6e
+#define MT6328_CHR_CON20             0x0f70
+#define MT6328_CHR_CON21             0x0f72
+#define MT6328_CHR_CON22             0x0f74
+#define MT6328_CHR_CON23             0x0f76
+#define MT6328_CHR_CON24             0x0f78
+#define MT6328_CHR_CON25             0x0f7a
+#define MT6328_CHR_CON26             0x0f7c
+#define MT6328_CHR_CON27             0x0f7e
+#define MT6328_CHR_CON28             0x0f80
+#define MT6328_CHR_CON29             0x0f82
+#define MT6328_CHR_CON30             0x0f84
+#define MT6328_CHR_CON31             0x0f86
+#define MT6328_CHR_CON32             0x0f88
+#define MT6328_CHR_CON33             0x0f8a
+#define MT6328_CHR_CON34             0x0f8c
+#define MT6328_CHR_CON35             0x0f8e
+#define MT6328_CHR_CON36             0x0f90
+#define MT6328_CHR_CON37             0x0f92
+#define MT6328_CHR_CON38             0x0f94
+#define MT6328_CHR_CON39             0x0f96
+#define MT6328_CHR_CON40             0x0f98
+#define MT6328_CHR_CON41             0x0f9a
+#define MT6328_CHR_CON42             0x0f9c
+#define MT6328_BATON_CON0            0x0f9e
+#define MT6328_CHR_CON43             0x0fa0
+#define MT6328_EOSC_CALI_CON0        0x0faa
+#define MT6328_EOSC_CALI_CON1        0x0fac
+#define MT6328_VRTC_PWM_CON0         0x0fae
+
+#endif /* __MFD_MT6328_REGISTERS_H__ */
diff --git a/include/linux/mfd/mt6397/core.h b/include/linux/mfd/mt6397/core.h
index 627487e26287..b774c3a4bb62 100644
--- a/include/linux/mfd/mt6397/core.h
+++ b/include/linux/mfd/mt6397/core.h
@@ -12,6 +12,7 @@
 
 enum chip_id {
 	MT6323_CHIP_ID = 0x23,
+	MT6328_CHIP_ID = 0x30,
 	MT6331_CHIP_ID = 0x20,
 	MT6332_CHIP_ID = 0x20,
 	MT6357_CHIP_ID = 0x57,
@@ -65,11 +66,11 @@ struct mt6397_chip {
 	int irq;
 	struct irq_domain *irq_domain;
 	struct mutex irqlock;
-	u16 wake_mask[2];
-	u16 irq_masks_cur[2];
-	u16 irq_masks_cache[2];
-	u16 int_con[2];
-	u16 int_status[2];
+	u16 wake_mask[3];
+	u16 irq_masks_cur[3];
+	u16 irq_masks_cache[3];
+	u16 int_con[3];
+	u16 int_status[3];
 	u16 chip_id;
 	void *irq_data;
 };
-- 
cgit v1.2.3


From f187b9bf1a639090893c31030ddb60f9beae23f0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 30 Oct 2024 06:18:52 +0100
Subject: block: remove bio_add_zone_append_page

This is only used by the nvmet zns passthrough code, which can trivially
just use bio_add_pc_page and do the sanity check for the max zone append
limit itself.

All future zoned file systems should follow the btrfs lead and let the
upper layers fill up bios unlimited by hardware constraints and split
them to the limits in the I/O submission handler.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20241030051859.280923-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index faceadb040f9..4a1bf43ca53d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -418,8 +418,6 @@ bool __must_check bio_add_folio(struct bio *bio, struct folio *folio,
 				size_t len, size_t off);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
-int bio_add_zone_append_page(struct bio *bio, struct page *page,
-			     unsigned int len, unsigned int offset);
 void __bio_add_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off);
 void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
-- 
cgit v1.2.3


From 336afe0c832d6eb985d0e9dbc5a70929594e58d9 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Fri, 25 Oct 2024 18:23:45 +0000
Subject: KVM: arm64: nv: Describe trap behaviour of MDCR_EL2.HPMN

MDCR_EL2.HPMN splits the PMU event counters into two ranges: the first
range is accessible from all ELs, and the second range is accessible
only to EL2/3. Supposing the guest hypervisor allows direct access to
the PMU counters from the L2, KVM needs to locally handle those
accesses.

Add a new complex trap configuration for HPMN that checks if the counter
index is accessible to the current context. As written, the architecture
suggests HPMN only causes PMEVCNTR<n>_EL0 to trap, though intuition (and
the pseudocode) suggest that the trap applies to PMEVTYPER<n>_EL0 as
well.

Reviewed-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20241025182354.3364124-11-oliver.upton@linux.dev
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 include/kvm/arm_pmu.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index e08aeec5d936..feb5d1d35f0f 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -96,6 +96,7 @@ int kvm_arm_set_default_pmu(struct kvm *kvm);
 u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm);
 
 u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu);
+bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx);
 #else
 struct kvm_pmu {
 };
@@ -187,6 +188,11 @@ static inline u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static inline bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx)
+{
+	return false;
+}
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From a3034dab74fc12d6c0a589e31af9fafc436a4a0e Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Fri, 25 Oct 2024 18:23:47 +0000
Subject: KVM: arm64: Rename kvm_pmu_valid_counter_mask()

Nested PMU support requires dynamically changing the visible range of
PMU counters based on the exception level and value of MDCR_EL2.HPMN. At
the same time, the PMU emulation code needs to know the absolute number
of implemented counters, regardless of context.

Rename the existing helper to make it obvious that it returns the number
of implemented counters and not anything else.

Reviewed-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20241025182354.3364124-13-oliver.upton@linux.dev
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 include/kvm/arm_pmu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index feb5d1d35f0f..019ace2bb914 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -47,7 +47,7 @@ static __always_inline bool kvm_arm_support_pmu_v3(void)
 #define kvm_arm_pmu_irq_initialized(v)	((v)->arch.pmu.irq_num >= VGIC_NR_SGIS)
 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
-u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu);
+u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu);
 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1);
 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
@@ -114,7 +114,7 @@ static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
 }
 static inline void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu,
 					     u64 select_idx, u64 val) {}
-static inline u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
+static inline u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 9a1c58cfefb06974a804174f127de3fedc779394 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Fri, 25 Oct 2024 18:23:48 +0000
Subject: KVM: arm64: nv: Adjust range of accessible PMCs according to HPMN

The value of MDCR_EL2.HPMN controls the number of event counters made
visible to EL0 and EL1. This means it is possible for the guest
hypervisor to allow direct access to event counters to the L2.

Rework KVM's PMU register emulation to take the effects of HPMN into
account when handling a trap. For bitmask-style registers, writes only
affect accessible registers.

Reviewed-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20241025182354.3364124-14-oliver.upton@linux.dev
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 include/kvm/arm_pmu.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 019ace2bb914..76244f0bd47a 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -48,6 +48,7 @@ static __always_inline bool kvm_arm_support_pmu_v3(void)
 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
 u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu);
+u64 kvm_pmu_accessible_counter_mask(struct kvm_vcpu *vcpu);
 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1);
 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
@@ -118,6 +119,10 @@ static inline u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu)
 {
 	return 0;
 }
+static inline u64 kvm_pmu_accessible_counter_mask(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
 static inline void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {}
-- 
cgit v1.2.3


From ae323e035801def145776dddf46c01ca1b90d21d Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Fri, 25 Oct 2024 18:25:59 +0000
Subject: KVM: arm64: nv: Reprogram PMU events affected by nested transition

Start reprogramming PMU events at nested boundaries now that everything
is in place to handle the EL2 event filter. Only repaint events where
the filter differs between EL1 and EL2 as a slight optimization.

PMU now 'works' for nested VMs, albeit slow.

Reviewed-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20241025182559.3364829-1-oliver.upton@linux.dev
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 include/kvm/arm_pmu.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 76244f0bd47a..e61dd7dd2286 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -98,6 +98,7 @@ u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm);
 
 u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu);
 bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx);
+void kvm_pmu_nested_transition(struct kvm_vcpu *vcpu);
 #else
 struct kvm_pmu {
 };
@@ -198,6 +199,8 @@ static inline bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int id
 	return false;
 }
 
+static inline void kvm_pmu_nested_transition(struct kvm_vcpu *vcpu) {}
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From 16aed0a6520ba01b7d22c32e193fc1ec674f92d4 Mon Sep 17 00:00:00 2001
From: Frank Li <Frank.Li@nxp.com>
Date: Mon, 21 Oct 2024 11:45:06 -0400
Subject: i3c: master: Replace hard code 2 with macro I3C_ADDR_SLOT_STATUS_BITS

Replace the hardcoded value 2, which indicates 2 bits for I3C address
status, with the predefined macro I3C_ADDR_SLOT_STATUS_BITS.

Improve maintainability and extensibility of the code.

Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Frank Li <Frank.Li@nxp.com>
Link: https://lore.kernel.org/r/20241021-i3c_dts_assign-v8-1-4098b8bde01e@nxp.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/i3c/master.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index 2a1ed05d5782..2100547b2d8d 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -313,6 +313,8 @@ enum i3c_addr_slot_status {
 	I3C_ADDR_SLOT_STATUS_MASK = 3,
 };
 
+#define I3C_ADDR_SLOT_STATUS_BITS 2
+
 /**
  * struct i3c_bus - I3C bus object
  * @cur_master: I3C master currently driving the bus. Since I3C is multi-master
@@ -354,7 +356,7 @@ enum i3c_addr_slot_status {
 struct i3c_bus {
 	struct i3c_dev_desc *cur_master;
 	int id;
-	unsigned long addrslots[((I2C_MAX_ADDR + 1) * 2) / BITS_PER_LONG];
+	unsigned long addrslots[((I2C_MAX_ADDR + 1) * I3C_ADDR_SLOT_STATUS_BITS) / BITS_PER_LONG];
 	enum i3c_bus_mode mode;
 	struct {
 		unsigned long i3c;
-- 
cgit v1.2.3


From 2f552fa280590e61bd3dbe66a7b54b99caa642a4 Mon Sep 17 00:00:00 2001
From: Frank Li <Frank.Li@nxp.com>
Date: Mon, 21 Oct 2024 11:45:07 -0400
Subject: i3c: master: Extend address status bit to 4 and add
 I3C_ADDR_SLOT_EXT_DESIRED
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extend the address status bit to 4 and introduce the
I3C_ADDR_SLOT_EXT_DESIRED macro to indicate that a device prefers a
specific address. This is generally set by the 'assigned-address' in the
device tree source (dts) file.

 ┌────┬─────────────┬───┬─────────┬───┐
 │S/Sr│ 7'h7E RnW=0 │ACK│ ENTDAA  │ T ├────┐
 └────┴─────────────┴───┴─────────┴───┘    │
 ┌─────────────────────────────────────────┘
 │  ┌──┬─────────────┬───┬─────────────────┬────────────────┬───┬─────────┐
 └─►│Sr│7'h7E RnW=1  │ACK│48bit UID BCR DCR│Assign 7bit Addr│PAR│ ACK/NACK│
    └──┴─────────────┴───┴─────────────────┴────────────────┴───┴─────────┘

Some master controllers (such as HCI) need to prepare the entire above
transaction before sending it out to the I3C bus. This means that a 7-bit
dynamic address needs to be allocated before knowing the target device's
UID information.

However, some I3C targets may request specific addresses (called as
"init_dyn_addr"), which is typically specified by the DT-'s
assigned-address property. Lower addresses having higher IBI priority. If
it is available, i3c_bus_get_free_addr() preferably return a free address
that is not in the list of desired addresses (called as "init_dyn_addr").
This allows the device with the "init_dyn_addr" to switch to its
"init_dyn_addr" when it hot-joins the I3C bus. Otherwise, if the
"init_dyn_addr" is already in use by another I3C device, the target device
will not be able to switch to its desired address.

If the previous step fails, fallback returning one of the remaining
unassigned address, regardless of its state in the desired list.

Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Frank Li <Frank.Li@nxp.com>
Link: https://lore.kernel.org/r/20241021-i3c_dts_assign-v8-2-4098b8bde01e@nxp.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/i3c/master.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index 2100547b2d8d..6e5328c6c6af 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -298,7 +298,8 @@ enum i3c_open_drain_speed {
  * @I3C_ADDR_SLOT_I2C_DEV: address is assigned to an I2C device
  * @I3C_ADDR_SLOT_I3C_DEV: address is assigned to an I3C device
  * @I3C_ADDR_SLOT_STATUS_MASK: address slot mask
- *
+ * @I3C_ADDR_SLOT_EXT_DESIRED: the bitmask represents addresses that are preferred by some devices,
+ *			       such as the "assigned-address" property in a device tree source.
  * On an I3C bus, addresses are assigned dynamically, and we need to know which
  * addresses are free to use and which ones are already assigned.
  *
@@ -311,9 +312,11 @@ enum i3c_addr_slot_status {
 	I3C_ADDR_SLOT_I2C_DEV,
 	I3C_ADDR_SLOT_I3C_DEV,
 	I3C_ADDR_SLOT_STATUS_MASK = 3,
+	I3C_ADDR_SLOT_EXT_STATUS_MASK = 7,
+	I3C_ADDR_SLOT_EXT_DESIRED = BIT(2),
 };
 
-#define I3C_ADDR_SLOT_STATUS_BITS 2
+#define I3C_ADDR_SLOT_STATUS_BITS 4
 
 /**
  * struct i3c_bus - I3C bus object
-- 
cgit v1.2.3


From 128d333f0dff2fbe41c546581c6f151e9d68cd4c Mon Sep 17 00:00:00 2001
From: Daeho Jeong <daehojeong@google.com>
Date: Thu, 17 Oct 2024 10:31:53 -0700
Subject: f2fs: introduce device aliasing file

F2FS should understand how the device aliasing file works and support
deleting the file after use. A device aliasing file can be created by
mkfs.f2fs tool and it can map the whole device with an extent, not
using node blocks. The file space should be pinned and normally used for
read-only usages.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Chao Yu <chao@kernel.org>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/uapi/linux/f2fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
index 955d440be104..f7aaf8d23e20 100644
--- a/include/uapi/linux/f2fs.h
+++ b/include/uapi/linux/f2fs.h
@@ -43,6 +43,7 @@
 #define F2FS_IOC_DECOMPRESS_FILE	_IO(F2FS_IOCTL_MAGIC, 23)
 #define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
 #define F2FS_IOC_START_ATOMIC_REPLACE	_IO(F2FS_IOCTL_MAGIC, 25)
+#define F2FS_IOC_GET_DEV_ALIAS_FILE	_IOR(F2FS_IOCTL_MAGIC, 26, __u32)
 
 /*
  * should be same as XFS_IOC_GOINGDOWN.
-- 
cgit v1.2.3


From d86c7a9162ae925dcb6632a4544c62eb0eb5c7cc Mon Sep 17 00:00:00 2001
From: George Guo <guodongtai@kylinos.cn>
Date: Mon, 28 Oct 2024 20:34:35 +0800
Subject: netlabel: document doi_remove field of struct netlbl_calipso_ops

Add documentation of doi_remove field to Kernel doc for struct netlbl_calipso_ops.

Flagged by ./scripts/kernel-doc -none.

Signed-off-by: George Guo <guodongtai@kylinos.cn>
Acked-by: Paul Moore <paul@paul-moore.com>
Link: https://patch.msgid.link/20241028123435.3495916-1-dongtai.guo@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netlabel.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 529160f76cac..4afd934b1238 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -208,6 +208,7 @@ struct netlbl_lsm_secattr {
  * struct netlbl_calipso_ops - NetLabel CALIPSO operations
  * @doi_add: add a CALIPSO DOI
  * @doi_free: free a CALIPSO DOI
+ * @doi_remove: remove a CALIPSO DOI
  * @doi_getdef: returns a reference to a DOI
  * @doi_putdef: releases a reference of a DOI
  * @doi_walk: enumerate the DOI list
-- 
cgit v1.2.3


From 4138e9ec00936c9fe7d0fe961e32f381b1e36926 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@openvpn.net>
Date: Tue, 29 Oct 2024 11:47:14 +0100
Subject: netlink: add NLA_POLICY_MAX_LEN macro

Similarly to NLA_POLICY_MIN_LEN, NLA_POLICY_MAX_LEN defines a policy
with a maximum length value.

The netlink generator for YAML specs has been extended accordingly.

Signed-off-by: Antonio Quartulli <antonio@openvpn.net>
Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://patch.msgid.link/20241029-b4-ovpn-v11-1-de4698c73a25@openvpn.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index db6af207287c..2dc671c977ff 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -469,6 +469,7 @@ struct nla_policy {
 	.max = _len						\
 }
 #define NLA_POLICY_MIN_LEN(_len)	NLA_POLICY_MIN(NLA_BINARY, _len)
+#define NLA_POLICY_MAX_LEN(_len)	NLA_POLICY_MAX(NLA_BINARY, _len)
 
 /**
  * struct nl_info - netlink source information
-- 
cgit v1.2.3


From a911bad094b010e276f072fe9a599b66e59ed5fe Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 29 Oct 2024 19:14:25 +0000
Subject: dql: annotate data-races around dql->last_obj_cnt

dql->last_obj_cnt is read/written from different contexts,
without any lock synchronization.

Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Link: https://patch.msgid.link/20241029191425.2519085-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dynamic_queue_limits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
index 281298e77a15..808b1a5102e7 100644
--- a/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@ -127,7 +127,7 @@ static inline void dql_queued(struct dql *dql, unsigned int count)
 	if (WARN_ON_ONCE(count > DQL_MAX_OBJECT))
 		return;
 
-	dql->last_obj_cnt = count;
+	WRITE_ONCE(dql->last_obj_cnt, count);
 
 	/* We want to force a write first, so that cpu do not attempt
 	 * to get cache line containing last_obj_cnt, num_queued, adj_limit
-- 
cgit v1.2.3


From d3eeb1ac0b99cdcd99420fb270041da946d2d360 Mon Sep 17 00:00:00 2001
From: Angelo Dureghello <adureghello@baylibre.com>
Date: Mon, 28 Oct 2024 22:45:30 +0100
Subject: iio: backend: extend features

Extend backend features with new calls needed later on this
patchset from axi version of ad3552r.

The follwoing calls are added:

iio_backend_ddr_enable()
	enable ddr bus transfer
iio_backend_ddr_disable()
	disable ddr bus transfer
iio_backend_data_stream_enable()
	enable data stream over bus interface
iio_backend_data_stream_disable()
	disable data stream over bus interface
iio_backend_data_transfer_addr()
	define the target register address where the DAC sample
	will be written.

Reviewed-by: Nuno Sa <nuno.sa@analog.com>
Signed-off-by: Angelo Dureghello <adureghello@baylibre.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20241028-wip-bl-ad3552r-axi-v0-iio-testing-v9-3-f6960b4f9719@kernel-space.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/backend.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index 37d56914d485..10be00f3b120 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -14,12 +14,14 @@ struct iio_dev;
 enum iio_backend_data_type {
 	IIO_BACKEND_TWOS_COMPLEMENT,
 	IIO_BACKEND_OFFSET_BINARY,
+	IIO_BACKEND_DATA_UNSIGNED,
 	IIO_BACKEND_DATA_TYPE_MAX
 };
 
 enum iio_backend_data_source {
 	IIO_BACKEND_INTERNAL_CONTINUOUS_WAVE,
 	IIO_BACKEND_EXTERNAL,
+	IIO_BACKEND_INTERNAL_RAMP_16BIT,
 	IIO_BACKEND_DATA_SOURCE_MAX
 };
 
@@ -89,6 +91,11 @@ enum iio_backend_sample_trigger {
  * @read_raw: Read a channel attribute from a backend device
  * @debugfs_print_chan_status: Print channel status into a buffer.
  * @debugfs_reg_access: Read or write register value of backend.
+ * @ddr_enable: Enable interface DDR (Double Data Rate) mode.
+ * @ddr_disable: Disable interface DDR (Double Data Rate) mode.
+ * @data_stream_enable: Enable data stream.
+ * @data_stream_disable: Disable data stream.
+ * @data_transfer_addr: Set data address.
  **/
 struct iio_backend_ops {
 	int (*enable)(struct iio_backend *back);
@@ -129,6 +136,11 @@ struct iio_backend_ops {
 					 size_t len);
 	int (*debugfs_reg_access)(struct iio_backend *back, unsigned int reg,
 				  unsigned int writeval, unsigned int *readval);
+	int (*ddr_enable)(struct iio_backend *back);
+	int (*ddr_disable)(struct iio_backend *back);
+	int (*data_stream_enable)(struct iio_backend *back);
+	int (*data_stream_disable)(struct iio_backend *back);
+	int (*data_transfer_addr)(struct iio_backend *back, u32 address);
 };
 
 /**
@@ -164,6 +176,11 @@ int iio_backend_data_sample_trigger(struct iio_backend *back,
 int devm_iio_backend_request_buffer(struct device *dev,
 				    struct iio_backend *back,
 				    struct iio_dev *indio_dev);
+int iio_backend_ddr_enable(struct iio_backend *back);
+int iio_backend_ddr_disable(struct iio_backend *back);
+int iio_backend_data_stream_enable(struct iio_backend *back);
+int iio_backend_data_stream_disable(struct iio_backend *back);
+int iio_backend_data_transfer_addr(struct iio_backend *back, u32 address);
 ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private,
 				 const struct iio_chan_spec *chan,
 				 const char *buf, size_t len);
-- 
cgit v1.2.3


From 76c6217c31266e800b67a476bba59dfeb9858a90 Mon Sep 17 00:00:00 2001
From: Ryan Chen <ryan_chen@aspeedtech.com>
Date: Wed, 23 Oct 2024 17:01:51 +0800
Subject: dt-bindings: mfd: aspeed: Support for AST2700

Add reset, clk dt bindings headers, and update compatible
support for AST2700 clk, silicon-id in yaml.

Signed-off-by: Ryan Chen <ryan_chen@aspeedtech.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Link: https://lore.kernel.org/r/20241023090153.1395220-2-ryan_chen@aspeedtech.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/dt-bindings/clock/aspeed,ast2700-scu.h | 163 +++++++++++++++++++++++++
 include/dt-bindings/reset/aspeed,ast2700-scu.h | 124 +++++++++++++++++++
 2 files changed, 287 insertions(+)
 create mode 100644 include/dt-bindings/clock/aspeed,ast2700-scu.h
 create mode 100644 include/dt-bindings/reset/aspeed,ast2700-scu.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/aspeed,ast2700-scu.h b/include/dt-bindings/clock/aspeed,ast2700-scu.h
new file mode 100644
index 000000000000..63021af3caf5
--- /dev/null
+++ b/include/dt-bindings/clock/aspeed,ast2700-scu.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Device Tree binding constants for AST2700 clock controller.
+ *
+ * Copyright (c) 2024 Aspeed Technology Inc.
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_AST2700_H
+#define __DT_BINDINGS_CLOCK_AST2700_H
+
+/* SOC0 clk */
+#define SCU0_CLKIN		0
+#define SCU0_CLK_24M		1
+#define SCU0_CLK_192M		2
+#define SCU0_CLK_UART		3
+#define SCU0_CLK_UART_DIV13	3
+#define SCU0_CLK_PSP		4
+#define SCU0_CLK_HPLL		5
+#define SCU0_CLK_HPLL_DIV2	6
+#define SCU0_CLK_HPLL_DIV4	7
+#define SCU0_CLK_HPLL_DIV_AHB	8
+#define SCU0_CLK_DPLL		9
+#define SCU0_CLK_MPLL		10
+#define SCU0_CLK_MPLL_DIV2	11
+#define SCU0_CLK_MPLL_DIV4	12
+#define SCU0_CLK_MPLL_DIV8	13
+#define SCU0_CLK_MPLL_DIV_AHB	14
+#define SCU0_CLK_D0		15
+#define SCU0_CLK_D1		16
+#define SCU0_CLK_CRT0		17
+#define SCU0_CLK_CRT1		18
+#define SCU0_CLK_MPHY		19
+#define SCU0_CLK_AXI0		20
+#define SCU0_CLK_AXI1		21
+#define SCU0_CLK_AHB		22
+#define SCU0_CLK_APB		23
+#define SCU0_CLK_UART4		24
+#define SCU0_CLK_EMMCMUX	25
+#define SCU0_CLK_EMMC		26
+#define SCU0_CLK_U2PHY_CLK12M	27
+#define SCU0_CLK_U2PHY_REFCLK	28
+
+/* SOC0 clk-gate */
+#define SCU0_CLK_GATE_MCLK	29
+#define SCU0_CLK_GATE_ECLK	30
+#define SCU0_CLK_GATE_2DCLK	31
+#define SCU0_CLK_GATE_VCLK	32
+#define SCU0_CLK_GATE_BCLK	33
+#define SCU0_CLK_GATE_VGA0CLK	34
+#define SCU0_CLK_GATE_REFCLK	35
+#define SCU0_CLK_GATE_PORTBUSB2CLK	36
+#define SCU0_CLK_GATE_UHCICLK	37
+#define SCU0_CLK_GATE_VGA1CLK	38
+#define SCU0_CLK_GATE_DDRPHYCLK	39
+#define SCU0_CLK_GATE_E2M0CLK	40
+#define SCU0_CLK_GATE_HACCLK	41
+#define SCU0_CLK_GATE_PORTAUSB2CLK	42
+#define SCU0_CLK_GATE_UART4CLK	43
+#define SCU0_CLK_GATE_SLICLK	44
+#define SCU0_CLK_GATE_DACCLK	45
+#define SCU0_CLK_GATE_DP	46
+#define SCU0_CLK_GATE_E2M1CLK	47
+#define SCU0_CLK_GATE_CRT0CLK	48
+#define SCU0_CLK_GATE_CRT1CLK	49
+#define SCU0_CLK_GATE_ECDSACLK	50
+#define SCU0_CLK_GATE_RSACLK	51
+#define SCU0_CLK_GATE_RVAS0CLK	52
+#define SCU0_CLK_GATE_UFSCLK	53
+#define SCU0_CLK_GATE_EMMCCLK	54
+#define SCU0_CLK_GATE_RVAS1CLK	55
+
+/* SOC1 clk */
+#define SCU1_CLKIN		0
+#define SCU1_CLK_HPLL		1
+#define SCU1_CLK_APLL		2
+#define SCU1_CLK_APLL_DIV2	3
+#define SCU1_CLK_APLL_DIV4	4
+#define SCU1_CLK_DPLL		5
+#define SCU1_CLK_UXCLK		6
+#define SCU1_CLK_HUXCLK		7
+#define SCU1_CLK_UARTX		8
+#define SCU1_CLK_HUARTX		9
+#define SCU1_CLK_AHB		10
+#define SCU1_CLK_APB		11
+#define SCU1_CLK_UART0		12
+#define SCU1_CLK_UART1		13
+#define SCU1_CLK_UART2		14
+#define SCU1_CLK_UART3		15
+#define SCU1_CLK_UART5		16
+#define SCU1_CLK_UART6		17
+#define SCU1_CLK_UART7		18
+#define SCU1_CLK_UART8		19
+#define SCU1_CLK_UART9		20
+#define SCU1_CLK_UART10		21
+#define SCU1_CLK_UART11		22
+#define SCU1_CLK_UART12		23
+#define SCU1_CLK_UART13		24
+#define SCU1_CLK_UART14		25
+#define SCU1_CLK_APLL_DIVN	26
+#define SCU1_CLK_SDMUX		27
+#define SCU1_CLK_SDCLK		28
+#define SCU1_CLK_RMII		29
+#define SCU1_CLK_RGMII		30
+#define SCU1_CLK_MACHCLK	31
+#define SCU1_CLK_MAC0RCLK	32
+#define SCU1_CLK_MAC1RCLK	33
+#define SCU1_CLK_CAN		34
+
+/* SOC1 clk gate */
+#define SCU1_CLK_GATE_LCLK0		35
+#define SCU1_CLK_GATE_LCLK1		36
+#define SCU1_CLK_GATE_ESPI0CLK		37
+#define SCU1_CLK_GATE_ESPI1CLK		38
+#define SCU1_CLK_GATE_SDCLK		39
+#define SCU1_CLK_GATE_IPEREFCLK		40
+#define SCU1_CLK_GATE_REFCLK		41
+#define SCU1_CLK_GATE_LPCHCLK		42
+#define SCU1_CLK_GATE_MAC0CLK		43
+#define SCU1_CLK_GATE_MAC1CLK		44
+#define SCU1_CLK_GATE_MAC2CLK		45
+#define SCU1_CLK_GATE_UART0CLK		46
+#define SCU1_CLK_GATE_UART1CLK		47
+#define SCU1_CLK_GATE_UART2CLK		48
+#define SCU1_CLK_GATE_UART3CLK		49
+#define SCU1_CLK_GATE_I2CCLK		50
+#define SCU1_CLK_GATE_I3C0CLK		51
+#define SCU1_CLK_GATE_I3C1CLK		52
+#define SCU1_CLK_GATE_I3C2CLK		53
+#define SCU1_CLK_GATE_I3C3CLK		54
+#define SCU1_CLK_GATE_I3C4CLK		55
+#define SCU1_CLK_GATE_I3C5CLK		56
+#define SCU1_CLK_GATE_I3C6CLK		57
+#define SCU1_CLK_GATE_I3C7CLK		58
+#define SCU1_CLK_GATE_I3C8CLK		59
+#define SCU1_CLK_GATE_I3C9CLK		60
+#define SCU1_CLK_GATE_I3C10CLK		61
+#define SCU1_CLK_GATE_I3C11CLK		62
+#define SCU1_CLK_GATE_I3C12CLK		63
+#define SCU1_CLK_GATE_I3C13CLK		64
+#define SCU1_CLK_GATE_I3C14CLK		65
+#define SCU1_CLK_GATE_I3C15CLK		66
+#define SCU1_CLK_GATE_UART5CLK		67
+#define SCU1_CLK_GATE_UART6CLK		68
+#define SCU1_CLK_GATE_UART7CLK		69
+#define SCU1_CLK_GATE_UART8CLK		70
+#define SCU1_CLK_GATE_UART9CLK		71
+#define SCU1_CLK_GATE_UART10CLK		72
+#define SCU1_CLK_GATE_UART11CLK		73
+#define SCU1_CLK_GATE_UART12CLK		74
+#define SCU1_CLK_GATE_FSICLK		75
+#define SCU1_CLK_GATE_LTPIPHYCLK	76
+#define SCU1_CLK_GATE_LTPICLK		77
+#define SCU1_CLK_GATE_VGALCLK		78
+#define SCU1_CLK_GATE_UHCICLK		79
+#define SCU1_CLK_GATE_CANCLK		80
+#define SCU1_CLK_GATE_PCICLK		81
+#define SCU1_CLK_GATE_SLICLK		82
+#define SCU1_CLK_GATE_E2MCLK		83
+#define SCU1_CLK_GATE_PORTCUSB2CLK	84
+#define SCU1_CLK_GATE_PORTDUSB2CLK	85
+#define SCU1_CLK_GATE_LTPI1TXCLK	86
+
+#endif
diff --git a/include/dt-bindings/reset/aspeed,ast2700-scu.h b/include/dt-bindings/reset/aspeed,ast2700-scu.h
new file mode 100644
index 000000000000..d53c719b7a66
--- /dev/null
+++ b/include/dt-bindings/reset/aspeed,ast2700-scu.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Device Tree binding constants for AST2700 reset controller.
+ *
+ * Copyright (c) 2024 Aspeed Technology Inc.
+ */
+
+#ifndef _MACH_ASPEED_AST2700_RESET_H_
+#define _MACH_ASPEED_AST2700_RESET_H_
+
+/* SOC0 */
+#define SCU0_RESET_SDRAM	0
+#define SCU0_RESET_DDRPHY	1
+#define SCU0_RESET_RSA		2
+#define SCU0_RESET_SHA3		3
+#define SCU0_RESET_HACE		4
+#define SCU0_RESET_SOC		5
+#define SCU0_RESET_VIDEO	6
+#define SCU0_RESET_2D		7
+#define SCU0_RESET_PCIS		8
+#define SCU0_RESET_RVAS0	9
+#define SCU0_RESET_RVAS1	10
+#define SCU0_RESET_SM3		11
+#define SCU0_RESET_SM4		12
+#define SCU0_RESET_CRT0		13
+#define SCU0_RESET_ECC		14
+#define SCU0_RESET_DP_PCI	15
+#define SCU0_RESET_UFS		16
+#define SCU0_RESET_EMMC		17
+#define SCU0_RESET_PCIE1RST	18
+#define SCU0_RESET_PCIE1RSTOE	19
+#define SCU0_RESET_PCIE0RST	20
+#define SCU0_RESET_PCIE0RSTOE	21
+#define SCU0_RESET_JTAG		22
+#define SCU0_RESET_MCTP0	23
+#define SCU0_RESET_MCTP1	24
+#define SCU0_RESET_XDMA0	25
+#define SCU0_RESET_XDMA1	26
+#define SCU0_RESET_H2X1		27
+#define SCU0_RESET_DP		28
+#define SCU0_RESET_DP_MCU	29
+#define SCU0_RESET_SSP		30
+#define SCU0_RESET_H2X0		31
+#define SCU0_RESET_PORTA_VHUB	32
+#define SCU0_RESET_PORTA_PHY3	33
+#define SCU0_RESET_PORTA_XHCI	34
+#define SCU0_RESET_PORTB_VHUB	35
+#define SCU0_RESET_PORTB_PHY3	36
+#define SCU0_RESET_PORTB_XHCI	37
+#define SCU0_RESET_PORTA_VHUB_EHCI	38
+#define SCU0_RESET_PORTB_VHUB_EHCI	39
+#define SCU0_RESET_UHCI		40
+#define SCU0_RESET_TSP		41
+#define SCU0_RESET_E2M0		42
+#define SCU0_RESET_E2M1		43
+#define SCU0_RESET_VLINK	44
+
+/* SOC1 */
+#define SCU1_RESET_LPC0		0
+#define SCU1_RESET_LPC1		1
+#define SCU1_RESET_MII		2
+#define SCU1_RESET_PECI		3
+#define SCU1_RESET_PWM		4
+#define SCU1_RESET_MAC0		5
+#define SCU1_RESET_MAC1		6
+#define SCU1_RESET_MAC2		7
+#define SCU1_RESET_ADC		8
+#define SCU1_RESET_SD		9
+#define SCU1_RESET_ESPI0	10
+#define SCU1_RESET_ESPI1	11
+#define SCU1_RESET_JTAG1	12
+#define SCU1_RESET_SPI0		13
+#define SCU1_RESET_SPI1		14
+#define SCU1_RESET_SPI2		15
+#define SCU1_RESET_I3C0		16
+#define SCU1_RESET_I3C1		17
+#define SCU1_RESET_I3C2		18
+#define SCU1_RESET_I3C3		19
+#define SCU1_RESET_I3C4		20
+#define SCU1_RESET_I3C5		21
+#define SCU1_RESET_I3C6		22
+#define SCU1_RESET_I3C7		23
+#define SCU1_RESET_I3C8		24
+#define SCU1_RESET_I3C9		25
+#define SCU1_RESET_I3C10	26
+#define SCU1_RESET_I3C11	27
+#define SCU1_RESET_I3C12	28
+#define SCU1_RESET_I3C13	29
+#define SCU1_RESET_I3C14	30
+#define SCU1_RESET_I3C15	31
+#define SCU1_RESET_MCU0		32
+#define SCU1_RESET_MCU1		33
+#define SCU1_RESET_H2A_SPI1	34
+#define SCU1_RESET_H2A_SPI2	35
+#define SCU1_RESET_UART0	36
+#define SCU1_RESET_UART1	37
+#define SCU1_RESET_UART2	38
+#define SCU1_RESET_UART3	39
+#define SCU1_RESET_I2C_FILTER	40
+#define SCU1_RESET_CALIPTRA	41
+#define SCU1_RESET_XDMA		42
+#define SCU1_RESET_FSI		43
+#define SCU1_RESET_CAN		44
+#define SCU1_RESET_MCTP		45
+#define SCU1_RESET_I2C		46
+#define SCU1_RESET_UART6	47
+#define SCU1_RESET_UART7	48
+#define SCU1_RESET_UART8	49
+#define SCU1_RESET_UART9	50
+#define SCU1_RESET_LTPI0	51
+#define SCU1_RESET_VGAL		52
+#define SCU1_RESET_LTPI1	53
+#define SCU1_RESET_ACE		54
+#define SCU1_RESET_E2M		55
+#define SCU1_RESET_UHCI		56
+#define SCU1_RESET_PORTC_USB2UART	57
+#define SCU1_RESET_PORTC_VHUB_EHCI	58
+#define SCU1_RESET_PORTD_USB2UART	59
+#define SCU1_RESET_PORTD_VHUB_EHCI	60
+#define SCU1_RESET_H2X		61
+#define SCU1_RESET_I3CDMA	62
+#define SCU1_RESET_PCIE2RST	63
+
+#endif  /* _MACH_ASPEED_AST2700_RESET_H_ */
-- 
cgit v1.2.3


From e9f0a363473585a0f51b1b61a9bb15a63808d6ea Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 22 Oct 2024 13:01:12 +0200
Subject: tracing: Remove TRACE_FLAG_IRQS_NOSUPPORT

It was possible to enable tracing with no IRQ tracing support. The
tracing infrastructure would then record TRACE_FLAG_IRQS_NOSUPPORT as
the only tracing flag and show an 'X' in the output.

The last user of this feature was PPC32 which managed to implement it
during PowerPC merge in 2009. Since then, it was unused and the PPC32
dependency was finally removed in commit 0ea5ee035133a ("tracing: Remove
PPC32 wart from config TRACING_SUPPORT").
Since the PowerPC merge the code behind !CONFIG_TRACE_IRQFLAGS_SUPPORT
with TRACING enabled can no longer be selected used and the 'X' is not
displayed or recorded.

Remove the CONFIG_TRACE_IRQFLAGS_SUPPORT from the tracing code. Remove
TRACE_FLAG_IRQS_NOSUPPORT.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://lore.kernel.org/20241022110112.XJI8I9T2@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index f8f2e52653df..016b29a56c87 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -184,7 +184,6 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
 
 enum trace_flag_type {
 	TRACE_FLAG_IRQS_OFF		= 0x01,
-	TRACE_FLAG_IRQS_NOSUPPORT	= 0x02,
 	TRACE_FLAG_NEED_RESCHED		= 0x04,
 	TRACE_FLAG_HARDIRQ		= 0x08,
 	TRACE_FLAG_SOFTIRQ		= 0x10,
@@ -193,7 +192,6 @@ enum trace_flag_type {
 	TRACE_FLAG_BH_OFF		= 0x80,
 };
 
-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
 {
 	unsigned int irq_status = irqs_disabled_flags(irqflags) ?
@@ -207,17 +205,6 @@ static inline unsigned int tracing_gen_ctx(void)
 	local_save_flags(irqflags);
 	return tracing_gen_ctx_flags(irqflags);
 }
-#else
-
-static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
-{
-	return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
-}
-static inline unsigned int tracing_gen_ctx(void)
-{
-	return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
-}
-#endif
 
 static inline unsigned int tracing_gen_ctx_dec(void)
 {
-- 
cgit v1.2.3


From a9cfb8778c43fc473ae16cddb6e9611705721b31 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 31 Oct 2024 11:20:53 -0400
Subject: tracing: Introduce tracepoint extended structure

Shrink the struct tracepoint size from 80 bytes to 72 bytes on x86-64 by
moving the (typically NULL) regfunc/unregfunc pointers to an extended
structure.

Tested-by: Jordan Rife <jrife@google.com>
Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Jordan Rife <jrife@google.com>
Cc: linux-trace-kernel@vger.kernel.org
Link: https://lore.kernel.org/20241031152056.744137-2-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint-defs.h |  8 ++++++--
 include/linux/tracepoint.h      | 19 +++++++++++++------
 2 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 60a6e8314d4c..967c08d9da84 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -29,6 +29,11 @@ struct tracepoint_func {
 	int prio;
 };
 
+struct tracepoint_ext {
+	int (*regfunc)(void);
+	void (*unregfunc)(void);
+};
+
 struct tracepoint {
 	const char *name;		/* Tracepoint name */
 	struct static_key_false key;
@@ -36,9 +41,8 @@ struct tracepoint {
 	void *static_call_tramp;
 	void *iterator;
 	void *probestub;
-	int (*regfunc)(void);
-	void (*unregfunc)(void);
 	struct tracepoint_func __rcu *funcs;
+	struct tracepoint_ext *ext;
 };
 
 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 0dc67fad706c..862ab49177a4 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -302,7 +302,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
  * structures, so we create an array of pointers that will be used for iteration
  * on the tracepoints.
  */
-#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)		\
+#define __DEFINE_TRACE_EXT(_name, _ext, proto, args)			\
 	static const char __tpstrtab_##_name[]				\
 	__section("__tracepoints_strings") = #_name;			\
 	extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name);	\
@@ -316,9 +316,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		.static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
 		.iterator = &__traceiter_##_name,			\
 		.probestub = &__probestub_##_name,			\
-		.regfunc = _reg,					\
-		.unregfunc = _unreg,					\
-		.funcs = NULL };					\
+		.funcs = NULL,						\
+		.ext = _ext,						\
+	};								\
 	__TRACEPOINT_ENTRY(_name);					\
 	int __traceiter_##_name(void *__data, proto)			\
 	{								\
@@ -341,8 +341,15 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	}								\
 	DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
 
-#define DEFINE_TRACE(name, proto, args)		\
-	DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
+#define DEFINE_TRACE_FN(_name, _reg, _unreg, _proto, _args)		\
+	static struct tracepoint_ext __tracepoint_ext_##_name = {	\
+		.regfunc = _reg,					\
+		.unregfunc = _unreg,					\
+	};								\
+	__DEFINE_TRACE_EXT(_name, &__tracepoint_ext_##_name, PARAMS(_proto), PARAMS(_args));
+
+#define DEFINE_TRACE(_name, _proto, _args)				\
+	__DEFINE_TRACE_EXT(_name, NULL, PARAMS(_proto), PARAMS(_args));
 
 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)				\
 	EXPORT_SYMBOL_GPL(__tracepoint_##name);				\
-- 
cgit v1.2.3


From 654ced4a13774e5aec4d9466c97d22df7cb1e60b Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 31 Oct 2024 11:20:54 -0400
Subject: tracing: Introduce tracepoint_is_faultable()

Introduce a "faultable" flag within the extended structure to know
whether a tracepoint needs rcu tasks trace grace period before reclaim.
This can be queried using tracepoint_is_faultable().

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: Jordan Rife <jrife@google.com>
Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Jordan Rife <jrife@google.com>
Cc: linux-trace-kernel@vger.kernel.org
Link: https://lore.kernel.org/20241031152056.744137-3-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint-defs.h |  2 ++
 include/linux/tracepoint.h      | 24 ++++++++++++++++++++++++
 include/trace/define_trace.h    |  2 +-
 3 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 967c08d9da84..aebf0571c736 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -32,6 +32,8 @@ struct tracepoint_func {
 struct tracepoint_ext {
 	int (*regfunc)(void);
 	void (*unregfunc)(void);
+	/* Flags. */
+	unsigned int faultable:1;
 };
 
 struct tracepoint {
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 862ab49177a4..906f3091d23d 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -104,6 +104,12 @@ void for_each_tracepoint_in_module(struct module *mod,
  * tracepoint_synchronize_unregister must be called between the last tracepoint
  * probe unregistration and the end of module exit to make sure there is no
  * caller executing a probe when it is freed.
+ *
+ * An alternative is to use the following for batch reclaim associated
+ * with a given tracepoint:
+ *
+ * - tracepoint_is_faultable() == false: call_rcu()
+ * - tracepoint_is_faultable() == true:  call_rcu_tasks_trace()
  */
 #ifdef CONFIG_TRACEPOINTS
 static inline void tracepoint_synchronize_unregister(void)
@@ -111,9 +117,17 @@ static inline void tracepoint_synchronize_unregister(void)
 	synchronize_rcu_tasks_trace();
 	synchronize_rcu();
 }
+static inline bool tracepoint_is_faultable(struct tracepoint *tp)
+{
+	return tp->ext && tp->ext->faultable;
+}
 #else
 static inline void tracepoint_synchronize_unregister(void)
 { }
+static inline bool tracepoint_is_faultable(struct tracepoint *tp)
+{
+	return false;
+}
 #endif
 
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
@@ -345,6 +359,15 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	static struct tracepoint_ext __tracepoint_ext_##_name = {	\
 		.regfunc = _reg,					\
 		.unregfunc = _unreg,					\
+		.faultable = false,					\
+	};								\
+	__DEFINE_TRACE_EXT(_name, &__tracepoint_ext_##_name, PARAMS(_proto), PARAMS(_args));
+
+#define DEFINE_TRACE_SYSCALL(_name, _reg, _unreg, _proto, _args)	\
+	static struct tracepoint_ext __tracepoint_ext_##_name = {	\
+		.regfunc = _reg,					\
+		.unregfunc = _unreg,					\
+		.faultable = true,					\
 	};								\
 	__DEFINE_TRACE_EXT(_name, &__tracepoint_ext_##_name, PARAMS(_proto), PARAMS(_args));
 
@@ -389,6 +412,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 #define __DECLARE_TRACE_SYSCALL	__DECLARE_TRACE
 
 #define DEFINE_TRACE_FN(name, reg, unreg, proto, args)
+#define DEFINE_TRACE_SYSCALL(name, reg, unreg, proto, args)
 #define DEFINE_TRACE(name, proto, args)
 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
 #define EXPORT_TRACEPOINT_SYMBOL(name)
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index ff5fa17a6259..63fea2218afa 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -48,7 +48,7 @@
 
 #undef TRACE_EVENT_SYSCALL
 #define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, print, reg, unreg) \
-	DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args))
+	DEFINE_TRACE_SYSCALL(name, reg, unreg, PARAMS(proto), PARAMS(args))
 
 #undef TRACE_EVENT_NOP
 #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)
-- 
cgit v1.2.3


From ee3685a98ea920369696d31e54d1fe6a939ed65f Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 31 Oct 2024 11:20:56 -0400
Subject: tracing: Add might_fault() check in __DECLARE_TRACE_SYSCALL

Catch incorrect use of syscall tracepoints even if no probes are
registered by adding a might_fault() check in trace_##name()
emitted by __DECLARE_TRACE_SYSCALL.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jordan Rife <jrife@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Jordan Rife <jrife@google.com>
Cc: linux-trace-kernel@vger.kernel.org
Link: https://lore.kernel.org/20241031152056.744137-5-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 906f3091d23d..425123e921ac 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -301,6 +301,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \
 	static inline void trace_##name(proto)				\
 	{								\
+		might_fault();						\
 		if (static_branch_unlikely(&__tracepoint_##name.key))	\
 			__DO_TRACE(name,				\
 				TP_ARGS(args),				\
-- 
cgit v1.2.3


From 61c6fefa92bb4ed7a34163b94f6ffac628237a29 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 1 Nov 2024 11:17:53 -0700
Subject: bpf: decouple BPF link/attach hook and BPF program sleepable
 semantics

BPF link's lifecycle protection scheme depends on both BPF hook and BPF
program. If *either* of those require RCU Tasks Trace GP, then we need
to go through a chain of GPs before putting BPF program refcount and
deallocating BPF link memory.

This patch adds bpf_link-specific sleepable flag, which can be set to
true even if underlying BPF program is not sleepable itself. If either
link->sleepable or link->prog->sleepable is true, we'll go through
a chain of RCU Tasks Trace GP and RCU GP before putting BPF program and
freeing memory.

This will be used to protect BPF link for sleepable (faultable) raw
tracepoints in the next patch.

Link: https://lore.kernel.org/20241101181754.782341-2-andrii@kernel.org
Tested-by: Jordan Rife <jrife@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/bpf.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 19d8ca8ac960..e7236facadd4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1598,6 +1598,11 @@ struct bpf_link {
 	enum bpf_link_type type;
 	const struct bpf_link_ops *ops;
 	struct bpf_prog *prog;
+	/* whether BPF link itself has "sleepable" semantics, which can differ
+	 * from underlying BPF program having a "sleepable" semantics, as BPF
+	 * link's semantics is determined by target attach hook
+	 */
+	bool sleepable;
 	/* rcu is used before freeing, work can be used to schedule that
 	 * RCU-based freeing before that, so they never overlap
 	 */
@@ -1614,8 +1619,10 @@ struct bpf_link_ops {
 	 */
 	void (*dealloc)(struct bpf_link *link);
 	/* deallocate link resources callback, called after RCU grace period;
-	 * if underlying BPF program is sleepable we go through tasks trace
-	 * RCU GP and then "classic" RCU GP
+	 * if either the underlying BPF program is sleepable or BPF link's
+	 * target hook is sleepable, we'll go through tasks trace RCU GP and
+	 * then "classic" RCU GP; this need for chaining tasks trace and
+	 * classic RCU GPs is designated by setting bpf_link->sleepable flag
 	 */
 	void (*dealloc_deferred)(struct bpf_link *link);
 	int (*detach)(struct bpf_link *link);
@@ -2362,6 +2369,9 @@ int bpf_prog_new_fd(struct bpf_prog *prog);
 
 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
 		   const struct bpf_link_ops *ops, struct bpf_prog *prog);
+void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type,
+			     const struct bpf_link_ops *ops, struct bpf_prog *prog,
+			     bool sleepable);
 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
 int bpf_link_settle(struct bpf_link_primer *primer);
 void bpf_link_cleanup(struct bpf_link_primer *primer);
@@ -2717,6 +2727,12 @@ static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
 {
 }
 
+static inline void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type,
+					   const struct bpf_link_ops *ops, struct bpf_prog *prog,
+					   bool sleepable)
+{
+}
+
 static inline int bpf_link_prime(struct bpf_link *link,
 				 struct bpf_link_primer *primer)
 {
-- 
cgit v1.2.3


From d44d26987bb3df6d76556827097fc9ce17565cb8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Oct 2024 13:04:07 +0100
Subject: timekeeping: Remove CONFIG_DEBUG_TIMEKEEPING

Since 135225a363ae timekeeping_cycles_to_ns() handles large offsets which
would lead to 64bit multiplication overflows correctly. It's also protected
against negative motion of the clocksource unconditionally, which was
exclusive to x86 before.

timekeeping_advance() handles large offsets already correctly.

That means the value of CONFIG_DEBUG_TIMEKEEPING which analyzed these cases
is very close to zero. Remove all of it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <jstultz@google.com>
Link: https://lore.kernel.org/all/20241031120328.536010148@linutronix.de
---
 include/linux/timekeeper_internal.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index a3b6380a7777..e39d4d563b19 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -76,9 +76,6 @@ struct tk_read_base {
  *				ntp shifted nano seconds.
  * @ntp_err_mult:		Multiplication factor for scaled math conversion
  * @skip_second_overflow:	Flag used to avoid updating NTP twice with same second
- * @last_warning:		Warning ratelimiter (DEBUG_TIMEKEEPING)
- * @underflow_seen:		Underflow warning flag (DEBUG_TIMEKEEPING)
- * @overflow_seen:		Overflow warning flag (DEBUG_TIMEKEEPING)
  *
  * Note: For timespec(64) based interfaces wall_to_monotonic is what
  * we need to add to xtime (or xtime corrected for sub jiffy times)
@@ -147,19 +144,6 @@ struct timekeeper {
 	u32			ntp_error_shift;
 	u32			ntp_err_mult;
 	u32			skip_second_overflow;
-
-#ifdef CONFIG_DEBUG_TIMEKEEPING
-	long			last_warning;
-	/*
-	 * These simple flag variables are managed
-	 * without locks, which is racy, but they are
-	 * ok since we don't really care about being
-	 * super precise about how many events were
-	 * seen, just that a problem was observed.
-	 */
-	int			underflow_seen;
-	int			overflow_seen;
-#endif
 };
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
-- 
cgit v1.2.3


From acb0ed843290f3b19ede1bd9328eb41dee58ce40 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Tue, 22 Oct 2024 01:24:34 +0100
Subject: crypto: asymmetric_keys - Remove unused functions

encrypt_blob(), decrypt_blob() and create_signature() were some of the
functions added in 2018 by
commit 5a30771832aa ("KEYS: Provide missing asymmetric key subops for new
key type ops [ver #2]")
however, they've not been used.

Remove them.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/public_key.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index b7f308977c84..81098e00c08f 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -104,9 +104,6 @@ static inline int restrict_link_by_digsig(struct key *dest_keyring,
 extern int query_asymmetric_key(const struct kernel_pkey_params *,
 				struct kernel_pkey_query *);
 
-extern int encrypt_blob(struct kernel_pkey_params *, const void *, void *);
-extern int decrypt_blob(struct kernel_pkey_params *, const void *, void *);
-extern int create_signature(struct kernel_pkey_params *, const void *, void *);
 extern int verify_signature(const struct key *,
 			    const struct public_key_signature *);
 
-- 
cgit v1.2.3


From 2a69297eed87c1f3ad33b8a169025c06adde5dcf Mon Sep 17 00:00:00 2001
From: Qi Tao <taoqi10@huawei.com>
Date: Sat, 26 Oct 2024 17:46:51 +0800
Subject: crypto: hisilicon - support querying the capability register

Query the capability register status of accelerator devices
(SEC, HPRE and ZIP) through the debugfs interface, for example:
cat cap_regs. The purpose is to improve the robustness and
locability of hardware devices and drivers.

Signed-off-by: Qi Tao <taoqi10@huawei.com>
Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/hisi_acc_qm.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 389e95754776..7272eac850e8 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -274,13 +274,25 @@ struct hisi_qm_cap_info {
 	u32 v3_val;
 };
 
+struct hisi_qm_cap_query_info {
+	u32 type;
+	const char *name;
+	u32 offset;
+	u32 v1_val;
+	u32 v2_val;
+	u32 v3_val;
+};
+
 struct hisi_qm_cap_record {
 	u32 type;
+	const char *name;
 	u32 cap_val;
 };
 
 struct hisi_qm_cap_tables {
+	u32 qm_cap_size;
 	struct hisi_qm_cap_record *qm_cap_table;
+	u32 dev_cap_size;
 	struct hisi_qm_cap_record *dev_cap_table;
 };
 
@@ -554,6 +566,9 @@ void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset);
 u32 hisi_qm_get_hw_info(struct hisi_qm *qm,
 			const struct hisi_qm_cap_info *info_table,
 			u32 index, bool is_read);
+u32 hisi_qm_get_cap_value(struct hisi_qm *qm,
+			const struct hisi_qm_cap_query_info *info_table,
+			u32 index, bool is_read);
 int hisi_qm_set_algs(struct hisi_qm *qm, u64 alg_msk, const struct qm_dev_alg *dev_algs,
 		     u32 dev_algs_size);
 
-- 
cgit v1.2.3


From c418ba6baca3ae10ffaf47b0803d2a9e6bf1af96 Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 26 Oct 2024 19:44:29 +0800
Subject: crypto: hisilicon/qm - disable same error report before resetting

If an error indicating that the device needs to be reset is reported,
disable the error reporting before device reset is complete,
enable the error reporting after the reset is complete to prevent
the same error from being reported repeatedly.

Fixes: eaebf4c3b103 ("crypto: hisilicon - Unify hardware error init/uninit into QM")
Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/hisi_acc_qm.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 7272eac850e8..6dbd0d49628f 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -229,6 +229,12 @@ struct hisi_qm_status {
 
 struct hisi_qm;
 
+enum acc_err_result {
+	ACC_ERR_NONE,
+	ACC_ERR_NEED_RESET,
+	ACC_ERR_RECOVERED,
+};
+
 struct hisi_qm_err_info {
 	char *acpi_rst;
 	u32 msi_wr_port;
@@ -257,9 +263,9 @@ struct hisi_qm_err_ini {
 	void (*close_axi_master_ooo)(struct hisi_qm *qm);
 	void (*open_sva_prefetch)(struct hisi_qm *qm);
 	void (*close_sva_prefetch)(struct hisi_qm *qm);
-	void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts);
 	void (*show_last_dfx_regs)(struct hisi_qm *qm);
 	void (*err_info_init)(struct hisi_qm *qm);
+	enum acc_err_result (*get_err_result)(struct hisi_qm *qm);
 };
 
 struct hisi_qm_cap_info {
-- 
cgit v1.2.3


From a812eee0b68645e2916d4a4399280fe5471cac67 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Thu, 10 Oct 2024 09:01:30 +0200
Subject: vdso: Rename struct arch_vdso_data to arch_vdso_time_data

The struct arch_vdso_data is only about vdso time data. So rename it to
arch_vdso_time_data to make it obvious.
Non time-related data will be migrated out of these structs soon.

Signed-off-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Heiko Carstens <hca@linux.ibm.com> # s390
Link: https://lore.kernel.org/all/20241010-vdso-generic-base-v1-28-b64f0842d512@linutronix.de
---
 include/vdso/datapage.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
index b85f24cac3f5..d967baa0cd0c 100644
--- a/include/vdso/datapage.h
+++ b/include/vdso/datapage.h
@@ -19,10 +19,10 @@
 #include <vdso/time32.h>
 #include <vdso/time64.h>
 
-#ifdef CONFIG_ARCH_HAS_VDSO_DATA
-#include <asm/vdso/data.h>
+#ifdef CONFIG_ARCH_HAS_VDSO_TIME_DATA
+#include <asm/vdso/time_data.h>
 #else
-struct arch_vdso_data {};
+struct arch_vdso_time_data {};
 #endif
 
 #define VDSO_BASES	(CLOCK_TAI + 1)
@@ -114,7 +114,7 @@ struct vdso_data {
 	u32			hrtimer_res;
 	u32			__unused;
 
-	struct arch_vdso_data	arch_data;
+	struct arch_vdso_time_data arch_data;
 };
 
 /**
-- 
cgit v1.2.3


From f6ca73063754950bf3fbad753e3a9557e3aa85e3 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Sat, 2 Nov 2024 14:28:41 +0100
Subject: i3c: Document I3C_ADDR_SLOT_EXT_STATUS_MASK

As the mask is part of the enum, document it.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Link: https://lore.kernel.org/r/20241102132841.2446176-1-alexandre.belloni@bootlin.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/i3c/master.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index 6e5328c6c6af..12d532b012c5 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -298,6 +298,7 @@ enum i3c_open_drain_speed {
  * @I3C_ADDR_SLOT_I2C_DEV: address is assigned to an I2C device
  * @I3C_ADDR_SLOT_I3C_DEV: address is assigned to an I3C device
  * @I3C_ADDR_SLOT_STATUS_MASK: address slot mask
+ * @I3C_ADDR_SLOT_EXT_STATUS_MASK: address slot mask with extended information
  * @I3C_ADDR_SLOT_EXT_DESIRED: the bitmask represents addresses that are preferred by some devices,
  *			       such as the "assigned-address" property in a device tree source.
  * On an I3C bus, addresses are assigned dynamically, and we need to know which
-- 
cgit v1.2.3


From 7029acd8a950393ee3a3d8e1a7ee1a9b77808a3b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 25 Oct 2024 19:27:39 -0600
Subject: io_uring/rsrc: get rid of per-ring io_rsrc_node list

Work in progress, but get rid of the per-ring serialization of resource
nodes, like registered buffers and files. Main issue here is that one
node can otherwise hold up a bunch of other nodes from getting freed,
which is especially a problem for file resource nodes and networked
workloads where some descriptors may not see activity in a long time.

As an example, instantiate an io_uring ring fd and create a sparse
registered file table. Even 2 will do. Then create a socket and register
it as fixed file 0, F0. The number of open files in the app is now 5,
with 0/1/2 being the usual stdin/out/err, 3 being the ring fd, and 4
being the socket. Register this socket (eg "the listener") in slot 0 of
the registered file table. Now add an operation on the socket that uses
slot 0. Finally, loop N times, where each loop creates a new socket,
registers said socket as a file, then unregisters the socket, and
finally closes the socket. This is roughly similar to what a basic
accept loop would look like.

At the end of this loop, it's not unreasonable to expect that there
would still be 5 open files. Each socket created and registered in the
loop is also unregistered and closed. But since the listener socket
registered first still has references to its resource node due to still
being active, each subsequent socket unregistration is stuck behind it
for reclaim. Hence 5 + N files are still open at that point, where N is
awaiting the final put held up by the listener socket.

Rewrite the io_rsrc_node handling to NOT rely on serialization. Struct
io_kiocb now gets explicit resource nodes assigned, with each holding a
reference to the parent node. A parent node is either of type FILE or
BUFFER, which are the two types of nodes that exist. A request can have
two nodes assigned, if it's using both registered files and buffers.
Since request issue and task_work completion is both under the ring
private lock, no atomics are needed to handle these references. It's a
simple unlocked inc/dec. As before, the registered buffer or file table
each hold a reference as well to the registered nodes. Final put of the
node will remove the node and free the underlying resource, eg unmap the
buffer or put the file.

Outside of removing the stall in resource reclaim described above, it
has the following advantages:

1) It's a lot simpler than the previous scheme, and easier to follow.
   No need to specific quiesce handling anymore.

2) There are no resource node allocations in the fast path, all of that
   happens at resource registration time.

3) The structs related to resource handling can all get simplified
   quite a bit, like io_rsrc_node and io_rsrc_data. io_rsrc_put can
   go away completely.

4) Handling of resource tags is much simpler, and doesn't require
   persistent storage as it can simply get assigned up front at
   registration time. Just copy them in one-by-one at registration time
   and assign to the resource node.

The only real downside is that a request is now explicitly limited to
pinning 2 resources, one file and one buffer, where before just
assigning a resource node to a request would pin all of them. The upside
is that it's easier to follow now, as an individual resource is
explicitly referenced and assigned to the request.

With this in place, the above mentioned example will be using exactly 5
files at the end of the loop, not N.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index d4ba4ae480d6..42c5f2c992c4 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -56,7 +56,7 @@ struct io_wq_work {
 };
 
 struct io_file_table {
-	struct io_fixed_file *files;
+	struct io_rsrc_node **nodes;
 	unsigned long *bitmap;
 	unsigned int alloc_hint;
 };
@@ -264,7 +264,6 @@ struct io_ring_ctx {
 		 * Fixed resources fast path, should be accessed only under
 		 * uring_lock, and updated through io_uring_register(2)
 		 */
-		struct io_rsrc_node	*rsrc_node;
 		atomic_t		cancel_seq;
 
 		/*
@@ -277,7 +276,7 @@ struct io_ring_ctx {
 		struct io_wq_work_list	iopoll_list;
 
 		struct io_file_table	file_table;
-		struct io_mapped_ubuf	**user_bufs;
+		struct io_rsrc_node	**user_bufs;
 		unsigned		nr_user_files;
 		unsigned		nr_user_bufs;
 
@@ -372,10 +371,7 @@ struct io_ring_ctx {
 	struct io_rsrc_data		*buf_data;
 
 	/* protected by ->uring_lock */
-	struct list_head		rsrc_ref_list;
 	struct io_alloc_cache		rsrc_node_cache;
-	struct wait_queue_head		rsrc_quiesce_wq;
-	unsigned			rsrc_quiesce;
 
 	u32			pers_next;
 	struct xarray		personalities;
@@ -642,7 +638,7 @@ struct io_kiocb {
 		__poll_t apoll_events;
 	};
 
-	struct io_rsrc_node		*rsrc_node;
+	struct io_rsrc_node		*rsrc_nodes[2];
 
 	atomic_t			refs;
 	bool				cancel_seq_set;
-- 
cgit v1.2.3


From fbbb8e991d86bb7539de6161746b6c747f93f533 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 26 Oct 2024 06:43:44 -0600
Subject: io_uring/rsrc: get rid of io_rsrc_node allocation cache

It's not going to be needed in the fast path going forward, so kill it
off.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 42c5f2c992c4..696f2a05a98b 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -370,9 +370,6 @@ struct io_ring_ctx {
 	struct io_rsrc_data		*file_data;
 	struct io_rsrc_data		*buf_data;
 
-	/* protected by ->uring_lock */
-	struct io_alloc_cache		rsrc_node_cache;
-
 	u32			pers_next;
 	struct xarray		personalities;
 
-- 
cgit v1.2.3


From 3597f2786b687a7f26361ce00a805ea0af41b65f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 26 Oct 2024 14:50:13 -0600
Subject: io_uring/rsrc: unify file and buffer resource tables

For files, there's nr_user_files/file_table/file_data, and buffers have
nr_user_bufs/user_bufs/buf_data. There's no reason why file_table and
file_data can't be the same thing, and ditto for the buffer side. That
gets rid of more io_ring_ctx state that's in two spots rather than just
being in one spot, as it should be. Put all the registered file data in
one locations, and ditto on the buffer front.

This also avoids having both io_rsrc_data->nodes being an allocated
array, and ->user_bufs[] or ->file_table.nodes. There's no reason to
have this information duplicated. Keep it in one spot, io_rsrc_data,
along with how many resources are available.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 696f2a05a98b..77fd508d043a 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -55,8 +55,13 @@ struct io_wq_work {
 	int cancel_seq;
 };
 
+struct io_rsrc_data {
+	unsigned int			nr;
+	struct io_rsrc_node		**nodes;
+};
+
 struct io_file_table {
-	struct io_rsrc_node **nodes;
+	struct io_rsrc_data data;
 	unsigned long *bitmap;
 	unsigned int alloc_hint;
 };
@@ -276,9 +281,7 @@ struct io_ring_ctx {
 		struct io_wq_work_list	iopoll_list;
 
 		struct io_file_table	file_table;
-		struct io_rsrc_node	**user_bufs;
-		unsigned		nr_user_files;
-		unsigned		nr_user_bufs;
+		struct io_rsrc_data	buf_table;
 
 		struct io_submit_state	submit_state;
 
@@ -366,10 +369,6 @@ struct io_ring_ctx {
 	struct wait_queue_head		poll_wq;
 	struct io_restriction		restrictions;
 
-	/* slow path rsrc auxilary data, used by update/register */
-	struct io_rsrc_data		*file_data;
-	struct io_rsrc_data		*buf_data;
-
 	u32			pers_next;
 	struct xarray		personalities;
 
-- 
cgit v1.2.3


From b16e920a1909da6799c43000db730d8fcdcae907 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 28 Oct 2024 18:43:13 -0600
Subject: io_uring/rsrc: allow cloning at an offset

Right now buffer cloning is an all-or-nothing kind of thing - either the
whole table is cloned from a source to a destination ring, or nothing at
all.

However, it's not always desired to clone the whole thing. Allow for
the application to specify a source and destination offset, and a
number of buffers to clone. If the destination offset is non-zero, then
allocate sparse nodes upfront.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 024745283783..cc8dbe78c126 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -719,7 +719,10 @@ enum {
 struct io_uring_clone_buffers {
 	__u32	src_fd;
 	__u32	flags;
-	__u32	pad[6];
+	__u32	src_off;
+	__u32	dst_off;
+	__u32	nr;
+	__u32	pad[3];
 };
 
 struct io_uring_buf {
-- 
cgit v1.2.3


From c1329532d5aabecf79788924941afb8a7b7c1024 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 29 Oct 2024 07:50:56 -0600
Subject: io_uring/rsrc: allow cloning with node replacements

Currently cloning a buffer table will fail if the destination already has
a table. But it should be possible to use it to replace existing elements.
Add a IORING_REGISTER_DST_REPLACE cloning flag, which if set, will allow
the destination to already having a buffer table. If that is the case,
then entries designated by offset + nr buffers will be replaced if they
already exist.

Note that it's allowed to use IORING_REGISTER_DST_REPLACE and not have
an existing table, in which case it'll work just like not having the
flag set and an empty table - it'll just assign the newly created table
for that case.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index cc8dbe78c126..ce58c4590de6 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -713,7 +713,8 @@ struct io_uring_clock_register {
 };
 
 enum {
-	IORING_REGISTER_SRC_REGISTERED = 1,
+	IORING_REGISTER_SRC_REGISTERED	= (1U << 0),
+	IORING_REGISTER_DST_REPLACE	= (1U << 1),
 };
 
 struct io_uring_clone_buffers {
-- 
cgit v1.2.3


From 01ee194d1aba1202f0926d5047a2a4cf84d0e45d Mon Sep 17 00:00:00 2001
From: hexue <xue01.he@samsung.com>
Date: Fri, 1 Nov 2024 17:19:57 +0800
Subject: io_uring: add support for hybrid IOPOLL

A new hybrid poll is implemented on the io_uring layer. Once an IO is
issued, it will not poll immediately, but rather block first and re-run
before IO complete, then poll to reap IO. While this poll method could
be a suboptimal solution when running on a single thread, it offers
performance lower than regular polling but higher than IRQ, and CPU
utilization is also lower than polling.

To use hybrid polling, the ring must be setup with both the
IORING_SETUP_IOPOLL and IORING_SETUP_HYBRID)IOPOLL flags set. Hybrid
polling has the same restrictions as IOPOLL, in that commands must
explicitly support it.

Signed-off-by: hexue <xue01.he@samsung.com>
Link: https://lore.kernel.org/r/20241101091957.564220-2-xue01.he@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 19 +++++++++++++++++--
 include/uapi/linux/io_uring.h  |  3 +++
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 77fd508d043a..d52fec533c51 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -298,6 +298,11 @@ struct io_ring_ctx {
 		 * ->uring_cmd() by io_uring_cmd_insert_cancelable()
 		 */
 		struct hlist_head	cancelable_uring_cmd;
+		/*
+		 * For Hybrid IOPOLL, runtime in hybrid polling, without
+		 * scheduling time
+		 */
+		u64					hybrid_poll_time;
 	} ____cacheline_aligned_in_smp;
 
 	struct {
@@ -449,6 +454,7 @@ enum {
 	REQ_F_LINK_TIMEOUT_BIT,
 	REQ_F_NEED_CLEANUP_BIT,
 	REQ_F_POLLED_BIT,
+	REQ_F_HYBRID_IOPOLL_STATE_BIT,
 	REQ_F_BUFFER_SELECTED_BIT,
 	REQ_F_BUFFER_RING_BIT,
 	REQ_F_REISSUE_BIT,
@@ -507,6 +513,8 @@ enum {
 	REQ_F_NEED_CLEANUP	= IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT),
 	/* already went through poll handler */
 	REQ_F_POLLED		= IO_REQ_FLAG(REQ_F_POLLED_BIT),
+	/* every req only blocks once in hybrid poll */
+	REQ_F_IOPOLL_STATE        = IO_REQ_FLAG(REQ_F_HYBRID_IOPOLL_STATE_BIT),
 	/* buffer already selected */
 	REQ_F_BUFFER_SELECTED	= IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT),
 	/* buffer selected from ring, needs commit */
@@ -639,8 +647,15 @@ struct io_kiocb {
 	atomic_t			refs;
 	bool				cancel_seq_set;
 	struct io_task_work		io_task_work;
-	/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
-	struct hlist_node		hash_node;
+	union {
+		/*
+		 * for polled requests, i.e. IORING_OP_POLL_ADD and async armed
+		 * poll
+		 */
+		struct hlist_node	hash_node;
+		/* For IOPOLL setup queues, with hybrid polling */
+		u64                     iopoll_start;
+	};
 	/* internal polling, see IORING_FEAT_FAST_POLL */
 	struct async_poll		*apoll;
 	/* opcode allocated if it needs to store data for async defer */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ce58c4590de6..47977a5c65f5 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit {
  */
 #define IORING_SETUP_NO_SQARRAY		(1U << 16)
 
+/* Use hybrid poll in iopoll process */
+#define IORING_SETUP_HYBRID_IOPOLL	(1U << 17)
+
 enum io_uring_op {
 	IORING_OP_NOP,
 	IORING_OP_READV,
-- 
cgit v1.2.3


From 53c0a58beb60b76e105a61aae518fd780eec03d9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 25 May 2024 23:32:20 -0400
Subject: net/socket.c: switch to CLASS(fd)

	The important part in sockfd_lookup_light() is avoiding needless
file refcount operations, not the marginal reduction of the register
pressure from not keeping a struct file pointer in the caller.

	Switch to use fdget()/fdpu(); with sane use of CLASS(fd) we can
get a better code generation...

	Would be nice if somebody tested it on networking test suites
(including benchmarks)...

	sockfd_lookup_light() does fdget(), uses sock_from_file() to
get the associated socket and returns the struct socket reference to
the caller, along with "do we need to fput()" flag.  No matching fdput(),
the caller does its equivalent manually, using the fact that sock->file
points to the struct file the socket has come from.

	Get rid of that - have the callers do fdget()/fdput() and
use sock_from_file() directly.  That kills sockfd_lookup_light()
and fput_light() (no users left).

	What's more, we can get rid of explicit fdget()/fdput() by
switching to CLASS(fd, ...) - code generation does not suffer, since
now fdput() inserted on "descriptor is not opened" failure exit
is recognized to be a no-op by compiler.

[folded a fix for braino in do_recvmmsg() caught by Simon Horman]

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/file.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/file.h b/include/linux/file.h
index f98de143245a..b49a92295b3f 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -30,12 +30,6 @@ extern struct file *alloc_file_pseudo_noaccount(struct inode *, struct vfsmount
 extern struct file *alloc_file_clone(struct file *, int flags,
 	const struct file_operations *);
 
-static inline void fput_light(struct file *file, int fput_needed)
-{
-	if (fput_needed)
-		fput(file);
-}
-
 /* either a reference to struct file + flags
  * (cloned vs. borrowed, pos locked), with
  * flags stored in lower bits of value,
-- 
cgit v1.2.3


From f302edb9d822804e72df3fa6ba270234050c678b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Jul 2024 21:49:04 -0400
Subject: switch netlink_getsockbyfilp() to taking descriptor

the only call site (in do_mq_notify()) obtains the argument
from an immediately preceding fdget() and it is immediately
followed by fdput(); might as well just replace it with
a variant that would take a descriptor instead of struct file *
and have file lookups handled inside that function.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index b332c2048c75..a48a30842d84 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -239,7 +239,7 @@ int netlink_register_notifier(struct notifier_block *nb);
 int netlink_unregister_notifier(struct notifier_block *nb);
 
 /* finegrained unicast helpers: */
-struct sock *netlink_getsockbyfilp(struct file *filp);
+struct sock *netlink_getsockbyfd(int fd);
 int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 		      long *timeo, struct sock *ssk);
 void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
-- 
cgit v1.2.3


From d7a9616ce0348b9d945d5dff82e4b44c0fe75b39 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 31 May 2024 22:10:12 -0400
Subject: introduce "fd_pos" class, convert fdget_pos() users to it.

fdget_pos() for constructor, fdput_pos() for cleanup, all users of
fd..._pos() converted trivially.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/file.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/file.h b/include/linux/file.h
index b49a92295b3f..4b09a8de2fd5 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -81,6 +81,7 @@ static inline void fdput_pos(struct fd f)
 
 DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd)
 DEFINE_CLASS(fd_raw, struct fd, fdput(_T), fdget_raw(fd), int fd)
+DEFINE_CLASS(fd_pos, struct fd, fdput_pos(_T), fdget_pos(fd), int fd)
 
 extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
 extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
-- 
cgit v1.2.3


From 38052c2dd71f5490f34bba21dc358e97fb205ee5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 6 Jun 2024 19:29:04 -0400
Subject: deal with the last remaing boolean uses of fd_file()

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/cleanup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index 038b2d523bf8..875c998275c0 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -234,7 +234,7 @@ const volatile void * __must_check_fn(const volatile void *val)
  * DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd)
  *
  *	CLASS(fdget, f)(fd);
- *	if (!fd_file(f))
+ *	if (fd_empty(f))
  *		return -EBADF;
  *
  *	// use 'f' without concern
-- 
cgit v1.2.3


From cdfd5daf90af8363fb1f58e08c829a775b2e2fc5 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Date: Fri, 1 Nov 2024 11:57:12 +0200
Subject: dt-bindings: clock: renesas,r9a08g045-vbattb: Document VBATTB

The VBATTB IP of the Renesas RZ/G3S SoC controls the clock for RTC,
the tamper detector and a small general usage memory of 128B.

The VBATTB controller controls the clock for the RTC on the Renesas
RZ/G3S. The HW block diagram for the clock logic is as follows:

           +----------+ XC   `\
RTXIN  --->|          |----->| \       +----+  VBATTCLK
           | 32K clock|      |  |----->|gate|----------->
           | osc      | XBYP |  |      +----+
RTXOUT --->|          |----->| /
           +----------+      ,/

One could connect as input to this HW block either a crystal or
an external clock device. This is board specific.

After discussions w/ Stephen Boyd the clock tree associated with this
hardware block was exported in Linux as:

input-xtal
  xbyp
  xc
     mux
        vbattclk

where:
- input-xtal is the input clock (connected to RTXIN, RTXOUT pins)
- xc, xbyp are mux inputs
- mux is the internal mux
- vbattclk is the gate clock that feeds in the end the RTC

to allow selecting the input of the MUX though assigned-clock DT
properties, using the already existing clock drivers and avoid adding
other DT properties.

This allows select the input of the mux based on the type of the
connected input clock:
- if the 32768 crystal is connected as input for the VBATTB,
  the input of the mux should be xc
- if an external clock device is connected as input for the VBATTB the
  input of the mux should be xbyp

Add bindings for the VBATTB controller.

Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Link: https://lore.kernel.org/20241101095720.2247815-2-claudiu.beznea.uj@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/dt-bindings/clock/renesas,r9a08g045-vbattb.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 include/dt-bindings/clock/renesas,r9a08g045-vbattb.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/renesas,r9a08g045-vbattb.h b/include/dt-bindings/clock/renesas,r9a08g045-vbattb.h
new file mode 100644
index 000000000000..67774eafad06
--- /dev/null
+++ b/include/dt-bindings/clock/renesas,r9a08g045-vbattb.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+ *
+ * Copyright (C) 2024 Renesas Electronics Corp.
+ */
+#ifndef __DT_BINDINGS_CLOCK_R9A08G045_VBATTB_H__
+#define __DT_BINDINGS_CLOCK_R9A08G045_VBATTB_H__
+
+#define VBATTB_XC		0
+#define VBATTB_XBYP		1
+#define VBATTB_MUX		2
+#define VBATTB_VBATTCLK		3
+
+#endif /* __DT_BINDINGS_CLOCK_R9A08G045_VBATTB_H__ */
-- 
cgit v1.2.3


From a1afb959add1fad43cb337448c244ed70bac3109 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Wed, 30 Oct 2024 09:11:56 +0100
Subject: dpll: add clock quality level attribute and op

In order to allow driver expose quality level of the clock it is
running, introduce a new netlink attr with enum to carry it to the
userspace. Also, introduce an op the dpll netlink code calls into the
driver to obtain the value.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://patch.msgid.link/20241030081157.966604-2-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dpll.h      |  4 ++++
 include/uapi/linux/dpll.h | 24 ++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/linux/dpll.h b/include/linux/dpll.h
index 81f7b623d0ba..5e4f9ab1cf75 100644
--- a/include/linux/dpll.h
+++ b/include/linux/dpll.h
@@ -26,6 +26,10 @@ struct dpll_device_ops {
 			       struct netlink_ext_ack *extack);
 	int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv,
 			s32 *temp, struct netlink_ext_ack *extack);
+	int (*clock_quality_level_get)(const struct dpll_device *dpll,
+				       void *dpll_priv,
+				       unsigned long *qls,
+				       struct netlink_ext_ack *extack);
 };
 
 struct dpll_pin_ops {
diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h
index b0654ade7b7e..2b7ec2da4bcc 100644
--- a/include/uapi/linux/dpll.h
+++ b/include/uapi/linux/dpll.h
@@ -79,6 +79,29 @@ enum dpll_lock_status_error {
 	DPLL_LOCK_STATUS_ERROR_MAX = (__DPLL_LOCK_STATUS_ERROR_MAX - 1)
 };
 
+/**
+ * enum dpll_clock_quality_level - level of quality of a clock device. This
+ *   mainly applies when the dpll lock-status is DPLL_LOCK_STATUS_HOLDOVER. The
+ *   current list is defined according to the table 11-7 contained in ITU-T
+ *   G.8264/Y.1364 document. One may extend this list freely by other ITU-T
+ *   defined clock qualities, or different ones defined by another
+ *   standardization body (for those, please use different prefix).
+ */
+enum dpll_clock_quality_level {
+	DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_PRC = 1,
+	DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_SSU_A,
+	DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_SSU_B,
+	DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_EEC1,
+	DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_PRTC,
+	DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_EPRTC,
+	DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_EEEC,
+	DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_EPRC,
+
+	/* private: */
+	__DPLL_CLOCK_QUALITY_LEVEL_MAX,
+	DPLL_CLOCK_QUALITY_LEVEL_MAX = (__DPLL_CLOCK_QUALITY_LEVEL_MAX - 1)
+};
+
 #define DPLL_TEMP_DIVIDER	1000
 
 /**
@@ -180,6 +203,7 @@ enum dpll_a {
 	DPLL_A_TEMP,
 	DPLL_A_TYPE,
 	DPLL_A_LOCK_STATUS_ERROR,
+	DPLL_A_CLOCK_QUALITY_LEVEL,
 
 	__DPLL_A_MAX,
 	DPLL_A_MAX = (__DPLL_A_MAX - 1)
-- 
cgit v1.2.3


From a8f80673ca0daaad990882529a5b4dc5114071e7 Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Thu, 24 Oct 2024 17:37:41 +0800
Subject: compiler_types: Add noinline_for_tracing annotation

Kernel functions that are not inlined can be easily hooked with BPF for
tracing. However, functions intended for tracing may still be inlined
unexpectedly. For example, in our case, after upgrading the compiler from
GCC 9 to GCC 11, the tcp_drop_reason() function was inlined, which broke
our monitoring tools. To prevent this, we need to ensure that the function
remains non-inlined.

The noinline_for_tracing annotation is introduced as a general solution for
preventing inlining of kernel functions that need to be traced. This
approach avoids the need for adding individual noinline comments to each
function and provides a more consistent way to maintain traceability.

Link: https://lore.kernel.org/netdev/CANn89iKvr44ipuRYFaPTpzwz=B_+pgA94jsggQ946mjwreV6Aw@mail.gmail.com/
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Link: https://patch.msgid.link/20241024093742.87681-2-laoar.shao@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/compiler_types.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 1a957ea2f4fe..0c8b9601e603 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -265,6 +265,12 @@ struct ftrace_likely_data {
  */
 #define noinline_for_stack noinline
 
+/*
+ * Use noinline_for_tracing for functions that should not be inlined.
+ * For tracing reasons.
+ */
+#define noinline_for_tracing noinline
+
 /*
  * Sanitizer helper attributes: Because using __always_inline and
  * __no_sanitize_* conflict, provide helper attributes that will either expand
-- 
cgit v1.2.3


From 43d3487035e9a86fad952de4240a518614240d43 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 29 Oct 2024 15:55:35 -0600
Subject: UAPI: ethtool: Use __struct_group() in struct ethtool_link_settings

Use the `__struct_group()` helper to create a new tagged
`struct ethtool_link_settings_hdr`. This structure groups together
all the members of the flexible `struct ethtool_link_settings`
except the flexible array. As a result, the array is effectively
separated from the rest of the members without modifying the memory
layout of the flexible structure.

This new tagged struct will be used to fix problematic declarations
of middle-flex-arrays in composite structs[1].

[1] https://git.kernel.org/linus/d88cabfd9abc

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://patch.msgid.link/9e9fb0bd72e5ba1e916acbb4995b1e358b86a689.1730238285.git.gustavoars@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool.h | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index c405ed63acfa..fc1f54b065f9 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -2511,21 +2511,24 @@ enum ethtool_reset_flags {
  *	autonegotiation; 0 if unknown or not applicable.  Read-only.
  */
 struct ethtool_link_settings {
-	__u32	cmd;
-	__u32	speed;
-	__u8	duplex;
-	__u8	port;
-	__u8	phy_address;
-	__u8	autoneg;
-	__u8	mdio_support;
-	__u8	eth_tp_mdix;
-	__u8	eth_tp_mdix_ctrl;
-	__s8	link_mode_masks_nwords;
-	__u8	transceiver;
-	__u8	master_slave_cfg;
-	__u8	master_slave_state;
-	__u8	rate_matching;
-	__u32	reserved[7];
+	/* New members MUST be added within the __struct_group() macro below. */
+	__struct_group(ethtool_link_settings_hdr, hdr, /* no attrs */,
+		__u32	cmd;
+		__u32	speed;
+		__u8	duplex;
+		__u8	port;
+		__u8	phy_address;
+		__u8	autoneg;
+		__u8	mdio_support;
+		__u8	eth_tp_mdix;
+		__u8	eth_tp_mdix_ctrl;
+		__s8	link_mode_masks_nwords;
+		__u8	transceiver;
+		__u8	master_slave_cfg;
+		__u8	master_slave_state;
+		__u8	rate_matching;
+		__u32	reserved[7];
+	);
 	__u32	link_mode_masks[];
 	/* layout of link_mode_masks fields:
 	 * __u32 map_supported[link_mode_masks_nwords];
-- 
cgit v1.2.3


From 3bd9b9abdf1563a22041b7255baea6d449902f1a Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 29 Oct 2024 15:58:47 -0600
Subject: net: ethtool: Avoid thousands of -Wflex-array-member-not-at-end
 warnings

-Wflex-array-member-not-at-end was introduced in GCC-14, and we are
getting ready to enable it, globally.

Change the type of the middle struct member currently causing trouble from
`struct ethtool_link_settings` to `struct ethtool_link_settings_hdr`.

Additionally, update the type of some variables in various functions that
don't access the flexible-array member, changing them to the newly created
`struct ethtool_link_settings_hdr`. These changes are needed because the
type of the conflicting middle members changed. So, those instances that
expect the type to be `struct ethtool_link_settings` should be adjusted to
the newly created type `struct ethtool_link_settings_hdr`.

Also, adjust variable declarations to follow the reverse xmas tree
convention.

Fix 3338 of the following -Wflex-array-member-not-at-end warnings:

include/linux/ethtool.h:214:38: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end]

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://patch.msgid.link/0bc2809fe2a6c11dd4c8a9a10d9bd65cccdb559b.1730238285.git.gustavoars@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 12f6dc567598..1199e308c8dd 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -211,7 +211,7 @@ void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id);
  * fields, but they are allowed to overwrite them (will be ignored).
  */
 struct ethtool_link_ksettings {
-	struct ethtool_link_settings base;
+	struct ethtool_link_settings_hdr base;
 	struct {
 		__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
 		__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
-- 
cgit v1.2.3


From 6b2d11e2d8fc130df4708be0b6b53fd3e6b54cf6 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <0x7f454c46@gmail.com>
Date: Wed, 30 Oct 2024 04:22:33 +0000
Subject: net/tcp: Add missing lockdep annotations for TCP-AO hlist traversals

Under CONFIG_PROVE_RCU_LIST + CONFIG_RCU_EXPERT
hlist_for_each_entry_rcu() provides very helpful splats, which help
to find possible issues. I missed CONFIG_RCU_EXPERT=y in my testing
config the same as described in
a3e4bf7f9675 ("configs/debug: make sure PROVE_RCU_LIST=y takes effect").

The fix itself is trivial: add the very same lockdep annotations
as were used to dereference ao_info from the socket.

Reported-by: Jakub Kicinski <kuba@kernel.org>
Closes: https://lore.kernel.org/netdev/20241028152645.35a8be66@kernel.org/
Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com>
Link: https://patch.msgid.link/20241030-tcp-ao-hlist-lockdep-annotate-v1-1-bf641a64d7c6@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tcp_ao.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
index 1d46460d0fef..df655ce6987d 100644
--- a/include/net/tcp_ao.h
+++ b/include/net/tcp_ao.h
@@ -183,7 +183,8 @@ int tcp_ao_hash_skb(unsigned short int family,
 		    const u8 *tkey, int hash_offset, u32 sne);
 int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family,
 		 sockptr_t optval, int optlen);
-struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao,
+struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk,
+					  struct tcp_ao_info *ao,
 					  int sndid, int rcvid);
 int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk,
 			     struct request_sock *req, struct sk_buff *skb,
-- 
cgit v1.2.3


From 2ea25aab938a250bdf3148acd15359b56b91b40e Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Tue, 29 Oct 2024 16:18:49 -0500
Subject: pwm: core: export pwm_get_state_hw()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Export the pwm_get_state_hw() function. This is useful in cases where
we want to know what the hardware is actually doing, rather than what
what we requested it should do.

Locking had to be rearranged to ensure that the chip is still
operational before trying to access ops now that this can be called
from outside the pwm core.

Signed-off-by: David Lechner <dlechner@baylibre.com>
Link: https://lore.kernel.org/r/20241029-pwm-export-pwm_get_state_hw-v2-1-03ba063a3230@baylibre.com
[ukleinek: Add dummy for !CONFIG_PWM]
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index f1cb1e5b0a36..78827f312407 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -370,6 +370,7 @@ int pwm_get_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf
 int pwm_set_waveform_might_sleep(struct pwm_device *pwm, const struct pwm_waveform *wf, bool exact);
 int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state);
 int pwm_apply_atomic(struct pwm_device *pwm, const struct pwm_state *state);
+int pwm_get_state_hw(struct pwm_device *pwm, struct pwm_state *state);
 int pwm_adjust_config(struct pwm_device *pwm);
 
 /**
@@ -494,6 +495,11 @@ static inline int pwm_apply_atomic(struct pwm_device *pwm,
 	return -EOPNOTSUPP;
 }
 
+static inline int pwm_get_state_hw(struct pwm_device *pwm, struct pwm_state *state)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int pwm_adjust_config(struct pwm_device *pwm)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 9a5a2483bc60c12d73ac6ca5ac5ab95361a895f4 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 1 Nov 2024 12:53:42 +0200
Subject: iio: Mark iio_dev::priv member with __private

The member is not supposed to be accessed directly, mark it with
__private to catch the misuses up.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20241101105342.3645018-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 445d6666a291..5c6682bd4cb9 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -624,7 +624,7 @@ struct iio_dev {
 	const struct iio_info		*info;
 	const struct iio_buffer_setup_ops	*setup_ops;
 
-	void				*priv;
+	void				*priv __private;
 };
 
 int iio_device_id(struct iio_dev *indio_dev);
@@ -785,7 +785,7 @@ struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv);
 /* The information at the returned address is guaranteed to be cacheline aligned */
 static inline void *iio_priv(const struct iio_dev *indio_dev)
 {
-	return indio_dev->priv;
+	return ACCESS_PRIVATE(indio_dev, priv);
 }
 
 void iio_device_free(struct iio_dev *indio_dev);
-- 
cgit v1.2.3


From 6e6738398def256126185cd25e2e3cb68f1bc0a3 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ribalda@chromium.org>
Date: Fri, 1 Nov 2024 07:46:27 +0000
Subject: iio: hid-sensors: Add proximity and attention IDs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The HID Usage Table at https://usb.org/sites/default/files/hut1_5.pdf
reserves:

- 0x4b2 for Human Proximity Range
Distance between a human and the computer. Default unit of
measure is meters;
https://www.usb.org/sites/default/files/hutrr39b_0.pdf

- 0x4bd for Human Attention Detected
Human-Presence sensors detect the presence of humans in the sensor’s
field-of-view using diverse and evolving technologies. Some presence
sensors are implemented with low resolution video cameras, which can
additionally track a subject’s attention (i.e. if the
user is ‘looking’ at the system with the integrated sensor).
A Human-Presence sensor, providing a Host with the user’s attention
state, allows the Host to optimize its behavior. For example, to
brighten/dim the system display, based on the user’s attention to the
system (potentially prolonging battery life). Default unit is
true/false;
https://www.usb.org/sites/default/files/hutrr107-humanpresenceattention_1.pdf

Signed-off-by: Ricardo Ribalda <ribalda@chromium.org>
Link: https://patch.msgid.link/20241101-hpd-v3-1-e9c80b7c7164@chromium.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/hid-sensor-ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h
index 6730ee900ee1..8a03d9696b1c 100644
--- a/include/linux/hid-sensor-ids.h
+++ b/include/linux/hid-sensor-ids.h
@@ -30,6 +30,8 @@
 #define HID_USAGE_SENSOR_PROX                                   0x200011
 #define HID_USAGE_SENSOR_DATA_PRESENCE                          0x2004b0
 #define HID_USAGE_SENSOR_HUMAN_PRESENCE                         0x2004b1
+#define HID_USAGE_SENSOR_HUMAN_PROXIMITY                        0x2004b2
+#define HID_USAGE_SENSOR_HUMAN_ATTENTION                        0x2004bd
 
 /* Pressure (200031) */
 #define HID_USAGE_SENSOR_PRESSURE                               0x200031
-- 
cgit v1.2.3


From 9d2fe9cd02ca5f1e70a7eff0262fb3668a27db0c Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ribalda@chromium.org>
Date: Fri, 1 Nov 2024 07:46:29 +0000
Subject: iio: Add channel type for attention

Add a new channel type representing if the user's attention state to the
the system. This usually means if the user is looking at the screen or
not.

Signed-off-by: Ricardo Ribalda <ribalda@chromium.org>
Link: https://patch.msgid.link/20241101-hpd-v3-3-e9c80b7c7164@chromium.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/uapi/linux/iio/types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
index f2e0b2d50e6b..12886d4465e4 100644
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -51,6 +51,7 @@ enum iio_chan_type {
 	IIO_DELTA_VELOCITY,
 	IIO_COLORTEMP,
 	IIO_CHROMATICITY,
+	IIO_ATTENTION,
 };
 
 enum iio_modifier {
-- 
cgit v1.2.3


From b4b42f28a0df6b9d31f0003f7dea3bddf272eaa4 Mon Sep 17 00:00:00 2001
From: Julien Stephan <jstephan@baylibre.com>
Date: Thu, 31 Oct 2024 16:27:02 +0100
Subject: iio: fix write_event_config signature

write_event_config callback use an int for state, but it is actually a
boolean. iio_ev_state_store is actually using kstrtobool to check user
input, then gives the converted boolean value to write_event_config.

Fix signature and update all iio drivers to use the new signature.

This patch has been partially written using coccinelle with the
following script:

$ cat iio-bool.cocci
// Options: --all-includes

virtual patch

@c1@
identifier iioinfo;
identifier wecfunc;
@@
 static const struct iio_info iioinfo = {
        ...,
        .write_event_config =
(
 wecfunc
|
 &wecfunc
),
        ...,
 };

@@
identifier c1.wecfunc;
identifier indio_dev, chan, type, dir, state;
@@
 int wecfunc(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, enum iio_event_type type, enum iio_event_direction dir,
-int
+bool
 state) {
  ...
 }

make coccicheck MODE=patch COCCI=iio-bool.cocci M=drivers/iio

Unfortunately, this script didn't match all files:
* all write_event_config callbacks using iio_device_claim_direct_scoped
  were not detected and not patched.
* all files that do not assign and declare the write_event_config
  callback in the same file.

iio.h was also manually updated.

The patch was build tested using allmodconfig config.

cc: Julia Lawall <julia.lawall@inria.fr>
Signed-off-by: Julien Stephan <jstephan@baylibre.com>
Link: https://patch.msgid.link/20241031-iio-fix-write-event-config-signature-v2-7-2bcacbb517a2@baylibre.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 5c6682bd4cb9..59c58f455311 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -514,7 +514,7 @@ struct iio_info {
 				  const struct iio_chan_spec *chan,
 				  enum iio_event_type type,
 				  enum iio_event_direction dir,
-				  int state);
+				  bool state);
 
 	int (*read_event_value)(struct iio_dev *indio_dev,
 				const struct iio_chan_spec *chan,
-- 
cgit v1.2.3


From 5d8173b8493151d32b99ec6732fb29c58256a7c8 Mon Sep 17 00:00:00 2001
From: Julien Stephan <jstephan@baylibre.com>
Date: Mon, 28 Oct 2024 17:38:11 +0100
Subject: iio: events.h: add event identifier macros for differential channel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, there are 3 helper macros in iio/events.h to create event
identifiers:
- IIO_EVENT_CODE : create generic event identifier for differential and non
  differential channels
- IIO_MOD_EVENT_CODE : create event identifier for modified (non
  differential) channels
- IIO_UNMOD_EVENT_CODE : create event identifier for unmodified (non
  differential) channels

For differential channels, drivers are expected to use IIO_EVENT_CODE.
However, only one driver in drivers/iio currently uses it correctly,
leading to inconsistent event identifiers for differential channels that
don’t match the intended attributes (such as max1363.c that supports
differential channels, but only uses IIO_UNMOD_EVENT_CODE).

To prevent such issues in future drivers, a new helper macro,
IIO_DIFF_EVENT_CODE, is introduced to specifically create event identifiers
for differential channels. Only one helper is needed for differential
channels since they cannot have modifiers.

Additionally, the descriptions for IIO_MOD_EVENT_CODE and
IIO_UNMOD_EVENT_CODE have been updated to clarify that they are intended
for non-differential channels,

Signed-off-by: Julien Stephan <jstephan@baylibre.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20241028-iio-add-macro-for-even-identifier-for-differential-channels-v1-1-b452c90f7ea6@baylibre.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/events.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/events.h b/include/linux/iio/events.h
index a4558c45a548..eeaba5e1525e 100644
--- a/include/linux/iio/events.h
+++ b/include/linux/iio/events.h
@@ -30,7 +30,8 @@
 
 
 /**
- * IIO_MOD_EVENT_CODE() - create event identifier for modified channels
+ * IIO_MOD_EVENT_CODE() - create event identifier for modified (non
+ * differential) channels
  * @chan_type:	Type of the channel. Should be one of enum iio_chan_type.
  * @number:	Channel number.
  * @modifier:	Modifier for the channel. Should be one of enum iio_modifier.
@@ -43,7 +44,8 @@
 	IIO_EVENT_CODE(chan_type, 0, modifier, direction, type, number, 0, 0)
 
 /**
- * IIO_UNMOD_EVENT_CODE() - create event identifier for unmodified channels
+ * IIO_UNMOD_EVENT_CODE() - create event identifier for unmodified (non
+ * differential) channels
  * @chan_type:	Type of the channel. Should be one of enum iio_chan_type.
  * @number:	Channel number.
  * @type:	Type of the event. Should be one of enum iio_event_type.
@@ -53,4 +55,16 @@
 #define IIO_UNMOD_EVENT_CODE(chan_type, number, type, direction)	\
 	IIO_EVENT_CODE(chan_type, 0, 0, direction, type, number, 0, 0)
 
+/**
+ * IIO_DIFF_EVENT_CODE() - create event identifier for differential channels
+ * @chan_type:	Type of the channel. Should be one of enum iio_chan_type.
+ * @chan1:	First channel number for differential channels.
+ * @chan2:	Second channel number for differential channels.
+ * @type:	Type of the event. Should be one of enum iio_event_type.
+ * @direction:	Direction of the event. One of enum iio_event_direction.
+ */
+
+#define IIO_DIFF_EVENT_CODE(chan_type, chan1, chan2, type, direction)	\
+	IIO_EVENT_CODE(chan_type, 1, 0, direction, type, 0, chan1, chan2)
+
 #endif
-- 
cgit v1.2.3


From 01f567d22152dfa8799e9fde5f18bbb5650d8681 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Fri, 1 Nov 2024 17:17:10 -0500
Subject: iio: events: make IIO_EVENT_CODE macro private

Make IIO_EVENT_CODE "private" by adding a leading underscore.

There are no more users of this macro in the kernel so we can make it
"private" and encourage developers to use the specialized versions of
the macro instead.

Signed-off-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20241101-iio-fix-event-macro-use-v1-3-0000c5d09f6d@baylibre.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/events.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/events.h b/include/linux/iio/events.h
index eeaba5e1525e..72062a0c7c87 100644
--- a/include/linux/iio/events.h
+++ b/include/linux/iio/events.h
@@ -10,7 +10,7 @@
 #include <uapi/linux/iio/events.h>
 
 /**
- * IIO_EVENT_CODE() - create event identifier
+ * _IIO_EVENT_CODE() - create event identifier
  * @chan_type:	Type of the channel. Should be one of enum iio_chan_type.
  * @diff:	Whether the event is for an differential channel or not.
  * @modifier:	Modifier for the channel. Should be one of enum iio_modifier.
@@ -19,10 +19,13 @@
  * @chan:	Channel number for non-differential channels.
  * @chan1:	First channel number for differential channels.
  * @chan2:	Second channel number for differential channels.
+ *
+ * Drivers should use the specialized macros below instead of using this one
+ * directly.
  */
 
-#define IIO_EVENT_CODE(chan_type, diff, modifier, direction,		\
-		       type, chan, chan1, chan2)			\
+#define _IIO_EVENT_CODE(chan_type, diff, modifier, direction,		\
+			type, chan, chan1, chan2)			\
 	(((u64)type << 56) | ((u64)diff << 55) |			\
 	 ((u64)direction << 48) | ((u64)modifier << 40) |		\
 	 ((u64)chan_type << 32) | (((u16)chan2) << 16) | ((u16)chan1) | \
@@ -41,7 +44,7 @@
 
 #define IIO_MOD_EVENT_CODE(chan_type, number, modifier,		\
 			   type, direction)				\
-	IIO_EVENT_CODE(chan_type, 0, modifier, direction, type, number, 0, 0)
+	_IIO_EVENT_CODE(chan_type, 0, modifier, direction, type, number, 0, 0)
 
 /**
  * IIO_UNMOD_EVENT_CODE() - create event identifier for unmodified (non
@@ -53,7 +56,7 @@
  */
 
 #define IIO_UNMOD_EVENT_CODE(chan_type, number, type, direction)	\
-	IIO_EVENT_CODE(chan_type, 0, 0, direction, type, number, 0, 0)
+	_IIO_EVENT_CODE(chan_type, 0, 0, direction, type, number, 0, 0)
 
 /**
  * IIO_DIFF_EVENT_CODE() - create event identifier for differential channels
@@ -65,6 +68,6 @@
  */
 
 #define IIO_DIFF_EVENT_CODE(chan_type, chan1, chan2, type, direction)	\
-	IIO_EVENT_CODE(chan_type, 1, 0, direction, type, 0, chan1, chan2)
+	_IIO_EVENT_CODE(chan_type, 1, 0, direction, type, 0, chan1, chan2)
 
 #endif
-- 
cgit v1.2.3


From a865276872ec4f129f8a582634be82dcc275dc2a Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Wed, 30 Oct 2024 18:23:25 -0600
Subject: dim: make dim_calc_stats() inputs const pointers

Make the start and end arguments to dim_calc_stats() const pointers
to clarify that the function does not modify their values.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Arthur Kiyanovski <akiyano@amazon.com>
Link: https://patch.msgid.link/20241031002326.3426181-1-csander@purestorage.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dim.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dim.h b/include/linux/dim.h
index 1b581ff25a15..84579a50ae7f 100644
--- a/include/linux/dim.h
+++ b/include/linux/dim.h
@@ -351,7 +351,8 @@ void dim_park_tired(struct dim *dim);
  * Takes into consideration counter wrap-around.
  * Returned boolean indicates whether curr_stats are reliable.
  */
-bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
+bool dim_calc_stats(const struct dim_sample *start,
+		    const struct dim_sample *end,
 		    struct dim_stats *curr_stats);
 
 /**
-- 
cgit v1.2.3


From 61bf0009a7657d394d942c8ee961b9ea5f2168fe Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Wed, 30 Oct 2024 18:23:26 -0600
Subject: dim: pass dim_sample to net_dim() by reference

net_dim() is currently passed a struct dim_sample argument by value.
struct dim_sample is 24 bytes. Since this is greater 16 bytes, x86-64
passes it on the stack. All callers have already initialized dim_sample
on the stack, so passing it by value requires pushing a duplicated copy
to the stack. Either witing to the stack and immediately reading it, or
perhaps dereferencing addresses relative to the stack pointer in a chain
of push instructions, seems to perform quite poorly.

In a heavy TCP workload, mlx5e_handle_rx_dim() consumes 3% of CPU time,
94% of which is attributed to the first push instruction to copy
dim_sample on the stack for the call to net_dim():
// Call ktime_get()
  0.26 |4ead2:   call   4ead7 <mlx5e_handle_rx_dim+0x47>
// Pass the address of struct dim in %rdi
       |4ead7:   lea    0x3d0(%rbx),%rdi
// Set dim_sample.pkt_ctr
       |4eade:   mov    %r13d,0x8(%rsp)
// Set dim_sample.byte_ctr
       |4eae3:   mov    %r12d,0xc(%rsp)
// Set dim_sample.event_ctr
  0.15 |4eae8:   mov    %bp,0x10(%rsp)
// Duplicate dim_sample on the stack
 94.16 |4eaed:   push   0x10(%rsp)
  2.79 |4eaf1:   push   0x10(%rsp)
  0.07 |4eaf5:   push   %rax
// Call net_dim()
  0.21 |4eaf6:   call   4eafb <mlx5e_handle_rx_dim+0x6b>

To allow the caller to reuse the struct dim_sample already on the stack,
pass the struct dim_sample by reference to net_dim().

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Arthur Kiyanovski <akiyano@amazon.com>
Reviewed-by: Louis Peens <louis.peens@corigine.com>
Link: https://patch.msgid.link/20241031002326.3426181-2-csander@purestorage.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dim.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dim.h b/include/linux/dim.h
index 84579a50ae7f..06543fd40fcc 100644
--- a/include/linux/dim.h
+++ b/include/linux/dim.h
@@ -425,7 +425,7 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
  * This is the main logic of the algorithm, where data is processed in order
  * to decide on next required action.
  */
-void net_dim(struct dim *dim, struct dim_sample end_sample);
+void net_dim(struct dim *dim, const struct dim_sample *end_sample);
 
 /* RDMA DIM */
 
-- 
cgit v1.2.3


From 1685f685ff8036c74c3b5e9006ec7ceda5bafefa Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 26 Oct 2024 18:43:33 +0300
Subject: soc: qcom: llcc: add support for SAR2130P and SAR1130P

Implement necessary support for the LLCC control on the SAR1130P and
SAR2130P platforms. These two platforms use different ATTR1_MAX_CAP
shift and also require manual override for num_banks.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20241026-sar2130p-llcc-v3-3-2a58fa1b4d12@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/llcc-qcom.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 9e9f528b1370..a79bfac230c6 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -54,7 +54,19 @@
 #define LLCC_CAMEXP4	 52
 #define LLCC_DISP_WB	 53
 #define LLCC_DISP_1	 54
+#define LLCC_VIEYE	 57
+#define LLCC_VIDPTH	 58
+#define LLCC_GPUMV	 59
+#define LLCC_EVA_LEFT	 60
+#define LLCC_EVA_RIGHT	 61
+#define LLCC_EVAGAIN	 62
+#define LLCC_VIPTH	 63
 #define LLCC_VIDVSP	 64
+#define LLCC_DISP_LEFT	 65
+#define LLCC_DISP_RIGHT	 66
+#define LLCC_EVCS_LEFT	 67
+#define LLCC_EVCS_RIGHT	 68
+#define LLCC_SPAD	 69
 
 /**
  * struct llcc_slice_desc - Cache slice descriptor
-- 
cgit v1.2.3


From 8ab3138a9b2dcb0ddf281240cf8cba414eb1224a Mon Sep 17 00:00:00 2001
From: Edward Srouji <edwards@nvidia.com>
Date: Tue, 3 Sep 2024 14:37:51 +0300
Subject: net/mlx5: Introduce data placement ordering bits

Introduce out-of-order (OOO) data placement (DP) IFC related bits to
support OOO DP QP.

Signed-off-by: Edward Srouji <edwards@nvidia.com>
Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://patch.msgid.link/f30e5cbb5459fd02f27f35909bb545cab346b58b.1725362773.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 96d369112bfa..2a037843b117 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1872,7 +1872,11 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_328[0x2];
 	u8	   relaxed_ordering_read[0x1];
 	u8         log_max_pd[0x5];
-	u8         reserved_at_330[0x5];
+	u8         dp_ordering_ooo_all_ud[0x1];
+	u8         dp_ordering_ooo_all_uc[0x1];
+	u8         dp_ordering_ooo_all_xrc[0x1];
+	u8         dp_ordering_ooo_all_dc[0x1];
+	u8         dp_ordering_ooo_all_rc[0x1];
 	u8         pcie_reset_using_hotreset_method[0x1];
 	u8         pci_sync_for_fw_update_with_driver_unload[0x1];
 	u8         vnic_env_cnt_steering_fail[0x1];
@@ -2094,7 +2098,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   reserved_at_0[0x80];
 
 	u8         migratable[0x1];
-	u8         reserved_at_81[0x11];
+	u8         reserved_at_81[0x7];
+	u8         dp_ordering_force[0x1];
+	u8         reserved_at_89[0x9];
 	u8         query_vuid[0x1];
 	u8         reserved_at_93[0x5];
 	u8         umr_log_entity_size_5[0x1];
@@ -3524,7 +3530,8 @@ struct mlx5_ifc_qpc_bits {
 	u8         latency_sensitive[0x1];
 	u8         reserved_at_24[0x1];
 	u8         drain_sigerr[0x1];
-	u8         reserved_at_26[0x2];
+	u8         reserved_at_26[0x1];
+	u8         dp_ordering_force[0x1];
 	u8         pd[0x18];
 
 	u8         mtu[0x3];
@@ -3597,7 +3604,8 @@ struct mlx5_ifc_qpc_bits {
 	u8         rae[0x1];
 	u8         reserved_at_493[0x1];
 	u8         page_offset[0x6];
-	u8         reserved_at_49a[0x3];
+	u8         reserved_at_49a[0x2];
+	u8         dp_ordering_1[0x1];
 	u8         cd_slave_receive[0x1];
 	u8         cd_slave_send[0x1];
 	u8         cd_master[0x1];
@@ -4507,7 +4515,8 @@ struct mlx5_ifc_dctc_bits {
 	u8         state[0x4];
 	u8         reserved_at_8[0x18];
 
-	u8         reserved_at_20[0x8];
+	u8         reserved_at_20[0x7];
+	u8         dp_ordering_force[0x1];
 	u8         user_index[0x18];
 
 	u8         reserved_at_40[0x8];
@@ -4522,7 +4531,9 @@ struct mlx5_ifc_dctc_bits {
 	u8         latency_sensitive[0x1];
 	u8         rlky[0x1];
 	u8         free_ar[0x1];
-	u8         reserved_at_73[0xd];
+	u8         reserved_at_73[0x1];
+	u8         dp_ordering_1[0x1];
+	u8         reserved_at_75[0xb];
 
 	u8         reserved_at_80[0x8];
 	u8         cs_res[0x8];
-- 
cgit v1.2.3


From fe5ba6bf91b3e30118c59fe51048cda101de6542 Mon Sep 17 00:00:00 2001
From: Wei Fang <wei.fang@nxp.com>
Date: Wed, 30 Oct 2024 17:39:15 +0800
Subject: net: enetc: add initial netc-blk-ctrl driver support

The netc-blk-ctrl driver is used to configure Integrated Endpoint
Register Block (IERB) and Privileged Register Block (PRB) of NETC.
For i.MX platforms, it is also used to configure the NETCMIX block.

The IERB contains registers that are used for pre-boot initialization,
debug, and non-customer configuration. The PRB controls global reset
and global error handling for NETC. The NETCMIX block is mainly used
to set MII protocol and PCS protocol of the links, it also contains
settings for some other functions.

Note the IERB configuration registers can only be written after being
unlocked by PRB, otherwise, all write operations are inhibited. A warm
reset is performed when the IERB is unlocked, and it results in an FLR
to all NETC devices. Therefore, all NETC device drivers must be probed
or initialized after the warm reset is finished.

Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fsl/netc_global.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 include/linux/fsl/netc_global.h

(limited to 'include')

diff --git a/include/linux/fsl/netc_global.h b/include/linux/fsl/netc_global.h
new file mode 100644
index 000000000000..fdecca8c90f0
--- /dev/null
+++ b/include/linux/fsl/netc_global.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2024 NXP
+ */
+#ifndef __NETC_GLOBAL_H
+#define __NETC_GLOBAL_H
+
+#include <linux/io.h>
+
+static inline u32 netc_read(void __iomem *reg)
+{
+	return ioread32(reg);
+}
+
+static inline void netc_write(void __iomem *reg, u32 val)
+{
+	iowrite32(val, reg);
+}
+
+#endif
-- 
cgit v1.2.3


From 8b36f7c3c6618dc97697a6a20a13b29651f68ab6 Mon Sep 17 00:00:00 2001
From: Edward Srouji <edwards@nvidia.com>
Date: Tue, 3 Sep 2024 14:37:52 +0300
Subject: RDMA/mlx5: Support OOO RX WQE consumption

Support QP with out-of-order (OOO) capabilities enabled.
This allows WRs on the receiver side of the QP to be consumed OOO,
permitting the sender side to transmit messages without guaranteeing
arrival order on the receiver side.

When enabled, the completion ordering of WRs remains in-order,
regardless of the Receive WRs consumption order.
RDMA Read and RDMA Atomic operations on the responder side continue to
be executed in-order, while the ordering of data placement for RDMA
Write and Send operations is not guaranteed.

Atomic operations larger than 8 bytes are currently not supported.
Therefore, when this feature is enabled, the created QP restricts its
atomic support to 8 bytes at most.

In addition, when querying the device, a new flag is returned in
response to indicate that the Kernel supports OOO QP.

Signed-off-by: Edward Srouji <edwards@nvidia.com>
Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://patch.msgid.link/06ac609a5f358c8fb0a090d22c61a2f9329d82e6.1725362773.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/uapi/rdma/mlx5-abi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index d4f6a36dffb0..8a6ad6c6841c 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -252,6 +252,7 @@ enum mlx5_ib_query_dev_resp_flags {
 	MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD  = 1 << 1,
 	MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2,
 	MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3,
+	MLX5_IB_QUERY_DEV_RESP_FLAGS_OOO_DP = 1 << 4,
 };
 
 enum mlx5_ib_tunnel_offloads {
@@ -439,6 +440,10 @@ struct mlx5_ib_burst_info {
 	__u16       reserved;
 };
 
+enum mlx5_ib_modify_qp_mask {
+	MLX5_IB_MODIFY_QP_OOO_DP = 1 << 0,
+};
+
 struct mlx5_ib_modify_qp {
 	__u32			   comp_mask;
 	struct mlx5_ib_burst_info  burst_info;
-- 
cgit v1.2.3


From af7a35bf6c36a77624d3abe46b3830b7c2a5f20c Mon Sep 17 00:00:00 2001
From: Chiara Meiohas <cmeiohas@nvidia.com>
Date: Thu, 31 Oct 2024 15:36:51 +0200
Subject: RDMA/core: Implement RoCE GID port rescan and export delete function

rdma_roce_rescan_port() scans all network devices in
the system and adds the gids if relevant to the RoCE device
port. When not in bonding mode it adds the GIDs of the
netdevice in this port. When in bonding mode it adds the
GIDs of both the port's netdevice and the bond master
netdevice.

Export roce_del_all_netdev_gids(), which  removes all GIDs
associated with a specific netdevice for a given port.

Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Link: https://patch.msgid.link/674d498da4637a1503ff1367e28bd09ff942fd5e.1730381292.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/ib_verbs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9cb8b5fe7eee..67551133b522 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4734,6 +4734,9 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
  * @device:         the rdma device
  */
 void rdma_roce_rescan_device(struct ib_device *ibdev);
+void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port);
+void roce_del_all_netdev_gids(struct ib_device *ib_dev,
+			      u32 port, struct net_device *ndev);
 
 struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
 
-- 
cgit v1.2.3


From dc6be4418a1144cce422093cde0245c76cdcaff0 Mon Sep 17 00:00:00 2001
From: Patrisious Haddad <phaddad@nvidia.com>
Date: Thu, 31 Oct 2024 13:22:51 +0200
Subject: RDMA/core: Add device ufile cleanup operation

Add a driver operation to allow preemptive cleanup of ufile HW resources
before the standard ufile cleanup flow begins. Thus, expediting the
final cleanup phase which leads to fast teardown overall.

This allows the use of driver specific clean up procedures to make the
cleanup process more efficient.

Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Link: https://patch.msgid.link/cabe00d75132b5732cb515944e3c500a01fb0b4a.1730373303.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/ib_verbs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 67551133b522..3417636da960 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2675,6 +2675,12 @@ struct ib_device_ops {
 	 */
 	void (*del_sub_dev)(struct ib_device *sub_dev);
 
+	/**
+	 * ufile_cleanup - Attempt to cleanup ubojects HW resources inside
+	 * the ufile.
+	 */
+	void (*ufile_hw_cleanup)(struct ib_uverbs_file *ufile);
+
 	DECLARE_RDMA_OBJ_SIZE(ib_ah);
 	DECLARE_RDMA_OBJ_SIZE(ib_counters);
 	DECLARE_RDMA_OBJ_SIZE(ib_cq);
-- 
cgit v1.2.3


From 27ed2f00807c2328c99751f9500ce6478f16cf7b Mon Sep 17 00:00:00 2001
From: Patrisious Haddad <phaddad@nvidia.com>
Date: Thu, 31 Oct 2024 13:22:52 +0200
Subject: RDMA/core: Move ib_uverbs_file struct to uverbs_types.h

In light of the previous commit, make the ib_uverbs_file accessible to
drivers by moving its definition to uverbs_types.h, to allow drivers to
freely access the struct argument and create a personalized cleanup flow.

For the same reason expose uverbs_try_lock_object function to allow driver
to safely access the uverbs objects.

Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Link: https://patch.msgid.link/29b718e0dca35daa5f496320a39284fc1f5a1722.1730373303.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/uverbs_types.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index ccd11631c167..26ba919ac245 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -134,6 +134,8 @@ static inline void uverbs_uobject_get(struct ib_uobject *uobject)
 }
 void uverbs_uobject_put(struct ib_uobject *uobject);
 
+int uverbs_try_lock_object(struct ib_uobject *uobj, enum rdma_lookup_mode mode);
+
 struct uverbs_obj_fd_type {
 	/*
 	 * In fd based objects, uverbs_obj_type_ops points to generic
@@ -150,6 +152,37 @@ struct uverbs_obj_fd_type {
 	int				flags;
 };
 
+struct ib_uverbs_file {
+	struct kref				ref;
+	struct ib_uverbs_device		       *device;
+	struct mutex				ucontext_lock;
+	/*
+	 * ucontext must be accessed via ib_uverbs_get_ucontext() or with
+	 * ucontext_lock held
+	 */
+	struct ib_ucontext		       *ucontext;
+	struct ib_uverbs_async_event_file      *default_async_file;
+	struct list_head			list;
+
+	/*
+	 * To access the uobjects list hw_destroy_rwsem must be held for write
+	 * OR hw_destroy_rwsem held for read AND uobjects_lock held.
+	 * hw_destroy_rwsem should be called across any destruction of the HW
+	 * object of an associated uobject.
+	 */
+	struct rw_semaphore	hw_destroy_rwsem;
+	spinlock_t		uobjects_lock;
+	struct list_head	uobjects;
+
+	struct mutex umap_lock;
+	struct list_head umaps;
+	struct page *disassociate_page;
+
+	struct xarray		idr;
+
+	struct mutex disassociation_lock;
+};
+
 extern const struct uverbs_obj_type_class uverbs_idr_class;
 extern const struct uverbs_obj_type_class uverbs_fd_class;
 int uverbs_uobject_fd_release(struct inode *inode, struct file *filp);
-- 
cgit v1.2.3


From 7566752e4d7d7fc0186531aa800068a7243f95c1 Mon Sep 17 00:00:00 2001
From: Chiara Meiohas <cmeiohas@nvidia.com>
Date: Thu, 31 Oct 2024 11:31:14 +0200
Subject: RDMA/nldev: Add IB device and net device rename events

Implement event sending for IB device rename and IB device
port associated netdevice rename.

In iproute2, rdma monitor displays the IB device name, port
and the netdevice name when displaying event info. Since
users can modiy these names, we track and notify on renaming
events.

Note: In order to receive netdevice rename events, drivers
must use the ib_device_set_netdev() API when attaching net
devices to IB devices.

$ rdma monitor
$ rmmod mlx5_ib
[UNREGISTER]	dev 1  rocep8s0f1
[UNREGISTER]	dev 0  rocep8s0f0

$ modprobe mlx5_ib
[REGISTER]	dev 2  mlx5_0
[NETDEV_ATTACH]	dev 2  mlx5_0 port 1 netdev 4 eth2
[REGISTER]	dev 3  mlx5_1
[NETDEV_ATTACH]	dev 3  mlx5_1 port 1 netdev 5 eth3
[RENAME]	dev 2  rocep8s0f0
[RENAME]	dev 3  rocep8s0f1

$ devlink dev eswitch set pci/0000:08:00.0 mode switchdev
[UNREGISTER]	dev 2  rocep8s0f0
[REGISTER]	dev 4  mlx5_0
[NETDEV_ATTACH]	dev 4  mlx5_0 port 30 netdev 4 eth2
[RENAME]	dev 4  rdmap8s0f0

$ echo 4 > /sys/class/net/eth2/device/sriov_numvfs
[NETDEV_ATTACH]	dev 4  rdmap8s0f0 port 2 netdev 7 eth4
[NETDEV_ATTACH]	dev 4  rdmap8s0f0 port 3 netdev 8 eth5
[NETDEV_ATTACH]	dev 4  rdmap8s0f0 port 4 netdev 9 eth6
[NETDEV_ATTACH]	dev 4  rdmap8s0f0 port 5 netdev 10 eth7
[REGISTER]	dev 5  mlx5_0
[NETDEV_ATTACH]	dev 5  mlx5_0 port 1 netdev 11 eth8
[REGISTER]	dev 6  mlx5_1
[NETDEV_ATTACH]	dev 6  mlx5_1 port 1 netdev 12 eth9
[RENAME]	dev 5  rocep8s0f0v0
[RENAME]	dev 6  rocep8s0f0v1
[REGISTER]	dev 7  mlx5_0
[NETDEV_ATTACH]	dev 7  mlx5_0 port 1 netdev 13 eth10
[RENAME]	dev 7  rocep8s0f0v2
[REGISTER]	dev 8  mlx5_0
[NETDEV_ATTACH]	dev 8  mlx5_0 port 1 netdev 14 eth11
[RENAME]	dev 8  rocep8s0f0v3

$ ip link set eth2 name myeth2
[NETDEV_RENAME]	 netdev 4 myeth2

$ ip link set eth1 name myeth1

** no events received, because eth1 is not attached to
   an IB device **

Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Link: https://patch.msgid.link/093c978ef2766fd3ab4ff8798eeb68f2f11582f6.1730367038.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/uapi/rdma/rdma_netlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 39be09c0ffbb..9f9cf20c1cd8 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -638,6 +638,8 @@ enum rdma_nl_notify_event_type {
 	RDMA_UNREGISTER_EVENT,
 	RDMA_NETDEV_ATTACH_EVENT,
 	RDMA_NETDEV_DETACH_EVENT,
+	RDMA_RENAME_EVENT,
+	RDMA_NETDEV_RENAME_EVENT,
 };
 
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From 2a8f6153e1c2db06a537a5c9d61102eb591776f1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 4 Nov 2024 08:39:32 +0100
Subject: block: pre-calculate max_zone_append_sectors

max_zone_append_sectors differs from all other queue limits in that the
final value used is not stored in the queue_limits but needs to be
obtained using queue_limits_max_zone_append_sectors helper.  This not
only adds (tiny) extra overhead to the I/O path, but also can be easily
forgotten in file system code.

Add a new max_hw_zone_append_sectors value to queue_limits which is
set by the driver, and calculate max_zone_append_sectors from that and
the other inputs in blk_validate_zoned_limits, similar to how
max_sectors is calculated to fix this.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20241104073955.112324-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7bfc877e159e..6d1413bd69a5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -375,6 +375,7 @@ struct queue_limits {
 	unsigned int		max_user_discard_sectors;
 	unsigned int		max_secure_erase_sectors;
 	unsigned int		max_write_zeroes_sectors;
+	unsigned int		max_hw_zone_append_sectors;
 	unsigned int		max_zone_append_sectors;
 	unsigned int		discard_granularity;
 	unsigned int		discard_alignment;
@@ -1204,25 +1205,9 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q)
 	return q->limits.max_segment_size;
 }
 
-static inline unsigned int
-queue_limits_max_zone_append_sectors(const struct queue_limits *l)
-{
-	unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors);
-
-	return min_not_zero(l->max_zone_append_sectors, max_sectors);
-}
-
-static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q)
-{
-	if (!blk_queue_is_zoned(q))
-		return 0;
-
-	return queue_limits_max_zone_append_sectors(&q->limits);
-}
-
 static inline bool queue_emulates_zone_append(struct request_queue *q)
 {
-	return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors;
+	return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors;
 }
 
 static inline bool bdev_emulates_zone_append(struct block_device *bdev)
@@ -1233,7 +1218,7 @@ static inline bool bdev_emulates_zone_append(struct block_device *bdev)
 static inline unsigned int
 bdev_max_zone_append_sectors(struct block_device *bdev)
 {
-	return queue_max_zone_append_sectors(bdev_get_queue(bdev));
+	return bdev_limits(bdev)->max_zone_append_sectors;
 }
 
 static inline unsigned int bdev_max_segments(struct block_device *bdev)
-- 
cgit v1.2.3


From 9a783139614fb837da4ccb2f8ec6f0ddc802b3d3 Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair23@gmail.com>
Date: Mon, 4 Nov 2024 16:03:00 +1000
Subject: bpf: Move btf_type_is_struct_ptr() under CONFIG_BPF_SYSCALL

The static inline btf_type_is_struct_ptr() function calls
btf_type_skip_modifiers() which is guarded by CONFIG_BPF_SYSCALL.
btf_type_is_struct_ptr() is also only called by CONFIG_BPF_SYSCALL
ifdef code, so let's only expose btf_type_is_struct_ptr() if
CONFIG_BPF_SYSCALL is defined.

Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
Link: https://lore.kernel.org/r/20241104060300.421403-1-alistair.francis@wdc.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf.h | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 631060e3ad14..4214e76c9168 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -582,6 +582,16 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty
 bool btf_types_are_same(const struct btf *btf1, u32 id1,
 			const struct btf *btf2, u32 id2);
 int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx);
+
+static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t)
+{
+	if (!btf_type_is_ptr(t))
+		return false;
+
+	t = btf_type_skip_modifiers(btf, t->type, NULL);
+
+	return btf_type_is_struct(t);
+}
 #else
 static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
 						    u32 type_id)
@@ -661,15 +671,4 @@ static inline int btf_check_iter_arg(struct btf *btf, const struct btf_type *fun
 	return -EOPNOTSUPP;
 }
 #endif
-
-static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t)
-{
-	if (!btf_type_is_ptr(t))
-		return false;
-
-	t = btf_type_skip_modifiers(btf, t->type, NULL);
-
-	return btf_type_is_struct(t);
-}
-
 #endif
-- 
cgit v1.2.3


From cb4158ce8ec8a5bb528cc1693356a5eb8058094d Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Mon, 4 Nov 2024 09:19:57 -0800
Subject: bpf: Mark raw_tp arguments with PTR_MAYBE_NULL

Arguments to a raw tracepoint are tagged as trusted, which carries the
semantics that the pointer will be non-NULL.  However, in certain cases,
a raw tracepoint argument may end up being NULL. More context about this
issue is available in [0].

Thus, there is a discrepancy between the reality, that raw_tp arguments
can actually be NULL, and the verifier's knowledge, that they are never
NULL, causing explicit NULL checks to be deleted, and accesses to such
pointers potentially crashing the kernel.

To fix this, mark raw_tp arguments as PTR_MAYBE_NULL, and then special
case the dereference and pointer arithmetic to permit it, and allow
passing them into helpers/kfuncs; these exceptions are made for raw_tp
programs only. Ensure that we don't do this when ref_obj_id > 0, as in
that case this is an acquired object and doesn't need such adjustment.

The reason we do mask_raw_tp_trusted_reg logic is because other will
recheck in places whether the register is a trusted_reg, and then
consider our register as untrusted when detecting the presence of the
PTR_MAYBE_NULL flag.

To allow safe dereference, we enable PROBE_MEM marking when we see loads
into trusted pointers with PTR_MAYBE_NULL.

While trusted raw_tp arguments can also be passed into helpers or kfuncs
where such broken assumption may cause issues, a future patch set will
tackle their case separately, as PTR_TO_BTF_ID (without PTR_TRUSTED) can
already be passed into helpers and causes similar problems. Thus, they
are left alone for now.

It is possible that these checks also permit passing non-raw_tp args
that are trusted PTR_TO_BTF_ID with null marking. In such a case,
allowing dereference when pointer is NULL expands allowed behavior, so
won't regress existing programs, and the case of passing these into
helpers is the same as above and will be dealt with later.

Also update the failure case in tp_btf_nullable selftest to capture the
new behavior, as the verifier will no longer cause an error when
directly dereference a raw tracepoint argument marked as __nullable.

  [0]: https://lore.kernel.org/bpf/ZrCZS6nisraEqehw@jlelli-thinkpadt14gen4.remote.csb

Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Reported-by: Juri Lelli <juri.lelli@redhat.com>
Tested-by: Juri Lelli <juri.lelli@redhat.com>
Fixes: 3f00c5239344 ("bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20241104171959.2938862-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c3ba4d475174..1b84613b10ac 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3495,4 +3495,10 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog)
 	return prog->aux->func_idx != 0;
 }
 
+static inline bool bpf_prog_is_raw_tp(const struct bpf_prog *prog)
+{
+	return prog->type == BPF_PROG_TYPE_TRACING &&
+	       prog->expected_attach_type == BPF_TRACE_RAW_TP;
+}
+
 #endif /* _LINUX_BPF_H */
-- 
cgit v1.2.3


From ad37bcd965fda43f34cf5cc051f5d310880bd1e7 Mon Sep 17 00:00:00 2001
From: Alice Ryhl <aliceryhl@google.com>
Date: Wed, 30 Oct 2024 16:04:25 +0000
Subject: rust: add tracepoint support

Make it possible to have Rust code call into tracepoints defined by C
code. It is still required that the tracepoint is declared in a C
header, and that this header is included in the input to bindgen.

Instead of calling __DO_TRACE directly, the exported rust_do_trace_
function calls an inline helper function. This is because the `cond`
argument does not exist at the callsite of DEFINE_RUST_DO_TRACE.

__DECLARE_TRACE always emits an inline static and an extern declaration
that is only used when CREATE_RUST_TRACE_POINTS is set. These should not
end up in the final binary so it is not a problem that they sometimes
are emitted without a user.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Alex Gaynor <alex.gaynor@gmail.com>
Cc: Wedson Almeida Filho <wedsonaf@gmail.com>
Cc: " =?utf-8?q?Bj=C3=B6rn_Roy_Baron?= " <bjorn3_gh@protonmail.com>
Cc: Benno Lossin <benno.lossin@proton.me>
Cc: Andreas Hindborg <a.hindborg@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Anup Patel <apatel@ventanamicro.com>
Cc: Andrew Jones <ajones@ventanamicro.com>
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
Cc: Conor Dooley <conor.dooley@microchip.com>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Bibo Mao <maobibo@loongson.cn>
Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
Link: https://lore.kernel.org/20241030-tracepoint-v12-2-eec7f0f8ad22@google.com
Reviewed-by: Carlos Llamas <cmllamas@google.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h   | 28 +++++++++++++++++++++++++++-
 include/trace/define_trace.h | 12 ++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 0dc67fad706c..84c4924e499f 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -225,6 +225,18 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 			preempt_enable_notrace();			\
 	} while (0)
 
+/*
+ * Declare an exported function that Rust code can call to trigger this
+ * tracepoint. This function does not include the static branch; that is done
+ * in Rust to avoid a function call when the tracepoint is disabled.
+ */
+#define DEFINE_RUST_DO_TRACE(name, proto, args)
+#define __DEFINE_RUST_DO_TRACE(name, proto, args)			\
+	notrace void rust_do_trace_##name(proto)			\
+	{								\
+		__rust_do_trace_##name(args);				\
+	}
+
 /*
  * Make sure the alignment of the structure in the __tracepoints section will
  * not add unwanted padding between the beginning of the section and the
@@ -240,6 +252,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	extern int __traceiter_##name(data_proto);			\
 	DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name);	\
 	extern struct tracepoint __tracepoint_##name;			\
+	extern void rust_do_trace_##name(proto);			\
 	static inline int						\
 	register_trace_##name(void (*probe)(data_proto), void *data)	\
 	{								\
@@ -271,6 +284,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 
 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
 	__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \
+	static inline void __rust_do_trace_##name(proto)		\
+	{								\
+		__DO_TRACE(name,					\
+			TP_ARGS(args),					\
+			TP_CONDITION(cond), 0);				\
+	}								\
 	static inline void trace_##name(proto)				\
 	{								\
 		if (static_branch_unlikely(&__tracepoint_##name.key))	\
@@ -285,6 +304,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 
 #define __DECLARE_TRACE_SYSCALL(name, proto, args, cond, data_proto)	\
 	__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \
+	static inline void __rust_do_trace_##name(proto)		\
+	{								\
+		__DO_TRACE(name,					\
+			TP_ARGS(args),					\
+			TP_CONDITION(cond), 1);				\
+	}								\
 	static inline void trace_##name(proto)				\
 	{								\
 		if (static_branch_unlikely(&__tracepoint_##name.key))	\
@@ -339,7 +364,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	void __probestub_##_name(void *__data, proto)			\
 	{								\
 	}								\
-	DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
+	DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);	\
+	DEFINE_RUST_DO_TRACE(_name, TP_PROTO(proto), TP_ARGS(args))
 
 #define DEFINE_TRACE(name, proto, args)		\
 	DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index ff5fa17a6259..0557626b6f6a 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -76,6 +76,13 @@
 #define DECLARE_TRACE(name, proto, args)	\
 	DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))
 
+/* If requested, create helpers for calling these tracepoints from Rust. */
+#ifdef CREATE_RUST_TRACE_POINTS
+#undef DEFINE_RUST_DO_TRACE
+#define DEFINE_RUST_DO_TRACE(name, proto, args)	\
+	__DEFINE_RUST_DO_TRACE(name, PARAMS(proto), PARAMS(args))
+#endif
+
 #undef TRACE_INCLUDE
 #undef __TRACE_INCLUDE
 
@@ -134,6 +141,11 @@
 # undef UNDEF_TRACE_INCLUDE_PATH
 #endif
 
+#ifdef CREATE_RUST_TRACE_POINTS
+# undef DEFINE_RUST_DO_TRACE
+# define DEFINE_RUST_DO_TRACE(name, proto, args)
+#endif
+
 /* We may be processing more files */
 #define CREATE_TRACE_POINTS
 
-- 
cgit v1.2.3


From 91d39024e1b02914cc5e2dbc137908e29b269ce4 Mon Sep 17 00:00:00 2001
From: Alice Ryhl <aliceryhl@google.com>
Date: Wed, 30 Oct 2024 16:04:26 +0000
Subject: rust: samples: add tracepoint to Rust sample

This updates the Rust printing sample to invoke a tracepoint. This
ensures that we have a user in-tree from the get-go even though the
patch is being merged before its real user.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Alex Gaynor <alex.gaynor@gmail.com>
Cc: Wedson Almeida Filho <wedsonaf@gmail.com>
Cc: Gary Guo <gary@garyguo.net>
Cc: " =?utf-8?q?Bj=C3=B6rn_Roy_Baron?= " <bjorn3_gh@protonmail.com>
Cc: Benno Lossin <benno.lossin@proton.me>
Cc: Andreas Hindborg <a.hindborg@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Anup Patel <apatel@ventanamicro.com>
Cc: Andrew Jones <ajones@ventanamicro.com>
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
Cc: Conor Dooley <conor.dooley@microchip.com>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Bibo Mao <maobibo@loongson.cn>
Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
Link: https://lore.kernel.org/20241030-tracepoint-v12-3-eec7f0f8ad22@google.com
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/events/rust_sample.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 include/trace/events/rust_sample.h

(limited to 'include')

diff --git a/include/trace/events/rust_sample.h b/include/trace/events/rust_sample.h
new file mode 100644
index 000000000000..dbc80ca2e465
--- /dev/null
+++ b/include/trace/events/rust_sample.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tracepoints for `samples/rust/rust_print.rs`.
+ *
+ * Copyright (C) 2024 Google, Inc.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rust_sample
+
+#if !defined(_RUST_SAMPLE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _RUST_SAMPLE_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(rust_sample_loaded,
+	TP_PROTO(int magic_number),
+	TP_ARGS(magic_number),
+	TP_STRUCT__entry(
+		__field(int, magic_number)
+	),
+	TP_fast_assign(
+		__entry->magic_number = magic_number;
+	),
+	TP_printk("magic=%d", __entry->magic_number)
+);
+
+#endif /* _RUST_SAMPLE_TRACE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 5609296750afd6462a4d994b6803ccc5e8bf1d4e Mon Sep 17 00:00:00 2001
From: Lukasz Luba <lukasz.luba@arm.com>
Date: Wed, 30 Oct 2024 16:39:51 +0000
Subject: PM: EM: Add min/max available performance state limits

On some devices there are HW dependencies for shared frequency and voltage
between devices. It will impact Energy Aware Scheduler (EAS) decision,
where CPUs share the voltage & frequency domain with other CPUs or devices
e.g.
 - Mid CPUs + Big CPU
 - Little CPU + L3 cache in DSU
 - some other device + Little CPUs

Detailed explanation of one example:
When the L3 cache frequency is increased, the affected Little CPUs might
run at higher voltage and frequency. That higher voltage causes higher CPU
power and thus more energy is used for running the tasks. This is
important for background running tasks, which try to run on energy
efficient CPUs.

Therefore, add performance state limits which are applied for the device
(in this case CPU). This is important on SoCs with HW dependencies
mentioned above so that the Energy Aware Scheduler (EAS) does not use
performance states outside the valid min-max range for energy calculation.

Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
Link: https://patch.msgid.link/20241030164126.1263793-2-lukasz.luba@arm.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/energy_model.h | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 1ff52020cf75..752e0b297582 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -55,6 +55,8 @@ struct em_perf_table {
  * struct em_perf_domain - Performance domain
  * @em_table:		Pointer to the runtime modifiable em_perf_table
  * @nr_perf_states:	Number of performance states
+ * @min_perf_state:	Minimum allowed Performance State index
+ * @max_perf_state:	Maximum allowed Performance State index
  * @flags:		See "em_perf_domain flags"
  * @cpus:		Cpumask covering the CPUs of the domain. It's here
  *			for performance reasons to avoid potential cache
@@ -70,6 +72,8 @@ struct em_perf_table {
 struct em_perf_domain {
 	struct em_perf_table __rcu *em_table;
 	int nr_perf_states;
+	int min_perf_state;
+	int max_perf_state;
 	unsigned long flags;
 	unsigned long cpus[];
 };
@@ -173,13 +177,14 @@ void em_table_free(struct em_perf_table __rcu *table);
 int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
 			 int nr_states);
 int em_dev_update_chip_binning(struct device *dev);
+int em_update_performance_limits(struct em_perf_domain *pd,
+		unsigned long freq_min_khz, unsigned long freq_max_khz);
 
 /**
  * em_pd_get_efficient_state() - Get an efficient performance state from the EM
  * @table:		List of performance states, in ascending order
- * @nr_perf_states:	Number of performance states
+ * @pd:			performance domain for which this must be done
  * @max_util:		Max utilization to map with the EM
- * @pd_flags:		Performance Domain flags
  *
  * It is called from the scheduler code quite frequently and as a consequence
  * doesn't implement any check.
@@ -188,13 +193,16 @@ int em_dev_update_chip_binning(struct device *dev);
  * requirement.
  */
 static inline int
-em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
-			  unsigned long max_util, unsigned long pd_flags)
+em_pd_get_efficient_state(struct em_perf_state *table,
+			  struct em_perf_domain *pd, unsigned long max_util)
 {
+	unsigned long pd_flags = pd->flags;
+	int min_ps = pd->min_perf_state;
+	int max_ps = pd->max_perf_state;
 	struct em_perf_state *ps;
 	int i;
 
-	for (i = 0; i < nr_perf_states; i++) {
+	for (i = min_ps; i <= max_ps; i++) {
 		ps = &table[i];
 		if (ps->performance >= max_util) {
 			if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
@@ -204,7 +212,7 @@ em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
 		}
 	}
 
-	return nr_perf_states - 1;
+	return max_ps;
 }
 
 /**
@@ -253,8 +261,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
 	 * requested performance.
 	 */
 	em_table = rcu_dereference(pd->em_table);
-	i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states,
-				      max_util, pd->flags);
+	i = em_pd_get_efficient_state(em_table->state, pd, max_util);
 	ps = &em_table->state[i];
 
 	/*
@@ -391,6 +398,12 @@ static inline int em_dev_update_chip_binning(struct device *dev)
 {
 	return -EINVAL;
 }
+static inline
+int em_update_performance_limits(struct em_perf_domain *pd,
+		unsigned long freq_min_khz, unsigned long freq_max_khz)
+{
+	return -EINVAL;
+}
 #endif
 
 #endif
-- 
cgit v1.2.3


From 9e0933c21c128d6d8ac4d8aae0babaf9a43100b8 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Mon, 4 Nov 2024 16:14:02 -0800
Subject: fs: iomap: Atomic write support

Support direct I/O atomic writes by producing a single bio with REQ_ATOMIC
flag set.

Initially FSes (XFS) should only support writing a single FS block
atomically.

As with any atomic write, we should produce a single bio which covers the
complete write length.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
[djwong: clarify a couple of things in the docs]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 include/linux/iomap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 4ad12a3c8bae..c7644bdcfca3 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -178,6 +178,7 @@ struct iomap_folio_ops {
 #else
 #define IOMAP_DAX		0
 #endif /* CONFIG_FS_DAX */
+#define IOMAP_ATOMIC		(1 << 9)
 
 struct iomap_ops {
 	/*
-- 
cgit v1.2.3


From 0a2cdeeae9ddc14d752173be6af98bc9fb45c6ad Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Mon, 4 Nov 2024 15:00:41 +0800
Subject: net: tcp: replace the document for "lsndtime" in tcp_sock

Commit d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables")
moved the fields around and misplaced the documentation for "lsndtime".
So, let's replace it in the proper place.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241104070041.64302-1-dongml2@chinatelecom.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/tcp.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 6a5e08b937b3..f88daaa76d83 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -200,7 +200,6 @@ struct tcp_sock {
 
 	/* TX read-mostly hotpath cache lines */
 	__cacheline_group_begin(tcp_sock_read_tx);
-	/* timestamp of last sent data packet (for restart window) */
 	u32	max_window;	/* Maximal window ever seen from peer	*/
 	u32	rcv_ssthresh;	/* Current window clamp			*/
 	u32	reordering;	/* Packet reordering metric.		*/
@@ -263,7 +262,7 @@ struct tcp_sock {
 	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */
 	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
 	u32	pushed_seq;	/* Last pushed seq, required to talk to windows */
-	u32	lsndtime;
+	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
 	u32	mdev_us;	/* medium deviation			*/
 	u32	rtt_seq;	/* sequence number to update rttvar	*/
 	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */
-- 
cgit v1.2.3


From 690e50dd69ee48e43e0f7c42396487da1b81be14 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sun, 3 Nov 2024 08:53:14 -0800
Subject: tools: ynl-gen: de-kdocify enums with no doc for entries

Sometimes the names of the enum entries are self-explanatory
or come from standards. Forcing authors to write trivial kdoc
for each of such entries seems unreasonable, but kdoc would
complain about undocumented entries.

Detect enums which only have documentation for the entire
type and no documentation for entries. Render their doc
as a plain comment.

Link: https://patch.msgid.link/20241103165314.1631237-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/dpll.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h
index 2b7ec2da4bcc..bf97d4b6d51f 100644
--- a/include/uapi/linux/dpll.h
+++ b/include/uapi/linux/dpll.h
@@ -79,13 +79,13 @@ enum dpll_lock_status_error {
 	DPLL_LOCK_STATUS_ERROR_MAX = (__DPLL_LOCK_STATUS_ERROR_MAX - 1)
 };
 
-/**
- * enum dpll_clock_quality_level - level of quality of a clock device. This
- *   mainly applies when the dpll lock-status is DPLL_LOCK_STATUS_HOLDOVER. The
- *   current list is defined according to the table 11-7 contained in ITU-T
- *   G.8264/Y.1364 document. One may extend this list freely by other ITU-T
- *   defined clock qualities, or different ones defined by another
- *   standardization body (for those, please use different prefix).
+/*
+ * level of quality of a clock device. This mainly applies when the dpll
+ * lock-status is DPLL_LOCK_STATUS_HOLDOVER. The current list is defined
+ * according to the table 11-7 contained in ITU-T G.8264/Y.1364 document. One
+ * may extend this list freely by other ITU-T defined clock qualities, or
+ * different ones defined by another standardization body (for those, please
+ * use different prefix).
  */
 enum dpll_clock_quality_level {
 	DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_PRC = 1,
-- 
cgit v1.2.3


From a12fcef429e17cb3db47cde0692a185d3ca712a3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Oct 2024 18:43:15 +0200
Subject: soc: fsl_qbman: use be16_to_cpu() in qm_sg_entry_get_off()

struct qm_sg_entry :: offset is a 13-bit field, declared as __be16.

When using be32_to_cpu(), a wrong value will be calculated on little
endian systems (Arm), because type promotion from 16-bit to 32-bit,
which is done before the byte swap and always in the CPU native
endianness, changes the value of the scatter/gather list entry offset in
big-endian interpretation (adds two zero bytes in the LSB interpretation).
The result of the byte swap is ANDed with GENMASK(12, 0), so the result
is always zero, because only those bytes added by type promotion remain
after the application of the bit mask.

The impact of the bug is that scatter/gather frames with a non-zero
offset into the buffer are treated by the driver as if they had a zero
offset. This is all in theory, because in practice, qm_sg_entry_get_off()
has a single caller, where the bug is inconsequential, because at that
call site the buffer offset will always be zero, as will be explained in
the subsequent change.

Flagged by sparse:

warning: cast to restricted __be32
warning: cast from restricted __be16

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Breno Leitao <leitao@debian.org>
Acked-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Madalin Bucur <madalin.bucur@oss.nxp.com>
Link: https://patch.msgid.link/20241029164317.50182-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/soc/fsl/qman.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/soc/fsl/qman.h b/include/soc/fsl/qman.h
index 0d3d6beb7fdb..7f7a4932d7f1 100644
--- a/include/soc/fsl/qman.h
+++ b/include/soc/fsl/qman.h
@@ -242,7 +242,7 @@ static inline void qm_sg_entry_set_f(struct qm_sg_entry *sg, int len)
 
 static inline int qm_sg_entry_get_off(const struct qm_sg_entry *sg)
 {
-	return be32_to_cpu(sg->offset) & QM_SG_OFF_MASK;
+	return be16_to_cpu(sg->offset) & QM_SG_OFF_MASK;
 }
 
 /* "Frame Dequeue Response" */
-- 
cgit v1.2.3


From f248ff14b7589306c8af922465aefedf9b10fa9e Mon Sep 17 00:00:00 2001
From: Desnes Nunes <desnesn@redhat.com>
Date: Thu, 31 Oct 2024 11:28:00 -0300
Subject: misc: rtsx: Cleanup on DRV_NAME cardreader variables

The rtsx_pci_ms memstick driver has been dropped, thus there is no more
need for DRV_NAME_RTSX_PCI_MS variable. Additionally, this also stand-
arizes DRV_NAME variables on alcor_pci and rtsx_usb drivers.

Fixes: d0f459259c13 ("memstick: rtsx_pci_ms: Remove Realtek PCI memstick driver")
Signed-off-by: Desnes Nunes <desnesn@redhat.com>
Link: https://lore.kernel.org/r/20241031142801.1141680-1-desnesn@redhat.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/alcor_pci.h   | 1 +
 include/linux/rtsx_common.h | 1 -
 include/linux/rtsx_usb.h    | 4 ++++
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/alcor_pci.h b/include/linux/alcor_pci.h
index c4a0b23846d8..dcb1d37dabc2 100644
--- a/include/linux/alcor_pci.h
+++ b/include/linux/alcor_pci.h
@@ -11,6 +11,7 @@
 #define ALCOR_SD_CARD 0
 #define ALCOR_MS_CARD 1
 
+#define DRV_NAME_ALCOR_PCI			"alcor_pci"
 #define DRV_NAME_ALCOR_PCI_SDMMC		"alcor_sdmmc"
 #define DRV_NAME_ALCOR_PCI_MS			"alcor_ms"
 
diff --git a/include/linux/rtsx_common.h b/include/linux/rtsx_common.h
index bf290ad14c57..da9c8c6b5d50 100644
--- a/include/linux/rtsx_common.h
+++ b/include/linux/rtsx_common.h
@@ -12,7 +12,6 @@
 
 #define DRV_NAME_RTSX_PCI		"rtsx_pci"
 #define DRV_NAME_RTSX_PCI_SDMMC		"rtsx_pci_sdmmc"
-#define DRV_NAME_RTSX_PCI_MS		"rtsx_pci_ms"
 
 #define RTSX_REG_PAIR(addr, val)	(((u32)(addr) << 16) | (u8)(val))
 
diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h
index 3247ed8e9ff0..f267a06c6b1e 100644
--- a/include/linux/rtsx_usb.h
+++ b/include/linux/rtsx_usb.h
@@ -12,6 +12,10 @@
 
 #include <linux/usb.h>
 
+#define DRV_NAME_RTSX_USB		"rtsx_usb"
+#define DRV_NAME_RTSX_USB_SDMMC		"rtsx_usb_sdmmc"
+#define DRV_NAME_RTSX_USB_MS		"rtsx_usb_ms"
+
 /* related module names */
 #define RTSX_USB_SD_CARD	0
 #define RTSX_USB_MS_CARD	1
-- 
cgit v1.2.3


From 7fce207af5ec074a9a50e90eb866b17ca4a90f06 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Thu, 24 Oct 2024 10:18:07 -0700
Subject: mm/writeback: add folio_mark_dirty_lock()

Add a new convenience helper folio_mark_dirty_lock() that grabs the
folio lock before calling folio_mark_dirty().

Refactor set_page_dirty_lock() to directly use folio_mark_dirty_lock().

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ecf63d2b0582..446d7096c48f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2539,6 +2539,7 @@ struct kvec;
 struct page *get_dump_page(unsigned long addr);
 
 bool folio_mark_dirty(struct folio *folio);
+bool folio_mark_dirty_lock(struct folio *folio);
 bool set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 
-- 
cgit v1.2.3


From 35890f85573c2ebbbf3491dc66f7ee2ad63055af Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 30 Oct 2024 21:20:45 -0300
Subject: vfio: Remove VFIO_TYPE1_NESTING_IOMMU

This control causes the ARM SMMU drivers to choose a stage 2
implementation for the IO pagetable (vs the stage 1 usual default),
however this choice has no significant visible impact to the VFIO
user. Further qemu never implemented this and no other userspace user is
known.

The original description in commit f5c9ecebaf2a ("vfio/iommu_type1: add
new VFIO_TYPE1_NESTING_IOMMU IOMMU type") suggested this was to "provide
SMMU translation services to the guest operating system" however the rest
of the API to set the guest table pointer for the stage 1 and manage
invalidation was never completed, or at least never upstreamed, rendering
this part useless dead code.

Upstream has now settled on iommufd as the uAPI for controlling nested
translation. Choosing the stage 2 implementation should be done by through
the IOMMU_HWPT_ALLOC_NEST_PARENT flag during domain allocation.

Remove VFIO_TYPE1_NESTING_IOMMU and everything under it including the
enable_nesting iommu_domain_op.

Just in-case there is some userspace using this continue to treat
requesting it as a NOP, but do not advertise support any more.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Mostafa Saleh <smostafa@google.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Donald Dutile <ddutile@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/1-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/iommu.h     | 3 ---
 include/uapi/linux/vfio.h | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bd722f473635..c88d18d2c928 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -635,7 +635,6 @@ struct iommu_ops {
  * @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE,
  *                           including no-snoop TLPs on PCIe or other platform
  *                           specific mechanisms.
- * @enable_nesting: Enable nesting
  * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*)
  * @free: Release the domain after use.
  */
@@ -663,7 +662,6 @@ struct iommu_domain_ops {
 				    dma_addr_t iova);
 
 	bool (*enforce_cache_coherency)(struct iommu_domain *domain);
-	int (*enable_nesting)(struct iommu_domain *domain);
 	int (*set_pgtable_quirks)(struct iommu_domain *domain,
 				  unsigned long quirks);
 
@@ -844,7 +842,6 @@ extern void iommu_group_put(struct iommu_group *group);
 extern int iommu_group_id(struct iommu_group *group);
 extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *);
 
-int iommu_enable_nesting(struct iommu_domain *domain);
 int iommu_set_pgtable_quirks(struct iommu_domain *domain,
 		unsigned long quirks);
 
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 2b68e6cdf190..c8dbf8219c4f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -35,7 +35,7 @@
 #define VFIO_EEH			5
 
 /* Two-stage IOMMU */
-#define VFIO_TYPE1_NESTING_IOMMU	6	/* Implies v2 */
+#define __VFIO_RESERVED_TYPE1_NESTING_IOMMU	6	/* Implies v2 */
 
 #define VFIO_SPAPR_TCE_v2_IOMMU		7
 
-- 
cgit v1.2.3


From 1b8655bb8d977ca110436c1cd0ca957c19670c1e Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Wed, 30 Oct 2024 21:20:46 -0300
Subject: ACPICA: IORT: Update for revision E.f

ACPICA commit c4f5c083d24df9ddd71d5782c0988408cf0fc1ab

The IORT spec, Issue E.f (April 2024), adds a new CANWBS bit to the Memory
Access Flag field in the Memory Access Properties table, mainly for a PCI
Root Complex.

This CANWBS defines the coherency of memory accesses to be not marked IOWB
cacheable/shareable. Its value further implies the coherency impact from a
pair of mismatched memory attributes (e.g. in a nested translation case):
  0x0: Use of mismatched memory attributes for accesses made by this
       device may lead to a loss of coherency.
  0x1: Coherency of accesses made by this device to locations in
       Conventional memory are ensured as follows, even if the memory
       attributes for the accesses presented by the device or provided by
       the SMMU are different from Inner and Outer Write-back cacheable,
       Shareable.

Link: https://github.com/acpica/acpica/commit/c4f5c083
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Acked-by: Hanjun Guo <guohanjun@huawei.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Donald Dutile <ddutile@redhat.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/2-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/acpi/actbl2.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index d3858eebc255..2e917a8f8bca 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -453,7 +453,7 @@ struct acpi_table_ccel {
  * IORT - IO Remapping Table
  *
  * Conforms to "IO Remapping Table System Software on ARM Platforms",
- * Document number: ARM DEN 0049E.e, Sep 2022
+ * Document number: ARM DEN 0049E.f, Apr 2024
  *
  ******************************************************************************/
 
@@ -524,6 +524,7 @@ struct acpi_iort_memory_access {
 
 #define ACPI_IORT_MF_COHERENCY          (1)
 #define ACPI_IORT_MF_ATTRIBUTES         (1<<1)
+#define ACPI_IORT_MF_CANWBS             (1<<2)
 
 /*
  * IORT node specific subtables
-- 
cgit v1.2.3


From 807404d66fcf898d4bcc6a3e3edb07ffd5b88400 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Wed, 30 Oct 2024 21:20:47 -0300
Subject: ACPI/IORT: Support CANWBS memory access flag

The IORT spec, Issue E.f (April 2024), adds a new CANWBS bit to the Memory
Access Flag field in the Memory Access Properties table, mainly for a PCI
Root Complex.

This CANWBS defines the coherency of memory accesses to be not marked IOWB
cacheable/shareable. Its value further implies the coherency impact from a
pair of mismatched memory attributes (e.g. in a nested translation case):
  0x0: Use of mismatched memory attributes for accesses made by this
       device may lead to a loss of coherency.
  0x1: Coherency of accesses made by this device to locations in
       Conventional memory are ensured as follows, even if the memory
       attributes for the accesses presented by the device or provided by
       the SMMU are different from Inner and Outer Write-back cacheable,
       Shareable.

Note that the loss of coherency on a CANWBS-unsupported HW typically could
occur to an SMMU that doesn't implement the S2FWB feature where additional
cache flush operations would be required to prevent that from happening.

Add a new ACPI_IORT_MF_CANWBS flag and set IOMMU_FWSPEC_PCI_RC_CANWBS upon
the presence of this new flag.

CANWBS and S2FWB are similar features, in that they both guarantee the VM
can not violate coherency, however S2FWB can be bypassed by PCI No Snoop
TLPs, while CANWBS cannot. Thus CANWBS meets the requirements to set
IOMMU_CAP_ENFORCE_CACHE_COHERENCY.

Architecturally ARM has expected that VFIO would disable No Snoop through
PCI Config space, if this is done then the two would have the same
protections.

Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Acked-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Donald Dutile <ddutile@redhat.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/3-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/iommu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c88d18d2c928..4ad9b9ec6c9b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -991,6 +991,8 @@ struct iommu_fwspec {
 
 /* ATS is supported */
 #define IOMMU_FWSPEC_PCI_RC_ATS			(1 << 0)
+/* CANWBS is supported */
+#define IOMMU_FWSPEC_PCI_RC_CANWBS		(1 << 1)
 
 /*
  * An iommu attach handle represents a relationship between an iommu domain
-- 
cgit v1.2.3


From 6912ec91828b8d7f21b393befad1c36dadbcd751 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Wed, 30 Oct 2024 21:20:49 -0300
Subject: iommu/arm-smmu-v3: Support IOMMU_GET_HW_INFO via struct
 arm_smmu_hw_info

For virtualization cases the IDR/IIDR/AIDR values of the actual SMMU
instance need to be available to the VMM so it can construct an
appropriate vSMMUv3 that reflects the correct HW capabilities.

For userspace page tables these values are required to constrain the valid
values within the CD table and the IOPTEs.

The kernel does not sanitize these values. If building a VMM then
userspace is required to only forward bits into a VM that it knows it can
implement. Some bits will also require a VMM to detect if appropriate
kernel support is available such as for ATS and BTM.

Start a new file and kconfig for the advanced iommufd support. This lets
it be compiled out for kernels that are not intended to support
virtualization, and allows distros to leave it disabled until they are
shipping a matching qemu too.

Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Donald Dutile <ddutile@redhat.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/5-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/uapi/linux/iommufd.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 72010f71c5e4..b5c94fecb94c 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -484,15 +484,50 @@ struct iommu_hw_info_vtd {
 	__aligned_u64 ecap_reg;
 };
 
+/**
+ * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
+ *                                   (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
+ *
+ * @flags: Must be set to 0
+ * @__reserved: Must be 0
+ * @idr: Implemented features for ARM SMMU Non-secure programming interface
+ * @iidr: Information about the implementation and implementer of ARM SMMU,
+ *        and architecture version supported
+ * @aidr: ARM SMMU architecture version
+ *
+ * For the details of @idr, @iidr and @aidr, please refer to the chapters
+ * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
+ *
+ * User space should read the underlying ARM SMMUv3 hardware information for
+ * the list of supported features.
+ *
+ * Note that these values reflect the raw HW capability, without any insight if
+ * any required kernel driver support is present. Bits may be set indicating the
+ * HW has functionality that is lacking kernel software support, such as BTM. If
+ * a VMM is using this information to construct emulated copies of these
+ * registers it should only forward bits that it knows it can support.
+ *
+ * In future, presence of required kernel support will be indicated in flags.
+ */
+struct iommu_hw_info_arm_smmuv3 {
+	__u32 flags;
+	__u32 __reserved;
+	__u32 idr[6];
+	__u32 iidr;
+	__u32 aidr;
+};
+
 /**
  * enum iommu_hw_info_type - IOMMU Hardware Info Types
  * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware
  *                           info
  * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
+ * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
  */
 enum iommu_hw_info_type {
 	IOMMU_HW_INFO_TYPE_NONE = 0,
 	IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
+	IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
 };
 
 /**
-- 
cgit v1.2.3


From 5205b63099507a84458075c3ca7e648407e6c8cc Mon Sep 17 00:00:00 2001
From: Liu Ying <victor.liu@nxp.com>
Date: Mon, 4 Nov 2024 11:27:57 +0800
Subject: media: uapi: Add MEDIA_BUS_FMT_RGB101010_1X7X5_{SPWG, JEIDA}

Add two media bus formats that identify 30-bit RGB pixels transmitted
by a LVDS link with five differential data pairs, serialized into 7
time slots, using standard SPWG/VESA or JEIDA data mapping.

Signed-off-by: Liu Ying <victor.liu@nxp.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241104032806.611890-5-victor.liu@nxp.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 include/uapi/linux/media-bus-format.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index d4c1d991014b..ff62056feed5 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -34,7 +34,7 @@
 
 #define MEDIA_BUS_FMT_FIXED			0x0001
 
-/* RGB - next is	0x1026 */
+/* RGB - next is	0x1028 */
 #define MEDIA_BUS_FMT_RGB444_1X12		0x1016
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE	0x1001
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE	0x1002
@@ -68,6 +68,8 @@
 #define MEDIA_BUS_FMT_ARGB8888_1X32		0x100d
 #define MEDIA_BUS_FMT_RGB888_1X32_PADHI		0x100f
 #define MEDIA_BUS_FMT_RGB101010_1X30		0x1018
+#define MEDIA_BUS_FMT_RGB101010_1X7X5_SPWG	0x1026
+#define MEDIA_BUS_FMT_RGB101010_1X7X5_JEIDA	0x1027
 #define MEDIA_BUS_FMT_RGB666_1X36_CPADLO	0x1020
 #define MEDIA_BUS_FMT_RGB888_1X36_CPADLO	0x1021
 #define MEDIA_BUS_FMT_RGB121212_1X36		0x1019
-- 
cgit v1.2.3


From 606410292f54ef08632bdfd5c58974cf4ebc3cc9 Mon Sep 17 00:00:00 2001
From: Liu Ying <victor.liu@nxp.com>
Date: Mon, 4 Nov 2024 11:27:59 +0800
Subject: drm: of: Add drm_of_lvds_get_dual_link_pixel_order_sink()

drm_of_lvds_get_dual_link_pixel_order() gets LVDS dual-link source pixel
order.  Similar to it, add it's counterpart function
drm_of_lvds_get_dual_link_pixel_order_sink() to get LVDS dual-link sink
pixel order.

Suggested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Liu Ying <victor.liu@nxp.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20241104032806.611890-7-victor.liu@nxp.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 include/drm/drm_of.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_of.h b/include/drm/drm_of.h
index 02d1cdd7f798..7f0256dae3f1 100644
--- a/include/drm/drm_of.h
+++ b/include/drm/drm_of.h
@@ -52,6 +52,8 @@ int drm_of_find_panel_or_bridge(const struct device_node *np,
 				struct drm_bridge **bridge);
 int drm_of_lvds_get_dual_link_pixel_order(const struct device_node *port1,
 					  const struct device_node *port2);
+int drm_of_lvds_get_dual_link_pixel_order_sink(struct device_node *port1,
+					       struct device_node *port2);
 int drm_of_lvds_get_data_mapping(const struct device_node *port);
 int drm_of_get_data_lanes_count(const struct device_node *endpoint,
 				const unsigned int min, const unsigned int max);
@@ -109,6 +111,13 @@ drm_of_lvds_get_dual_link_pixel_order(const struct device_node *port1,
 	return -EINVAL;
 }
 
+static inline int
+drm_of_lvds_get_dual_link_pixel_order_sink(struct device_node *port1,
+					   struct device_node *port2)
+{
+	return -EINVAL;
+}
+
 static inline int
 drm_of_lvds_get_data_mapping(const struct device_node *port)
 {
-- 
cgit v1.2.3


From 5c1806c41ce0a0110db5dd4c483cf2dc28b3ddf0 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Mon, 4 Nov 2024 16:43:07 +0100
Subject: kcsan, seqlock: Support seqcount_latch_t

While fuzzing an arm64 kernel, Alexander Potapenko reported:

| BUG: KCSAN: data-race in ktime_get_mono_fast_ns / timekeeping_update
|
| write to 0xffffffc082e74248 of 56 bytes by interrupt on cpu 0:
|  update_fast_timekeeper kernel/time/timekeeping.c:430 [inline]
|  timekeeping_update+0x1d8/0x2d8 kernel/time/timekeeping.c:768
|  timekeeping_advance+0x9e8/0xb78 kernel/time/timekeeping.c:2344
|  update_wall_time+0x18/0x38 kernel/time/timekeeping.c:2360
|  [...]
|
| read to 0xffffffc082e74258 of 8 bytes by task 5260 on cpu 1:
|  __ktime_get_fast_ns kernel/time/timekeeping.c:372 [inline]
|  ktime_get_mono_fast_ns+0x88/0x174 kernel/time/timekeeping.c:489
|  init_srcu_struct_fields+0x40c/0x530 kernel/rcu/srcutree.c:263
|  init_srcu_struct+0x14/0x20 kernel/rcu/srcutree.c:311
|  [...]
|
| value changed: 0x000002f875d33266 -> 0x000002f877416866
|
| Reported by Kernel Concurrency Sanitizer on:
| CPU: 1 UID: 0 PID: 5260 Comm: syz.2.7483 Not tainted 6.12.0-rc3-dirty #78

This is a false positive data race between a seqcount latch writer and a reader
accessing stale data. Since its introduction, KCSAN has never understood the
seqcount_latch interface (due to being unannotated).

Unlike the regular seqlock interface, the seqcount_latch interface for latch
writers never has had a well-defined critical section, making it difficult to
teach tooling where the critical section starts and ends.

Introduce an instrumentable (non-raw) seqcount_latch interface, with
which we can clearly denote writer critical sections. This both helps
readability and tooling like KCSAN to understand when the writer is done
updating all latch copies.

Fixes: 88ecd153be95 ("seqlock, kcsan: Add annotations for KCSAN")
Reported-by: Alexander Potapenko <glider@google.com>
Co-developed-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Signed-off-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20241104161910.780003-4-elver@google.com
---
 include/linux/seqlock.h | 86 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 71 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index fffeb754880f..45eee0e5dca0 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -621,6 +621,23 @@ static __always_inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *
 	return READ_ONCE(s->seqcount.sequence);
 }
 
+/**
+ * read_seqcount_latch() - pick even/odd latch data copy
+ * @s: Pointer to seqcount_latch_t
+ *
+ * See write_seqcount_latch() for details and a full reader/writer usage
+ * example.
+ *
+ * Return: sequence counter raw value. Use the lowest bit as an index for
+ * picking which data copy to read. The full counter must then be checked
+ * with read_seqcount_latch_retry().
+ */
+static __always_inline unsigned read_seqcount_latch(const seqcount_latch_t *s)
+{
+	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
+	return raw_read_seqcount_latch(s);
+}
+
 /**
  * raw_read_seqcount_latch_retry() - end a seqcount_latch_t read section
  * @s:		Pointer to seqcount_latch_t
@@ -635,9 +652,34 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
 	return unlikely(READ_ONCE(s->seqcount.sequence) != start);
 }
 
+/**
+ * read_seqcount_latch_retry() - end a seqcount_latch_t read section
+ * @s:		Pointer to seqcount_latch_t
+ * @start:	count, from read_seqcount_latch()
+ *
+ * Return: true if a read section retry is required, else false
+ */
+static __always_inline int
+read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
+{
+	kcsan_atomic_next(0);
+	return raw_read_seqcount_latch_retry(s, start);
+}
+
 /**
  * raw_write_seqcount_latch() - redirect latch readers to even/odd copy
  * @s: Pointer to seqcount_latch_t
+ */
+static __always_inline void raw_write_seqcount_latch(seqcount_latch_t *s)
+{
+	smp_wmb();	/* prior stores before incrementing "sequence" */
+	s->seqcount.sequence++;
+	smp_wmb();      /* increment "sequence" before following stores */
+}
+
+/**
+ * write_seqcount_latch_begin() - redirect latch readers to odd copy
+ * @s: Pointer to seqcount_latch_t
  *
  * The latch technique is a multiversion concurrency control method that allows
  * queries during non-atomic modifications. If you can guarantee queries never
@@ -665,17 +707,11 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
  *
  *	void latch_modify(struct latch_struct *latch, ...)
  *	{
- *		smp_wmb();	// Ensure that the last data[1] update is visible
- *		latch->seq.sequence++;
- *		smp_wmb();	// Ensure that the seqcount update is visible
- *
+ *		write_seqcount_latch_begin(&latch->seq);
  *		modify(latch->data[0], ...);
- *
- *		smp_wmb();	// Ensure that the data[0] update is visible
- *		latch->seq.sequence++;
- *		smp_wmb();	// Ensure that the seqcount update is visible
- *
+ *		write_seqcount_latch(&latch->seq);
  *		modify(latch->data[1], ...);
+ *		write_seqcount_latch_end(&latch->seq);
  *	}
  *
  * The query will have a form like::
@@ -686,13 +722,13 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
  *		unsigned seq, idx;
  *
  *		do {
- *			seq = raw_read_seqcount_latch(&latch->seq);
+ *			seq = read_seqcount_latch(&latch->seq);
  *
  *			idx = seq & 0x01;
  *			entry = data_query(latch->data[idx], ...);
  *
  *		// This includes needed smp_rmb()
- *		} while (raw_read_seqcount_latch_retry(&latch->seq, seq));
+ *		} while (read_seqcount_latch_retry(&latch->seq, seq));
  *
  *		return entry;
  *	}
@@ -716,11 +752,31 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
  *	When data is a dynamic data structure; one should use regular RCU
  *	patterns to manage the lifetimes of the objects within.
  */
-static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
+static __always_inline void write_seqcount_latch_begin(seqcount_latch_t *s)
 {
-	smp_wmb();	/* prior stores before incrementing "sequence" */
-	s->seqcount.sequence++;
-	smp_wmb();      /* increment "sequence" before following stores */
+	kcsan_nestable_atomic_begin();
+	raw_write_seqcount_latch(s);
+}
+
+/**
+ * write_seqcount_latch() - redirect latch readers to even copy
+ * @s: Pointer to seqcount_latch_t
+ */
+static __always_inline void write_seqcount_latch(seqcount_latch_t *s)
+{
+	raw_write_seqcount_latch(s);
+}
+
+/**
+ * write_seqcount_latch_end() - end a seqcount_latch_t write section
+ * @s:		Pointer to seqcount_latch_t
+ *
+ * Marks the end of a seqcount_latch_t writer section, after all copies of the
+ * latch-protected data have been updated.
+ */
+static __always_inline void write_seqcount_latch_end(seqcount_latch_t *s)
+{
+	kcsan_nestable_atomic_end();
 }
 
 #define __SEQLOCK_UNLOCKED(lockname)					\
-- 
cgit v1.2.3


From 93190bc35d6d4364a4d8c38ac8961dabecbff4ed Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Mon, 4 Nov 2024 16:43:08 +0100
Subject: seqlock, treewide: Switch to non-raw seqcount_latch interface

Switch all instrumentable users of the seqcount_latch interface over to
the non-raw interface.

Co-developed-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Signed-off-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20241104161910.780003-5-elver@google.com
---
 include/linux/rbtree_latch.h | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
index 6a0999c26c7c..2f630eb8307e 100644
--- a/include/linux/rbtree_latch.h
+++ b/include/linux/rbtree_latch.h
@@ -14,7 +14,7 @@
  *
  * If we need to allow unconditional lookups (say as required for NMI context
  * usage) we need a more complex setup; this data structure provides this by
- * employing the latch technique -- see @raw_write_seqcount_latch -- to
+ * employing the latch technique -- see @write_seqcount_latch_begin -- to
  * implement a latched RB-tree which does allow for unconditional lookups by
  * virtue of always having (at least) one stable copy of the tree.
  *
@@ -132,7 +132,7 @@ __lt_find(void *key, struct latch_tree_root *ltr, int idx,
  * @ops: operators defining the node order
  *
  * It inserts @node into @root in an ordered fashion such that we can always
- * observe one complete tree. See the comment for raw_write_seqcount_latch().
+ * observe one complete tree. See the comment for write_seqcount_latch_begin().
  *
  * The inserts use rcu_assign_pointer() to publish the element such that the
  * tree structure is stored before we can observe the new @node.
@@ -145,10 +145,11 @@ latch_tree_insert(struct latch_tree_node *node,
 		  struct latch_tree_root *root,
 		  const struct latch_tree_ops *ops)
 {
-	raw_write_seqcount_latch(&root->seq);
+	write_seqcount_latch_begin(&root->seq);
 	__lt_insert(node, root, 0, ops->less);
-	raw_write_seqcount_latch(&root->seq);
+	write_seqcount_latch(&root->seq);
 	__lt_insert(node, root, 1, ops->less);
+	write_seqcount_latch_end(&root->seq);
 }
 
 /**
@@ -159,7 +160,7 @@ latch_tree_insert(struct latch_tree_node *node,
  *
  * Removes @node from the trees @root in an ordered fashion such that we can
  * always observe one complete tree. See the comment for
- * raw_write_seqcount_latch().
+ * write_seqcount_latch_begin().
  *
  * It is assumed that @node will observe one RCU quiescent state before being
  * reused of freed.
@@ -172,10 +173,11 @@ latch_tree_erase(struct latch_tree_node *node,
 		 struct latch_tree_root *root,
 		 const struct latch_tree_ops *ops)
 {
-	raw_write_seqcount_latch(&root->seq);
+	write_seqcount_latch_begin(&root->seq);
 	__lt_erase(node, root, 0);
-	raw_write_seqcount_latch(&root->seq);
+	write_seqcount_latch(&root->seq);
 	__lt_erase(node, root, 1);
+	write_seqcount_latch_end(&root->seq);
 }
 
 /**
@@ -204,9 +206,9 @@ latch_tree_find(void *key, struct latch_tree_root *root,
 	unsigned int seq;
 
 	do {
-		seq = raw_read_seqcount_latch(&root->seq);
+		seq = read_seqcount_latch(&root->seq);
 		node = __lt_find(key, root, seq & 1, ops->comp);
-	} while (raw_read_seqcount_latch_retry(&root->seq, seq));
+	} while (read_seqcount_latch_retry(&root->seq, seq));
 
 	return node;
 }
-- 
cgit v1.2.3


From 183ec5f26b2fc97a4a9871865bfe9b33c41fddb2 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Mon, 4 Nov 2024 16:43:09 +0100
Subject: kcsan, seqlock: Fix incorrect assumption in read_seqbegin()

During testing of the preceding changes, I noticed that in some cases,
current->kcsan_ctx.in_flat_atomic remained true until task exit. This is
obviously wrong, because _all_ accesses for the given task will be
treated as atomic, resulting in false negatives i.e. missed data races.

Debugging led to fs/dcache.c, where we can see this usage of seqlock:

	struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
	{
		struct dentry *dentry;
		unsigned seq;

		do {
			seq = read_seqbegin(&rename_lock);
			dentry = __d_lookup(parent, name);
			if (dentry)
				break;
		} while (read_seqretry(&rename_lock, seq));
	[...]

As can be seen, read_seqretry() is never called if dentry != NULL;
consequently, current->kcsan_ctx.in_flat_atomic will never be reset to
false by read_seqretry().

Give up on the wrong assumption of "assume closing read_seqretry()", and
rely on the already-present annotations in read_seqcount_begin/retry().

Fixes: 88ecd153be95 ("seqlock, kcsan: Add annotations for KCSAN")
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20241104161910.780003-6-elver@google.com
---
 include/linux/seqlock.h | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 45eee0e5dca0..5298765d6ca4 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -810,11 +810,7 @@ static __always_inline void write_seqcount_latch_end(seqcount_latch_t *s)
  */
 static inline unsigned read_seqbegin(const seqlock_t *sl)
 {
-	unsigned ret = read_seqcount_begin(&sl->seqcount);
-
-	kcsan_atomic_next(0);  /* non-raw usage, assume closing read_seqretry() */
-	kcsan_flat_atomic_begin();
-	return ret;
+	return read_seqcount_begin(&sl->seqcount);
 }
 
 /**
@@ -830,12 +826,6 @@ static inline unsigned read_seqbegin(const seqlock_t *sl)
  */
 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 {
-	/*
-	 * Assume not nested: read_seqretry() may be called multiple times when
-	 * completing read critical section.
-	 */
-	kcsan_flat_atomic_end();
-
 	return read_seqcount_retry(&sl->seqcount, start);
 }
 
-- 
cgit v1.2.3


From 0f0d1b8e5010bfe1feeb4d78d137e41946a5370d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 28 Oct 2024 14:20:35 +0100
Subject: sched/ext: Remove sched_fork() hack

Instead of solving the underlying problem of the double invocation of
__sched_fork() for idle tasks, sched-ext decided to hack around the issue
by partially clearing out the entity struct to preserve the already
enqueued node. A provided analysis and solution has been ignored for four
months.

Now that someone else has taken care of cleaning it up, remove the
disgusting hack and clear out the full structure. Remove the comment in the
structure declaration as well, as there is no requirement for @node being
the last element anymore.

Fixes: f0e1a0643a59 ("sched_ext: Implement BPF extensible scheduler class")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/87ldy82wkc.ffs@tglx
---
 include/linux/sched/ext.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 1ddbde64a31b..2799e7284fff 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -199,7 +199,6 @@ struct sched_ext_entity {
 #ifdef CONFIG_EXT_GROUP_SCHED
 	struct cgroup		*cgrp_moving_from;
 #endif
-	/* must be the last field, see init_scx_entity() */
 	struct list_head	tasks_node;
 };
 
-- 
cgit v1.2.3


From 26baa1f1c4bdc34b8d698c1900b407d863ad0e69 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 4 Oct 2024 14:47:02 +0200
Subject: sched: Add TIF_NEED_RESCHED_LAZY infrastructure

Add the basic infrastructure to split the TIF_NEED_RESCHED bit in two.
Either bit will cause a resched on return-to-user, but only
TIF_NEED_RESCHED will drive IRQ preemption.

No behavioural change intended.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lkml.kernel.org/r/20241007075055.219540785@infradead.org
---
 include/linux/entry-common.h |  3 ++-
 include/linux/entry-kvm.h    |  5 +++--
 include/linux/sched.h        |  3 ++-
 include/linux/thread_info.h  | 21 +++++++++++++++++----
 4 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 1e50cdb83ae5..fc61d0205c97 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -64,7 +64,8 @@
 
 #define EXIT_TO_USER_MODE_WORK						\
 	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |		\
-	 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
+	 _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY |			\
+	 _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |			\
 	 ARCH_EXIT_TO_USER_MODE_WORK)
 
 /**
diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
index 6813171afccb..16149f6625e4 100644
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -17,8 +17,9 @@
 #endif
 
 #define XFER_TO_GUEST_MODE_WORK						\
-	(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL |	\
-	 _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK)
+	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \
+	 _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME |			\
+	 ARCH_XFER_TO_GUEST_MODE_WORK)
 
 struct kvm_vcpu;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a76e3d074a2a..1d5cc3e50884 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2002,7 +2002,8 @@ static inline void set_tsk_need_resched(struct task_struct *tsk)
 
 static inline void clear_tsk_need_resched(struct task_struct *tsk)
 {
-	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+	atomic_long_andnot(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY,
+			   (atomic_long_t *)&task_thread_info(tsk)->flags);
 }
 
 static inline int test_tsk_need_resched(struct task_struct *tsk)
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 9ea0b28068f4..cf2446c9c30d 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -59,6 +59,14 @@ enum syscall_work_bit {
 
 #include <asm/thread_info.h>
 
+#ifndef TIF_NEED_RESCHED_LAZY
+#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY
+#error Inconsistent PREEMPT_LAZY
+#endif
+#define TIF_NEED_RESCHED_LAZY TIF_NEED_RESCHED
+#define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED
+#endif
+
 #ifdef __KERNEL__
 
 #ifndef arch_set_restart_data
@@ -179,22 +187,27 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti
 
 #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
 
-static __always_inline bool tif_need_resched(void)
+static __always_inline bool tif_test_bit(int bit)
 {
-	return arch_test_bit(TIF_NEED_RESCHED,
+	return arch_test_bit(bit,
 			     (unsigned long *)(&current_thread_info()->flags));
 }
 
 #else
 
-static __always_inline bool tif_need_resched(void)
+static __always_inline bool tif_test_bit(int bit)
 {
-	return test_bit(TIF_NEED_RESCHED,
+	return test_bit(bit,
 			(unsigned long *)(&current_thread_info()->flags));
 }
 
 #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
 
+static __always_inline bool tif_need_resched(void)
+{
+	return tif_test_bit(TIF_NEED_RESCHED);
+}
+
 #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
 static inline int arch_within_stack_frames(const void * const stack,
 					   const void * const stackend,
-- 
cgit v1.2.3


From 7c70cb94d29cd325fabe4a818c18613e3b9919a1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 4 Oct 2024 14:46:58 +0200
Subject: sched: Add Lazy preemption model

Change fair to use resched_curr_lazy(), which, when the lazy
preemption model is selected, will set TIF_NEED_RESCHED_LAZY.

This LAZY bit will be promoted to the full NEED_RESCHED bit on tick.
As such, the average delay between setting LAZY and actually
rescheduling will be TICK_NSEC/2.

In short, Lazy preemption will delay preemption for fair class but
will function as Full preemption for all the other classes, most
notably the realtime (RR/FIFO/DEADLINE) classes.

The goal is to bridge the performance gap with Voluntary, such that we
might eventually remove that option entirely.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lkml.kernel.org/r/20241007075055.331243614@infradead.org
---
 include/linux/preempt.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index ce76f1a45722..ca86235ac15c 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -486,6 +486,7 @@ DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
 extern bool preempt_model_none(void);
 extern bool preempt_model_voluntary(void);
 extern bool preempt_model_full(void);
+extern bool preempt_model_lazy(void);
 
 #else
 
@@ -502,6 +503,11 @@ static inline bool preempt_model_full(void)
 	return IS_ENABLED(CONFIG_PREEMPT);
 }
 
+static inline bool preempt_model_lazy(void)
+{
+	return IS_ENABLED(CONFIG_PREEMPT_LAZY);
+}
+
 #endif
 
 static inline bool preempt_model_rt(void)
@@ -519,7 +525,7 @@ static inline bool preempt_model_rt(void)
  */
 static inline bool preempt_model_preemptible(void)
 {
-	return preempt_model_full() || preempt_model_rt();
+	return preempt_model_full() || preempt_model_lazy() || preempt_model_rt();
 }
 
 #endif /* __LINUX_PREEMPT_H */
-- 
cgit v1.2.3


From 18d92bb57c39504d9da11c6ef604f58eb1d5a117 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 22 Oct 2024 18:59:08 +0300
Subject: perf/core: Add aux_pause, aux_resume, aux_start_paused

Hardware traces, such as instruction traces, can produce a vast amount of
trace data, so being able to reduce tracing to more specific circumstances
can be useful.

The ability to pause or resume tracing when another event happens, can do
that.

Add ability for an event to "pause" or "resume" AUX area tracing.

Add aux_pause bit to perf_event_attr to indicate that, if the event
happens, the associated AUX area tracing should be paused. Ditto
aux_resume. Do not allow aux_pause and aux_resume to be set together.

Add aux_start_paused bit to perf_event_attr to indicate to an AUX area
event that it should start in a "paused" state.

Add aux_paused to struct hw_perf_event for AUX area events to keep track of
the "paused" state. aux_paused is initialized to aux_start_paused.

Add PERF_EF_PAUSE and PERF_EF_RESUME modes for ->stop() and ->start()
callbacks. Call as needed, during __perf_event_output(). Add
aux_in_pause_resume to struct perf_buffer to prevent races with the NMI
handler. Pause/resume in NMI context will miss out if it coincides with
another pause/resume.

To use aux_pause or aux_resume, an event must be in a group with the AUX
area event as the group leader.

Example (requires Intel PT and tools patches also):

 $ perf record --kcore -e intel_pt/aux-action=start-paused/k,syscalls:sys_enter_newuname/aux-action=resume/,syscalls:sys_exit_newuname/aux-action=pause/ uname
 Linux
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.043 MB perf.data ]
 $ perf script --call-trace
 uname   30805 [000] 24001.058782799: name: 0x7ffc9c1865b0
 uname   30805 [000] 24001.058784424:  psb offs: 0
 uname   30805 [000] 24001.058784424:  cbr: 39 freq: 3904 MHz (139%)
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])        debug_smp_processor_id
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])        __x64_sys_newuname
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])            down_read
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                __cond_resched
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                preempt_count_add
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                    in_lock_functions
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                preempt_count_sub
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])            up_read
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                preempt_count_add
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                    in_lock_functions
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                preempt_count_sub
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])            _copy_to_user
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])        syscall_exit_to_user_mode
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])            syscall_exit_work
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                perf_syscall_exit
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                    debug_smp_processor_id
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                    perf_trace_buf_alloc
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        perf_swevent_get_recursion_context
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            debug_smp_processor_id
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        debug_smp_processor_id
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                    perf_tp_event
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        perf_trace_buf_update
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            tracing_gen_ctx_irq_test
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        perf_swevent_event
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            __perf_event_account_interrupt
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                __this_cpu_preempt_check
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            perf_event_output_forward
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                perf_event_aux_pause
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                    ring_buffer_get
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                        __rcu_read_lock
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                        __rcu_read_unlock
 uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                    pt_event_stop
 uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                        debug_smp_processor_id
 uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                        debug_smp_processor_id
 uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                        native_write_msr
 uname   30805 [000] 24001.058785463: ([kernel.kallsyms])                                        native_write_msr
 uname   30805 [000] 24001.058785639: 0x0

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: James Clark <james.clark@arm.com>
Link: https://lkml.kernel.org/r/20241022155920.17511-3-adrian.hunter@intel.com
---
 include/linux/perf_event.h      | 28 ++++++++++++++++++++++++++++
 include/uapi/linux/perf_event.h | 11 ++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fb908843f209..91b310052a7c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -170,6 +170,12 @@ struct hw_perf_event {
 		};
 		struct { /* aux / Intel-PT */
 			u64		aux_config;
+			/*
+			 * For AUX area events, aux_paused cannot be a state
+			 * flag because it can be updated asynchronously to
+			 * state.
+			 */
+			unsigned int	aux_paused;
 		};
 		struct { /* software */
 			struct hrtimer	hrtimer;
@@ -294,6 +300,7 @@ struct perf_event_pmu_context;
 #define PERF_PMU_CAP_NO_EXCLUDE			0x0040
 #define PERF_PMU_CAP_AUX_OUTPUT			0x0080
 #define PERF_PMU_CAP_EXTENDED_HW_TYPE		0x0100
+#define PERF_PMU_CAP_AUX_PAUSE			0x0200
 
 /**
  * pmu::scope
@@ -384,6 +391,8 @@ struct pmu {
 #define PERF_EF_START	0x01		/* start the counter when adding    */
 #define PERF_EF_RELOAD	0x02		/* reload the counter when starting */
 #define PERF_EF_UPDATE	0x04		/* update the counter when stopping */
+#define PERF_EF_PAUSE	0x08		/* AUX area event, pause tracing */
+#define PERF_EF_RESUME	0x10		/* AUX area event, resume tracing */
 
 	/*
 	 * Adds/Removes a counter to/from the PMU, can be done inside a
@@ -423,6 +432,18 @@ struct pmu {
 	 *
 	 * ->start() with PERF_EF_RELOAD will reprogram the counter
 	 *  value, must be preceded by a ->stop() with PERF_EF_UPDATE.
+	 *
+	 * ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
+	 * overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
+	 * PERF_EF_RESUME.
+	 *
+	 * ->start() with PERF_EF_RESUME will start as simply as possible but
+	 * only if the counter is not otherwise stopped. Will not overlap
+	 * another ->start() with PERF_EF_RESUME nor ->stop() with
+	 * PERF_EF_PAUSE.
+	 *
+	 * Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other
+	 * ->stop()/->start() invocations, just not itself.
 	 */
 	void (*start)			(struct perf_event *event, int flags);
 	void (*stop)			(struct perf_event *event, int flags);
@@ -1679,6 +1700,13 @@ static inline bool has_aux(struct perf_event *event)
 	return event->pmu->setup_aux;
 }
 
+static inline bool has_aux_action(struct perf_event *event)
+{
+	return event->attr.aux_sample_size ||
+	       event->attr.aux_pause ||
+	       event->attr.aux_resume;
+}
+
 static inline bool is_write_backward(struct perf_event *event)
 {
 	return !!event->attr.write_backward;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 4842c36fdf80..0524d541d4e3 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -511,7 +511,16 @@ struct perf_event_attr {
 	__u16	sample_max_stack;
 	__u16	__reserved_2;
 	__u32	aux_sample_size;
-	__u32	__reserved_3;
+
+	union {
+		__u32	aux_action;
+		struct {
+			__u32	aux_start_paused :  1, /* start AUX area tracing paused */
+				aux_pause        :  1, /* on overflow, pause AUX area tracing */
+				aux_resume       :  1, /* on overflow, resume AUX area tracing */
+				__reserved_3     : 29;
+		};
+	};
 
 	/*
 	 * User provided data if sigtrap=1, passed back to user via
-- 
cgit v1.2.3


From 95e2eaf5b91aae6c3a433cd7882733bd806fa3c8 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Mon, 4 Nov 2024 09:40:30 +0800
Subject: iommu/vt-d: Remove unused dmar_msi_read

dmar_msi_read() has been unused since 2022 in
commit cf8e8658100d ("arch: Remove Itanium (IA-64) architecture")

Remove it.

(dmar_msi_write still exists and is used once).

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Link: https://lore.kernel.org/r/20241022002702.302728-1-linux@treblig.org
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/dmar.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 499bb2c63483..692b2b445761 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -292,7 +292,6 @@ static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src)
 struct irq_data;
 extern void dmar_msi_unmask(struct irq_data *data);
 extern void dmar_msi_mask(struct irq_data *data);
-extern void dmar_msi_read(int irq, struct msi_msg *msg);
 extern void dmar_msi_write(int irq, struct msi_msg *msg);
 extern int dmar_set_interrupt(struct intel_iommu *iommu);
 extern irqreturn_t dmar_fault(int irq, void *dev_id);
-- 
cgit v1.2.3


From bebf29b18f34620e25f7e2bd9e4e4d8e34a8977d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sun, 3 Nov 2024 17:03:31 +0000
Subject: sysfs: introduce callback attribute_group::bin_size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Several drivers need to dynamically calculate the size of an binary
attribute. Currently this is done by assigning attr->size from the
is_bin_visible() callback.

This has drawbacks:
* It is not documented.
* A single attribute can be instantiated multiple times, overwriting the
  shared size field.
* It prevents the structure to be moved to read-only memory.

Introduce a new dedicated callback to calculate the size of the
attribute.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Acked-by: Krzysztof Wilczyński <kw@linux.com>
Link: https://lore.kernel.org/r/20241103-sysfs-const-bin_attr-v2-2-71110628844c@weissschuh.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index c4e64dc11206..4746cccb9589 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -87,6 +87,11 @@ do {							\
  *		SYSFS_GROUP_VISIBLE() when assigning this callback to
  *		specify separate _group_visible() and _attr_visible()
  *		handlers.
+ * @bin_size:
+ *		Optional: Function to return the size of a binary attribute
+ *		of the group. Will be called repeatedly for each binary
+ *		attribute in the group. Overwrites the size field embedded
+ *		inside the attribute itself.
  * @attrs:	Pointer to NULL terminated list of attributes.
  * @bin_attrs:	Pointer to NULL terminated list of binary attributes.
  *		Either attrs or bin_attrs or both must be provided.
@@ -97,6 +102,9 @@ struct attribute_group {
 					      struct attribute *, int);
 	umode_t			(*is_bin_visible)(struct kobject *,
 						  struct bin_attribute *, int);
+	size_t			(*bin_size)(struct kobject *,
+					    const struct bin_attribute *,
+					    int);
 	struct attribute	**attrs;
 	struct bin_attribute	**bin_attrs;
 };
-- 
cgit v1.2.3


From b626816fdd7f9beb841856ba049396cff46e99aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sun, 3 Nov 2024 17:03:34 +0000
Subject: sysfs: treewide: constify attribute callback of bin_is_visible()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The is_bin_visible() callbacks should not modify the struct
bin_attribute passed as argument.
Enforce this by marking the argument as const.

As there are not many callback implementers perform this change
throughout the tree at once.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Ira Weiny <ira.weiny@intel.com>
Acked-by: Krzysztof Wilczyński <kw@linux.com>
Link: https://lore.kernel.org/r/20241103-sysfs-const-bin_attr-v2-5-71110628844c@weissschuh.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 4746cccb9589..d1b22d56198b 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -101,7 +101,7 @@ struct attribute_group {
 	umode_t			(*is_visible)(struct kobject *,
 					      struct attribute *, int);
 	umode_t			(*is_bin_visible)(struct kobject *,
-						  struct bin_attribute *, int);
+						  const struct bin_attribute *, int);
 	size_t			(*bin_size)(struct kobject *,
 					    const struct bin_attribute *,
 					    int);
@@ -199,22 +199,22 @@ struct attribute_group {
  * attributes, the group visibility is determined by the function
  * specified to is_visible() not is_bin_visible()
  */
-#define DEFINE_SYSFS_BIN_GROUP_VISIBLE(name)                             \
-	static inline umode_t sysfs_group_visible_##name(                \
-		struct kobject *kobj, struct bin_attribute *attr, int n) \
-	{                                                                \
-		if (n == 0 && !name##_group_visible(kobj))               \
-			return SYSFS_GROUP_INVISIBLE;                    \
-		return name##_attr_visible(kobj, attr, n);               \
+#define DEFINE_SYSFS_BIN_GROUP_VISIBLE(name)                                   \
+	static inline umode_t sysfs_group_visible_##name(                      \
+		struct kobject *kobj, const struct bin_attribute *attr, int n) \
+	{                                                                      \
+		if (n == 0 && !name##_group_visible(kobj))                     \
+			return SYSFS_GROUP_INVISIBLE;                          \
+		return name##_attr_visible(kobj, attr, n);                     \
 	}
 
-#define DEFINE_SIMPLE_SYSFS_BIN_GROUP_VISIBLE(name)                   \
-	static inline umode_t sysfs_group_visible_##name(             \
-		struct kobject *kobj, struct bin_attribute *a, int n) \
-	{                                                             \
-		if (n == 0 && !name##_group_visible(kobj))            \
-			return SYSFS_GROUP_INVISIBLE;                 \
-		return a->mode;                                       \
+#define DEFINE_SIMPLE_SYSFS_BIN_GROUP_VISIBLE(name)                         \
+	static inline umode_t sysfs_group_visible_##name(                   \
+		struct kobject *kobj, const struct bin_attribute *a, int n) \
+	{                                                                   \
+		if (n == 0 && !name##_group_visible(kobj))                  \
+			return SYSFS_GROUP_INVISIBLE;                       \
+		return a->mode;                                             \
 	}
 
 #define SYSFS_GROUP_VISIBLE(fn) sysfs_group_visible_##fn
-- 
cgit v1.2.3


From 94a20fb9af16417ab5fd17bcde3d906926f15ef6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sun, 3 Nov 2024 17:03:35 +0000
Subject: sysfs: treewide: constify attribute callback of bin_attribute::mmap()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mmap() callbacks should not modify the struct
bin_attribute passed as argument.
Enforce this by marking the argument as const.

As there are not many callback implementers perform this change
throughout the tree at once.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com> # ocxl
Acked-by: Krzysztof Wilczyński <kw@linux.com>
Link: https://lore.kernel.org/r/20241103-sysfs-const-bin_attr-v2-6-71110628844c@weissschuh.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d1b22d56198b..9fcdc8cd3118 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -309,7 +309,7 @@ struct bin_attribute {
 			 char *, loff_t, size_t);
 	loff_t (*llseek)(struct file *, struct kobject *, struct bin_attribute *,
 			 loff_t, int);
-	int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr,
+	int (*mmap)(struct file *, struct kobject *, const struct bin_attribute *attr,
 		    struct vm_area_struct *vma);
 };
 
-- 
cgit v1.2.3


From 699e7b85afb5d94b99b0a3edca7e9e93ea320c8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sun, 3 Nov 2024 17:03:36 +0000
Subject: sysfs: treewide: constify attribute callback of
 bin_attribute::llseek()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The llseek() callbacks should not modify the struct
bin_attribute passed as argument.
Enforce this by marking the argument as const.

As there are not many callback implementers perform this change
throughout the tree at once.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Acked-by: Krzysztof Wilczyński <kw@linux.com>
Link: https://lore.kernel.org/r/20241103-sysfs-const-bin_attr-v2-7-71110628844c@weissschuh.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 9fcdc8cd3118..cb2a5e277c23 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -307,7 +307,7 @@ struct bin_attribute {
 			char *, loff_t, size_t);
 	ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *,
 			 char *, loff_t, size_t);
-	loff_t (*llseek)(struct file *, struct kobject *, struct bin_attribute *,
+	loff_t (*llseek)(struct file *, struct kobject *, const struct bin_attribute *,
 			 loff_t, int);
 	int (*mmap)(struct file *, struct kobject *, const struct bin_attribute *attr,
 		    struct vm_area_struct *vma);
-- 
cgit v1.2.3


From ae587a509903cca138e910445d8c21fe73b45c80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sun, 3 Nov 2024 17:03:37 +0000
Subject: sysfs: implement all BIN_ATTR_* macros in terms of __BIN_ATTR()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The preparations for the upcoming constification of struct bin_attribute
requires some logic in the structure definition macros.
To avoid duplication of that logic in multiple macros, reimplement all
other macros in terms of __BIN_ATTR().

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Acked-by: Krzysztof Wilczyński <kw@linux.com>
Link: https://lore.kernel.org/r/20241103-sysfs-const-bin_attr-v2-8-71110628844c@weissschuh.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index cb2a5e277c23..d17c473c1ef2 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -333,17 +333,11 @@ struct bin_attribute {
 	.size	= _size,						\
 }
 
-#define __BIN_ATTR_RO(_name, _size) {					\
-	.attr	= { .name = __stringify(_name), .mode = 0444 },		\
-	.read	= _name##_read,						\
-	.size	= _size,						\
-}
+#define __BIN_ATTR_RO(_name, _size)					\
+	__BIN_ATTR(_name, 0444, _name##_read, NULL, _size)
 
-#define __BIN_ATTR_WO(_name, _size) {					\
-	.attr	= { .name = __stringify(_name), .mode = 0200 },		\
-	.write	= _name##_write,					\
-	.size	= _size,						\
-}
+#define __BIN_ATTR_WO(_name, _size)					\
+	__BIN_ATTR(_name, 0200, NULL, _name##_write, _size)
 
 #define __BIN_ATTR_RW(_name, _size)					\
 	__BIN_ATTR(_name, 0644, _name##_read, _name##_write, _size)
@@ -364,11 +358,8 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_WO(_name, _size)
 struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size)
 
 
-#define __BIN_ATTR_ADMIN_RO(_name, _size) {					\
-	.attr	= { .name = __stringify(_name), .mode = 0400 },		\
-	.read	= _name##_read,						\
-	.size	= _size,						\
-}
+#define __BIN_ATTR_ADMIN_RO(_name, _size)				\
+	__BIN_ATTR(_name, 0400, _name##_read, NULL, _size)
 
 #define __BIN_ATTR_ADMIN_RW(_name, _size)					\
 	__BIN_ATTR(_name, 0600, _name##_read, _name##_write, _size)
@@ -379,10 +370,8 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RO(_name, _size)
 #define BIN_ATTR_ADMIN_RW(_name, _size)					\
 struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RW(_name, _size)
 
-#define __BIN_ATTR_SIMPLE_RO(_name, _mode) {				\
-	.attr	= { .name = __stringify(_name), .mode = _mode },	\
-	.read	= sysfs_bin_attr_simple_read,				\
-}
+#define __BIN_ATTR_SIMPLE_RO(_name, _mode)				\
+	__BIN_ATTR(_name, _mode, sysfs_bin_attr_simple_read, NULL, 0)
 
 #define BIN_ATTR_SIMPLE_RO(_name)					\
 struct bin_attribute bin_attr_##_name = __BIN_ATTR_SIMPLE_RO(_name, 0444)
-- 
cgit v1.2.3


From eb2e6c3a8d66ff37b2ee26cd32334ae0e05fd596 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sun, 3 Nov 2024 17:03:38 +0000
Subject: sysfs: bin_attribute: add const read/write callback variants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To make it possible to put struct bin_attribute into read-only memory,
the sysfs core has to stop passing mutable pointers to the read() and
write() callbacks.
As there are numerous implementors of these callbacks throughout the
tree it's not possible to change all of them at once.
To enable a step-by-step transition, add new variants of the read() and
write() callbacks which differ only in the constness of the struct
bin_attribute argument.

As most binary attributes are defined through macros, extend these
macros to transparently handle both variants of callbacks to minimize
the churn during the transition.
As soon as all handlers are switch to the const variant, the non-const
one can be removed together with the transition machinery.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Acked-by: Krzysztof Wilczyński <kw@linux.com>
Link: https://lore.kernel.org/r/20241103-sysfs-const-bin_attr-v2-9-71110628844c@weissschuh.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d17c473c1ef2..d713a6445a62 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -305,8 +305,12 @@ struct bin_attribute {
 	struct address_space *(*f_mapping)(void);
 	ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *,
 			char *, loff_t, size_t);
+	ssize_t (*read_new)(struct file *, struct kobject *, const struct bin_attribute *,
+			    char *, loff_t, size_t);
 	ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *,
 			 char *, loff_t, size_t);
+	ssize_t (*write_new)(struct file *, struct kobject *,
+			     const struct bin_attribute *, char *, loff_t, size_t);
 	loff_t (*llseek)(struct file *, struct kobject *, const struct bin_attribute *,
 			 loff_t, int);
 	int (*mmap)(struct file *, struct kobject *, const struct bin_attribute *attr,
@@ -325,11 +329,28 @@ struct bin_attribute {
  */
 #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr)
 
+typedef ssize_t __sysfs_bin_rw_handler_new(struct file *, struct kobject *,
+					   const struct bin_attribute *, char *, loff_t, size_t);
+
 /* macros to create static binary attributes easier */
 #define __BIN_ATTR(_name, _mode, _read, _write, _size) {		\
 	.attr = { .name = __stringify(_name), .mode = _mode },		\
-	.read	= _read,						\
-	.write	= _write,						\
+	.read = _Generic(_read,						\
+		__sysfs_bin_rw_handler_new * : NULL,			\
+		default : _read						\
+	),								\
+	.read_new = _Generic(_read,					\
+		__sysfs_bin_rw_handler_new * : _read,			\
+		default : NULL						\
+	),								\
+	.write = _Generic(_write,					\
+		__sysfs_bin_rw_handler_new * : NULL,			\
+		default : _write					\
+	),								\
+	.write_new = _Generic(_write,					\
+		__sysfs_bin_rw_handler_new * : _write,			\
+		default : NULL						\
+	),								\
 	.size	= _size,						\
 }
 
-- 
cgit v1.2.3


From b3e8f29d6b45e865bab9a9964709ff7413e33e85 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 Nov 2024 10:41:15 +0100
Subject: netfilter: nf_tables: avoid false-positive lockdep splats with
 flowtables

The transaction mutex prevents concurrent add/delete, its ok to iterate
those lists outside of rcu read side critical sections.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 91ae20cb7648..c1513bd14568 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1463,7 +1463,8 @@ struct nft_flowtable {
 	struct nf_flowtable		data;
 };
 
-struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
+struct nft_flowtable *nft_flowtable_lookup(const struct net *net,
+					   const struct nft_table *table,
 					   const struct nlattr *nla,
 					   u8 genmask);
 
-- 
cgit v1.2.3


From 64c58d7c99343a910edf995e15d8037e19ec5777 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 3 Nov 2024 20:19:16 -0800
Subject: iomap: add a merge boundary flag

File systems might have boundaries over which merges aren't possible.
In fact these are very common, although most of the time some kind of
header at the beginning of this region (e.g. XFS alloation groups, ext4
block groups) automatically create a merge barrier.  But if that is
not present, say for a device purely used for data we need to manually
communicate that to iomap.

Add a IOMAP_F_BOUNDARY flag to never merge I/O into a previous mapping.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 include/linux/iomap.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f61407e3b121..9ecb8ea7714c 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -53,6 +53,9 @@ struct vm_fault;
  *
  * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent
  * rather than a file data extent.
+ *
+ * IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must
+ * never be merged with the mapping before it.
  */
 #define IOMAP_F_NEW		(1U << 0)
 #define IOMAP_F_DIRTY		(1U << 1)
@@ -64,6 +67,7 @@ struct vm_fault;
 #define IOMAP_F_BUFFER_HEAD	0
 #endif /* CONFIG_BUFFER_HEAD */
 #define IOMAP_F_XATTR		(1U << 5)
+#define IOMAP_F_BOUNDARY	(1U << 6)
 
 /*
  * Flags set by the core iomap code during operations:
-- 
cgit v1.2.3


From 57a063632df8db6cb20d64ee52a06d4e2049235a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 23 Oct 2024 11:45:24 -0700
Subject: Input: introduce notion of passive observers for input handlers

Sometimes it is useful to observe (and maybe modify) data coming from
an input device, but only do that if there are other users of such input
device. An example is touchpad switching functionality on Lenovo IdeaPad
Z570 where it is desirable to suppress events coming from the touchpad
if user toggles touchpad on/off button (on this laptop the firmware does
not stop the device).

Introduce notion of passive observers for input handlers to solve this
issue. An input handler marked as passive observer behaves exactly like
any other input handler or filter, but with one exception: it does not
open/start underlying input device when attaching to it.

Link: https://lore.kernel.org/r/ZxlEROX7bMo5cbZP@google.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/input.h b/include/linux/input.h
index 89a0be6ee0e2..6437c35f0796 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -286,6 +286,10 @@ struct input_handle;
  * @start: starts handler for given handle. This function is called by
  *	input core right after connect() method and also when a process
  *	that "grabbed" a device releases it
+ * @passive_observer: set to %true by drivers only interested in observing
+ *	data stream from devices if there are other users present. Such
+ *	drivers will not result in starting underlying hardware device
+ *	when input_open_device() is called for their handles
  * @legacy_minors: set to %true by drivers using legacy minor ranges
  * @minor: beginning of range of 32 legacy minors for devices this driver
  *	can provide
@@ -321,6 +325,7 @@ struct input_handler {
 	void (*disconnect)(struct input_handle *handle);
 	void (*start)(struct input_handle *handle);
 
+	bool passive_observer;
 	bool legacy_minors;
 	int minor;
 	const char *name;
-- 
cgit v1.2.3


From 3ee315537e941fb92cff31b259641d19e798b4e6 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 27 Oct 2024 03:24:41 +0200
Subject: dt-bindings: clock: qcom: document SAR2130P Global Clock Controller

Add bindings for the Global Clock Controller (GCC) present on the
Qualcomm SAR2130P platform.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20241027-sar2130p-clocks-v5-2-ecad2a1432ba@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sar2130p-gcc.h | 185 ++++++++++++++++++++++++++
 1 file changed, 185 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sar2130p-gcc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sar2130p-gcc.h b/include/dt-bindings/clock/qcom,sar2130p-gcc.h
new file mode 100644
index 000000000000..69d2dd2538a6
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sar2130p-gcc.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+/*
+ * Copyright (c) 2021-2022, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GCC_SAR2130P_H
+#define _DT_BINDINGS_CLK_QCOM_GCC_SAR2130P_H
+
+/* GCC clocks */
+#define GCC_GPLL0						0
+#define GCC_GPLL0_OUT_EVEN					1
+#define GCC_GPLL1						2
+#define GCC_GPLL9						3
+#define GCC_GPLL9_OUT_EVEN					4
+#define GCC_AGGRE_NOC_PCIE_1_AXI_CLK				5
+#define GCC_AGGRE_USB3_PRIM_AXI_CLK				6
+#define GCC_BOOT_ROM_AHB_CLK					7
+#define GCC_CAMERA_AHB_CLK					8
+#define GCC_CAMERA_HF_AXI_CLK					9
+#define GCC_CAMERA_SF_AXI_CLK					10
+#define GCC_CAMERA_XO_CLK					11
+#define GCC_CFG_NOC_PCIE_ANOC_AHB_CLK				12
+#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK				13
+#define GCC_DDRSS_GPU_AXI_CLK					14
+#define GCC_DDRSS_PCIE_SF_CLK					15
+#define GCC_DISP_AHB_CLK					16
+#define GCC_DISP_HF_AXI_CLK					17
+#define GCC_GP1_CLK						18
+#define GCC_GP1_CLK_SRC						19
+#define GCC_GP2_CLK						20
+#define GCC_GP2_CLK_SRC						21
+#define GCC_GP3_CLK						22
+#define GCC_GP3_CLK_SRC						23
+#define GCC_GPU_CFG_AHB_CLK					24
+#define GCC_GPU_GPLL0_CLK_SRC					25
+#define GCC_GPU_GPLL0_DIV_CLK_SRC				26
+#define GCC_GPU_MEMNOC_GFX_CLK					27
+#define GCC_GPU_SNOC_DVM_GFX_CLK				28
+#define GCC_IRIS_SS_HF_AXI1_CLK					29
+#define GCC_IRIS_SS_SPD_AXI1_CLK				30
+#define GCC_PCIE_0_AUX_CLK					31
+#define GCC_PCIE_0_AUX_CLK_SRC					32
+#define GCC_PCIE_0_CFG_AHB_CLK					33
+#define GCC_PCIE_0_MSTR_AXI_CLK					34
+#define GCC_PCIE_0_PHY_RCHNG_CLK				35
+#define GCC_PCIE_0_PHY_RCHNG_CLK_SRC				36
+#define GCC_PCIE_0_PIPE_CLK					37
+#define GCC_PCIE_0_PIPE_CLK_SRC					38
+#define GCC_PCIE_0_SLV_AXI_CLK					39
+#define GCC_PCIE_0_SLV_Q2A_AXI_CLK				40
+#define GCC_PCIE_1_AUX_CLK					41
+#define GCC_PCIE_1_AUX_CLK_SRC					42
+#define GCC_PCIE_1_CFG_AHB_CLK					43
+#define GCC_PCIE_1_MSTR_AXI_CLK					44
+#define GCC_PCIE_1_PHY_RCHNG_CLK				45
+#define GCC_PCIE_1_PHY_RCHNG_CLK_SRC				46
+#define GCC_PCIE_1_PIPE_CLK					47
+#define GCC_PCIE_1_PIPE_CLK_SRC					48
+#define GCC_PCIE_1_SLV_AXI_CLK					49
+#define GCC_PCIE_1_SLV_Q2A_AXI_CLK				50
+#define GCC_PDM2_CLK						51
+#define GCC_PDM2_CLK_SRC					52
+#define GCC_PDM_AHB_CLK						53
+#define GCC_PDM_XO4_CLK						54
+#define GCC_QMIP_CAMERA_NRT_AHB_CLK				55
+#define GCC_QMIP_CAMERA_RT_AHB_CLK				56
+#define GCC_QMIP_GPU_AHB_CLK					57
+#define GCC_QMIP_PCIE_AHB_CLK					58
+#define GCC_QMIP_VIDEO_CV_CPU_AHB_CLK				59
+#define GCC_QMIP_VIDEO_CVP_AHB_CLK				60
+#define GCC_QMIP_VIDEO_LSR_AHB_CLK				61
+#define GCC_QMIP_VIDEO_V_CPU_AHB_CLK				62
+#define GCC_QMIP_VIDEO_VCODEC_AHB_CLK				63
+#define GCC_QUPV3_WRAP0_CORE_2X_CLK				64
+#define GCC_QUPV3_WRAP0_CORE_CLK				65
+#define GCC_QUPV3_WRAP0_S0_CLK					66
+#define GCC_QUPV3_WRAP0_S0_CLK_SRC				67
+#define GCC_QUPV3_WRAP0_S1_CLK					68
+#define GCC_QUPV3_WRAP0_S1_CLK_SRC				69
+#define GCC_QUPV3_WRAP0_S2_CLK					70
+#define GCC_QUPV3_WRAP0_S2_CLK_SRC				71
+#define GCC_QUPV3_WRAP0_S3_CLK					72
+#define GCC_QUPV3_WRAP0_S3_CLK_SRC				73
+#define GCC_QUPV3_WRAP0_S4_CLK					74
+#define GCC_QUPV3_WRAP0_S4_CLK_SRC				75
+#define GCC_QUPV3_WRAP0_S5_CLK					76
+#define GCC_QUPV3_WRAP0_S5_CLK_SRC				77
+#define GCC_QUPV3_WRAP1_CORE_2X_CLK				78
+#define GCC_QUPV3_WRAP1_CORE_CLK				79
+#define GCC_QUPV3_WRAP1_S0_CLK					80
+#define GCC_QUPV3_WRAP1_S0_CLK_SRC				81
+#define GCC_QUPV3_WRAP1_S1_CLK					82
+#define GCC_QUPV3_WRAP1_S1_CLK_SRC				83
+#define GCC_QUPV3_WRAP1_S2_CLK					84
+#define GCC_QUPV3_WRAP1_S2_CLK_SRC				85
+#define GCC_QUPV3_WRAP1_S3_CLK					86
+#define GCC_QUPV3_WRAP1_S3_CLK_SRC				87
+#define GCC_QUPV3_WRAP1_S4_CLK					88
+#define GCC_QUPV3_WRAP1_S4_CLK_SRC				89
+#define GCC_QUPV3_WRAP1_S5_CLK					90
+#define GCC_QUPV3_WRAP1_S5_CLK_SRC				91
+#define GCC_QUPV3_WRAP_0_M_AHB_CLK				92
+#define GCC_QUPV3_WRAP_0_S_AHB_CLK				93
+#define GCC_QUPV3_WRAP_1_M_AHB_CLK				94
+#define GCC_QUPV3_WRAP_1_S_AHB_CLK				95
+#define GCC_SDCC1_AHB_CLK					96
+#define GCC_SDCC1_APPS_CLK					97
+#define GCC_SDCC1_APPS_CLK_SRC					98
+#define GCC_SDCC1_ICE_CORE_CLK					99
+#define GCC_SDCC1_ICE_CORE_CLK_SRC				100
+#define GCC_USB30_PRIM_MASTER_CLK				101
+#define GCC_USB30_PRIM_MASTER_CLK_SRC				102
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK				103
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC			104
+#define GCC_USB30_PRIM_MOCK_UTMI_POSTDIV_CLK_SRC		105
+#define GCC_USB30_PRIM_SLEEP_CLK				106
+#define GCC_USB3_PRIM_PHY_AUX_CLK				107
+#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC				108
+#define GCC_USB3_PRIM_PHY_COM_AUX_CLK				109
+#define GCC_USB3_PRIM_PHY_PIPE_CLK				110
+#define GCC_USB3_PRIM_PHY_PIPE_CLK_SRC				111
+#define GCC_VIDEO_AHB_CLK					112
+#define GCC_VIDEO_AXI0_CLK					113
+#define GCC_VIDEO_AXI1_CLK					114
+#define GCC_VIDEO_XO_CLK					115
+#define GCC_GPLL4						116
+#define GCC_GPLL5						117
+#define GCC_GPLL7						118
+#define GCC_DDRSS_SPAD_CLK					119
+#define GCC_DDRSS_SPAD_CLK_SRC					120
+#define GCC_VIDEO_AXI0_SREG					121
+#define GCC_VIDEO_AXI1_SREG					122
+#define GCC_IRIS_SS_HF_AXI1_SREG				123
+#define GCC_IRIS_SS_SPD_AXI1_SREG				124
+
+/* GCC resets */
+#define GCC_CAMERA_BCR						0
+#define GCC_DISPLAY_BCR						1
+#define GCC_GPU_BCR						2
+#define GCC_PCIE_0_BCR						3
+#define GCC_PCIE_0_LINK_DOWN_BCR				4
+#define GCC_PCIE_0_NOCSR_COM_PHY_BCR				5
+#define GCC_PCIE_0_PHY_BCR					6
+#define GCC_PCIE_0_PHY_NOCSR_COM_PHY_BCR			7
+#define GCC_PCIE_1_BCR						8
+#define GCC_PCIE_1_LINK_DOWN_BCR				9
+#define GCC_PCIE_1_NOCSR_COM_PHY_BCR				10
+#define GCC_PCIE_1_PHY_BCR					11
+#define GCC_PCIE_1_PHY_NOCSR_COM_PHY_BCR			12
+#define GCC_PCIE_PHY_BCR					13
+#define GCC_PCIE_PHY_CFG_AHB_BCR				14
+#define GCC_PCIE_PHY_COM_BCR					15
+#define GCC_PDM_BCR						16
+#define GCC_QUPV3_WRAPPER_0_BCR					17
+#define GCC_QUPV3_WRAPPER_1_BCR					18
+#define GCC_QUSB2PHY_PRIM_BCR					19
+#define GCC_QUSB2PHY_SEC_BCR					20
+#define GCC_SDCC1_BCR						21
+#define GCC_USB30_PRIM_BCR					22
+#define GCC_USB3_DP_PHY_PRIM_BCR				23
+#define GCC_USB3_DP_PHY_SEC_BCR					24
+#define GCC_USB3_PHY_PRIM_BCR					25
+#define GCC_USB3_PHY_SEC_BCR					26
+#define GCC_USB3PHY_PHY_PRIM_BCR				27
+#define GCC_USB3PHY_PHY_SEC_BCR					28
+#define GCC_VIDEO_AXI0_CLK_ARES					29
+#define GCC_VIDEO_AXI1_CLK_ARES					30
+#define GCC_VIDEO_BCR						31
+#define GCC_IRIS_SS_HF_AXI_CLK_ARES				32
+#define GCC_IRIS_SS_SPD_AXI_CLK_ARES				33
+#define GCC_DDRSS_SPAD_CLK_ARES					34
+
+/* GCC power domains */
+#define PCIE_0_GDSC						0
+#define PCIE_0_PHY_GDSC						1
+#define PCIE_1_GDSC						2
+#define PCIE_1_PHY_GDSC						3
+#define USB30_PRIM_GDSC						4
+#define USB3_PHY_GDSC						5
+#define HLOS1_VOTE_MM_SNOC_MMU_TBU_HF0_GDSC			6
+#define HLOS1_VOTE_MM_SNOC_MMU_TBU_SF0_GDSC			7
+#define HLOS1_VOTE_TURING_MMU_TBU0_GDSC				8
+#define HLOS1_VOTE_TURING_MMU_TBU1_GDSC				9
+
+#endif
-- 
cgit v1.2.3


From 111481020aa599764790c807312465a4c94f4b5c Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Sun, 27 Oct 2024 03:24:44 +0200
Subject: dt-bindings: clk: qcom,sm8450-gpucc: add SAR2130P compatibles

Expand qcom,sm8450-gpucc bindings to include SAR2130P.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20241027-sar2130p-clocks-v5-5-ecad2a1432ba@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sar2130p-gpucc.h | 33 +++++++++++++++++++++++++
 include/dt-bindings/reset/qcom,sar2130p-gpucc.h | 14 +++++++++++
 2 files changed, 47 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sar2130p-gpucc.h
 create mode 100644 include/dt-bindings/reset/qcom,sar2130p-gpucc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sar2130p-gpucc.h b/include/dt-bindings/clock/qcom,sar2130p-gpucc.h
new file mode 100644
index 000000000000..a2204369110a
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sar2130p-gpucc.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved
+ * Copyright (c) 2024, Linaro Limited
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_SAR2130P_H
+#define _DT_BINDINGS_CLK_QCOM_GPU_CC_SAR2130P_H
+
+/* GPU_CC clocks */
+#define GPU_CC_AHB_CLK				0
+#define GPU_CC_CRC_AHB_CLK			1
+#define GPU_CC_CX_FF_CLK			2
+#define GPU_CC_CX_GMU_CLK			3
+#define GPU_CC_CXO_AON_CLK			4
+#define GPU_CC_CXO_CLK				5
+#define GPU_CC_FF_CLK_SRC			6
+#define GPU_CC_GMU_CLK_SRC			7
+#define GPU_CC_GX_GMU_CLK			8
+#define GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK		9
+#define GPU_CC_HUB_AON_CLK			10
+#define GPU_CC_HUB_CLK_SRC			11
+#define GPU_CC_HUB_CX_INT_CLK			12
+#define GPU_CC_MEMNOC_GFX_CLK			13
+#define GPU_CC_PLL0				14
+#define GPU_CC_PLL1				15
+#define GPU_CC_SLEEP_CLK			16
+
+/* GDSCs */
+#define GPU_GX_GDSC				0
+#define GPU_CX_GDSC				1
+
+#endif
diff --git a/include/dt-bindings/reset/qcom,sar2130p-gpucc.h b/include/dt-bindings/reset/qcom,sar2130p-gpucc.h
new file mode 100644
index 000000000000..99ba5f092e2a
--- /dev/null
+++ b/include/dt-bindings/reset/qcom,sar2130p-gpucc.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2024, Linaro Limited
+ */
+
+#ifndef _DT_BINDINGS_RESET_QCOM_GPU_CC_SAR2130P_H
+#define _DT_BINDINGS_RESET_QCOM_GPU_CC_SAR2130P_H
+
+#define GPUCC_GPU_CC_GX_BCR			0
+#define GPUCC_GPU_CC_ACD_BCR			1
+#define GPUCC_GPU_CC_GX_ACD_IROOT_BCR		2
+
+#endif
-- 
cgit v1.2.3


From 03e525c66de2535dc1afd26be004621c7c5a253e Mon Sep 17 00:00:00 2001
From: Sricharan Ramabadhran <quic_srichara@quicinc.com>
Date: Mon, 28 Oct 2024 11:35:02 +0530
Subject: dt-bindings: clock: Add Qualcomm IPQ5424 GCC binding

Add binding for the Qualcomm IPQ5424 Global Clock Controller

Signed-off-by: Sricharan Ramabadhran <quic_srichara@quicinc.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20241028060506.246606-3-quic_srichara@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,ipq5424-gcc.h | 156 ++++++++++++++
 include/dt-bindings/reset/qcom,ipq5424-gcc.h | 310 +++++++++++++++++++++++++++
 2 files changed, 466 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,ipq5424-gcc.h
 create mode 100644 include/dt-bindings/reset/qcom,ipq5424-gcc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,ipq5424-gcc.h b/include/dt-bindings/clock/qcom,ipq5424-gcc.h
new file mode 100644
index 000000000000..755ce7a71c7c
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,ipq5424-gcc.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2018,2020 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_IPQ_GCC_IPQ5424_H
+#define _DT_BINDINGS_CLOCK_IPQ_GCC_IPQ5424_H
+
+#define GPLL0					0
+#define GPLL4					1
+#define GPLL2					2
+#define GPLL2_OUT_MAIN                          3
+#define GCC_SLEEP_CLK_SRC			4
+#define GCC_APSS_DBG_CLK                        5
+#define GCC_USB0_EUD_AT_CLK			6
+#define GCC_PCIE0_AXI_M_CLK_SRC			7
+#define GCC_PCIE0_AXI_M_CLK			8
+#define GCC_PCIE1_AXI_M_CLK_SRC			9
+#define GCC_PCIE1_AXI_M_CLK			10
+#define GCC_PCIE2_AXI_M_CLK_SRC			11
+#define GCC_PCIE2_AXI_M_CLK			12
+#define GCC_PCIE3_AXI_M_CLK_SRC			13
+#define GCC_PCIE3_AXI_M_CLK			14
+#define GCC_PCIE0_AXI_S_CLK_SRC			15
+#define GCC_PCIE0_AXI_S_BRIDGE_CLK		16
+#define GCC_PCIE0_AXI_S_CLK			17
+#define GCC_PCIE1_AXI_S_CLK_SRC			18
+#define GCC_PCIE1_AXI_S_BRIDGE_CLK		19
+#define GCC_PCIE1_AXI_S_CLK			20
+#define GCC_PCIE2_AXI_S_CLK_SRC			21
+#define GCC_PCIE2_AXI_S_BRIDGE_CLK		22
+#define GCC_PCIE2_AXI_S_CLK			23
+#define GCC_PCIE3_AXI_S_CLK_SRC			24
+#define GCC_PCIE3_AXI_S_BRIDGE_CLK		25
+#define GCC_PCIE3_AXI_S_CLK			26
+#define GCC_PCIE0_PIPE_CLK_SRC			27
+#define GCC_PCIE0_PIPE_CLK			28
+#define GCC_PCIE1_PIPE_CLK_SRC			29
+#define GCC_PCIE1_PIPE_CLK			30
+#define GCC_PCIE2_PIPE_CLK_SRC			31
+#define GCC_PCIE2_PIPE_CLK			32
+#define GCC_PCIE3_PIPE_CLK_SRC			33
+#define GCC_PCIE3_PIPE_CLK			34
+#define GCC_PCIE_AUX_CLK_SRC			35
+#define GCC_PCIE0_AUX_CLK			36
+#define GCC_PCIE1_AUX_CLK			37
+#define GCC_PCIE2_AUX_CLK			38
+#define GCC_PCIE3_AUX_CLK			39
+#define GCC_PCIE0_AHB_CLK			40
+#define GCC_PCIE1_AHB_CLK			41
+#define GCC_PCIE2_AHB_CLK			42
+#define GCC_PCIE3_AHB_CLK			43
+#define GCC_USB0_AUX_CLK_SRC			44
+#define GCC_USB0_AUX_CLK			45
+#define GCC_USB0_MASTER_CLK			46
+#define GCC_USB0_MOCK_UTMI_CLK_SRC		47
+#define GCC_USB0_MOCK_UTMI_DIV_CLK_SRC		48
+#define GCC_USB0_MOCK_UTMI_CLK			49
+#define GCC_USB0_PIPE_CLK_SRC			50
+#define GCC_USB0_PIPE_CLK			51
+#define GCC_USB0_PHY_CFG_AHB_CLK		52
+#define GCC_USB0_SLEEP_CLK			53
+#define GCC_SDCC1_APPS_CLK_SRC			54
+#define GCC_SDCC1_APPS_CLK			55
+#define GCC_SDCC1_ICE_CORE_CLK_SRC		56
+#define GCC_SDCC1_ICE_CORE_CLK			57
+#define GCC_SDCC1_AHB_CLK			58
+#define GCC_PCNOC_BFDCD_CLK_SRC			59
+#define GCC_NSSCFG_CLK				60
+#define GCC_NSSNOC_NSSCC_CLK			61
+#define GCC_NSSCC_CLK				62
+#define GCC_NSSNOC_PCNOC_1_CLK			63
+#define GCC_QPIC_AHB_CLK			64
+#define GCC_QPIC_CLK				65
+#define GCC_MDIO_AHB_CLK			66
+#define GCC_PRNG_AHB_CLK			67
+#define GCC_UNIPHY0_AHB_CLK			68
+#define GCC_UNIPHY1_AHB_CLK			69
+#define GCC_UNIPHY2_AHB_CLK			70
+#define GCC_CMN_12GPLL_AHB_CLK			71
+#define GCC_SYSTEM_NOC_BFDCD_CLK_SRC		72
+#define GCC_NSSNOC_SNOC_CLK			73
+#define GCC_NSSNOC_SNOC_1_CLK			74
+#define GCC_WCSS_AHB_CLK_SRC			75
+#define GCC_QDSS_AT_CLK_SRC			76
+#define GCC_NSSNOC_ATB_CLK			77
+#define GCC_QDSS_AT_CLK				78
+#define GCC_QDSS_TSCTR_CLK_SRC			79
+#define GCC_NSS_TS_CLK				80
+#define GCC_QPIC_IO_MACRO_CLK_SRC		81
+#define GCC_QPIC_IO_MACRO_CLK			82
+#define GCC_LPASS_AXIM_CLK_SRC			83
+#define GCC_LPASS_CORE_AXIM_CLK			84
+#define GCC_LPASS_SWAY_CLK_SRC			85
+#define GCC_LPASS_SWAY_CLK			86
+#define GCC_CNOC_LPASS_CFG_CLK                  87
+#define GCC_SNOC_LPASS_CLK                      88
+#define GCC_ADSS_PWM_CLK_SRC			89
+#define GCC_ADSS_PWM_CLK			90
+#define GCC_XO_CLK_SRC				91
+#define GCC_NSSNOC_XO_DCD_CLK			92
+#define GCC_NSSNOC_QOSGEN_REF_CLK		93
+#define GCC_NSSNOC_TIMEOUT_REF_CLK		94
+#define GCC_UNIPHY0_SYS_CLK			95
+#define GCC_UNIPHY1_SYS_CLK			96
+#define GCC_UNIPHY2_SYS_CLK			97
+#define GCC_CMN_12GPLL_SYS_CLK			98
+#define GCC_UNIPHY_SYS_CLK_SRC			99
+#define GCC_NSS_TS_CLK_SRC			100
+#define GCC_ANOC_PCIE0_1LANE_M_CLK		101
+#define GCC_ANOC_PCIE1_1LANE_M_CLK		102
+#define GCC_ANOC_PCIE2_2LANE_M_CLK		103
+#define GCC_ANOC_PCIE3_2LANE_M_CLK		104
+#define GCC_CNOC_PCIE0_1LANE_S_CLK		105
+#define GCC_CNOC_PCIE1_1LANE_S_CLK		106
+#define GCC_CNOC_PCIE2_2LANE_S_CLK		107
+#define GCC_CNOC_PCIE3_2LANE_S_CLK		108
+#define GCC_CNOC_USB_CLK			109
+#define GCC_CNOC_WCSS_AHB_CLK			110
+#define GCC_QUPV3_AHB_MST_CLK			111
+#define GCC_QUPV3_AHB_SLV_CLK			112
+#define GCC_QUPV3_I2C0_CLK			113
+#define GCC_QUPV3_I2C1_CLK			114
+#define GCC_QUPV3_SPI0_CLK			115
+#define GCC_QUPV3_SPI1_CLK			116
+#define GCC_QUPV3_UART0_CLK			117
+#define GCC_QUPV3_UART1_CLK			118
+#define GCC_QPIC_CLK_SRC			119
+#define GCC_QUPV3_I2C0_CLK_SRC			120
+#define GCC_QUPV3_I2C1_CLK_SRC			121
+#define GCC_QUPV3_I2C0_DIV_CLK_SRC              122
+#define GCC_QUPV3_I2C1_DIV_CLK_SRC              123
+#define GCC_QUPV3_SPI0_CLK_SRC			124
+#define GCC_QUPV3_SPI1_CLK_SRC			125
+#define GCC_QUPV3_UART0_CLK_SRC			126
+#define GCC_QUPV3_UART1_CLK_SRC			127
+#define GCC_USB1_MASTER_CLK			128
+#define GCC_USB1_MOCK_UTMI_CLK_SRC		129
+#define GCC_USB1_MOCK_UTMI_DIV_CLK_SRC		130
+#define GCC_USB1_MOCK_UTMI_CLK			131
+#define GCC_USB1_SLEEP_CLK			132
+#define GCC_USB1_PHY_CFG_AHB_CLK		133
+#define GCC_USB0_MASTER_CLK_SRC			134
+#define GCC_QDSS_DAP_CLK			135
+#define GCC_PCIE0_RCHNG_CLK_SRC			136
+#define GCC_PCIE0_RCHNG_CLK			137
+#define GCC_PCIE1_RCHNG_CLK_SRC			138
+#define GCC_PCIE1_RCHNG_CLK			139
+#define GCC_PCIE2_RCHNG_CLK_SRC			140
+#define GCC_PCIE2_RCHNG_CLK			141
+#define GCC_PCIE3_RCHNG_CLK_SRC			142
+#define GCC_PCIE3_RCHNG_CLK			143
+#define GCC_IM_SLEEP_CLK			144
+
+#endif
diff --git a/include/dt-bindings/reset/qcom,ipq5424-gcc.h b/include/dt-bindings/reset/qcom,ipq5424-gcc.h
new file mode 100644
index 000000000000..16a72771c79a
--- /dev/null
+++ b/include/dt-bindings/reset/qcom,ipq5424-gcc.h
@@ -0,0 +1,310 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2018,2020 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_RESET_IPQ_GCC_IPQ5424_H
+#define _DT_BINDINGS_RESET_IPQ_GCC_IPQ5424_H
+
+#define GCC_QUPV3_BCR				0
+#define GCC_QUPV3_I2C0_BCR			1
+#define GCC_QUPV3_UART0_BCR			2
+#define GCC_QUPV3_I2C1_BCR			3
+#define GCC_QUPV3_UART1_BCR			4
+#define GCC_QUPV3_SPI0_BCR			5
+#define GCC_QUPV3_SPI1_BCR			6
+#define GCC_IMEM_BCR				7
+#define GCC_TME_BCR				8
+#define GCC_DDRSS_BCR				9
+#define GCC_PRNG_BCR				10
+#define GCC_BOOT_ROM_BCR			11
+#define GCC_NSS_BCR				12
+#define GCC_MDIO_BCR				13
+#define GCC_UNIPHY0_BCR				14
+#define GCC_UNIPHY1_BCR				15
+#define GCC_UNIPHY2_BCR				16
+#define GCC_WCSS_BCR				17
+#define GCC_SEC_CTRL_BCR			19
+#define GCC_TME_SEC_BUS_BCR			20
+#define GCC_ADSS_BCR				21
+#define GCC_LPASS_BCR				22
+#define GCC_PCIE0_BCR				23
+#define GCC_PCIE0_LINK_DOWN_BCR			24
+#define GCC_PCIE0PHY_PHY_BCR			25
+#define GCC_PCIE0_PHY_BCR			26
+#define GCC_PCIE1_BCR				27
+#define GCC_PCIE1_LINK_DOWN_BCR			28
+#define GCC_PCIE1PHY_PHY_BCR			29
+#define GCC_PCIE1_PHY_BCR			30
+#define GCC_PCIE2_BCR				31
+#define GCC_PCIE2_LINK_DOWN_BCR			32
+#define GCC_PCIE2PHY_PHY_BCR			33
+#define GCC_PCIE2_PHY_BCR			34
+#define GCC_PCIE3_BCR				35
+#define GCC_PCIE3_LINK_DOWN_BCR			36
+#define GCC_PCIE3PHY_PHY_BCR			37
+#define GCC_PCIE3_PHY_BCR			38
+#define GCC_USB_BCR				39
+#define GCC_QUSB2_0_PHY_BCR			40
+#define GCC_USB0_PHY_BCR			41
+#define GCC_USB3PHY_0_PHY_BCR			42
+#define GCC_QDSS_BCR				43
+#define GCC_SNOC_BCR				44
+#define GCC_ANOC_BCR				45
+#define GCC_PCNOC_BCR				46
+#define GCC_PCNOC_BUS_TIMEOUT0_BCR		47
+#define GCC_PCNOC_BUS_TIMEOUT1_BCR		48
+#define GCC_PCNOC_BUS_TIMEOUT2_BCR		49
+#define GCC_PCNOC_BUS_TIMEOUT3_BCR		50
+#define GCC_PCNOC_BUS_TIMEOUT4_BCR		51
+#define GCC_PCNOC_BUS_TIMEOUT5_BCR		52
+#define GCC_PCNOC_BUS_TIMEOUT6_BCR		53
+#define GCC_PCNOC_BUS_TIMEOUT7_BCR		54
+#define GCC_PCNOC_BUS_TIMEOUT8_BCR		55
+#define GCC_PCNOC_BUS_TIMEOUT9_BCR		56
+#define GCC_QPIC_BCR				57
+#define GCC_SDCC_BCR				58
+#define GCC_DCC_BCR				59
+#define GCC_SPDM_BCR				60
+#define GCC_MPM_BCR				61
+#define GCC_APC0_VOLTAGE_DROOP_DETECTOR_BCR	62
+#define GCC_RBCPR_BCR				63
+#define GCC_CMN_BLK_BCR				64
+#define GCC_TCSR_BCR				65
+#define GCC_TLMM_BCR				66
+#define GCC_QUPV3_AHB_MST_ARES			67
+#define GCC_QUPV3_CORE_ARES			68
+#define GCC_QUPV3_2X_CORE_ARES			69
+#define GCC_QUPV3_SLEEP_ARES			70
+#define GCC_QUPV3_AHB_SLV_ARES			71
+#define GCC_QUPV3_I2C0_ARES			72
+#define GCC_QUPV3_UART0_ARES			73
+#define GCC_QUPV3_I2C1_ARES			74
+#define GCC_QUPV3_UART1_ARES			75
+#define GCC_QUPV3_SPI0_ARES			76
+#define GCC_QUPV3_SPI1_ARES			77
+#define GCC_DEBUG_ARES				78
+#define GCC_GP1_ARES				79
+#define GCC_GP2_ARES				80
+#define GCC_GP3_ARES				81
+#define GCC_IMEM_AXI_ARES			82
+#define GCC_IMEM_CFG_AHB_ARES			83
+#define GCC_TME_ARES				84
+#define GCC_TME_TS_ARES				85
+#define GCC_TME_SLOW_ARES			86
+#define GCC_TME_RTC_TOGGLE_ARES			87
+#define GCC_TIC_ARES				88
+#define GCC_PRNG_AHB_ARES			89
+#define GCC_BOOT_ROM_AHB_ARES			90
+#define GCC_NSSNOC_ATB_ARES			91
+#define GCC_NSS_TS_ARES				92
+#define GCC_NSSNOC_QOSGEN_REF_ARES		93
+#define GCC_NSSNOC_TIMEOUT_REF_ARES		94
+#define GCC_NSSNOC_MEMNOC_ARES			95
+#define GCC_NSSNOC_SNOC_ARES			96
+#define GCC_NSSCFG_ARES				97
+#define GCC_NSSNOC_NSSCC_ARES			98
+#define GCC_NSSCC_ARES				99
+#define GCC_MDIO_AHB_ARES			100
+#define GCC_UNIPHY0_SYS_ARES			101
+#define GCC_UNIPHY0_AHB_ARES			102
+#define GCC_UNIPHY1_SYS_ARES			103
+#define GCC_UNIPHY1_AHB_ARES			104
+#define GCC_UNIPHY2_SYS_ARES			105
+#define GCC_UNIPHY2_AHB_ARES			106
+#define GCC_NSSNOC_XO_DCD_ARES			107
+#define GCC_NSSNOC_SNOC_1_ARES			108
+#define GCC_NSSNOC_PCNOC_1_ARES			109
+#define GCC_NSSNOC_MEMNOC_1_ARES		110
+#define GCC_DDRSS_ATB_ARES			111
+#define GCC_DDRSS_AHB_ARES			112
+#define GCC_GEMNOC_AHB_ARES			113
+#define GCC_GEMNOC_Q6_AXI_ARES			114
+#define GCC_GEMNOC_NSSNOC_ARES			115
+#define GCC_GEMNOC_SNOC_ARES			116
+#define GCC_GEMNOC_APSS_ARES			117
+#define GCC_GEMNOC_QOSGEN_EXTREF_ARES		118
+#define GCC_GEMNOC_TS_ARES			119
+#define GCC_DDRSS_SMS_SLOW_ARES			120
+#define GCC_GEMNOC_CNOC_ARES			121
+#define GCC_GEMNOC_XO_DBG_ARES			122
+#define GCC_GEMNOC_ANOC_ARES			123
+#define GCC_DDRSS_LLCC_ATB_ARES			124
+#define GCC_LLCC_TPDM_CFG_ARES			125
+#define GCC_TME_BUS_ARES			126
+#define GCC_SEC_CTRL_ACC_ARES			127
+#define GCC_SEC_CTRL_ARES			128
+#define GCC_SEC_CTRL_SENSE_ARES			129
+#define GCC_SEC_CTRL_AHB_ARES			130
+#define GCC_SEC_CTRL_BOOT_ROM_PATCH_ARES	131
+#define GCC_ADSS_PWM_ARES			132
+#define GCC_TME_ATB_ARES			133
+#define GCC_TME_DBGAPB_ARES			134
+#define GCC_TME_DEBUG_ARES			135
+#define GCC_TME_AT_ARES				136
+#define GCC_TME_APB_ARES			137
+#define GCC_TME_DMI_DBG_HS_ARES			138
+#define GCC_APSS_AHB_ARES			139
+#define GCC_APSS_AXI_ARES			140
+#define GCC_CPUSS_TRIG_ARES			141
+#define GCC_APSS_DBG_ARES			142
+#define GCC_APSS_TS_ARES			143
+#define GCC_APSS_ATB_ARES			144
+#define GCC_Q6_AXIM_ARES			145
+#define GCC_Q6_AXIS_ARES			146
+#define GCC_Q6_AHB_ARES				147
+#define GCC_Q6_AHB_S_ARES			148
+#define GCC_Q6SS_ATBM_ARES			149
+#define GCC_Q6_TSCTR_1TO2_ARES			150
+#define GCC_Q6SS_PCLKDBG_ARES			151
+#define GCC_Q6SS_TRIG_ARES			152
+#define GCC_Q6SS_BOOT_CBCR_ARES			153
+#define GCC_WCSS_DBG_IFC_APB_ARES		154
+#define GCC_WCSS_DBG_IFC_ATB_ARES		155
+#define GCC_WCSS_DBG_IFC_NTS_ARES		156
+#define GCC_WCSS_DBG_IFC_DAPBUS_ARES		157
+#define GCC_WCSS_DBG_IFC_APB_BDG_ARES		158
+#define GCC_WCSS_DBG_IFC_NTS_BDG_ARES		159
+#define GCC_WCSS_DBG_IFC_DAPBUS_BDG_ARES	160
+#define GCC_WCSS_ECAHB_ARES			161
+#define GCC_WCSS_ACMT_ARES			162
+#define GCC_WCSS_AHB_S_ARES			163
+#define GCC_WCSS_AXI_M_ARES			164
+#define GCC_PCNOC_WAPSS_ARES			165
+#define GCC_SNOC_WAPSS_ARES			166
+#define GCC_LPASS_SWAY_ARES			167
+#define GCC_LPASS_CORE_AXIM_ARES		168
+#define GCC_PCIE0_AHB_ARES			169
+#define GCC_PCIE0_AXI_M_ARES			170
+#define GCC_PCIE0_AXI_S_ARES			171
+#define GCC_PCIE0_AXI_S_BRIDGE_ARES		172
+#define GCC_PCIE0_PIPE_ARES			173
+#define GCC_PCIE0_AUX_ARES			174
+#define GCC_PCIE1_AHB_ARES			175
+#define GCC_PCIE1_AXI_M_ARES			176
+#define GCC_PCIE1_AXI_S_ARES			177
+#define GCC_PCIE1_AXI_S_BRIDGE_ARES		178
+#define GCC_PCIE1_PIPE_ARES			179
+#define GCC_PCIE1_AUX_ARES			180
+#define GCC_PCIE2_AHB_ARES			181
+#define GCC_PCIE2_AXI_M_ARES			182
+#define GCC_PCIE2_AXI_S_ARES			183
+#define GCC_PCIE2_AXI_S_BRIDGE_ARES		184
+#define GCC_PCIE2_PIPE_ARES			185
+#define GCC_PCIE2_AUX_ARES			186
+#define GCC_PCIE3_AHB_ARES			187
+#define GCC_PCIE3_AXI_M_ARES			188
+#define GCC_PCIE3_AXI_S_ARES			189
+#define GCC_PCIE3_AXI_S_BRIDGE_ARES		190
+#define GCC_PCIE3_PIPE_ARES			191
+#define GCC_PCIE3_AUX_ARES			192
+#define GCC_USB0_MASTER_ARES			193
+#define GCC_USB0_AUX_ARES			194
+#define GCC_USB0_MOCK_UTMI_ARES			195
+#define GCC_USB0_PIPE_ARES			196
+#define GCC_USB0_SLEEP_ARES			197
+#define GCC_USB0_PHY_CFG_AHB_ARES		198
+#define GCC_QDSS_AT_ARES			199
+#define GCC_QDSS_STM_ARES			200
+#define GCC_QDSS_TRACECLKIN_ARES		201
+#define GCC_QDSS_TSCTR_DIV2_ARES		202
+#define GCC_QDSS_TSCTR_DIV3_ARES		203
+#define GCC_QDSS_TSCTR_DIV4_ARES		204
+#define GCC_QDSS_TSCTR_DIV8_ARES		205
+#define GCC_QDSS_TSCTR_DIV16_ARES		206
+#define GCC_QDSS_DAP_ARES			207
+#define GCC_QDSS_APB2JTAG_ARES			208
+#define GCC_QDSS_ETR_USB_ARES			209
+#define GCC_QDSS_DAP_AHB_ARES			210
+#define GCC_QDSS_CFG_AHB_ARES			211
+#define GCC_QDSS_EUD_AT_ARES			212
+#define GCC_QDSS_TS_ARES			213
+#define GCC_QDSS_USB_ARES			214
+#define GCC_SYS_NOC_AXI_ARES			215
+#define GCC_SNOC_QOSGEN_EXTREF_ARES		216
+#define GCC_CNOC_LPASS_CFG_ARES			217
+#define GCC_SYS_NOC_AT_ARES			218
+#define GCC_SNOC_PCNOC_AHB_ARES			219
+#define GCC_SNOC_TME_ARES			220
+#define GCC_SNOC_XO_DCD_ARES			221
+#define GCC_SNOC_TS_ARES			222
+#define GCC_ANOC0_AXI_ARES			223
+#define GCC_ANOC_PCIE0_1LANE_M_ARES		224
+#define GCC_ANOC_PCIE2_2LANE_M_ARES		225
+#define GCC_ANOC_PCIE1_1LANE_M_ARES		226
+#define GCC_ANOC_PCIE3_2LANE_M_ARES		227
+#define GCC_ANOC_PCNOC_AHB_ARES			228
+#define GCC_ANOC_QOSGEN_EXTREF_ARES		229
+#define GCC_ANOC_XO_DCD_ARES			230
+#define GCC_SNOC_XO_DBG_ARES			231
+#define GCC_AGGRNOC_ATB_ARES			232
+#define GCC_AGGRNOC_TS_ARES			233
+#define GCC_USB0_EUD_AT_ARES			234
+#define GCC_PCNOC_TIC_ARES			235
+#define GCC_PCNOC_AHB_ARES			236
+#define GCC_PCNOC_XO_DBG_ARES			237
+#define GCC_SNOC_LPASS_ARES			238
+#define GCC_PCNOC_AT_ARES			239
+#define GCC_PCNOC_XO_DCD_ARES			240
+#define GCC_PCNOC_TS_ARES			241
+#define GCC_PCNOC_BUS_TIMEOUT0_AHB_ARES		242
+#define GCC_PCNOC_BUS_TIMEOUT1_AHB_ARES		243
+#define GCC_PCNOC_BUS_TIMEOUT2_AHB_ARES		244
+#define GCC_PCNOC_BUS_TIMEOUT3_AHB_ARES		245
+#define GCC_PCNOC_BUS_TIMEOUT4_AHB_ARES		246
+#define GCC_PCNOC_BUS_TIMEOUT5_AHB_ARES		247
+#define GCC_PCNOC_BUS_TIMEOUT6_AHB_ARES		248
+#define GCC_PCNOC_BUS_TIMEOUT7_AHB_ARES		249
+#define GCC_Q6_AXIM_RESET			250
+#define GCC_Q6_AXIS_RESET			251
+#define GCC_Q6_AHB_S_RESET			252
+#define GCC_Q6_AHB_RESET			253
+#define GCC_Q6SS_DBG_RESET			254
+#define GCC_WCSS_ECAHB_RESET			255
+#define GCC_WCSS_DBG_BDG_RESET			256
+#define GCC_WCSS_DBG_RESET			257
+#define GCC_WCSS_AXI_M_RESET			258
+#define GCC_WCSS_AHB_S_RESET			259
+#define GCC_WCSS_ACMT_RESET			260
+#define GCC_WCSSAON_RESET			261
+#define GCC_PCIE0_PIPE_RESET			262
+#define GCC_PCIE0_CORE_STICKY_RESET		263
+#define GCC_PCIE0_AXI_S_STICKY_RESET		264
+#define GCC_PCIE0_AXI_S_RESET			265
+#define GCC_PCIE0_AXI_M_STICKY_RESET		266
+#define GCC_PCIE0_AXI_M_RESET			267
+#define GCC_PCIE0_AUX_RESET			268
+#define GCC_PCIE0_AHB_RESET			269
+#define GCC_PCIE1_PIPE_RESET			270
+#define GCC_PCIE1_CORE_STICKY_RESET		271
+#define GCC_PCIE1_AXI_S_STICKY_RESET		272
+#define GCC_PCIE1_AXI_S_RESET			273
+#define GCC_PCIE1_AXI_M_STICKY_RESET		274
+#define GCC_PCIE1_AXI_M_RESET			275
+#define GCC_PCIE1_AUX_RESET			276
+#define GCC_PCIE1_AHB_RESET			277
+#define GCC_PCIE2_PIPE_RESET			278
+#define GCC_PCIE2_CORE_STICKY_RESET		279
+#define GCC_PCIE2_AXI_S_STICKY_RESET		280
+#define GCC_PCIE2_AXI_S_RESET			281
+#define GCC_PCIE2_AXI_M_STICKY_RESET		282
+#define GCC_PCIE2_AXI_M_RESET			283
+#define GCC_PCIE2_AUX_RESET			284
+#define GCC_PCIE2_AHB_RESET			285
+#define GCC_PCIE3_PIPE_RESET			286
+#define GCC_PCIE3_CORE_STICKY_RESET		287
+#define GCC_PCIE3_AXI_S_STICKY_RESET		288
+#define GCC_PCIE3_AXI_S_RESET			289
+#define GCC_PCIE3_AXI_M_STICKY_RESET		290
+#define GCC_PCIE3_AXI_M_RESET			291
+#define GCC_PCIE3_AUX_RESET			292
+#define GCC_PCIE3_AHB_RESET			293
+#define GCC_NSS_PARTIAL_RESET			294
+#define GCC_UNIPHY0_XPCS_ARES			295
+#define GCC_UNIPHY1_XPCS_ARES			296
+#define GCC_UNIPHY2_XPCS_ARES			297
+#define GCC_USB1_BCR				298
+#define GCC_QUSB2_1_PHY_BCR			299
+
+#endif
-- 
cgit v1.2.3


From c9cfca98998eb1cd14bdeccd607982ae818711e7 Mon Sep 17 00:00:00 2001
From: Manikanta Mylavarapu <quic_mmanikan@quicinc.com>
Date: Wed, 16 Oct 2024 20:45:26 +0530
Subject: dt-bindings: arm: qcom,ids: add SoC ID for IPQ5424/IPQ5404

Add the ID for Qualcomm IPQ5424/IPQ5404 SoC.

Signed-off-by: Manikanta Mylavarapu <quic_mmanikan@quicinc.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20241016151528.2893599-2-quic_mmanikan@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/arm/qcom,ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h
index b66f8aeb9f47..e850dc3a1ad3 100644
--- a/include/dt-bindings/arm/qcom,ids.h
+++ b/include/dt-bindings/arm/qcom,ids.h
@@ -279,6 +279,8 @@
 #define QCOM_ID_QCM8550			604
 #define QCOM_ID_IPQ5300			624
 #define QCOM_ID_IPQ5321			650
+#define QCOM_ID_IPQ5424			651
+#define QCOM_ID_IPQ5404			671
 #define QCOM_ID_QCS9100			667
 #define QCOM_ID_QCS8300			674
 #define QCOM_ID_QCS8275			675
-- 
cgit v1.2.3


From d2d243df445a88c26e91eac02b041213c7a32e9e Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Sun, 22 Sep 2024 12:32:13 +0800
Subject: mm: shmem: fix khugepaged activation policy for shmem

Shmem has a separate interface (different from anonymous pages) to control
huge page allocation, that means shmem THP can be enabled while anonymous
THP is disabled.  However, in this case, khugepaged will not start to
collapse shmem THP, which is unreasonable.

To fix this issue, we should call start_stop_khugepaged() to activate or
deactivate the khugepaged thread when setting shmem mTHP interfaces.
Moreover, add a new helper shmem_hpage_pmd_enabled() to help to check
whether shmem THP is enabled, which will determine if khugepaged should be
activated.

Link: https://lkml.kernel.org/r/9b9c6cbc4499bf44c6455367fd9e0f6036525680.1726978977.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reported-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/shmem_fs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 515a9a6a3c6f..ee6635052383 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -114,6 +114,7 @@ int shmem_unuse(unsigned int type);
 unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
 				loff_t write_end, bool shmem_huge_force);
+bool shmem_hpage_pmd_enabled(void);
 #else
 static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
@@ -121,6 +122,11 @@ static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
 {
 	return 0;
 }
+
+static inline bool shmem_hpage_pmd_enabled(void)
+{
+	return false;
+}
 #endif
 
 #ifdef CONFIG_SHMEM
-- 
cgit v1.2.3


From 9e9e085effe9b7e342138fde3cf8577d22509932 Mon Sep 17 00:00:00 2001
From: Adrian Huang <ahuang12@lenovo.com>
Date: Sat, 27 Jul 2024 00:52:46 +0800
Subject: mm/vmalloc: combine all TLB flush operations of KASAN shadow virtual
 address into one operation

When compiling kernel source 'make -j $(nproc)' with the up-and-running
KASAN-enabled kernel on a 256-core machine, the following soft lockup is
shown:

watchdog: BUG: soft lockup - CPU#28 stuck for 22s! [kworker/28:1:1760]
CPU: 28 PID: 1760 Comm: kworker/28:1 Kdump: loaded Not tainted 6.10.0-rc5 #95
Workqueue: events drain_vmap_area_work
RIP: 0010:smp_call_function_many_cond+0x1d8/0xbb0
Code: 38 c8 7c 08 84 c9 0f 85 49 08 00 00 8b 45 08 a8 01 74 2e 48 89 f1 49 89 f7 48 c1 e9 03 41 83 e7 07 4c 01 e9 41 83 c7 03 f3 90 <0f> b6 01 41 38 c7 7c 08 84 c0 0f 85 d4 06 00 00 8b 45 08 a8 01 75
RSP: 0018:ffffc9000cb3fb60 EFLAGS: 00000202
RAX: 0000000000000011 RBX: ffff8883bc4469c0 RCX: ffffed10776e9949
RDX: 0000000000000002 RSI: ffff8883bb74ca48 RDI: ffffffff8434dc50
RBP: ffff8883bb74ca40 R08: ffff888103585dc0 R09: ffff8884533a1800
R10: 0000000000000004 R11: ffffffffffffffff R12: ffffed1077888d39
R13: dffffc0000000000 R14: ffffed1077888d38 R15: 0000000000000003
FS:  0000000000000000(0000) GS:ffff8883bc400000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00005577b5c8d158 CR3: 0000000004850000 CR4: 0000000000350ef0
Call Trace:
 <IRQ>
 ? watchdog_timer_fn+0x2cd/0x390
 ? __pfx_watchdog_timer_fn+0x10/0x10
 ? __hrtimer_run_queues+0x300/0x6d0
 ? sched_clock_cpu+0x69/0x4e0
 ? __pfx___hrtimer_run_queues+0x10/0x10
 ? srso_return_thunk+0x5/0x5f
 ? ktime_get_update_offsets_now+0x7f/0x2a0
 ? srso_return_thunk+0x5/0x5f
 ? srso_return_thunk+0x5/0x5f
 ? hrtimer_interrupt+0x2ca/0x760
 ? __sysvec_apic_timer_interrupt+0x8c/0x2b0
 ? sysvec_apic_timer_interrupt+0x6a/0x90
 </IRQ>
 <TASK>
 ? asm_sysvec_apic_timer_interrupt+0x16/0x20
 ? smp_call_function_many_cond+0x1d8/0xbb0
 ? __pfx_do_kernel_range_flush+0x10/0x10
 on_each_cpu_cond_mask+0x20/0x40
 flush_tlb_kernel_range+0x19b/0x250
 ? srso_return_thunk+0x5/0x5f
 ? kasan_release_vmalloc+0xa7/0xc0
 purge_vmap_node+0x357/0x820
 ? __pfx_purge_vmap_node+0x10/0x10
 __purge_vmap_area_lazy+0x5b8/0xa10
 drain_vmap_area_work+0x21/0x30
 process_one_work+0x661/0x10b0
 worker_thread+0x844/0x10e0
 ? srso_return_thunk+0x5/0x5f
 ? __kthread_parkme+0x82/0x140
 ? __pfx_worker_thread+0x10/0x10
 kthread+0x2a5/0x370
 ? __pfx_kthread+0x10/0x10
 ret_from_fork+0x30/0x70
 ? __pfx_kthread+0x10/0x10
 ret_from_fork_asm+0x1a/0x30
 </TASK>

Debugging Analysis:

  1. The following ftrace log shows that the lockup CPU spends too much
     time iterating vmap_nodes and flushing TLB when purging vm_area
     structures. (Some info is trimmed).

     kworker: funcgraph_entry:              |  drain_vmap_area_work() {
     kworker: funcgraph_entry:              |   mutex_lock() {
     kworker: funcgraph_entry:  1.092 us    |     __cond_resched();
     kworker: funcgraph_exit:   3.306 us    |   }
     ...                                        ...
     kworker: funcgraph_entry:              |    flush_tlb_kernel_range() {
     ...                                          ...
     kworker: funcgraph_exit: # 7533.649 us |    }
     ...                                         ...
     kworker: funcgraph_entry:  2.344 us    |   mutex_unlock();
     kworker: funcgraph_exit: $ 23871554 us | }

     The drain_vmap_area_work() spends over 23 seconds.

     There are 2805 flush_tlb_kernel_range() calls in the ftrace log.
       * One is called in __purge_vmap_area_lazy().
       * Others are called by purge_vmap_node->kasan_release_vmalloc.
         purge_vmap_node() iteratively releases kasan vmalloc
         allocations and flushes TLB for each vmap_area.
           - [Rough calculation] Each flush_tlb_kernel_range() runs
             about 7.5ms.
               -- 2804 * 7.5ms = 21.03 seconds.
               -- That's why a soft lock is triggered.

  2. Extending the soft lockup time can work around the issue (For example,
     # echo 60 > /proc/sys/kernel/watchdog_thresh). This confirms the
     above-mentioned speculation: drain_vmap_area_work() spends too much
     time.

If we combine all TLB flush operations of the KASAN shadow virtual
address into one operation in the call path
'purge_vmap_node()->kasan_release_vmalloc()', the running time of
drain_vmap_area_work() can be saved greatly. The idea is from the
flush_tlb_kernel_range() call in __purge_vmap_area_lazy(). And, the
soft lockup won't be triggered.

Here is the test result based on 6.10:

[6.10 wo/ the patch]
  1. ftrace latency profiling (record a trace if the latency > 20s).
     echo 20000000 > /sys/kernel/debug/tracing/tracing_thresh
     echo drain_vmap_area_work > /sys/kernel/debug/tracing/set_graph_function
     echo function_graph > /sys/kernel/debug/tracing/current_tracer
     echo 1 > /sys/kernel/debug/tracing/tracing_on

  2. Run `make -j $(nproc)` to compile the kernel source

  3. Once the soft lockup is reproduced, check the ftrace log:
     cat /sys/kernel/debug/tracing/trace
        # tracer: function_graph
        #
        # CPU  DURATION                  FUNCTION CALLS
        # |     |   |                     |   |   |   |
          76) $ 50412985 us |    } /* __purge_vmap_area_lazy */
          76) $ 50412997 us |  } /* drain_vmap_area_work */
          76) $ 29165911 us |    } /* __purge_vmap_area_lazy */
          76) $ 29165926 us |  } /* drain_vmap_area_work */
          91) $ 53629423 us |    } /* __purge_vmap_area_lazy */
          91) $ 53629434 us |  } /* drain_vmap_area_work */
          91) $ 28121014 us |    } /* __purge_vmap_area_lazy */
          91) $ 28121026 us |  } /* drain_vmap_area_work */

[6.10 w/ the patch]
  1. Repeat step 1-2 in "[6.10 wo/ the patch]"

  2. The soft lockup is not triggered and ftrace log is empty.
     cat /sys/kernel/debug/tracing/trace
     # tracer: function_graph
     #
     # CPU  DURATION                  FUNCTION CALLS
     # |     |   |                     |   |   |   |

  3. Setting 'tracing_thresh' to 10/5 seconds does not get any ftrace
     log.

  4. Setting 'tracing_thresh' to 1 second gets ftrace log.
     cat /sys/kernel/debug/tracing/trace
     # tracer: function_graph
     #
     # CPU  DURATION                  FUNCTION CALLS
     # |     |   |                     |   |   |   |
       23) $ 1074942 us  |    } /* __purge_vmap_area_lazy */
       23) $ 1074950 us  |  } /* drain_vmap_area_work */

  The worst execution time of drain_vmap_area_work() is about 1 second.

Link: https://lore.kernel.org/lkml/ZqFlawuVnOMY2k3E@pc638.lan/
Link: https://lkml.kernel.org/r/20240726165246.31326-1-ahuang12@lenovo.com
Fixes: 282631cb2447 ("mm: vmalloc: remove global purge_vmap_area_root rb-tree")
Signed-off-by: Adrian Huang <ahuang12@lenovo.com>
Co-developed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Tested-by: Jiwei Sun <sunjw10@lenovo.com>
Reviewed-by: Baoquan He <bhe@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kasan.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 00a3bf7c0d8f..6bbfc8aa42e8 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -29,6 +29,9 @@ typedef unsigned int __bitwise kasan_vmalloc_flags_t;
 #define KASAN_VMALLOC_VM_ALLOC		((__force kasan_vmalloc_flags_t)0x02u)
 #define KASAN_VMALLOC_PROT_NORMAL	((__force kasan_vmalloc_flags_t)0x04u)
 
+#define KASAN_VMALLOC_PAGE_RANGE 0x1 /* Apply exsiting page range */
+#define KASAN_VMALLOC_TLB_FLUSH  0x2 /* TLB flush */
+
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 
 #include <linux/pgtable.h>
@@ -564,7 +567,8 @@ void kasan_populate_early_vm_area_shadow(void *start, unsigned long size);
 int kasan_populate_vmalloc(unsigned long addr, unsigned long size);
 void kasan_release_vmalloc(unsigned long start, unsigned long end,
 			   unsigned long free_region_start,
-			   unsigned long free_region_end);
+			   unsigned long free_region_end,
+			   unsigned long flags);
 
 #else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
 
@@ -579,7 +583,8 @@ static inline int kasan_populate_vmalloc(unsigned long start,
 static inline void kasan_release_vmalloc(unsigned long start,
 					 unsigned long end,
 					 unsigned long free_region_start,
-					 unsigned long free_region_end) { }
+					 unsigned long free_region_end,
+					 unsigned long flags) { }
 
 #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
 
@@ -614,7 +619,8 @@ static inline int kasan_populate_vmalloc(unsigned long start,
 static inline void kasan_release_vmalloc(unsigned long start,
 					 unsigned long end,
 					 unsigned long free_region_start,
-					 unsigned long free_region_end) { }
+					 unsigned long free_region_end,
+					 unsigned long flags) { }
 
 static inline void *kasan_unpoison_vmalloc(const void *start,
 					   unsigned long size,
-- 
cgit v1.2.3


From 1cd1a4e71b61eaf8cadd15372b67ccd60a2e1a99 Mon Sep 17 00:00:00 2001
From: Tanya Agarwal <tanyaagarwal25699@gmail.com>
Date: Fri, 27 Sep 2024 00:05:16 +0530
Subject: mm/mempolicy: fix comments for better documentation

Fix typo in mempolicy.h and Correct the number of allowed memory policy

Link: https://lkml.kernel.org/r/20240926183516.4034-2-tanyaagarwal25699@gmail.com
Signed-off-by: Tanya Agarwal <tanyaagarwal25699@gmail.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Cc: Anup Sharma <anupnewsmail@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mempolicy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 1add16f21612..ce9885e0178a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -47,7 +47,7 @@ struct mempolicy {
 	atomic_t refcnt;
 	unsigned short mode; 	/* See MPOL_* above */
 	unsigned short flags;	/* See set_mempolicy() MPOL_F_* above */
-	nodemask_t nodes;	/* interleave/bind/perfer */
+	nodemask_t nodes;	/* interleave/bind/preferred/etc */
 	int home_node;		/* Home node to use for MPOL_BIND and MPOL_PREFERRED_MANY */
 
 	union {
-- 
cgit v1.2.3


From f2f484085ef1a2bb5aea861a06bc6b4dc50d2ab8 Mon Sep 17 00:00:00 2001
From: Nanyong Sun <sunnanyong@huawei.com>
Date: Thu, 26 Sep 2024 15:49:22 +0800
Subject: mm: move mm flags to mm_types.h

The types of mm flags are now far beyond the core dump related features.
This patch moves mm flags from linux/sched/coredump.h to linux/mm_types.h.
The linux/sched/coredump.h has include the mm_types.h, so the C files
related to coredump does not need to change head file inclusion.  In
addition, the inclusion of sched/coredump.h now can be deleted from the C
files that irrelevant to core dump.

Link: https://lkml.kernel.org/r/20240926074922.2721274-1-sunnanyong@huawei.com
Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h        |  1 -
 include/linux/khugepaged.h     |  2 -
 include/linux/ksm.h            |  1 -
 include/linux/mm_types.h       | 84 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/oom.h            |  1 -
 include/linux/sched/coredump.h | 82 -----------------------------------------
 6 files changed, 84 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ef5b80e48599..8afe09a2cf03 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -2,7 +2,6 @@
 #ifndef _LINUX_HUGE_MM_H
 #define _LINUX_HUGE_MM_H
 
-#include <linux/sched/coredump.h>
 #include <linux/mm_types.h>
 
 #include <linux/fs.h> /* only for vma_is_dax() */
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 30baae91b225..1f46046080f5 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -2,8 +2,6 @@
 #ifndef _LINUX_KHUGEPAGED_H
 #define _LINUX_KHUGEPAGED_H
 
-#include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */
-
 extern unsigned int khugepaged_max_ptes_none __read_mostly;
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern struct attribute_group khugepaged_attr_group;
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index ec9c05044d4f..29022e71a074 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -13,7 +13,6 @@
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
 #include <linux/sched.h>
-#include <linux/sched/coredump.h>
 
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6e3bdf8e38bc..ff8627acbaa7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1499,4 +1499,88 @@ enum {
 	/* See also internal only FOLL flags in mm/internal.h */
 };
 
+/* mm flags */
+
+/*
+ * The first two bits represent core dump modes for set-user-ID,
+ * the modes are SUID_DUMP_* defined in linux/sched/coredump.h
+ */
+#define MMF_DUMPABLE_BITS 2
+#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
+/* coredump filter bits */
+#define MMF_DUMP_ANON_PRIVATE	2
+#define MMF_DUMP_ANON_SHARED	3
+#define MMF_DUMP_MAPPED_PRIVATE	4
+#define MMF_DUMP_MAPPED_SHARED	5
+#define MMF_DUMP_ELF_HEADERS	6
+#define MMF_DUMP_HUGETLB_PRIVATE 7
+#define MMF_DUMP_HUGETLB_SHARED  8
+#define MMF_DUMP_DAX_PRIVATE	9
+#define MMF_DUMP_DAX_SHARED	10
+
+#define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
+#define MMF_DUMP_FILTER_BITS	9
+#define MMF_DUMP_FILTER_MASK \
+	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
+#define MMF_DUMP_FILTER_DEFAULT \
+	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
+	 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+
+#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+# define MMF_DUMP_MASK_DEFAULT_ELF	(1 << MMF_DUMP_ELF_HEADERS)
+#else
+# define MMF_DUMP_MASK_DEFAULT_ELF	0
+#endif
+					/* leave room for more dump flags */
+#define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
+#define MMF_VM_HUGEPAGE		17	/* set when mm is available for khugepaged */
+
+/*
+ * This one-shot flag is dropped due to necessity of changing exe once again
+ * on NFS restore
+ */
+//#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
+
+#define MMF_HAS_UPROBES		19	/* has uprobes */
+#define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
+#define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
+#define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
+#define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
+#define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
+#define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
+#define MMF_OOM_REAP_QUEUED	25	/* mm was queued for oom_reaper */
+#define MMF_MULTIPROCESS	26	/* mm is shared between processes */
+/*
+ * MMF_HAS_PINNED: Whether this mm has pinned any pages.  This can be either
+ * replaced in the future by mm.pinned_vm when it becomes stable, or grow into
+ * a counter on its own. We're aggresive on this bit for now: even if the
+ * pinned pages were unpinned later on, we'll still keep this bit set for the
+ * lifecycle of this mm, just for simplicity.
+ */
+#define MMF_HAS_PINNED		27	/* FOLL_PIN has run, never cleared */
+
+#define MMF_HAS_MDWE		28
+#define MMF_HAS_MDWE_MASK	(1 << MMF_HAS_MDWE)
+
+
+#define MMF_HAS_MDWE_NO_INHERIT	29
+
+#define MMF_VM_MERGE_ANY	30
+#define MMF_VM_MERGE_ANY_MASK	(1 << MMF_VM_MERGE_ANY)
+
+#define MMF_TOPDOWN		31	/* mm searches top down by default */
+#define MMF_TOPDOWN_MASK	(1 << MMF_TOPDOWN)
+
+#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
+				 MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\
+				 MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK)
+
+static inline unsigned long mmf_init_flags(unsigned long flags)
+{
+	if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT))
+		flags &= ~((1UL << MMF_HAS_MDWE) |
+			   (1UL << MMF_HAS_MDWE_NO_INHERIT));
+	return flags & MMF_INIT_MASK;
+}
+
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 7d0c9c48a0c5..1e0fc6931ce9 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -7,7 +7,6 @@
 #include <linux/types.h>
 #include <linux/nodemask.h>
 #include <uapi/linux/oom.h>
-#include <linux/sched/coredump.h> /* MMF_* */
 #include <linux/mm.h> /* VM_FAULT* */
 
 struct zonelist;
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index e62ff805cfc9..6eb65ceed213 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -8,12 +8,6 @@
 #define SUID_DUMP_USER		1	/* Dump as user of process */
 #define SUID_DUMP_ROOT		2	/* Dump as root */
 
-/* mm flags */
-
-/* for SUID_DUMP_* above */
-#define MMF_DUMPABLE_BITS 2
-#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
-
 extern void set_dumpable(struct mm_struct *mm, int value);
 /*
  * This returns the actual value of the suid_dumpable flag. For things
@@ -31,80 +25,4 @@ static inline int get_dumpable(struct mm_struct *mm)
 	return __get_dumpable(mm->flags);
 }
 
-/* coredump filter bits */
-#define MMF_DUMP_ANON_PRIVATE	2
-#define MMF_DUMP_ANON_SHARED	3
-#define MMF_DUMP_MAPPED_PRIVATE	4
-#define MMF_DUMP_MAPPED_SHARED	5
-#define MMF_DUMP_ELF_HEADERS	6
-#define MMF_DUMP_HUGETLB_PRIVATE 7
-#define MMF_DUMP_HUGETLB_SHARED  8
-#define MMF_DUMP_DAX_PRIVATE	9
-#define MMF_DUMP_DAX_SHARED	10
-
-#define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS	9
-#define MMF_DUMP_FILTER_MASK \
-	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
-#define MMF_DUMP_FILTER_DEFAULT \
-	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
-	 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
-
-#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
-# define MMF_DUMP_MASK_DEFAULT_ELF	(1 << MMF_DUMP_ELF_HEADERS)
-#else
-# define MMF_DUMP_MASK_DEFAULT_ELF	0
-#endif
-					/* leave room for more dump flags */
-#define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
-#define MMF_VM_HUGEPAGE		17	/* set when mm is available for
-					   khugepaged */
-/*
- * This one-shot flag is dropped due to necessity of changing exe once again
- * on NFS restore
- */
-//#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
-
-#define MMF_HAS_UPROBES		19	/* has uprobes */
-#define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
-#define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
-#define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
-#define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
-#define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
-#define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
-#define MMF_OOM_REAP_QUEUED	25	/* mm was queued for oom_reaper */
-#define MMF_MULTIPROCESS	26	/* mm is shared between processes */
-/*
- * MMF_HAS_PINNED: Whether this mm has pinned any pages.  This can be either
- * replaced in the future by mm.pinned_vm when it becomes stable, or grow into
- * a counter on its own. We're aggresive on this bit for now: even if the
- * pinned pages were unpinned later on, we'll still keep this bit set for the
- * lifecycle of this mm, just for simplicity.
- */
-#define MMF_HAS_PINNED		27	/* FOLL_PIN has run, never cleared */
-
-#define MMF_HAS_MDWE		28
-#define MMF_HAS_MDWE_MASK	(1 << MMF_HAS_MDWE)
-
-
-#define MMF_HAS_MDWE_NO_INHERIT	29
-
-#define MMF_VM_MERGE_ANY	30
-#define MMF_VM_MERGE_ANY_MASK	(1 << MMF_VM_MERGE_ANY)
-
-#define MMF_TOPDOWN		31	/* mm searches top down by default */
-#define MMF_TOPDOWN_MASK	(1 << MMF_TOPDOWN)
-
-#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
-				 MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\
-				 MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK)
-
-static inline unsigned long mmf_init_flags(unsigned long flags)
-{
-	if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT))
-		flags &= ~((1UL << MMF_HAS_MDWE) |
-			   (1UL << MMF_HAS_MDWE_NO_INHERIT));
-	return flags & MMF_INIT_MASK;
-}
-
 #endif /* _LINUX_SCHED_COREDUMP_H */
-- 
cgit v1.2.3


From 66efef9b1a7d6cc725efa9395fb390483ad5b555 Mon Sep 17 00:00:00 2001
From: Qi Zheng <zhengqi.arch@bytedance.com>
Date: Thu, 26 Sep 2024 14:46:14 +0800
Subject: mm: pgtable: introduce pte_offset_map_{ro|rw}_nolock()

Patch series "introduce pte_offset_map_{ro|rw}_nolock()", v5.

As proposed by David Hildenbrand [1], this series introduces the following
two new helper functions to replace pte_offset_map_nolock().

1. pte_offset_map_ro_nolock()
2. pte_offset_map_rw_nolock()

As the name suggests, pte_offset_map_ro_nolock() is used for read-only
case.  In this case, only read-only operations will be performed on PTE
page after the PTL is held.  The RCU lock in pte_offset_map_nolock() will
ensure that the PTE page will not be freed, and there is no need to worry
about whether the pmd entry is modified.  Therefore
pte_offset_map_ro_nolock() is just a renamed version of
pte_offset_map_nolock().

pte_offset_map_rw_nolock() is used for may-write case.  In this case, the
pte or pmd entry may be modified after the PTL is held, so we need to
ensure that the pmd entry has not been modified concurrently.  So in
addition to the name change, it also outputs the pmdval when successful.
The users should make sure the page table is stable like checking
pte_same() or checking pmd_same() by using the output pmdval before
performing the write operations.

This series will convert all pte_offset_map_nolock() into the above two
helper functions one by one, and finally completely delete it.

This also a preparation for reclaiming the empty user PTE page table
pages.


This patch (of 13):

Currently, the usage of pte_offset_map_nolock() can be divided into the
following two cases:

1) After acquiring PTL, only read-only operations are performed on the PTE
   page. In this case, the RCU lock in pte_offset_map_nolock() will ensure
   that the PTE page will not be freed, and there is no need to worry
   about whether the pmd entry is modified.

2) After acquiring PTL, the pte or pmd entries may be modified. At this
   time, we need to ensure that the pmd entry has not been modified
   concurrently.

To more clearing distinguish between these two cases, this commit
introduces two new helper functions to replace pte_offset_map_nolock().
For 1), just rename it to pte_offset_map_ro_nolock().  For 2), in addition
to changing the name to pte_offset_map_rw_nolock(), it also outputs the
pmdval when successful.  It is applicable for may-write cases where any
modification operations to the page table may happen after the
corresponding spinlock is held afterwards.  But the users should make sure
the page table is stable like checking pte_same() or checking pmd_same()
by using the output pmdval before performing the write operations.

Note: "RO" / "RW" expresses the intended semantics, not that the *kmap*
will be read-only/read-write protected.

Subsequent commits will convert pte_offset_map_nolock() into the above
two functions one by one, and finally completely delete it.

Link: https://lkml.kernel.org/r/cover.1727332572.git.zhengqi.arch@bytedance.com
Link: https://lkml.kernel.org/r/5aeecfa131600a454b1f3a038a1a54282ca3b856.1727332572.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 61fff5d34ed5..0cf45d4b7286 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3017,6 +3017,11 @@ static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
 
 pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
 			unsigned long addr, spinlock_t **ptlp);
+pte_t *pte_offset_map_ro_nolock(struct mm_struct *mm, pmd_t *pmd,
+				unsigned long addr, spinlock_t **ptlp);
+pte_t *pte_offset_map_rw_nolock(struct mm_struct *mm, pmd_t *pmd,
+				unsigned long addr, pmd_t *pmdvalp,
+				spinlock_t **ptlp);
 
 #define pte_unmap_unlock(pte, ptl)	do {		\
 	spin_unlock(ptl);				\
-- 
cgit v1.2.3


From 583e66debd1d5aa8c401aebe924c7406e15579a7 Mon Sep 17 00:00:00 2001
From: Qi Zheng <zhengqi.arch@bytedance.com>
Date: Thu, 26 Sep 2024 14:46:26 +0800
Subject: mm: pgtable: remove pte_offset_map_nolock()

Now no users are using the pte_offset_map_nolock(), remove it.

Link: https://lkml.kernel.org/r/d04f9bbbcde048fb6ffa6f2bdbc6f9b22d5286f9.1727332572.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0cf45d4b7286..8f5394d75ce2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3015,8 +3015,6 @@ static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
 	return pte;
 }
 
-pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
-			unsigned long addr, spinlock_t **ptlp);
 pte_t *pte_offset_map_ro_nolock(struct mm_struct *mm, pmd_t *pmd,
 				unsigned long addr, spinlock_t **ptlp);
 pte_t *pte_offset_map_rw_nolock(struct mm_struct *mm, pmd_t *pmd,
-- 
cgit v1.2.3


From 473c371254d2c9906c286c939eaa99d0fac13e38 Mon Sep 17 00:00:00 2001
From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Date: Thu, 26 Sep 2024 13:06:47 +0800
Subject: mm: migrate LRU_REFS_MASK bits in folio_migrate_flags

Bits of LRU_REFS_MASK are not inherited during migration which lead to new
folio start from tier0 when MGLRU enabled.  Try to bring as much bits of
folio->flags as possible since compaction and alloc_contig_range which
introduce migration do happen at times.

Link: https://lkml.kernel.org/r/20240926050647.5653-1-zhaoyang.huang@unisoc.com
Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Suggested-by: Yu Zhao <yuzhao@google.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_inline.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index f4fe593c1400..6f801c7b36e2 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -291,6 +291,12 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio,
 	return true;
 }
 
+static inline void folio_migrate_refs(struct folio *new, struct folio *old)
+{
+	unsigned long refs = READ_ONCE(old->flags) & LRU_REFS_MASK;
+
+	set_mask_bits(&new->flags, LRU_REFS_MASK, refs);
+}
 #else /* !CONFIG_LRU_GEN */
 
 static inline bool lru_gen_enabled(void)
@@ -313,6 +319,10 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio,
 	return false;
 }
 
+static inline void folio_migrate_refs(struct folio *new, struct folio *old)
+{
+
+}
 #endif /* CONFIG_LRU_GEN */
 
 static __always_inline
-- 
cgit v1.2.3


From 43b53bca61b032270395152ba93d6e9ead47f0de Mon Sep 17 00:00:00 2001
From: Imran Shaik <quic_imrashai@quicinc.com>
Date: Thu, 22 Aug 2024 16:57:18 +0530
Subject: dt-bindings: clock: qcom: Add GCC clocks for QCS8300

Add support for qcom global clock controller bindings for QCS8300 platform.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Imran Shaik <quic_imrashai@quicinc.com>
Link: https://lore.kernel.org/r/20240822-qcs8300-gcc-v2-1-b310dfa70ad8@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,qcs8300-gcc.h | 234 +++++++++++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,qcs8300-gcc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,qcs8300-gcc.h b/include/dt-bindings/clock/qcom,qcs8300-gcc.h
new file mode 100644
index 000000000000..a0083b1d2126
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,qcs8300-gcc.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GCC_QCS8300_H
+#define _DT_BINDINGS_CLK_QCOM_GCC_QCS8300_H
+
+/* GCC clocks */
+#define GCC_GPLL0						0
+#define GCC_GPLL0_OUT_EVEN					1
+#define GCC_GPLL1						2
+#define GCC_GPLL4						3
+#define GCC_GPLL7						4
+#define GCC_GPLL9						5
+#define GCC_AGGRE_NOC_QUPV3_AXI_CLK				6
+#define GCC_AGGRE_UFS_PHY_AXI_CLK				7
+#define GCC_AGGRE_USB2_PRIM_AXI_CLK				8
+#define GCC_AGGRE_USB3_PRIM_AXI_CLK				9
+#define GCC_AHB2PHY0_CLK					10
+#define GCC_AHB2PHY2_CLK					11
+#define GCC_AHB2PHY3_CLK					12
+#define GCC_BOOT_ROM_AHB_CLK					13
+#define GCC_CAMERA_AHB_CLK					14
+#define GCC_CAMERA_HF_AXI_CLK					15
+#define GCC_CAMERA_SF_AXI_CLK					16
+#define GCC_CAMERA_THROTTLE_XO_CLK				17
+#define GCC_CAMERA_XO_CLK					18
+#define GCC_CFG_NOC_USB2_PRIM_AXI_CLK				19
+#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK				20
+#define GCC_DDRSS_GPU_AXI_CLK					21
+#define GCC_DISP_AHB_CLK					22
+#define GCC_DISP_HF_AXI_CLK					23
+#define GCC_DISP_XO_CLK						24
+#define GCC_EDP_REF_CLKREF_EN					25
+#define GCC_EMAC0_AXI_CLK					26
+#define GCC_EMAC0_PHY_AUX_CLK					27
+#define GCC_EMAC0_PHY_AUX_CLK_SRC				28
+#define GCC_EMAC0_PTP_CLK					29
+#define GCC_EMAC0_PTP_CLK_SRC					30
+#define GCC_EMAC0_RGMII_CLK					31
+#define GCC_EMAC0_RGMII_CLK_SRC					32
+#define GCC_EMAC0_SLV_AHB_CLK					33
+#define GCC_GP1_CLK						34
+#define GCC_GP1_CLK_SRC						35
+#define GCC_GP2_CLK						36
+#define GCC_GP2_CLK_SRC						37
+#define GCC_GP3_CLK						38
+#define GCC_GP3_CLK_SRC						39
+#define GCC_GP4_CLK						40
+#define GCC_GP4_CLK_SRC						41
+#define GCC_GP5_CLK						42
+#define GCC_GP5_CLK_SRC						43
+#define GCC_GPU_CFG_AHB_CLK					44
+#define GCC_GPU_GPLL0_CLK_SRC					45
+#define GCC_GPU_GPLL0_DIV_CLK_SRC				46
+#define GCC_GPU_MEMNOC_GFX_CENTER_PIPELINE_CLK			47
+#define GCC_GPU_MEMNOC_GFX_CLK					48
+#define GCC_GPU_SNOC_DVM_GFX_CLK				49
+#define GCC_GPU_TCU_THROTTLE_AHB_CLK				50
+#define GCC_GPU_TCU_THROTTLE_CLK				51
+#define GCC_PCIE_0_AUX_CLK					52
+#define GCC_PCIE_0_AUX_CLK_SRC					53
+#define GCC_PCIE_0_CFG_AHB_CLK					54
+#define GCC_PCIE_0_MSTR_AXI_CLK					55
+#define GCC_PCIE_0_PHY_AUX_CLK					56
+#define GCC_PCIE_0_PHY_AUX_CLK_SRC				57
+#define GCC_PCIE_0_PHY_RCHNG_CLK				58
+#define GCC_PCIE_0_PHY_RCHNG_CLK_SRC				59
+#define GCC_PCIE_0_PIPE_CLK					60
+#define GCC_PCIE_0_PIPE_CLK_SRC					61
+#define GCC_PCIE_0_PIPE_DIV_CLK_SRC				62
+#define GCC_PCIE_0_PIPEDIV2_CLK					63
+#define GCC_PCIE_0_SLV_AXI_CLK					64
+#define GCC_PCIE_0_SLV_Q2A_AXI_CLK				65
+#define GCC_PCIE_1_AUX_CLK					66
+#define GCC_PCIE_1_AUX_CLK_SRC					67
+#define GCC_PCIE_1_CFG_AHB_CLK					68
+#define GCC_PCIE_1_MSTR_AXI_CLK					69
+#define GCC_PCIE_1_PHY_AUX_CLK					70
+#define GCC_PCIE_1_PHY_AUX_CLK_SRC				71
+#define GCC_PCIE_1_PHY_RCHNG_CLK				72
+#define GCC_PCIE_1_PHY_RCHNG_CLK_SRC				73
+#define GCC_PCIE_1_PIPE_CLK					74
+#define GCC_PCIE_1_PIPE_CLK_SRC					75
+#define GCC_PCIE_1_PIPE_DIV_CLK_SRC				76
+#define GCC_PCIE_1_PIPEDIV2_CLK					77
+#define GCC_PCIE_1_SLV_AXI_CLK					78
+#define GCC_PCIE_1_SLV_Q2A_AXI_CLK				79
+#define GCC_PCIE_CLKREF_EN					80
+#define GCC_PCIE_THROTTLE_CFG_CLK				81
+#define GCC_PDM2_CLK						82
+#define GCC_PDM2_CLK_SRC					83
+#define GCC_PDM_AHB_CLK						84
+#define GCC_PDM_XO4_CLK						85
+#define GCC_QMIP_CAMERA_NRT_AHB_CLK				86
+#define GCC_QMIP_CAMERA_RT_AHB_CLK				87
+#define GCC_QMIP_DISP_AHB_CLK					88
+#define GCC_QMIP_DISP_ROT_AHB_CLK				89
+#define GCC_QMIP_VIDEO_CVP_AHB_CLK				90
+#define GCC_QMIP_VIDEO_VCODEC_AHB_CLK				91
+#define GCC_QMIP_VIDEO_VCPU_AHB_CLK				92
+#define GCC_QUPV3_WRAP0_CORE_2X_CLK				93
+#define GCC_QUPV3_WRAP0_CORE_CLK				94
+#define GCC_QUPV3_WRAP0_S0_CLK					95
+#define GCC_QUPV3_WRAP0_S0_CLK_SRC				96
+#define GCC_QUPV3_WRAP0_S1_CLK					97
+#define GCC_QUPV3_WRAP0_S1_CLK_SRC				98
+#define GCC_QUPV3_WRAP0_S2_CLK					99
+#define GCC_QUPV3_WRAP0_S2_CLK_SRC				100
+#define GCC_QUPV3_WRAP0_S3_CLK					101
+#define GCC_QUPV3_WRAP0_S3_CLK_SRC				102
+#define GCC_QUPV3_WRAP0_S4_CLK					103
+#define GCC_QUPV3_WRAP0_S4_CLK_SRC				104
+#define GCC_QUPV3_WRAP0_S5_CLK					105
+#define GCC_QUPV3_WRAP0_S5_CLK_SRC				106
+#define GCC_QUPV3_WRAP0_S6_CLK					107
+#define GCC_QUPV3_WRAP0_S6_CLK_SRC				108
+#define GCC_QUPV3_WRAP0_S7_CLK					109
+#define GCC_QUPV3_WRAP0_S7_CLK_SRC				110
+#define GCC_QUPV3_WRAP1_CORE_2X_CLK				111
+#define GCC_QUPV3_WRAP1_CORE_CLK				112
+#define GCC_QUPV3_WRAP1_S0_CLK					113
+#define GCC_QUPV3_WRAP1_S0_CLK_SRC				114
+#define GCC_QUPV3_WRAP1_S1_CLK					115
+#define GCC_QUPV3_WRAP1_S1_CLK_SRC				116
+#define GCC_QUPV3_WRAP1_S2_CLK					117
+#define GCC_QUPV3_WRAP1_S2_CLK_SRC				118
+#define GCC_QUPV3_WRAP1_S3_CLK					119
+#define GCC_QUPV3_WRAP1_S3_CLK_SRC				120
+#define GCC_QUPV3_WRAP1_S4_CLK					121
+#define GCC_QUPV3_WRAP1_S4_CLK_SRC				122
+#define GCC_QUPV3_WRAP1_S5_CLK					123
+#define GCC_QUPV3_WRAP1_S5_CLK_SRC				124
+#define GCC_QUPV3_WRAP1_S6_CLK					125
+#define GCC_QUPV3_WRAP1_S6_CLK_SRC				126
+#define GCC_QUPV3_WRAP1_S7_CLK					127
+#define GCC_QUPV3_WRAP1_S7_CLK_SRC				128
+#define GCC_QUPV3_WRAP3_CORE_2X_CLK				129
+#define GCC_QUPV3_WRAP3_CORE_CLK				130
+#define GCC_QUPV3_WRAP3_QSPI_CLK				131
+#define GCC_QUPV3_WRAP3_S0_CLK					132
+#define GCC_QUPV3_WRAP3_S0_CLK_SRC				133
+#define GCC_QUPV3_WRAP3_S0_DIV_CLK_SRC				134
+#define GCC_QUPV3_WRAP_0_M_AHB_CLK				135
+#define GCC_QUPV3_WRAP_0_S_AHB_CLK				136
+#define GCC_QUPV3_WRAP_1_M_AHB_CLK				137
+#define GCC_QUPV3_WRAP_1_S_AHB_CLK				138
+#define GCC_QUPV3_WRAP_3_M_AHB_CLK				139
+#define GCC_QUPV3_WRAP_3_S_AHB_CLK				140
+#define GCC_SDCC1_AHB_CLK					141
+#define GCC_SDCC1_APPS_CLK					142
+#define GCC_SDCC1_APPS_CLK_SRC					143
+#define GCC_SDCC1_ICE_CORE_CLK					144
+#define GCC_SDCC1_ICE_CORE_CLK_SRC				145
+#define GCC_SGMI_CLKREF_EN					146
+#define GCC_UFS_PHY_AHB_CLK					147
+#define GCC_UFS_PHY_AXI_CLK					148
+#define GCC_UFS_PHY_AXI_CLK_SRC					149
+#define GCC_UFS_PHY_ICE_CORE_CLK				150
+#define GCC_UFS_PHY_ICE_CORE_CLK_SRC				151
+#define GCC_UFS_PHY_PHY_AUX_CLK					152
+#define GCC_UFS_PHY_PHY_AUX_CLK_SRC				153
+#define GCC_UFS_PHY_RX_SYMBOL_0_CLK				154
+#define GCC_UFS_PHY_RX_SYMBOL_0_CLK_SRC				155
+#define GCC_UFS_PHY_RX_SYMBOL_1_CLK				156
+#define GCC_UFS_PHY_RX_SYMBOL_1_CLK_SRC				157
+#define GCC_UFS_PHY_TX_SYMBOL_0_CLK				158
+#define GCC_UFS_PHY_TX_SYMBOL_0_CLK_SRC				159
+#define GCC_UFS_PHY_UNIPRO_CORE_CLK				160
+#define GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC				161
+#define GCC_USB20_MASTER_CLK					162
+#define GCC_USB20_MASTER_CLK_SRC				163
+#define GCC_USB20_MOCK_UTMI_CLK					164
+#define GCC_USB20_MOCK_UTMI_CLK_SRC				165
+#define GCC_USB20_MOCK_UTMI_POSTDIV_CLK_SRC			166
+#define GCC_USB20_SLEEP_CLK					167
+#define GCC_USB30_PRIM_MASTER_CLK				168
+#define GCC_USB30_PRIM_MASTER_CLK_SRC				169
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK				170
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC			171
+#define GCC_USB30_PRIM_MOCK_UTMI_POSTDIV_CLK_SRC		172
+#define GCC_USB30_PRIM_SLEEP_CLK				173
+#define GCC_USB3_PRIM_PHY_AUX_CLK				174
+#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC				175
+#define GCC_USB3_PRIM_PHY_COM_AUX_CLK				176
+#define GCC_USB3_PRIM_PHY_PIPE_CLK				177
+#define GCC_USB3_PRIM_PHY_PIPE_CLK_SRC				178
+#define GCC_USB_CLKREF_EN					179
+#define GCC_VIDEO_AHB_CLK					180
+#define GCC_VIDEO_AXI0_CLK					181
+#define GCC_VIDEO_AXI1_CLK					182
+#define GCC_VIDEO_XO_CLK					183
+
+/* GCC power domains */
+#define GCC_EMAC0_GDSC						0
+#define GCC_PCIE_0_GDSC						1
+#define GCC_PCIE_1_GDSC						2
+#define GCC_UFS_PHY_GDSC					3
+#define GCC_USB20_PRIM_GDSC					4
+#define GCC_USB30_PRIM_GDSC					5
+
+/* GCC resets */
+#define GCC_EMAC0_BCR						0
+#define GCC_PCIE_0_BCR						1
+#define GCC_PCIE_0_LINK_DOWN_BCR				2
+#define GCC_PCIE_0_NOCSR_COM_PHY_BCR				3
+#define GCC_PCIE_0_PHY_BCR					4
+#define GCC_PCIE_0_PHY_NOCSR_COM_PHY_BCR			5
+#define GCC_PCIE_1_BCR						6
+#define GCC_PCIE_1_LINK_DOWN_BCR				7
+#define GCC_PCIE_1_NOCSR_COM_PHY_BCR				8
+#define GCC_PCIE_1_PHY_BCR					9
+#define GCC_PCIE_1_PHY_NOCSR_COM_PHY_BCR			10
+#define GCC_SDCC1_BCR						11
+#define GCC_UFS_PHY_BCR						12
+#define GCC_USB20_PRIM_BCR					13
+#define GCC_USB2_PHY_PRIM_BCR					14
+#define GCC_USB2_PHY_SEC_BCR					15
+#define GCC_USB30_PRIM_BCR					16
+#define GCC_USB3_DP_PHY_PRIM_BCR				17
+#define GCC_USB3_PHY_PRIM_BCR					18
+#define GCC_USB3_PHY_TERT_BCR					19
+#define GCC_USB3_UNIPHY_MP0_BCR					20
+#define GCC_USB3_UNIPHY_MP1_BCR					21
+#define GCC_USB3PHY_PHY_PRIM_BCR				22
+#define GCC_USB3UNIPHY_PHY_MP0_BCR				23
+#define GCC_USB3UNIPHY_PHY_MP1_BCR				24
+#define GCC_USB_PHY_CFG_AHB2PHY_BCR				25
+#define GCC_VIDEO_BCR						26
+#define GCC_VIDEO_AXI0_CLK_ARES					27
+#define GCC_VIDEO_AXI1_CLK_ARES					28
+
+#endif
-- 
cgit v1.2.3


From 4cc0473d7754d387680bdf0728eb29f0ec8834bf Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Mon, 7 Oct 2024 22:49:05 +0800
Subject: get rid of __get_task_comm()

Patch series "Improve the copy of task comm", v8.

Using {memcpy,strncpy,strcpy,kstrdup} to copy the task comm relies on the
length of task comm.  Changes in the task comm could result in a
destination string that is overflow.  Therefore, we should explicitly
ensure the destination string is always NUL-terminated, regardless of the
task comm.  This approach will facilitate future extensions to the task
comm.

As suggested by Linus [0], we can identify all relevant code with the
following git grep command:

  git grep 'memcpy.*->comm\>'
  git grep 'kstrdup.*->comm\>'
  git grep 'strncpy.*->comm\>'
  git grep 'strcpy.*->comm\>'

PATCH #2~#4:   memcpy
PATCH #5~#6:   kstrdup
PATCH #7:      strcpy

Please note that strncpy() is not included in this series as it is being
tracked by another effort. [1]


This patch (of 7):

We want to eliminate the use of __get_task_comm() for the following
reasons:

- The task_lock() is unnecessary
  Quoted from Linus [0]:
  : Since user space can randomly change their names anyway, using locking
  : was always wrong for readers (for writers it probably does make sense
  : to have some lock - although practically speaking nobody cares there
  : either, but at least for a writer some kind of race could have
  : long-term mixed results

Link: https://lkml.kernel.org/r/20241007144911.27693-1-laoar.shao@gmail.com
Link: https://lkml.kernel.org/r/20241007144911.27693-2-laoar.shao@gmail.com
Link: https://lore.kernel.org/all/CAHk-=wivfrF0_zvf+oj6==Sh=-npJooP8chLPEfaFV0oNYTTBA@mail.gmail.com [0]
Link: https://lore.kernel.org/all/CAHk-=whWtUC-AjmGJveAETKOMeMFSTwKwu99v7+b6AyHMmaDFA@mail.gmail.com/
Link: https://lore.kernel.org/all/CAHk-=wjAmmHUg6vho1KjzQi2=psR30+CogFd4aXrThr2gsiS4g@mail.gmail.com/ [0]
Link: https://github.com/KSPP/linux/issues/90 [1]
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Matus Jokay <matus.jokay@stuba.sk>
Cc: Alejandro Colomar <alx@kernel.org>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Justin Stitt <justinstitt@google.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Airlie <airlied@gmail.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Ondrej Mosnacek <omosnace@redhat.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Quentin Monnet <qmo@kernel.org>
Cc: Simon Horman <horms@kernel.org>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched.h | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index bb343136ddd0..67718d5591dd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1121,9 +1121,12 @@ struct task_struct {
 	/*
 	 * executable name, excluding path.
 	 *
-	 * - normally initialized setup_new_exec()
-	 * - access it with [gs]et_task_comm()
-	 * - lock it with task_lock()
+	 * - normally initialized begin_new_exec()
+	 * - set it with set_task_comm()
+	 *   - strscpy_pad() to ensure it is always NUL-terminated and
+	 *     zero-padded
+	 *   - task_lock() to ensure the operation is atomic and the name is
+	 *     fully updated.
 	 */
 	char				comm[TASK_COMM_LEN];
 
@@ -1938,10 +1941,23 @@ static inline void set_task_comm(struct task_struct *tsk, const char *from)
 	__set_task_comm(tsk, from, false);
 }
 
-extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
+/*
+ * - Why not use task_lock()?
+ *   User space can randomly change their names anyway, so locking for readers
+ *   doesn't make sense. For writers, locking is probably necessary, as a race
+ *   condition could lead to long-term mixed results.
+ *   The strscpy_pad() in __set_task_comm() can ensure that the task comm is
+ *   always NUL-terminated and zero-padded. Therefore the race condition between
+ *   reader and writer is not an issue.
+ *
+ * - BUILD_BUG_ON() can help prevent the buf from being truncated.
+ *   Since the callers don't perform any return value checks, this safeguard is
+ *   necessary.
+ */
 #define get_task_comm(buf, tsk) ({			\
-	BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN);	\
-	__get_task_comm(buf, sizeof(buf), tsk);		\
+	BUILD_BUG_ON(sizeof(buf) < TASK_COMM_LEN);	\
+	strscpy_pad(buf, (tsk)->comm);			\
+	buf;						\
 })
 
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From f2fa0fd4e7db8326a77618962714924b64f5f889 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 12 Oct 2024 19:52:53 +0200
Subject: reboot: move reboot_notifier_list to kernel/reboot.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All the functions related to the reboot notifier list are in
kernel/reboot.c.  Move the list itself, too.  As there are no direct users
anymore, make the declaration static.

Link: https://lkml.kernel.org/r/20241012-reboot_notifier_list-v1-1-6093bb9455ce@weissschuh.net
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/notifier.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 45702bdcbceb..b42e64734968 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -237,7 +237,5 @@ static inline int notifier_to_errno(int ret)
 #define KBD_KEYSYM		0x0004 /* Keyboard keysym */
 #define KBD_POST_KEYSYM		0x0005 /* Called after keyboard keysym interpretation */
 
-extern struct blocking_notifier_head reboot_notifier_list;
-
 #endif /* __KERNEL__ */
 #endif /* _LINUX_NOTIFIER_H */
-- 
cgit v1.2.3


From a9d38bcd7337f051912174ebfc500e1cef73982e Mon Sep 17 00:00:00 2001
From: Sui Jingfeng <sui.jingfeng@linux.dev>
Date: Sat, 12 Oct 2024 18:08:17 +0800
Subject: scatterlist: fix a typo

Replace the 'One' with 'On'.

Link: https://lkml.kernel.org/r/20241012100817.323007-1-sui.jingfeng@linux.dev
Fixes: af2880ec4402 ("scatterlist: add dedicated config for DMA flags")
Signed-off-by: Sui Jingfeng <sui.jingfeng@linux.dev>
Reviewed-by: Petr Tesarik <petr@tesarici.cz>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Michael Kelley <mhklinux@outlook.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/scatterlist.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index e61d164622db..c5e2239b550e 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -273,7 +273,7 @@ static inline void sg_unmark_end(struct scatterlist *sg)
 }
 
 /*
- * One 64-bit architectures there is a 4-byte padding in struct scatterlist
+ * On 64-bit architectures there is a 4-byte padding in struct scatterlist
  * (assuming also CONFIG_NEED_SG_DMA_LENGTH is set). Use this padding for DMA
  * flags bits to indicate when a specific dma address is a bus address or the
  * buffer may have been bounced via SWIOTLB.
-- 
cgit v1.2.3


From 74ef070e325465a1b364db6a5c6859785537f835 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Mon, 21 Oct 2024 10:07:36 +0200
Subject: percpu: merge VERIFY_PERCPU_PTR() into its only user

Merge VERIFY_PERCPU_PTR() into non-CONFIG_SMP per_cpu_ptr() to make macro
similar to CONFIG_SMP per_cpu_ptr().  This will allow a follow-up patch to
refactor common code to a macro.

No functional changes, non-CONFIG_SMP per_cpu_ptr() was the only user of
VERIFY_PERCPU_PTR().

Link: https://lkml.kernel.org/r/20241021080856.48746-1-ubizjak@gmail.com
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/percpu-defs.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 8efce7414fad..7fa88c5f4b26 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -254,13 +254,13 @@ do {									\
 
 #else	/* CONFIG_SMP */
 
-#define VERIFY_PERCPU_PTR(__p)						\
+#define per_cpu_ptr(ptr, cpu)						\
 ({									\
-	__verify_pcpu_ptr(__p);						\
-	(typeof(*(__p)) __kernel __force *)(__p);			\
+	(void)(cpu);							\
+	__verify_pcpu_ptr(ptr);						\
+	(typeof(*(ptr)) __kernel __force *)(ptr);			\
 })
 
-#define per_cpu_ptr(ptr, cpu)	({ (void)(cpu); VERIFY_PERCPU_PTR(ptr); })
 #define raw_cpu_ptr(ptr)	per_cpu_ptr(ptr, 0)
 #define this_cpu_ptr(ptr)	raw_cpu_ptr(ptr)
 
-- 
cgit v1.2.3


From 001217defda86d0d6a5a9e6cf77a6b813857e7e3 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Mon, 21 Oct 2024 10:07:37 +0200
Subject: percpu: introduce PERCPU_PTR() macro

Introduce PERCPU_PTR() macro to cast the percpu pointer from the percpu
address space to a generic (kernel) address space.  Use it in
per_cpu_ptr() and related SHIFT_PERCPU_PTR() macros.

Also remove common knowledge from SHIFT_PERCPU_PTR() comment, "weird cast"
is just a standard way to inform sparse of a cast from the percpu address
space to a generic address space.

Link: https://lkml.kernel.org/r/20241021080856.48746-2-ubizjak@gmail.com
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/percpu-defs.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 7fa88c5f4b26..e1cf7982424f 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -220,15 +220,17 @@ do {									\
 	(void)__vpp_verify;						\
 } while (0)
 
+#define PERCPU_PTR(__p)							\
+	(typeof(*(__p)) __force __kernel *)(__p);
+
 #ifdef CONFIG_SMP
 
 /*
- * Add an offset to a pointer but keep the pointer as-is.  Use RELOC_HIDE()
- * to prevent the compiler from making incorrect assumptions about the
- * pointer value.  The weird cast keeps both GCC and sparse happy.
+ * Add an offset to a pointer.  Use RELOC_HIDE() to prevent the compiler
+ * from making incorrect assumptions about the pointer value.
  */
 #define SHIFT_PERCPU_PTR(__p, __offset)					\
-	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset))
+	RELOC_HIDE(PERCPU_PTR(__p), (__offset))
 
 #define per_cpu_ptr(ptr, cpu)						\
 ({									\
@@ -258,7 +260,7 @@ do {									\
 ({									\
 	(void)(cpu);							\
 	__verify_pcpu_ptr(ptr);						\
-	(typeof(*(ptr)) __kernel __force *)(ptr);			\
+	PERCPU_PTR(ptr);						\
 })
 
 #define raw_cpu_ptr(ptr)	per_cpu_ptr(ptr, 0)
-- 
cgit v1.2.3


From dabddd687c9e1a06241d6b4d1f66b9f2b60b3ad1 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Mon, 21 Oct 2024 10:07:38 +0200
Subject: percpu: cast percpu pointer in PERCPU_PTR() via unsigned long

Cast pointer from percpu address space to generic (kernel) address space
in PERCPU_PTR() macro via unsigned long intermediate cast [1].  This
intermediate cast is also required to avoid build failure when GCC's
strict named address space checks for x86 targets [2] are enabled.

Found by GCC's named address space checks.

[1] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name
[2] https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces

Link: https://lkml.kernel.org/r/20241021080856.48746-3-ubizjak@gmail.com
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/percpu-defs.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index e1cf7982424f..35842d1e3879 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -221,7 +221,10 @@ do {									\
 } while (0)
 
 #define PERCPU_PTR(__p)							\
-	(typeof(*(__p)) __force __kernel *)(__p);
+({									\
+	unsigned long __pcpu_ptr = (__force unsigned long)(__p);	\
+	(typeof(*(__p)) __force __kernel *)(__pcpu_ptr);		\
+})
 
 #ifdef CONFIG_SMP
 
-- 
cgit v1.2.3


From 92a8b224b833e82d286d2100432adbac8cf8a2a1 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Sun, 20 Oct 2024 12:01:51 +0800
Subject: lib/min_heap: introduce non-inline versions of min heap API functions

Patch series "Enhance min heap API with non-inline functions and
optimizations", v2.

Add non-inline versions of the min heap API functions in lib/min_heap.c
and updates all users outside of kernel/events/core.c to use these
non-inline versions.  To mitigate the performance impact of indirect
function calls caused by the non-inline versions of the swap and compare
functions, a builtin swap has been introduced that swaps elements based on
their size.  Additionally, it micro-optimizes the efficiency of the min
heap by pre-scaling the counter, following the same approach as in
lib/sort.c.  Documentation for the min heap API has also been added to the
core-api section.


This patch (of 10):

All current min heap API functions are marked with '__always_inline'.
However, as the number of users increases, inlining these functions
everywhere leads to a increase in kernel size.

In performance-critical paths, such as when perf events are enabled and
min heap functions are called on every context switch, it is important to
retain the inline versions for optimal performance.  To balance this, the
original inline functions are kept, and additional non-inline versions of
the functions have been added in lib/min_heap.c.

Link: https://lkml.kernel.org/r/20241020040200.939973-1-visitorckw@gmail.com
Link: https://lore.kernel.org/20240522161048.8d8bbc7b153b4ecd92c50666@linux-foundation.org
Link: https://lkml.kernel.org/r/20241020040200.939973-2-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Kuan-Wei Chiu <visitorckw@gmail.com>
Cc: "Liang, Kan" <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 129 +++++++++++++++++++++++++++++++----------------
 1 file changed, 86 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 43a7b9dcf15e..0abb21173979 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -40,7 +40,7 @@ struct min_heap_callbacks {
 
 /* Initialize a min-heap. */
 static __always_inline
-void __min_heap_init(min_heap_char *heap, void *data, int size)
+void __min_heap_init_inline(min_heap_char *heap, void *data, int size)
 {
 	heap->nr = 0;
 	heap->size = size;
@@ -50,33 +50,33 @@ void __min_heap_init(min_heap_char *heap, void *data, int size)
 		heap->data = heap->preallocated;
 }
 
-#define min_heap_init(_heap, _data, _size)	\
-	__min_heap_init((min_heap_char *)_heap, _data, _size)
+#define min_heap_init_inline(_heap, _data, _size)	\
+	__min_heap_init_inline((min_heap_char *)_heap, _data, _size)
 
 /* Get the minimum element from the heap. */
 static __always_inline
-void *__min_heap_peek(struct min_heap_char *heap)
+void *__min_heap_peek_inline(struct min_heap_char *heap)
 {
 	return heap->nr ? heap->data : NULL;
 }
 
-#define min_heap_peek(_heap)	\
-	(__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap))
+#define min_heap_peek_inline(_heap)	\
+	(__minheap_cast(_heap) __min_heap_peek_inline((min_heap_char *)_heap))
 
 /* Check if the heap is full. */
 static __always_inline
-bool __min_heap_full(min_heap_char *heap)
+bool __min_heap_full_inline(min_heap_char *heap)
 {
 	return heap->nr == heap->size;
 }
 
-#define min_heap_full(_heap)	\
-	__min_heap_full((min_heap_char *)_heap)
+#define min_heap_full_inline(_heap)	\
+	__min_heap_full_inline((min_heap_char *)_heap)
 
 /* Sift the element at pos down the heap. */
 static __always_inline
-void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
-		const struct min_heap_callbacks *func, void *args)
+void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size,
+				 const struct min_heap_callbacks *func, void *args)
 {
 	void *left, *right;
 	void *data = heap->data;
@@ -108,13 +108,14 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
 	}
 }
 
-#define min_heap_sift_down(_heap, _pos, _func, _args)	\
-	__min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args)
+#define min_heap_sift_down_inline(_heap, _pos, _func, _args)	\
+	__min_heap_sift_down_inline((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap),	\
+				    _func, _args)
 
 /* Sift up ith element from the heap, O(log2(nr)). */
 static __always_inline
-void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
-		const struct min_heap_callbacks *func, void *args)
+void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx,
+			       const struct min_heap_callbacks *func, void *args)
 {
 	void *data = heap->data;
 	size_t parent;
@@ -128,27 +129,28 @@ void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
 	}
 }
 
-#define min_heap_sift_up(_heap, _idx, _func, _args)	\
-	__min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
+#define min_heap_sift_up_inline(_heap, _idx, _func, _args)	\
+	__min_heap_sift_up_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx,	\
+				  _func, _args)
 
 /* Floyd's approach to heapification that is O(nr). */
 static __always_inline
-void __min_heapify_all(min_heap_char *heap, size_t elem_size,
-		const struct min_heap_callbacks *func, void *args)
+void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size,
+			      const struct min_heap_callbacks *func, void *args)
 {
 	int i;
 
 	for (i = heap->nr / 2 - 1; i >= 0; i--)
-		__min_heap_sift_down(heap, i, elem_size, func, args);
+		__min_heap_sift_down_inline(heap, i, elem_size, func, args);
 }
 
-#define min_heapify_all(_heap, _func, _args)	\
-	__min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
+#define min_heapify_all_inline(_heap, _func, _args)	\
+	__min_heapify_all_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
 
 /* Remove minimum element from the heap, O(log2(nr)). */
 static __always_inline
-bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
-		const struct min_heap_callbacks *func, void *args)
+bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size,
+			   const struct min_heap_callbacks *func, void *args)
 {
 	void *data = heap->data;
 
@@ -158,13 +160,13 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
 	/* Place last element at the root (position 0) and then sift down. */
 	heap->nr--;
 	memcpy(data, data + (heap->nr * elem_size), elem_size);
-	__min_heap_sift_down(heap, 0, elem_size, func, args);
+	__min_heap_sift_down_inline(heap, 0, elem_size, func, args);
 
 	return true;
 }
 
-#define min_heap_pop(_heap, _func, _args)	\
-	__min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
+#define min_heap_pop_inline(_heap, _func, _args)	\
+	__min_heap_pop_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
 
 /*
  * Remove the minimum element and then push the given element. The
@@ -172,22 +174,21 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
  * efficient than a pop followed by a push that does 2.
  */
 static __always_inline
-void __min_heap_pop_push(min_heap_char *heap,
-		const void *element, size_t elem_size,
-		const struct min_heap_callbacks *func,
-		void *args)
+void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t elem_size,
+				const struct min_heap_callbacks *func, void *args)
 {
 	memcpy(heap->data, element, elem_size);
-	__min_heap_sift_down(heap, 0, elem_size, func, args);
+	__min_heap_sift_down_inline(heap, 0, elem_size, func, args);
 }
 
-#define min_heap_pop_push(_heap, _element, _func, _args)	\
-	__min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
+#define min_heap_pop_push_inline(_heap, _element, _func, _args)	\
+	__min_heap_pop_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap),	\
+				   _func, _args)
 
 /* Push an element on to the heap, O(log2(nr)). */
 static __always_inline
-bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
-		const struct min_heap_callbacks *func, void *args)
+bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t elem_size,
+			    const struct min_heap_callbacks *func, void *args)
 {
 	void *data = heap->data;
 	int pos;
@@ -201,18 +202,19 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
 	heap->nr++;
 
 	/* Sift child at pos up. */
-	__min_heap_sift_up(heap, elem_size, pos, func, args);
+	__min_heap_sift_up_inline(heap, elem_size, pos, func, args);
 
 	return true;
 }
 
-#define min_heap_push(_heap, _element, _func, _args)	\
-	__min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
+#define min_heap_push_inline(_heap, _element, _func, _args)	\
+	__min_heap_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap),	\
+			       _func, _args)
 
 /* Remove ith element from the heap, O(log2(nr)). */
 static __always_inline
-bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
-		const struct min_heap_callbacks *func, void *args)
+bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx,
+			   const struct min_heap_callbacks *func, void *args)
 {
 	void *data = heap->data;
 
@@ -224,12 +226,53 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
 	if (idx == heap->nr)
 		return true;
 	func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args);
-	__min_heap_sift_up(heap, elem_size, idx, func, args);
-	__min_heap_sift_down(heap, idx, elem_size, func, args);
+	__min_heap_sift_up_inline(heap, elem_size, idx, func, args);
+	__min_heap_sift_down_inline(heap, idx, elem_size, func, args);
 
 	return true;
 }
 
+#define min_heap_del_inline(_heap, _idx, _func, _args)	\
+	__min_heap_del_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx,	\
+			      _func, _args)
+
+void __min_heap_init(min_heap_char *heap, void *data, int size);
+void *__min_heap_peek(struct min_heap_char *heap);
+bool __min_heap_full(min_heap_char *heap);
+void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
+			  const struct min_heap_callbacks *func, void *args);
+void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
+			const struct min_heap_callbacks *func, void *args);
+void __min_heapify_all(min_heap_char *heap, size_t elem_size,
+		       const struct min_heap_callbacks *func, void *args);
+bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
+		    const struct min_heap_callbacks *func, void *args);
+void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size,
+			 const struct min_heap_callbacks *func, void *args);
+bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
+		     const struct min_heap_callbacks *func, void *args);
+bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
+		    const struct min_heap_callbacks *func, void *args);
+
+#define min_heap_init(_heap, _data, _size)	\
+	__min_heap_init((min_heap_char *)_heap, _data, _size)
+#define min_heap_peek(_heap)	\
+	(__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap))
+#define min_heap_full(_heap)	\
+	__min_heap_full((min_heap_char *)_heap)
+#define min_heap_sift_down(_heap, _pos, _func, _args)	\
+	__min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args)
+#define min_heap_sift_up(_heap, _idx, _func, _args)	\
+	__min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
+#define min_heapify_all(_heap, _func, _args)	\
+	__min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
+#define min_heap_pop(_heap, _func, _args)	\
+	__min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
+#define min_heap_pop_push(_heap, _element, _func, _args)	\
+	__min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap),	\
+			    _func, _args)
+#define min_heap_push(_heap, _element, _func, _args)	\
+	__min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
 #define min_heap_del(_heap, _idx, _func, _args)	\
 	__min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
 
-- 
cgit v1.2.3


From aa5888afc2347ebb394c2c4b694fa3026775009e Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Sun, 20 Oct 2024 12:01:52 +0800
Subject: lib min_heap: optimize min heap by prescaling counters for better
 performance

Improve the efficiency of the min heap by prescaling counters, eliminating
the need to repeatedly compute 'index * element_size' when accessing
elements.  By doing so, we avoid the overhead associated with
recalculating the byte offset for each heap operation.

However, with prescaling, the calculation for the parent element's
location is no longer as simple as '(i - 1) / 2'.  To address this, we
copy the parent function from 'lib/sort.c', which calculates the parent
offset in a branchless manner without using any division instructions.

This optimization should result in a more efficient heap implementation by
reducing the computational overhead of finding parent and child offsets.

Link: https://lkml.kernel.org/r/20241020040200.939973-3-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: "Liang, Kan" <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 73 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 0abb21173979..41b092a14238 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -38,6 +38,32 @@ struct min_heap_callbacks {
 	void (*swp)(void *lhs, void *rhs, void *args);
 };
 
+/**
+ * parent - given the offset of the child, find the offset of the parent.
+ * @i: the offset of the heap element whose parent is sought.  Non-zero.
+ * @lsbit: a precomputed 1-bit mask, equal to "size & -size"
+ * @size: size of each element
+ *
+ * In terms of array indexes, the parent of element j = @i/@size is simply
+ * (j-1)/2.  But when working in byte offsets, we can't use implicit
+ * truncation of integer divides.
+ *
+ * Fortunately, we only need one bit of the quotient, not the full divide.
+ * @size has a least significant bit.  That bit will be clear if @i is
+ * an even multiple of @size, and set if it's an odd multiple.
+ *
+ * Logically, we're doing "if (i & lsbit) i -= size;", but since the
+ * branch is unpredictable, it's done with a bit of clever branch-free
+ * code instead.
+ */
+__attribute_const__ __always_inline
+static size_t parent(size_t i, unsigned int lsbit, size_t size)
+{
+	i -= size;
+	i -= size & -(i & lsbit);
+	return i / 2;
+}
+
 /* Initialize a min-heap. */
 static __always_inline
 void __min_heap_init_inline(min_heap_char *heap, void *data, int size)
@@ -78,33 +104,30 @@ static __always_inline
 void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size,
 				 const struct min_heap_callbacks *func, void *args)
 {
-	void *left, *right;
+	const unsigned long lsbit = elem_size & -elem_size;
 	void *data = heap->data;
-	void *root = data + pos * elem_size;
-	int i = pos, j;
+	/* pre-scale counters for performance */
+	size_t a = pos * elem_size;
+	size_t b, c, d;
+	size_t n = heap->nr * elem_size;
 
 	/* Find the sift-down path all the way to the leaves. */
-	for (;;) {
-		if (i * 2 + 2 >= heap->nr)
-			break;
-		left = data + (i * 2 + 1) * elem_size;
-		right = data + (i * 2 + 2) * elem_size;
-		i = func->less(left, right, args) ? i * 2 + 1 : i * 2 + 2;
-	}
+	for (b = a; c = 2 * b + elem_size, (d = c + elem_size) < n;)
+		b = func->less(data + c, data + d, args) ? c : d;
 
 	/* Special case for the last leaf with no sibling. */
-	if (i * 2 + 2 == heap->nr)
-		i = i * 2 + 1;
+	if (d == n)
+		b = c;
 
 	/* Backtrack to the correct location. */
-	while (i != pos && func->less(root, data + i * elem_size, args))
-		i = (i - 1) / 2;
+	while (b != a && func->less(data + a, data + b, args))
+		b = parent(b, lsbit, elem_size);
 
 	/* Shift the element into its correct place. */
-	j = i;
-	while (i != pos) {
-		i = (i - 1) / 2;
-		func->swp(data + i * elem_size, data + j * elem_size, args);
+	c = b;
+	while (b != a) {
+		b = parent(b, lsbit, elem_size);
+		func->swp(data + b, data + c, args);
 	}
 }
 
@@ -117,15 +140,17 @@ static __always_inline
 void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx,
 			       const struct min_heap_callbacks *func, void *args)
 {
+	const unsigned long lsbit = elem_size & -elem_size;
 	void *data = heap->data;
-	size_t parent;
+	/* pre-scale counters for performance */
+	size_t a = idx * elem_size, b;
 
-	while (idx) {
-		parent = (idx - 1) / 2;
-		if (func->less(data + parent * elem_size, data + idx * elem_size, args))
+	while (a) {
+		b = parent(a, lsbit, elem_size);
+		if (func->less(data + b, data + a, args))
 			break;
-		func->swp(data + parent * elem_size, data + idx * elem_size, args);
-		idx = parent;
+		func->swp(data + a, data + b, args);
+		a = b;
 	}
 }
 
-- 
cgit v1.2.3


From 03ec56d084611b5a4dc06ffa74db0928616e4d7f Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Sun, 20 Oct 2024 12:01:53 +0800
Subject: lib min_heap: avoid indirect function call by providing default swap

The non-inline min heap API can result in an indirect function call to the
custom swap function.  This becomes particularly costly when
CONFIG_MITIGATION_RETPOLINE is enabled, as indirect function calls are
expensive in this case.

To address this, copy the code from lib/sort.c and provide a default
builtin swap implementation that performs element swaps based on the
element size.  This change allows most users to avoid the overhead of
indirect function calls, improving efficiency.

Link: https://lkml.kernel.org/r/20241020040200.939973-4-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: "Liang, Kan" <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 159 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 156 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 41b092a14238..e781727c8916 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -38,6 +38,147 @@ struct min_heap_callbacks {
 	void (*swp)(void *lhs, void *rhs, void *args);
 };
 
+/**
+ * is_aligned - is this pointer & size okay for word-wide copying?
+ * @base: pointer to data
+ * @size: size of each element
+ * @align: required alignment (typically 4 or 8)
+ *
+ * Returns true if elements can be copied using word loads and stores.
+ * The size must be a multiple of the alignment, and the base address must
+ * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
+ *
+ * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
+ * to "if ((a | b) & mask)", so we do that by hand.
+ */
+__attribute_const__ __always_inline
+static bool is_aligned(const void *base, size_t size, unsigned char align)
+{
+	unsigned char lsbits = (unsigned char)size;
+
+	(void)base;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	lsbits |= (unsigned char)(uintptr_t)base;
+#endif
+	return (lsbits & (align - 1)) == 0;
+}
+
+/**
+ * swap_words_32 - swap two elements in 32-bit chunks
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size (must be a multiple of 4)
+ *
+ * Exchange the two objects in memory.  This exploits base+index addressing,
+ * which basically all CPUs have, to minimize loop overhead computations.
+ *
+ * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
+ * bottom of the loop, even though the zero flag is still valid from the
+ * subtract (since the intervening mov instructions don't alter the flags).
+ * Gcc 8.1.0 doesn't have that problem.
+ */
+static __always_inline
+void swap_words_32(void *a, void *b, size_t n)
+{
+	do {
+		u32 t = *(u32 *)(a + (n -= 4));
+		*(u32 *)(a + n) = *(u32 *)(b + n);
+		*(u32 *)(b + n) = t;
+	} while (n);
+}
+
+/**
+ * swap_words_64 - swap two elements in 64-bit chunks
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size (must be a multiple of 8)
+ *
+ * Exchange the two objects in memory.  This exploits base+index
+ * addressing, which basically all CPUs have, to minimize loop overhead
+ * computations.
+ *
+ * We'd like to use 64-bit loads if possible.  If they're not, emulating
+ * one requires base+index+4 addressing which x86 has but most other
+ * processors do not.  If CONFIG_64BIT, we definitely have 64-bit loads,
+ * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
+ * x32 ABI).  Are there any cases the kernel needs to worry about?
+ */
+static __always_inline
+void swap_words_64(void *a, void *b, size_t n)
+{
+	do {
+#ifdef CONFIG_64BIT
+		u64 t = *(u64 *)(a + (n -= 8));
+		*(u64 *)(a + n) = *(u64 *)(b + n);
+		*(u64 *)(b + n) = t;
+#else
+		/* Use two 32-bit transfers to avoid base+index+4 addressing */
+		u32 t = *(u32 *)(a + (n -= 4));
+		*(u32 *)(a + n) = *(u32 *)(b + n);
+		*(u32 *)(b + n) = t;
+
+		t = *(u32 *)(a + (n -= 4));
+		*(u32 *)(a + n) = *(u32 *)(b + n);
+		*(u32 *)(b + n) = t;
+#endif
+	} while (n);
+}
+
+/**
+ * swap_bytes - swap two elements a byte at a time
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size
+ *
+ * This is the fallback if alignment doesn't allow using larger chunks.
+ */
+static __always_inline
+void swap_bytes(void *a, void *b, size_t n)
+{
+	do {
+		char t = ((char *)a)[--n];
+		((char *)a)[n] = ((char *)b)[n];
+		((char *)b)[n] = t;
+	} while (n);
+}
+
+/*
+ * The values are arbitrary as long as they can't be confused with
+ * a pointer, but small integers make for the smallest compare
+ * instructions.
+ */
+#define SWAP_WORDS_64 ((void (*)(void *, void *, void *))0)
+#define SWAP_WORDS_32 ((void (*)(void *, void *, void *))1)
+#define SWAP_BYTES    ((void (*)(void *, void *, void *))2)
+
+/*
+ * Selects the appropriate swap function based on the element size.
+ */
+static __always_inline
+void *select_swap_func(const void *base, size_t size)
+{
+	if (is_aligned(base, size, 8))
+		return SWAP_WORDS_64;
+	else if (is_aligned(base, size, 4))
+		return SWAP_WORDS_32;
+	else
+		return SWAP_BYTES;
+}
+
+static __always_inline
+void do_swap(void *a, void *b, size_t size, void (*swap_func)(void *lhs, void *rhs, void *args),
+	     void *priv)
+{
+	if (swap_func == SWAP_WORDS_64)
+		swap_words_64(a, b, size);
+	else if (swap_func == SWAP_WORDS_32)
+		swap_words_32(a, b, size);
+	else if (swap_func == SWAP_BYTES)
+		swap_bytes(a, b, size);
+	else
+		swap_func(a, b, priv);
+}
+
 /**
  * parent - given the offset of the child, find the offset of the parent.
  * @i: the offset of the heap element whose parent is sought.  Non-zero.
@@ -106,11 +247,15 @@ void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size,
 {
 	const unsigned long lsbit = elem_size & -elem_size;
 	void *data = heap->data;
+	void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
 	/* pre-scale counters for performance */
 	size_t a = pos * elem_size;
 	size_t b, c, d;
 	size_t n = heap->nr * elem_size;
 
+	if (!swp)
+		swp = select_swap_func(data, elem_size);
+
 	/* Find the sift-down path all the way to the leaves. */
 	for (b = a; c = 2 * b + elem_size, (d = c + elem_size) < n;)
 		b = func->less(data + c, data + d, args) ? c : d;
@@ -127,7 +272,7 @@ void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size,
 	c = b;
 	while (b != a) {
 		b = parent(b, lsbit, elem_size);
-		func->swp(data + b, data + c, args);
+		do_swap(data + b, data + c, elem_size, swp, args);
 	}
 }
 
@@ -142,14 +287,18 @@ void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx
 {
 	const unsigned long lsbit = elem_size & -elem_size;
 	void *data = heap->data;
+	void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
 	/* pre-scale counters for performance */
 	size_t a = idx * elem_size, b;
 
+	if (!swp)
+		swp = select_swap_func(data, elem_size);
+
 	while (a) {
 		b = parent(a, lsbit, elem_size);
 		if (func->less(data + b, data + a, args))
 			break;
-		func->swp(data + a, data + b, args);
+		do_swap(data + a, data + b, elem_size, swp, args);
 		a = b;
 	}
 }
@@ -242,15 +391,19 @@ bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx,
 			   const struct min_heap_callbacks *func, void *args)
 {
 	void *data = heap->data;
+	void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
 
 	if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap"))
 		return false;
 
+	if (!swp)
+		swp = select_swap_func(data, elem_size);
+
 	/* Place last element at the root (position 0) and then sift down. */
 	heap->nr--;
 	if (idx == heap->nr)
 		return true;
-	func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args);
+	do_swap(data + (idx * elem_size), data + (heap->nr * elem_size), elem_size, swp, args);
 	__min_heap_sift_up_inline(heap, elem_size, idx, func, args);
 	__min_heap_sift_down_inline(heap, idx, elem_size, func, args);
 
-- 
cgit v1.2.3


From 84bfbfbbd32aee136afea4b6bf82581dce79c305 Mon Sep 17 00:00:00 2001
From: Maurice Lambert <mauricelambert434@gmail.com>
Date: Sun, 3 Nov 2024 23:39:50 +0100
Subject: netlink: typographical error in nlmsg_type constants definition

This commit fix a typographical error in netlink nlmsg_type constants definition in the include/uapi/linux/rtnetlink.h at line 177. The definition is RTM_NEWNVLAN RTM_NEWVLAN instead of RTM_NEWVLAN RTM_NEWVLAN.

Signed-off-by: Maurice Lambert <mauricelambert434@gmail.com>
Fixes: 8dcea187088b ("net: bridge: vlan: add rtm definitions and dump support")
Link: https://patch.msgid.link/20241103223950.230300-1-mauricelambert434@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/rtnetlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 3b687d20c9ed..db7254d52d93 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -174,7 +174,7 @@ enum {
 #define RTM_GETLINKPROP	RTM_GETLINKPROP
 
 	RTM_NEWVLAN = 112,
-#define RTM_NEWNVLAN	RTM_NEWVLAN
+#define RTM_NEWVLAN	RTM_NEWVLAN
 	RTM_DELVLAN,
 #define RTM_DELVLAN	RTM_DELVLAN
 	RTM_GETVLAN,
-- 
cgit v1.2.3


From e1ef630c56d36770e180f0d0bf7b61b5289f5c48 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Date: Fri, 1 Nov 2024 11:57:13 +0200
Subject: clk: Add devm_clk_hw_register_gate_parent_hw()

Add devm_clk_hw_register_gate_parent_hw() macro to allow registering
devres managed gate clocks providing struct clk_hw object as parent.

Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Link: https://lore.kernel.org/20241101095720.2247815-3-claudiu.beznea.uj@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/linux/clk-provider.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 7e43caabb54b..28cf7d103e92 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -622,6 +622,24 @@ struct clk *clk_register_gate(struct device *dev, const char *name,
 	__devm_clk_hw_register_gate((dev), NULL, (name), (parent_name), NULL, \
 			       NULL, (flags), (reg), (bit_idx),		      \
 			       (clk_gate_flags), (lock))
+/**
+ * devm_clk_hw_register_gate_parent_hw - register a gate clock with the clock
+ * framework
+ * @dev: device that is registering this clock
+ * @name: name of this clock
+ * @parent_hw: pointer to parent clk
+ * @flags: framework-specific flags for this clock
+ * @reg: register address to control gating of this clock
+ * @bit_idx: which bit in the register controls gating of this clock
+ * @clk_gate_flags: gate-specific flags for this clock
+ * @lock: shared register lock for this clock
+ */
+#define devm_clk_hw_register_gate_parent_hw(dev, name, parent_hw, flags,      \
+					    reg, bit_idx, clk_gate_flags,     \
+					    lock)			      \
+	__devm_clk_hw_register_gate((dev), NULL, (name), NULL, (parent_hw),   \
+				    NULL, (flags), (reg), (bit_idx),	      \
+				    (clk_gate_flags), (lock))
 /**
  * devm_clk_hw_register_gate_parent_data - register a gate clock with the
  * clock framework
-- 
cgit v1.2.3


From 32360bf6a5d4016669c3545e7b0ec939937f5331 Mon Sep 17 00:00:00 2001
From: Dmitry Rokosov <ddrokosov@salutedevices.com>
Date: Wed, 4 Sep 2024 01:39:30 +0300
Subject: leds: Introduce ordered workqueue for LEDs events instead of
 system_wq

This allows to setup ordered workqueue for LEDs events. This may be
useful, because default 'system_wq' does not guarantee execution order
of each work_struct, thus for several brightness update requests (for
multiple LEDs), real brightness switch could be in random order.

Yes, for sysfs-based LEDs we have flush_work() call inside
brightness_store() operation, but it's blocking call, so userspace
caller can be blocked at a long time, which means LEDs animation stream
can be broken.

Ordered workqueue has the same behaviour as system_wq + flush_work(),
but all scheduled works are async and userspace caller is not blocked,
which it better for userspace animation scheduling.

Signed-off-by: Alexey Romanov <avromanov@salutedevices.com>
Signed-off-by: Dmitry Rokosov <ddrokosov@salutedevices.com>
Link: https://lore.kernel.org/r/20240903223936.21292-1-ddrokosov@salutedevices.com
[Lee: Couple of style fix-ups]
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/leds.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index e5968c3ed4ae..9f34b9facaad 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -171,6 +171,7 @@ struct led_classdev {
 	int			 new_blink_brightness;
 	void			(*flash_resume)(struct led_classdev *led_cdev);
 
+	struct workqueue_struct *wq;			/* LED workqueue */
 	struct work_struct	set_brightness_work;
 	int			delayed_set_value;
 	unsigned long		delayed_delay_on;
-- 
cgit v1.2.3


From 4ca7cd938725a4050dcd62ae9472e931d603118d Mon Sep 17 00:00:00 2001
From: Mukesh Ojha <quic_mojha@quicinc.com>
Date: Sun, 3 Nov 2024 21:35:27 +0530
Subject: leds: class: Protect brightness_show() with led_cdev->led_access
 mutex

There is NULL pointer issue observed if from Process A where hid device
being added which results in adding a led_cdev addition and later a
another call to access of led_cdev attribute from Process B can result
in NULL pointer issue.

Use mutex led_cdev->led_access to protect access to led->cdev and its
attribute inside brightness_show() and max_brightness_show() and also
update the comment for mutex that it should be used to protect the led
class device fields.

	Process A 				Process B

 kthread+0x114
 worker_thread+0x244
 process_scheduled_works+0x248
 uhid_device_add_worker+0x24
 hid_add_device+0x120
 device_add+0x268
 bus_probe_device+0x94
 device_initial_probe+0x14
 __device_attach+0xfc
 bus_for_each_drv+0x10c
 __device_attach_driver+0x14c
 driver_probe_device+0x3c
 __driver_probe_device+0xa0
 really_probe+0x190
 hid_device_probe+0x130
 ps_probe+0x990
 ps_led_register+0x94
 devm_led_classdev_register_ext+0x58
 led_classdev_register_ext+0x1f8
 device_create_with_groups+0x48
 device_create_groups_vargs+0xc8
 device_add+0x244
 kobject_uevent+0x14
 kobject_uevent_env[jt]+0x224
 mutex_unlock[jt]+0xc4
 __mutex_unlock_slowpath+0xd4
 wake_up_q+0x70
 try_to_wake_up[jt]+0x48c
 preempt_schedule_common+0x28
 __schedule+0x628
 __switch_to+0x174
						el0t_64_sync+0x1a8/0x1ac
						el0t_64_sync_handler+0x68/0xbc
						el0_svc+0x38/0x68
						do_el0_svc+0x1c/0x28
						el0_svc_common+0x80/0xe0
						invoke_syscall+0x58/0x114
						__arm64_sys_read+0x1c/0x2c
						ksys_read+0x78/0xe8
						vfs_read+0x1e0/0x2c8
						kernfs_fop_read_iter+0x68/0x1b4
						seq_read_iter+0x158/0x4ec
						kernfs_seq_show+0x44/0x54
						sysfs_kf_seq_show+0xb4/0x130
						dev_attr_show+0x38/0x74
						brightness_show+0x20/0x4c
						dualshock4_led_get_brightness+0xc/0x74

[ 3313.874295][ T4013] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000060
[ 3313.874301][ T4013] Mem abort info:
[ 3313.874303][ T4013]   ESR = 0x0000000096000006
[ 3313.874305][ T4013]   EC = 0x25: DABT (current EL), IL = 32 bits
[ 3313.874307][ T4013]   SET = 0, FnV = 0
[ 3313.874309][ T4013]   EA = 0, S1PTW = 0
[ 3313.874311][ T4013]   FSC = 0x06: level 2 translation fault
[ 3313.874313][ T4013] Data abort info:
[ 3313.874314][ T4013]   ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
[ 3313.874316][ T4013]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[ 3313.874318][ T4013]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[ 3313.874320][ T4013] user pgtable: 4k pages, 39-bit VAs, pgdp=00000008f2b0a000
..

[ 3313.874332][ T4013] Dumping ftrace buffer:
[ 3313.874334][ T4013]    (ftrace buffer empty)
..
..
[ dd3313.874639][ T4013] CPU: 6 PID: 4013 Comm: InputReader
[ 3313.874648][ T4013] pc : dualshock4_led_get_brightness+0xc/0x74
[ 3313.874653][ T4013] lr : led_update_brightness+0x38/0x60
[ 3313.874656][ T4013] sp : ffffffc0b910bbd0
..
..
[ 3313.874685][ T4013] Call trace:
[ 3313.874687][ T4013]  dualshock4_led_get_brightness+0xc/0x74
[ 3313.874690][ T4013]  brightness_show+0x20/0x4c
[ 3313.874692][ T4013]  dev_attr_show+0x38/0x74
[ 3313.874696][ T4013]  sysfs_kf_seq_show+0xb4/0x130
[ 3313.874700][ T4013]  kernfs_seq_show+0x44/0x54
[ 3313.874703][ T4013]  seq_read_iter+0x158/0x4ec
[ 3313.874705][ T4013]  kernfs_fop_read_iter+0x68/0x1b4
[ 3313.874708][ T4013]  vfs_read+0x1e0/0x2c8
[ 3313.874711][ T4013]  ksys_read+0x78/0xe8
[ 3313.874714][ T4013]  __arm64_sys_read+0x1c/0x2c
[ 3313.874718][ T4013]  invoke_syscall+0x58/0x114
[ 3313.874721][ T4013]  el0_svc_common+0x80/0xe0
[ 3313.874724][ T4013]  do_el0_svc+0x1c/0x28
[ 3313.874727][ T4013]  el0_svc+0x38/0x68
[ 3313.874730][ T4013]  el0t_64_sync_handler+0x68/0xbc
[ 3313.874732][ T4013]  el0t_64_sync+0x1a8/0x1ac

Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
Reviewed-by: Anish Kumar <yesanishhere@gmail.com>
Link: https://lore.kernel.org/r/20241103160527.82487-1-quic_mojha@quicinc.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/leds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 9f34b9facaad..98f9719c924c 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -239,7 +239,7 @@ struct led_classdev {
 	struct kernfs_node	*brightness_hw_changed_kn;
 #endif
 
-	/* Ensures consistent access to the LED Flash Class device */
+	/* Ensures consistent access to the LED class device */
 	struct mutex		led_access;
 };
 
-- 
cgit v1.2.3


From 33b091c08ed85e023c21376e6f787355fd46b440 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@igalia.com>
Date: Fri, 1 Nov 2024 13:42:49 -0300
Subject: libfs: Fix kernel-doc warning in generic_ci_validate_strict_name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the indentation of the return values from
generic_ci_validate_strict_name() to properly render the comment and to
address a `make htmldocs` warning:

Documentation/filesystems/api-summary:14: include/linux/fs.h:3504:
WARNING: Bullet list ends without a blank line; unexpected unindent.

Fixes: 0e152beb5aa1 ("libfs: Create the helper function generic_ci_validate_strict_name()")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/lkml/20241030162435.05425f60@canb.auug.org.au/
Signed-off-by: André Almeida <andrealmeid@igalia.com>
Link: https://lore.kernel.org/r/20241101164251.327884-2-andrealmeid@igalia.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b277369672a1..001d580af862 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3477,12 +3477,12 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
  * @name: name of the new file
  *
  * Return:
- * * True if the filename is suitable for this directory. It can be
- * true if a given name is not suitable for a strict encoding
- * directory, but the directory being used isn't strict
+ * * True: if the filename is suitable for this directory. It can be
+ *   true if a given name is not suitable for a strict encoding
+ *   directory, but the directory being used isn't strict
  * * False if the filename isn't suitable for this directory. This only
- * happens when a directory is casefolded and the filesystem is strict
- * about its encoding.
+ *   happens when a directory is casefolded and the filesystem is strict
+ *   about its encoding.
  */
 static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name)
 {
-- 
cgit v1.2.3


From e57dfaa4b0a72f6a231a8eedb95d260045bbd8db Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Thu, 31 Oct 2024 16:52:57 +0100
Subject: xfrm: Convert struct xfrm_dst_lookup_params -> tos to dscp_t.

Add type annotation to the "tos" field of struct xfrm_dst_lookup_params,
to ensure that the ECN bits aren't mistakenly taken into account when
doing route lookups. Rename that field (tos -> dscp) to make that
change explicit.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2b87999bd5aa..32c09e85a64c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -19,6 +19,7 @@
 
 #include <net/sock.h>
 #include <net/dst.h>
+#include <net/inet_dscp.h>
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/ipv6.h>
@@ -354,7 +355,7 @@ void xfrm_if_unregister_cb(void);
 
 struct xfrm_dst_lookup_params {
 	struct net *net;
-	int tos;
+	dscp_t dscp;
 	int oif;
 	xfrm_address_t *saddr;
 	xfrm_address_t *daddr;
-- 
cgit v1.2.3


From 6140be90ec70c39fa844741ca3cc807dd0866394 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Fri, 26 Apr 2024 18:20:14 +0200
Subject: fs/xattr: add *at family syscalls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the four syscalls setxattrat(), getxattrat(), listxattrat() and
removexattrat().  Those can be used to operate on extended attributes,
especially security related ones, either relative to a pinned directory
or on a file descriptor without read access, avoiding a
/proc/<pid>/fd/<fd> detour, requiring a mounted procfs.

One use case will be setfiles(8) setting SELinux file contexts
("security.selinux") without race conditions and without a file
descriptor opened with read access requiring SELinux read permission.

Use the do_{name}at() pattern from fs/open.c.

Pass the value of the extended attribute, its length, and for
setxattrat(2) the command (XATTR_CREATE or XATTR_REPLACE) via an added
struct xattr_args to not exceed six syscall arguments and not
merging the AT_* and XATTR_* flags.

[AV: fixes by Christian Brauner folded in, the entire thing rebased on
top of {filename,file}_...xattr() primitives, treatment of empty
pathnames regularized.  As the result, AT_EMPTY_PATH+NULL handling
is cheap, so f...(2) can use it]

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
Link: https://lore.kernel.org/r/20240426162042.191916-1-cgoettsche@seltendoof.de
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
CC: x86@kernel.org
CC: linux-alpha@vger.kernel.org
CC: linux-kernel@vger.kernel.org
CC: linux-arm-kernel@lists.infradead.org
CC: linux-ia64@vger.kernel.org
CC: linux-m68k@lists.linux-m68k.org
CC: linux-mips@vger.kernel.org
CC: linux-parisc@vger.kernel.org
CC: linuxppc-dev@lists.ozlabs.org
CC: linux-s390@vger.kernel.org
CC: linux-sh@vger.kernel.org
CC: sparclinux@vger.kernel.org
CC: linux-fsdevel@vger.kernel.org
CC: audit@vger.kernel.org
CC: linux-arch@vger.kernel.org
CC: linux-api@vger.kernel.org
CC: linux-security-module@vger.kernel.org
CC: selinux@vger.kernel.org
[brauner: slight tweaks]
Signed-off-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/asm-generic/audit_change_attr.h |  6 ++++++
 include/linux/syscalls.h                | 13 +++++++++++++
 include/linux/xattr.h                   |  4 ++++
 include/uapi/asm-generic/unistd.h       | 11 ++++++++++-
 include/uapi/linux/xattr.h              |  7 +++++++
 5 files changed, 40 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/audit_change_attr.h b/include/asm-generic/audit_change_attr.h
index 331670807cf0..cc840537885f 100644
--- a/include/asm-generic/audit_change_attr.h
+++ b/include/asm-generic/audit_change_attr.h
@@ -11,9 +11,15 @@ __NR_lchown,
 __NR_fchown,
 #endif
 __NR_setxattr,
+#ifdef __NR_setxattrat
+__NR_setxattrat,
+#endif
 __NR_lsetxattr,
 __NR_fsetxattr,
 __NR_removexattr,
+#ifdef __NR_removexattrat
+__NR_removexattrat,
+#endif
 __NR_lremovexattr,
 __NR_fremovexattr,
 #ifdef __NR_fchownat
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5758104921e6..c6333204d451 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -77,6 +77,7 @@ struct cachestat_range;
 struct cachestat;
 struct statmount;
 struct mnt_id_req;
+struct xattr_args;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -338,23 +339,35 @@ asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
 				void __user *arg, unsigned int nr_args);
 asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
 			     const void __user *value, size_t size, int flags);
+asmlinkage long sys_setxattrat(int dfd, const char __user *path, unsigned int at_flags,
+			       const char __user *name,
+			       const struct xattr_args __user *args, size_t size);
 asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name,
 			      const void __user *value, size_t size, int flags);
 asmlinkage long sys_fsetxattr(int fd, const char __user *name,
 			      const void __user *value, size_t size, int flags);
 asmlinkage long sys_getxattr(const char __user *path, const char __user *name,
 			     void __user *value, size_t size);
+asmlinkage long sys_getxattrat(int dfd, const char __user *path, unsigned int at_flags,
+			       const char __user *name,
+			       struct xattr_args __user *args, size_t size);
 asmlinkage long sys_lgetxattr(const char __user *path, const char __user *name,
 			      void __user *value, size_t size);
 asmlinkage long sys_fgetxattr(int fd, const char __user *name,
 			      void __user *value, size_t size);
 asmlinkage long sys_listxattr(const char __user *path, char __user *list,
 			      size_t size);
+asmlinkage long sys_listxattrat(int dfd, const char __user *path,
+				unsigned int at_flags,
+				char __user *list, size_t size);
 asmlinkage long sys_llistxattr(const char __user *path, char __user *list,
 			       size_t size);
 asmlinkage long sys_flistxattr(int fd, char __user *list, size_t size);
 asmlinkage long sys_removexattr(const char __user *path,
 				const char __user *name);
+asmlinkage long sys_removexattrat(int dfd, const char __user *path,
+				  unsigned int at_flags,
+				  const char __user *name);
 asmlinkage long sys_lremovexattr(const char __user *path,
 				 const char __user *name);
 asmlinkage long sys_fremovexattr(int fd, const char __user *name);
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d20051865800..86b0d47984a1 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -19,6 +19,10 @@
 #include <linux/user_namespace.h>
 #include <uapi/linux/xattr.h>
 
+/* List of all open_how "versions". */
+#define XATTR_ARGS_SIZE_VER0	16 /* sizeof first published struct */
+#define XATTR_ARGS_SIZE_LATEST	XATTR_ARGS_SIZE_VER0
+
 struct inode;
 struct dentry;
 
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 5bf6148cac2b..88dc393c2bca 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -841,8 +841,17 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
 #define __NR_mseal 462
 __SYSCALL(__NR_mseal, sys_mseal)
 
+#define __NR_setxattrat 463
+__SYSCALL(__NR_setxattrat, sys_setxattrat)
+#define __NR_getxattrat 464
+__SYSCALL(__NR_getxattrat, sys_getxattrat)
+#define __NR_listxattrat 465
+__SYSCALL(__NR_listxattrat, sys_listxattrat)
+#define __NR_removexattrat 466
+__SYSCALL(__NR_removexattrat, sys_removexattrat)
+
 #undef __NR_syscalls
-#define __NR_syscalls 463
+#define __NR_syscalls 467
 
 /*
  * 32 bit systems traditionally used different
diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
index 9463db2dfa9d..9854f9cff3c6 100644
--- a/include/uapi/linux/xattr.h
+++ b/include/uapi/linux/xattr.h
@@ -11,6 +11,7 @@
 */
 
 #include <linux/libc-compat.h>
+#include <linux/types.h>
 
 #ifndef _UAPI_LINUX_XATTR_H
 #define _UAPI_LINUX_XATTR_H
@@ -20,6 +21,12 @@
 
 #define XATTR_CREATE	0x1	/* set value, fail if attr already exists */
 #define XATTR_REPLACE	0x2	/* set value, fail if attr does not exist */
+
+struct xattr_args {
+	__aligned_u64 __user value;
+	__u32 size;
+	__u32 flags;
+};
 #endif
 
 /* Namespaces */
-- 
cgit v1.2.3


From 44d0469f79bd3d0b3433732877358df7dc6b17b1 Mon Sep 17 00:00:00 2001
From: Zijian Zhang <zijianzhang@bytedance.com>
Date: Wed, 6 Nov 2024 00:37:42 +0000
Subject: bpf: Add sk_is_inet and IS_ICSK check in tls_sw_has_ctx_tx/rx

As the introduction of the support for vsock and unix sockets in sockmap,
tls_sw_has_ctx_tx/rx cannot presume the socket passed in must be IS_ICSK.
vsock and af_unix sockets have vsock_sock and unix_sock instead of
inet_connection_sock. For these sockets, tls_get_ctx may return an invalid
pointer and cause page fault in function tls_sw_ctx_rx.

BUG: unable to handle page fault for address: 0000000000040030
Workqueue: vsock-loopback vsock_loopback_work
RIP: 0010:sk_psock_strp_data_ready+0x23/0x60
Call Trace:
 ? __die+0x81/0xc3
 ? no_context+0x194/0x350
 ? do_page_fault+0x30/0x110
 ? async_page_fault+0x3e/0x50
 ? sk_psock_strp_data_ready+0x23/0x60
 virtio_transport_recv_pkt+0x750/0x800
 ? update_load_avg+0x7e/0x620
 vsock_loopback_work+0xd0/0x100
 process_one_work+0x1a7/0x360
 worker_thread+0x30/0x390
 ? create_worker+0x1a0/0x1a0
 kthread+0x112/0x130
 ? __kthread_cancel_work+0x40/0x40
 ret_from_fork+0x1f/0x40

v2:
  - Add IS_ICSK check
v3:
  - Update the commits in Fixes

Fixes: 634f1a7110b4 ("vsock: support sockmap")
Fixes: 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap")
Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Cong Wang <cong.wang@bytedance.com>
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20241106003742.399240-1-zijianzhang@bytedance.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/net/tls.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 3a33924db2bc..61fef2880114 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -390,8 +390,12 @@ tls_offload_ctx_tx(const struct tls_context *tls_ctx)
 
 static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
 {
-	struct tls_context *ctx = tls_get_ctx(sk);
+	struct tls_context *ctx;
+
+	if (!sk_is_inet(sk) || !inet_test_bit(IS_ICSK, sk))
+		return false;
 
+	ctx = tls_get_ctx(sk);
 	if (!ctx)
 		return false;
 	return !!tls_sw_ctx_tx(ctx);
@@ -399,8 +403,12 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
 
 static inline bool tls_sw_has_ctx_rx(const struct sock *sk)
 {
-	struct tls_context *ctx = tls_get_ctx(sk);
+	struct tls_context *ctx;
+
+	if (!sk_is_inet(sk) || !inet_test_bit(IS_ICSK, sk))
+		return false;
 
+	ctx = tls_get_ctx(sk);
 	if (!ctx)
 		return false;
 	return !!tls_sw_ctx_rx(ctx);
-- 
cgit v1.2.3


From 6f94cbc29adacc15007c5a16295052e674099282 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sun, 3 Nov 2024 08:46:07 -0700
Subject: io_uring/rsrc: split io_kiocb node type assignments

Currently the io_rsrc_node assignment in io_kiocb is an array of two
pointers, as two nodes may be assigned to a request - one file node,
and one buffer node. However, the buffer node can co-exist with the
provided buffers, as currently it's not supported to use both provided
and registered buffers at the same time.

This crucially brings struct io_kiocb down to 4 cache lines again, as
before it spilled into the 5th cacheline.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index d52fec533c51..01e7fb9fcfe2 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -475,6 +475,7 @@ enum {
 	REQ_F_BL_EMPTY_BIT,
 	REQ_F_BL_NO_RECYCLE_BIT,
 	REQ_F_BUFFERS_COMMIT_BIT,
+	REQ_F_BUF_NODE_BIT,
 
 	/* not a real bit, just to check we're not overflowing the space */
 	__REQ_F_LAST_BIT,
@@ -553,6 +554,8 @@ enum {
 	REQ_F_BL_NO_RECYCLE	= IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT),
 	/* buffer ring head needs incrementing on put */
 	REQ_F_BUFFERS_COMMIT	= IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT),
+	/* buf node is valid */
+	REQ_F_BUF_NODE		= IO_REQ_FLAG(REQ_F_BUF_NODE_BIT),
 };
 
 typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
@@ -633,6 +636,8 @@ struct io_kiocb {
 		 * REQ_F_BUFFER_RING is set.
 		 */
 		struct io_buffer_list	*buf_list;
+
+		struct io_rsrc_node	*buf_node;
 	};
 
 	union {
@@ -642,7 +647,7 @@ struct io_kiocb {
 		__poll_t apoll_events;
 	};
 
-	struct io_rsrc_node		*rsrc_nodes[2];
+	struct io_rsrc_node		*file_node;
 
 	atomic_t			refs;
 	bool				cancel_seq_set;
-- 
cgit v1.2.3


From b6f58a3f4aa8dba424356c7a69388a81f4459300 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sun, 3 Nov 2024 10:23:38 -0700
Subject: io_uring: move struct io_kiocb from task_struct to io_uring_task

Rather than store the task_struct itself in struct io_kiocb, store
the io_uring specific task_struct. The life times are the same in terms
of io_uring, and this avoids doing some dereferences through the
task_struct. For the hot path of putting local task references, we can
deref req->tctx instead, which we'll need anyway in that function
regardless of whether it's local or remote references.

This is mostly straight forward, except the original task PF_EXITING
check needs a bit of tweaking. task_work is _always_ run from the
originating task, except in the fallback case, where it's run from a
kernel thread. Replace the potentially racy (in case of fallback work)
checks for req->task->flags with current->flags. It's either the still
the original task, in which case PF_EXITING will be sane, or it has
PF_KTHREAD set, in which case it's fallback work. Both cases should
prevent moving forward with the given request.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h   | 2 +-
 include/linux/io_uring_types.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index c189d36ad55e..578a3fdf5c71 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -110,7 +110,7 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
 
 static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
 {
-	return cmd_to_io_kiocb(cmd)->task;
+	return cmd_to_io_kiocb(cmd)->tctx->task;
 }
 
 #endif /* _LINUX_IO_URING_CMD_H */
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 01e7fb9fcfe2..fba2988accc3 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -84,6 +84,7 @@ struct io_uring_task {
 	/* submission side */
 	int				cached_refs;
 	const struct io_ring_ctx 	*last;
+	struct task_struct		*task;
 	struct io_wq			*io_wq;
 	struct file			*registered_rings[IO_RINGFD_REG_MAX];
 
@@ -625,7 +626,7 @@ struct io_kiocb {
 	struct io_cqe			cqe;
 
 	struct io_ring_ctx		*ctx;
-	struct task_struct		*task;
+	struct io_uring_task		*tctx;
 
 	union {
 		/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
-- 
cgit v1.2.3


From 6bf90bd8c58a305994948eb3409d91a7d8f2edae Mon Sep 17 00:00:00 2001
From: Olivier Langlois <olivier@trillion01.com>
Date: Sun, 13 Oct 2024 14:29:24 -0400
Subject: io_uring/napi: add static napi tracking strategy

Add the static napi tracking strategy. That allows the user to manually
manage the napi ids list for busy polling, and eliminate the overhead of
dynamically updating the list from the fast path.

Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Link: https://lore.kernel.org/r/96943de14968c35a5c599352259ad98f3c0770ba.1728828877.git.olivier@trillion01.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  2 +-
 include/uapi/linux/io_uring.h  | 32 ++++++++++++++++++++++++++++++--
 2 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index fba2988accc3..072e65e93105 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -408,7 +408,7 @@ struct io_ring_ctx {
 	/* napi busy poll default timeout */
 	ktime_t			napi_busy_poll_dt;
 	bool			napi_prefer_busy_poll;
-	bool			napi_enabled;
+	u8			napi_track_mode;
 
 	DECLARE_HASHTABLE(napi_ht, 4);
 #endif
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 47977a5c65f5..5d08435b95a8 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -790,12 +790,40 @@ struct io_uring_buf_status {
 	__u32	resv[8];
 };
 
+enum io_uring_napi_op {
+	/* register/ungister backward compatible opcode */
+	IO_URING_NAPI_REGISTER_OP = 0,
+
+	/* opcodes to update napi_list when static tracking is used */
+	IO_URING_NAPI_STATIC_ADD_ID = 1,
+	IO_URING_NAPI_STATIC_DEL_ID = 2
+};
+
+enum io_uring_napi_tracking_strategy {
+	/* value must be 0 for backward compatibility */
+	IO_URING_NAPI_TRACKING_DYNAMIC = 0,
+	IO_URING_NAPI_TRACKING_STATIC = 1,
+	IO_URING_NAPI_TRACKING_INACTIVE = 255
+};
+
 /* argument for IORING_(UN)REGISTER_NAPI */
 struct io_uring_napi {
 	__u32	busy_poll_to;
 	__u8	prefer_busy_poll;
-	__u8	pad[3];
-	__u64	resv;
+
+	/* a io_uring_napi_op value */
+	__u8	opcode;
+	__u8	pad[2];
+
+	/*
+	 * for IO_URING_NAPI_REGISTER_OP, it is a
+	 * io_uring_napi_tracking_strategy value.
+	 *
+	 * for IO_URING_NAPI_STATIC_ADD_ID/IO_URING_NAPI_STATIC_DEL_ID
+	 * it is the napi id to add/del from napi_list.
+	 */
+	__u32	op_param;
+	__u32	resv;
 };
 
 /*
-- 
cgit v1.2.3


From 3b96b895127b7c0aed63d82c974b46340e8466c1 Mon Sep 17 00:00:00 2001
From: Esther Shimanovich <eshimanovich@chromium.org>
Date: Tue, 10 Sep 2024 17:57:45 +0000
Subject: PCI: Detect and trust built-in Thunderbolt chips

Some computers with CPUs that lack Thunderbolt features use discrete
Thunderbolt chips to add Thunderbolt functionality. These Thunderbolt
chips are located within the chassis; between the Root Port labeled
ExternalFacingPort and the USB-C port.

These Thunderbolt PCIe devices should be labeled as fixed and trusted, as
they are built into the computer. Otherwise, security policies that rely on
those flags may have unintended results, such as preventing USB-C ports
from enumerating.

Detect the above scenario through the process of elimination.

  1) Integrated Thunderbolt host controllers already have Thunderbolt
     implemented, so anything outside their external facing Root Port is
     removable and untrusted.

     Detect them using the following properties:

       - Most integrated host controllers have the "usb4-host-interface"
         ACPI property, as described here:

         https://learn.microsoft.com/en-us/windows-hardware/drivers/pci/dsd-for-pcie-root-ports#mapping-native-protocols-pcie-displayport-tunneled-through-usb4-to-usb4-host-routers

       - Integrated Thunderbolt PCIe Root Ports before Alder Lake do not
         have the "usb4-host-interface" ACPI property. Identify those by
         their PCI IDs instead.

  2) If a Root Port does not have integrated Thunderbolt capabilities, but
     has the "ExternalFacingPort" ACPI property, that means the
     manufacturer has opted to use a discrete Thunderbolt host controller
     that is built into the computer.

     This host controller can be identified by virtue of being located
     directly below an external-facing Root Port that lacks integrated
     Thunderbolt. Label it as trusted and fixed.

     Everything downstream from it is untrusted and removable.

The "ExternalFacingPort" ACPI property is described here:
https://learn.microsoft.com/en-us/windows-hardware/drivers/pci/dsd-for-pcie-root-ports#identifying-externally-exposed-pcie-root-ports

Link: https://lore.kernel.org/r/20240910-trust-tbt-fix-v5-1-7a7a42a5f496@chromium.org
Suggested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Esther Shimanovich <eshimanovich@chromium.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
 include/linux/pci.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 573b4c4c2be6..4e77c4230c0a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2609,6 +2609,12 @@ pci_host_bridge_acpi_msi_domain(struct pci_bus *bus) { return NULL; }
 static inline bool pci_pr3_present(struct pci_dev *pdev) { return false; }
 #endif
 
+#if defined(CONFIG_X86) && defined(CONFIG_ACPI)
+bool arch_pci_dev_is_removable(struct pci_dev *pdev);
+#else
+static inline bool arch_pci_dev_is_removable(struct pci_dev *pdev) { return false; }
+#endif
+
 #ifdef CONFIG_EEH
 static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev)
 {
-- 
cgit v1.2.3


From 25caea955cc950507d179f3ef456404b475e8c23 Mon Sep 17 00:00:00 2001
From: Inochi Amaoto <inochiama@gmail.com>
Date: Thu, 31 Oct 2024 14:08:58 +0800
Subject: irqchip: Add T-HEAD C900 ACLINT SSWI driver

Add a driver for the T-HEAD C900 ACLINT SSWI device. This device allows
the system with T-HEAD cpus to send ipi via fast device interface.

Signed-off-by: Inochi Amaoto <inochiama@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20241031060859.722258-3-inochiama@gmail.com
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 2361ed4d2b15..799052249c7b 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -147,6 +147,7 @@ enum cpuhp_state {
 	CPUHP_AP_IRQ_EIOINTC_STARTING,
 	CPUHP_AP_IRQ_AVECINTC_STARTING,
 	CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
+	CPUHP_AP_IRQ_THEAD_ACLINT_SSWI_STARTING,
 	CPUHP_AP_IRQ_RISCV_IMSIC_STARTING,
 	CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING,
 	CPUHP_AP_ARM_MVEBU_COHERENCY,
-- 
cgit v1.2.3


From 513793bc6ab331b947111e8efaf8fcef33fb83e5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:31 +0100
Subject: posix-timers: Make signal delivery consistent

Signals of timers which are reprogammed, disarmed or deleted can deliver
signals related to the past. The POSIX spec is blury about this:

 - "The effect of disarming or resetting a timer with pending expiration
    notifications is unspecified."

 - "The disposition of pending signals for the deleted timer is
    unspecified."

In both cases it is reasonable to expect that pending signals are
discarded. Especially in the reprogramming case it does not make sense to
account for previous overruns or to deliver a signal for a timer which has
been disarmed. This makes the behaviour consistent and understandable.

Remove the si_sys_private check from the signal delivery code and invoke
posix_timer_deliver_signal() unconditionally for posix timer related
signals.

Change posix_timer_deliver_signal() so it controls the actual signal
delivery via the return value. It now instructs the signal code to drop the
signal when:

  1) The timer does not longer exist in the hash table

  2) The timer signal_seq value is not the same as the si_sys_private value
     which was set when the signal was queued.

This is also a preparatory change to embed the sigqueue into the k_itimer
structure, which in turn allows to remove the si_sys_private magic.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/all/20241105064213.040348644@linutronix.de
---
 include/linux/posix-timers.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 02afbb4da7f7..8c6d97412526 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -137,8 +137,6 @@ static inline void clear_posix_cputimers_work(struct task_struct *p) { }
 static inline void posix_cputimers_init_work(void) { }
 #endif
 
-#define REQUEUE_PENDING 1
-
 /**
  * struct k_itimer - POSIX.1b interval timer structure.
  * @list:		List head for binding the timer to signals->posix_timers
-- 
cgit v1.2.3


From bf635681c906ad056d1fda325de8d1c12c9f8201 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:33 +0100
Subject: posix-cpu-timers: Cleanup the firing logic

The firing flag of a posix CPU timer is tristate:

  0: when the timer is not about to deliver a signal

  1: when the timer has expired, but the signal has not been delivered yet

 -1: when the timer was queued for signal delivery and a rearm operation
     raced against it and supressed the signal delivery.

This is a pointless exercise as this can be simply expressed with a
boolean. Only if set, the signal is delivered. This makes delete and rearm
consistent with the rest of the posix timers.

Convert firing to bool and fixup the usage sites accordingly and add
comments why the timer cannot be dequeued right away.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/all/20241105064213.172848618@linutronix.de
---
 include/linux/posix-timers.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 8c6d97412526..b1de21731a08 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -49,7 +49,7 @@ struct cpu_timer {
 	struct timerqueue_head		*head;
 	struct pid			*pid;
 	struct list_head		elist;
-	int				firing;
+	bool				firing;
 	struct task_struct __rcu	*handling;
 };
 
-- 
cgit v1.2.3


From 4cf7bf2a2f1a8ace4a49a1138c8123fdb5990093 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:35 +0100
Subject: posix-cpu-timers: Use dedicated flag for CPU timer nanosleep

POSIX CPU timer nanosleep creates a k_itimer on stack and uses the sigq
pointer to detect the nanosleep case in the expiry function.

Prepare for embedding sigqueue into struct k_itimer by using a dedicated
flag for nanosleep.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241105064213.238550394@linutronix.de
---
 include/linux/posix-timers.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index b1de21731a08..bcd01208d795 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -42,6 +42,7 @@ static inline int clockid_to_fd(const clockid_t clk)
  * @pid:	Pointer to target task PID
  * @elist:	List head for the expiry list
  * @firing:	Timer is currently firing
+ * @nanosleep:	Timer is used for nanosleep and is not a regular posix-timer
  * @handling:	Pointer to the task which handles expiry
  */
 struct cpu_timer {
@@ -50,6 +51,7 @@ struct cpu_timer {
 	struct pid			*pid;
 	struct list_head		elist;
 	bool				firing;
+	bool				nanosleep;
 	struct task_struct __rcu	*handling;
 };
 
-- 
cgit v1.2.3


From 5d916a0988eed5217c103932ff4887c9ae83c89c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:36 +0100
Subject: posix-timers: Add a refcount to struct k_itimer

To cure the SIG_IGN handling for posix interval timers, the preallocated
sigqueue needs to be embedded into struct k_itimer to prevent life time
races of all sorts.

To make that work correctly it needs reference counting so that timer
deletion does not free the timer prematuraly when there is a signal queued
or delivered concurrently.

Add a rcuref to the posix timer part.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241105064213.304756440@linutronix.de
---
 include/linux/posix-timers.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index bcd01208d795..9740fd0c2933 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -6,11 +6,13 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/posix-timers_types.h>
+#include <linux/rcuref.h>
 #include <linux/spinlock.h>
 #include <linux/timerqueue.h>
 
 struct kernel_siginfo;
 struct task_struct;
+struct k_itimer;
 
 static inline clockid_t make_process_cpuclock(const unsigned int pid,
 		const clockid_t clock)
@@ -105,6 +107,7 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
 
 void posixtimer_rearm_itimer(struct task_struct *p);
 bool posixtimer_deliver_signal(struct kernel_siginfo *info);
+void posixtimer_free_timer(struct k_itimer *timer);
 
 /* Init task static initializer */
 #define INIT_CPU_TIMERBASE(b) {						\
@@ -129,6 +132,7 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
 					      u64 cpu_limit) { }
 static inline void posixtimer_rearm_itimer(struct task_struct *p) { }
 static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info) { return false; }
+static inline void posixtimer_free_timer(struct k_itimer *timer) { }
 #endif
 
 #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
@@ -156,6 +160,7 @@ static inline void posix_cputimers_init_work(void) { }
  * @it_signal:		Pointer to the creators signal struct
  * @it_pid:		The pid of the process/task targeted by the signal
  * @it_process:		The task to wakeup on clock_nanosleep (CPU timers)
+ * @rcuref:		Reference count for life time management
  * @sigq:		Pointer to preallocated sigqueue
  * @it:			Union representing the various posix timer type
  *			internals.
@@ -180,6 +185,7 @@ struct k_itimer {
 		struct task_struct	*it_process;
 	};
 	struct sigqueue		*sigq;
+	rcuref_t		rcuref;
 	union {
 		struct {
 			struct hrtimer	timer;
@@ -200,4 +206,12 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 
 int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
 
+#ifdef CONFIG_POSIX_TIMERS
+static inline void posixtimer_putref(struct k_itimer *tmr)
+{
+	if (rcuref_put(&tmr->rcuref))
+		posixtimer_free_timer(tmr);
+}
+#endif /* !CONFIG_POSIX_TIMERS */
+
 #endif
-- 
cgit v1.2.3


From 54f1dd642fd088ba969206f09e7afffad7d9db2c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:39 +0100
Subject: signal: Provide posixtimer_sigqueue_init()

To cure the SIG_IGN handling for posix interval timers, the preallocated
sigqueue needs to be embedded into struct k_itimer to prevent life time
races of all sorts.

Provide a new function to initialize the embedded sigqueue to prepare for
that.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241105064213.450427515@linutronix.de
---
 include/linux/posix-timers.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 9740fd0c2933..200098d27cc0 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -12,6 +12,7 @@
 
 struct kernel_siginfo;
 struct task_struct;
+struct sigqueue;
 struct k_itimer;
 
 static inline clockid_t make_process_cpuclock(const unsigned int pid,
@@ -106,6 +107,7 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
 }
 
 void posixtimer_rearm_itimer(struct task_struct *p);
+bool posixtimer_init_sigqueue(struct sigqueue *q);
 bool posixtimer_deliver_signal(struct kernel_siginfo *info);
 void posixtimer_free_timer(struct k_itimer *timer);
 
-- 
cgit v1.2.3


From ef1c5bcd6daa674392bdf89b8ae889aafd73f956 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:41 +0100
Subject: posix-timers: Store PID type in the timer

instead of re-evaluating the signal delivery mode everywhere.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241105064213.519086500@linutronix.de
---
 include/linux/posix-timers.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 200098d27cc0..947176582de9 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -5,6 +5,7 @@
 #include <linux/alarmtimer.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/pid.h>
 #include <linux/posix-timers_types.h>
 #include <linux/rcuref.h>
 #include <linux/spinlock.h>
@@ -180,6 +181,7 @@ struct k_itimer {
 	s64			it_overrun_last;
 	unsigned int		it_signal_seq;
 	int			it_sigev_notify;
+	enum pid_type		it_pid_type;
 	ktime_t			it_interval;
 	struct signal_struct	*it_signal;
 	union {
-- 
cgit v1.2.3


From 0360ed14d9826678a50fa2b873e522a24cd3c018 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:42 +0100
Subject: signal: Refactor send_sigqueue()

To handle posix timers which have their signal ignored via SIG_IGN properly
it is required to requeue a ignored signal for delivery when SIG_IGN is
lifted so the timer gets rearmed.

Split the required code out of send_sigqueue() so it can be reused in
context of sigaction().

While at it rename send_sigqueue() to posixtimer_send_sigqueue() so its
clear what this is about.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241105064213.586453412@linutronix.de
---
 include/linux/posix-timers.h | 1 +
 include/linux/sched/signal.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 947176582de9..52611ea923b2 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -109,6 +109,7 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
 
 void posixtimer_rearm_itimer(struct task_struct *p);
 bool posixtimer_init_sigqueue(struct sigqueue *q);
+int posixtimer_send_sigqueue(struct k_itimer *tmr);
 bool posixtimer_deliver_signal(struct kernel_siginfo *info);
 void posixtimer_free_timer(struct k_itimer *timer);
 
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index bd9f569231d9..36283c1c55e9 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -340,7 +340,6 @@ extern int send_sig(int, struct task_struct *, int);
 extern int zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
-extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type, int si_private);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 
 static inline void clear_notify_signal(void)
-- 
cgit v1.2.3


From 11629b9808e5900d675fd469d19932ea48060de3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:43 +0100
Subject: signal: Replace resched_timer logic

In preparation for handling ignored posix timer signals correctly and
embedding the sigqueue struct into struct k_itimer, hand down a pointer to
the sigqueue struct into posix_timer_deliver_signal() instead of just
having a boolean flag.

No functional change.

Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Link: https://lore.kernel.org/all/20241105064213.652658158@linutronix.de
---
 include/linux/posix-timers.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 52611ea923b2..39f1db76833a 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -110,7 +110,7 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
 void posixtimer_rearm_itimer(struct task_struct *p);
 bool posixtimer_init_sigqueue(struct sigqueue *q);
 int posixtimer_send_sigqueue(struct k_itimer *tmr);
-bool posixtimer_deliver_signal(struct kernel_siginfo *info);
+bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq);
 void posixtimer_free_timer(struct k_itimer *timer);
 
 /* Init task static initializer */
@@ -135,7 +135,8 @@ static inline void posix_cputimers_init(struct posix_cputimers *pct) { }
 static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
 					      u64 cpu_limit) { }
 static inline void posixtimer_rearm_itimer(struct task_struct *p) { }
-static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info) { return false; }
+static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info,
+					     struct sigqueue *timer_sigq) { return false; }
 static inline void posixtimer_free_timer(struct k_itimer *timer) { }
 #endif
 
-- 
cgit v1.2.3


From 6017a158beb13b412e55a451379798aae5876514 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:45 +0100
Subject: posix-timers: Embed sigqueue in struct k_itimer

To cure the SIG_IGN handling for posix interval timers, the preallocated
sigqueue needs to be embedded into struct k_itimer to prevent life time
races of all sorts.

Now that the prerequisites are in place, embed the sigqueue into struct
k_itimer and fixup the relevant usage sites.

Aside of preparing for proper SIG_IGN handling, this spares an extra
allocation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241105064213.719695194@linutronix.de
---
 include/linux/posix-timers.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 39f1db76833a..28c0a30e0853 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -39,6 +39,8 @@ static inline int clockid_to_fd(const clockid_t clk)
 
 #ifdef CONFIG_POSIX_TIMERS
 
+#include <linux/signal_types.h>
+
 /**
  * cpu_timer - Posix CPU timer representation for k_itimer
  * @node:	timerqueue node to queue in the task/sig
@@ -166,7 +168,7 @@ static inline void posix_cputimers_init_work(void) { }
  * @it_pid:		The pid of the process/task targeted by the signal
  * @it_process:		The task to wakeup on clock_nanosleep (CPU timers)
  * @rcuref:		Reference count for life time management
- * @sigq:		Pointer to preallocated sigqueue
+ * @sigq:		Embedded sigqueue
  * @it:			Union representing the various posix timer type
  *			internals.
  * @rcu:		RCU head for freeing the timer.
@@ -190,7 +192,7 @@ struct k_itimer {
 		struct pid		*it_pid;
 		struct task_struct	*it_process;
 	};
-	struct sigqueue		*sigq;
+	struct sigqueue		sigq;
 	rcuref_t		rcuref;
 	union {
 		struct {
@@ -218,6 +220,23 @@ static inline void posixtimer_putref(struct k_itimer *tmr)
 	if (rcuref_put(&tmr->rcuref))
 		posixtimer_free_timer(tmr);
 }
+
+static inline void posixtimer_sigqueue_getref(struct sigqueue *q)
+{
+	struct k_itimer *tmr = container_of(q, struct k_itimer, sigq);
+
+	WARN_ON_ONCE(!rcuref_get(&tmr->rcuref));
+}
+
+static inline void posixtimer_sigqueue_putref(struct sigqueue *q)
+{
+	struct k_itimer *tmr = container_of(q, struct k_itimer, sigq);
+
+	posixtimer_putref(tmr);
+}
+#else  /* CONFIG_POSIX_TIMERS */
+static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { }
+static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { }
 #endif /* !CONFIG_POSIX_TIMERS */
 
 #endif
-- 
cgit v1.2.3


From c2a4796a154bb952be1106911841aab2c8c17c4d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:46 +0100
Subject: signal: Cleanup unused posix-timer leftovers

Remove the leftovers of sigqueue preallocation as it's not longer used.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241105064213.786506636@linutronix.de
---
 include/linux/sched/signal.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 36283c1c55e9..02972fd41931 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -338,8 +338,6 @@ extern void force_fatal_sig(int);
 extern void force_exit_sig(int);
 extern int send_sig(int, struct task_struct *, int);
 extern int zap_other_threads(struct task_struct *p);
-extern struct sigqueue *sigqueue_alloc(void);
-extern void sigqueue_free(struct sigqueue *);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 
 static inline void clear_notify_signal(void)
-- 
cgit v1.2.3


From 647da5f709f112319c0d51e06f330d8afecb1940 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:48 +0100
Subject: posix-timers: Move sequence logic into struct k_itimer

The posix timer signal handling uses siginfo::si_sys_private for handling
the sequence counter check. That indirection is not longer required and the
sequence count value at signal queueing time can be stored in struct
k_itimer itself.

This removes the requirement of treating siginfo::si_sys_private special as
it's now always zero as the kernel does not touch it anymore.

Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Link: https://lore.kernel.org/all/20241105064213.852619866@linutronix.de
---
 include/linux/posix-timers.h       | 2 ++
 include/uapi/asm-generic/siginfo.h | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 28c0a30e0853..49a89614d900 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -162,6 +162,7 @@ static inline void posix_cputimers_init_work(void) { }
  * @it_overrun:		The overrun counter for pending signals
  * @it_overrun_last:	The overrun at the time of the last delivered signal
  * @it_signal_seq:	Sequence count to control signal delivery
+ * @it_sigqueue_seq:	The sequence count at the point where the signal was queued
  * @it_sigev_notify:	The notify word of sigevent struct for signal delivery
  * @it_interval:	The interval for periodic timers
  * @it_signal:		Pointer to the creators signal struct
@@ -184,6 +185,7 @@ struct k_itimer {
 	s64			it_overrun;
 	s64			it_overrun_last;
 	unsigned int		it_signal_seq;
+	unsigned int		it_sigqueue_seq;
 	int			it_sigev_notify;
 	enum pid_type		it_pid_type;
 	ktime_t			it_interval;
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index b7bc545ec3b2..5a1ca43b5fc6 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -46,7 +46,7 @@ union __sifields {
 		__kernel_timer_t _tid;	/* timer id */
 		int _overrun;		/* overrun count */
 		sigval_t _sigval;	/* same as below */
-		int _sys_private;       /* not to be passed to user */
+		int _sys_private;       /* Not used by the kernel. Historic leftover. Always 0. */
 	} _timer;
 
 	/* POSIX.1b signals */
-- 
cgit v1.2.3


From 69f032c92cf883ea74a4b69ba3d91317aa6f174e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:49 +0100
Subject: signal: Provide ignored_posix_timers list

To prepare for handling posix timer signals on sigaction(SIG_IGN) properly,
add a list to task::signal.

This list will be used to queue posix timers so their signal can be
requeued when SIG_IGN is lifted later.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241105064213.920101900@linutronix.de
---
 include/linux/sched/signal.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 02972fd41931..d5d03d919df8 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -138,6 +138,7 @@ struct signal_struct {
 	/* POSIX.1b Interval Timers */
 	unsigned int		next_posix_timer_id;
 	struct hlist_head	posix_timers;
+	struct hlist_head	ignored_posix_timers;
 
 	/* ITIMER_REAL timer for the process */
 	struct hrtimer real_timer;
-- 
cgit v1.2.3


From 0e20cd33acc7a173b23900550331ee82a23e9f00 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:51 +0100
Subject: posix-timers: Handle ignored list on delete and exit

To handle posix timer signals on sigaction(SIG_IGN) properly, the timers
will be queued on a separate ignored list.

Add the necessary cleanup code for timer_delete() and exit_itimers().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241105064213.987530588@linutronix.de
---
 include/linux/posix-timers.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 49a89614d900..1608b52a44d5 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -152,7 +152,8 @@ static inline void posix_cputimers_init_work(void) { }
 
 /**
  * struct k_itimer - POSIX.1b interval timer structure.
- * @list:		List head for binding the timer to signals->posix_timers
+ * @list:		List node for binding the timer to tsk::signal::posix_timers
+ * @ignored_list:	List node for tracking ignored timers in tsk::signal::ignored_posix_timers
  * @t_hash:		Entry in the posix timer hash table
  * @it_lock:		Lock protecting the timer
  * @kclock:		Pointer to the k_clock struct handling this timer
@@ -176,6 +177,7 @@ static inline void posix_cputimers_init_work(void) { }
  */
 struct k_itimer {
 	struct hlist_node	list;
+	struct hlist_node	ignored_list;
 	struct hlist_node	t_hash;
 	spinlock_t		it_lock;
 	const struct k_clock	*kclock;
-- 
cgit v1.2.3


From df7a996b4dab03c889fa86d849447b716f07b069 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:54 +0100
Subject: signal: Queue ignored posixtimers on ignore list

Queue posixtimers which have their signal ignored on the ignored list:

   1) When the timer fires and the signal has SIG_IGN set

   2) When SIG_IGN is installed via sigaction() and a timer signal
      is already queued

This only happens when the signal is for a valid timer, which delivered the
signal in periodic mode. One-shot timer signals are correctly dropped.

Due to the lock order constraints (sighand::siglock nests inside
timer::lock) the signal code cannot access any of the timer fields which
are relevant to make this decision, e.g. timer::it_status.

This is addressed by establishing a protection scheme which requires to
lock both locks on the timer side for modifying decision fields in the
timer struct and therefore makes it possible for the signal delivery to
evaluate with only sighand:siglock being held:

  1) Move the NULLification of timer->it_signal into the sighand::siglock
     protected section of timer_delete() and check timer::it_signal in the
     code path which determines whether the signal is dropped or queued on
     the ignore list.

     This ensures that a deleted timer cannot be moved onto the ignore
     list, which would prevent it from being freed on exit() as it is not
     longer in the process' posix timer list.

     If the timer got moved to the ignored list before deletion then it is
     removed from the ignored list under sighand lock in timer_delete().

  2) Provide a new timer::it_sig_periodic flag, which gets set in the
     signal queue path with both timer and sighand locks held if the timer
     is actually in periodic mode at expiry time.

     The ignore list code checks this flag under sighand::siglock and drops
     the signal when it is not set.

     If it is set, then the signal is moved to the ignored list independent
     of the actual state of the timer.

     When the signal is un-ignored later then the signal is moved back to
     the signal queue. On signal delivery the posix timer side decides
     about dropping the signal if the timer was re-armed, dis-armed or
     deleted based on the signal sequence counter check.

     If the thread/process exits then not yet delivered signals are
     discarded which means the reference of the timer containing the
     sigqueue is dropped and frees the timer.

     This is way cheaper than requiring all code paths to lock
     sighand::siglock of the target thread/process on any modification of
     timer::it_status or going all the way and removing pending signals
     from the signal queues on every rearm, disarm or delete operation.

So the protection scheme here is that on the timer side both timer::lock
and sighand::siglock have to be held for modifying

   timer::it_signal
   timer::it_sig_periodic

which means that on the signal side holding sighand::siglock is enough to
evaluate these fields.

In posixtimer_deliver_signal() holding timer::lock is sufficient to do the
sequence validation against timer::it_signal_seq because a concurrent
expiry is waiting on timer::lock to be released.

This completes the SIG_IGN handling and such timers are not longer self
rearmed which avoids pointless wakeups.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241105064214.120756416@linutronix.de
---
 include/linux/posix-timers.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 1608b52a44d5..43ea6e784a25 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -160,6 +160,7 @@ static inline void posix_cputimers_init_work(void) { }
  * @it_clock:		The posix timer clock id
  * @it_id:		The posix timer id for identifying the timer
  * @it_status:		The status of the timer
+ * @it_sig_periodic:	The periodic status at signal delivery
  * @it_overrun:		The overrun counter for pending signals
  * @it_overrun_last:	The overrun at the time of the last delivered signal
  * @it_signal_seq:	Sequence count to control signal delivery
@@ -184,6 +185,7 @@ struct k_itimer {
 	clockid_t		it_clock;
 	timer_t			it_id;
 	int			it_status;
+	bool			it_sig_periodic;
 	s64			it_overrun;
 	s64			it_overrun_last;
 	unsigned int		it_signal_seq;
-- 
cgit v1.2.3


From 7a66f72b09bb0762360274b1fb677b3433dbaa06 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:55 +0100
Subject: posix-timers: Cleanup SIG_IGN workaround leftovers

Now that ignored posix timer signals are requeued and the timers are
rearmed on signal delivery the workaround to keep such timers alive and
self rearm them is not longer required.

Remove the relevant hacks and the not longer required return values from
the related functions. The alarm timer workarounds will be cleaned up in a
separate step.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241105064214.187239060@linutronix.de
---
 include/linux/posix-timers.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 43ea6e784a25..f11f10c97bd9 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -111,7 +111,7 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
 
 void posixtimer_rearm_itimer(struct task_struct *p);
 bool posixtimer_init_sigqueue(struct sigqueue *q);
-int posixtimer_send_sigqueue(struct k_itimer *tmr);
+void posixtimer_send_sigqueue(struct k_itimer *tmr);
 bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq);
 void posixtimer_free_timer(struct k_itimer *timer);
 
-- 
cgit v1.2.3


From 2634303f8773b0c602069887565cd412440be15d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Nov 2024 09:14:58 +0100
Subject: alarmtimers: Remove return value from alarm functions

Now that the SIG_IGN problem is solved in the core code, the alarmtimer
callbacks do not require a return value anymore.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/all/20241105064214.318837272@linutronix.de
---
 include/linux/alarmtimer.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index 05e758b8b894..3ffa5341dce2 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -20,12 +20,6 @@ enum alarmtimer_type {
 	ALARM_BOOTTIME_FREEZER,
 };
 
-enum alarmtimer_restart {
-	ALARMTIMER_NORESTART,
-	ALARMTIMER_RESTART,
-};
-
-
 #define ALARMTIMER_STATE_INACTIVE	0x00
 #define ALARMTIMER_STATE_ENQUEUED	0x01
 
@@ -42,14 +36,14 @@ enum alarmtimer_restart {
 struct alarm {
 	struct timerqueue_node	node;
 	struct hrtimer		timer;
-	enum alarmtimer_restart	(*function)(struct alarm *, ktime_t now);
+	void			(*function)(struct alarm *, ktime_t now);
 	enum alarmtimer_type	type;
 	int			state;
 	void			*data;
 };
 
 void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
-		enum alarmtimer_restart (*function)(struct alarm *, ktime_t));
+		void (*function)(struct alarm *, ktime_t));
 void alarm_start(struct alarm *alarm, ktime_t start);
 void alarm_start_relative(struct alarm *alarm, ktime_t start);
 void alarm_restart(struct alarm *alarm);
-- 
cgit v1.2.3


From 1e4033b53db48cc77c436feac9c6d7d63db87b87 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Nov 2024 17:43:57 +0000
Subject: net: skb_reset_mac_len() must check if mac_header was set

Recent discussions show that skb_reset_mac_len() should be more careful.

We expect the MAC header being set.

If not, clear skb->mac_len and fire a warning for CONFIG_DEBUG_NET=y builds.

If after investigations we find that not having a MAC header was okay,
we can remove the warning.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/netdev/CANn89iJZGH+yEfJxfPWa3Hm7jxb-aeY2Up4HufmLMnVuQXt38A@mail.gmail.com/T/
Cc: En-Wei Wu <en-wei.wu@canonical.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20241105174403.850330-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 48f1e0fa2a13..5d8fefa71cac 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2909,9 +2909,19 @@ static inline void skb_reset_inner_headers(struct sk_buff *skb)
 	skb->inner_transport_header = skb->transport_header;
 }
 
+static inline int skb_mac_header_was_set(const struct sk_buff *skb)
+{
+	return skb->mac_header != (typeof(skb->mac_header))~0U;
+}
+
 static inline void skb_reset_mac_len(struct sk_buff *skb)
 {
-	skb->mac_len = skb->network_header - skb->mac_header;
+	if (!skb_mac_header_was_set(skb)) {
+		DEBUG_NET_WARN_ON_ONCE(1);
+		skb->mac_len = 0;
+	} else {
+		skb->mac_len = skb->network_header - skb->mac_header;
+	}
 }
 
 static inline unsigned char *skb_inner_transport_header(const struct sk_buff
@@ -3014,11 +3024,6 @@ static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
 	skb->network_header += offset;
 }
 
-static inline int skb_mac_header_was_set(const struct sk_buff *skb)
-{
-	return skb->mac_header != (typeof(skb->mac_header))~0U;
-}
-
 static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
 {
 	DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb));
-- 
cgit v1.2.3


From cfe8394e06f22847ecae0312bdd0b633b471b3f0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Nov 2024 17:43:58 +0000
Subject: net: add debug check in skb_reset_inner_transport_header()

Make sure (skb->data - skb->head) can fit in skb->inner_transport_header

This needs CONFIG_DEBUG_NET=y.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20241105174403.850330-3-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5d8fefa71cac..75795568803c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2937,7 +2937,10 @@ static inline int skb_inner_transport_offset(const struct sk_buff *skb)
 
 static inline void skb_reset_inner_transport_header(struct sk_buff *skb)
 {
-	skb->inner_transport_header = skb->data - skb->head;
+	long offset = skb->data - skb->head;
+
+	DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_transport_header))offset);
+	skb->inner_transport_header = offset;
 }
 
 static inline void skb_set_inner_transport_header(struct sk_buff *skb,
-- 
cgit v1.2.3


From 1732e4bedb3e29986da6ea315e8bf8caf74e1e38 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Nov 2024 17:43:59 +0000
Subject: net: add debug check in skb_reset_inner_network_header()

Make sure (skb->data - skb->head) can fit in skb->inner_network_header

This needs CONFIG_DEBUG_NET=y.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20241105174403.850330-4-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 75795568803c..8dafd1295a6e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2957,7 +2957,10 @@ static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
 
 static inline void skb_reset_inner_network_header(struct sk_buff *skb)
 {
-	skb->inner_network_header = skb->data - skb->head;
+	long offset = skb->data - skb->head;
+
+	DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_network_header))offset);
+	skb->inner_network_header = offset;
 }
 
 static inline void skb_set_inner_network_header(struct sk_buff *skb,
-- 
cgit v1.2.3


From 78a0cb2f45dc9327f26956c242f7f4b450efff49 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Nov 2024 17:44:00 +0000
Subject: net: add debug check in skb_reset_inner_mac_header()

Make sure (skb->data - skb->head) can fit in skb->inner_mac_header

This needs CONFIG_DEBUG_NET=y.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20241105174403.850330-5-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8dafd1295a6e..32a7d8a65b9f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2982,7 +2982,10 @@ static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb)
 
 static inline void skb_reset_inner_mac_header(struct sk_buff *skb)
 {
-	skb->inner_mac_header = skb->data - skb->head;
+	long offset = skb->data - skb->head;
+
+	DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_mac_header))offset);
+	skb->inner_mac_header = offset;
 }
 
 static inline void skb_set_inner_mac_header(struct sk_buff *skb,
-- 
cgit v1.2.3


From ae50ea52bdd77eabd8f3c4630c1b03c3373f61a5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Nov 2024 17:44:01 +0000
Subject: net: add debug check in skb_reset_transport_header()

Make sure (skb->data - skb->head) can fit in skb->transport_header

This needs CONFIG_DEBUG_NET=y.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20241105174403.850330-6-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 32a7d8a65b9f..f76524844e7e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3007,7 +3007,10 @@ static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 
 static inline void skb_reset_transport_header(struct sk_buff *skb)
 {
-	skb->transport_header = skb->data - skb->head;
+	long offset = skb->data - skb->head;
+
+	DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->transport_header))offset);
+	skb->transport_header = offset;
 }
 
 static inline void skb_set_transport_header(struct sk_buff *skb,
-- 
cgit v1.2.3


From 305ae87dafc1a9f35374a783119d9e6001d1b8e0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Nov 2024 17:44:02 +0000
Subject: net: add debug check in skb_reset_network_header()

Make sure (skb->data - skb->head) can fit in skb->network_header

This needs CONFIG_DEBUG_NET=y.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20241105174403.850330-7-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f76524844e7e..f1e49a32880d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3027,7 +3027,10 @@ static inline unsigned char *skb_network_header(const struct sk_buff *skb)
 
 static inline void skb_reset_network_header(struct sk_buff *skb)
 {
-	skb->network_header = skb->data - skb->head;
+	long offset = skb->data - skb->head;
+
+	DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->network_header))offset);
+	skb->network_header = offset;
 }
 
 static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
-- 
cgit v1.2.3


From 3b6167e9bfc9eae4edad065c166c667f9a8e693a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Nov 2024 17:44:03 +0000
Subject: net: add debug check in skb_reset_mac_header()

Make sure (skb->data - skb->head) can fit in skb->mac_header

This needs CONFIG_DEBUG_NET=y.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20241105174403.850330-8-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f1e49a32880d..e5095d75abba 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3063,7 +3063,10 @@ static inline void skb_unset_mac_header(struct sk_buff *skb)
 
 static inline void skb_reset_mac_header(struct sk_buff *skb)
 {
-	skb->mac_header = skb->data - skb->head;
+	long offset = skb->data - skb->head;
+
+	DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->mac_header))offset);
+	skb->mac_header = offset;
 }
 
 static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
-- 
cgit v1.2.3


From 7670e74ff31939acd792ff59fa83bc73d040dd8e Mon Sep 17 00:00:00 2001
From: Ed Tsai <ed.tsai@mediatek.com>
Date: Tue, 8 Oct 2024 14:59:42 +0800
Subject: scsi: ufs: ufs-mediatek: Configure individual LU queue flags

Previously, ufs vops config_scsi_dev was removed because there were no
users. ufs-mediatek needs it to configure the queue flags for each LU
individually. Therefore, bring it back and customize the queue flag as
required.

[mkp: fixed typo]

Signed-off-by: Ed Tsai <ed.tsai@mediatek.com>
Link: https://lore.kernel.org/r/20241008065950.23431-1-ed.tsai@mediatek.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 9ea2a7411bb5..d7aca9e61684 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -386,6 +386,7 @@ struct ufs_hba_variant_ops {
 	int	(*get_outstanding_cqs)(struct ufs_hba *hba,
 				       unsigned long *ocqs);
 	int	(*config_esi)(struct ufs_hba *hba);
+	void	(*config_scsi_dev)(struct scsi_device *sdev);
 };
 
 /* clock gating state  */
-- 
cgit v1.2.3


From 49a17639508c3b35f90ca829e60dddeeeb750e74 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 6 Nov 2024 15:51:39 +0100
Subject: softirq: Use a dedicated thread for timer wakeups on PREEMPT_RT.

The timer and hrtimer soft interrupts are raised in hard interrupt
context. With threaded interrupts force enabled or on PREEMPT_RT this leads
to waking the ksoftirqd for the processing of the soft interrupt.

ksoftirqd runs as SCHED_OTHER task which means it will compete with other
tasks for CPU resources.  This can introduce long delays for timer
processing on heavy loaded systems and is not desired.

Split the TIMER_SOFTIRQ and HRTIMER_SOFTIRQ processing into a dedicated
timers thread and let it run at the lowest SCHED_FIFO priority.
Wake-ups for RT tasks happen from hardirq context so only timer_list timers
and hrtimers for "regular" tasks are processed here. The higher priority
ensures that wakeups are performed before scheduling SCHED_OTHER tasks.

Using a dedicated variable to store the pending softirq bits values ensure
that the timer are not accidentally picked up by ksoftirqd and other
threaded interrupts.

It shouldn't be picked up by ksoftirqd since it runs at lower priority.
However if ksoftirqd is already running while a timer fires, then ksoftird
will be PI-boosted due to the BH-lock to ktimer's priority.

The timer thread can pick up pending softirqs from ksoftirqd but only
if the softirq load is high. It is not be desired that the picked up
softirqs are processed at SCHED_FIFO priority under high softirq load
but this can already happen by a PI-boost by a force-threaded interrupt.

[ frederic@kernel.org: rcutorture.c fixes, storm fix by introduction of
  local_timers_pending() for tick_nohz_next_event() ]

[ junxiao.chang@intel.com: Ensure ktimersd gets woken up even if a
  softirq is currently served. ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org> [rcutorture]
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/all/20241106150419.2593080-4-bigeasy@linutronix.de
---
 include/linux/interrupt.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 457151f9f263..8cd9327e4e78 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -616,6 +616,53 @@ extern void __raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq(unsigned int nr);
 
+/*
+ * With forced-threaded interrupts enabled a raised softirq is deferred to
+ * ksoftirqd unless it can be handled within the threaded interrupt. This
+ * affects timer_list timers and hrtimers which are explicitly marked with
+ * HRTIMER_MODE_SOFT.
+ * With PREEMPT_RT enabled more hrtimers are moved to softirq for processing
+ * which includes all timers which are not explicitly marked HRTIMER_MODE_HARD.
+ * Userspace controlled timers (like the clock_nanosleep() interface) is divided
+ * into two categories: Tasks with elevated scheduling policy including
+ * SCHED_{FIFO|RR|DL} and the remaining scheduling policy. The tasks with the
+ * elevated scheduling policy are woken up directly from the HARDIRQ while all
+ * other wake ups are delayed to softirq and so to ksoftirqd.
+ *
+ * The ksoftirqd runs at SCHED_OTHER policy at which it should remain since it
+ * handles the softirq in an overloaded situation (not handled everything
+ * within its last run).
+ * If the timers are handled at SCHED_OTHER priority then they competes with all
+ * other SCHED_OTHER tasks for CPU resources are possibly delayed.
+ * Moving timers softirqs to a low priority SCHED_FIFO thread instead ensures
+ * that timer are performed before scheduling any SCHED_OTHER thread.
+ */
+DECLARE_PER_CPU(struct task_struct *, ktimerd);
+DECLARE_PER_CPU(unsigned long, pending_timer_softirq);
+void raise_ktimers_thread(unsigned int nr);
+
+static inline unsigned int local_timers_pending_force_th(void)
+{
+	return __this_cpu_read(pending_timer_softirq);
+}
+
+static inline void raise_timer_softirq(unsigned int nr)
+{
+	lockdep_assert_in_irq();
+	if (force_irqthreads())
+		raise_ktimers_thread(nr);
+	else
+		__raise_softirq_irqoff(nr);
+}
+
+static inline unsigned int local_timers_pending(void)
+{
+	if (force_irqthreads())
+		return local_timers_pending_force_th();
+	else
+		return local_softirq_pending();
+}
+
 DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
 
 static inline struct task_struct *this_cpu_ksoftirqd(void)
-- 
cgit v1.2.3


From fbf920f255315974808ce91d934fe50198294d51 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Thu, 31 Oct 2024 16:14:15 +0100
Subject: hrtimers: Add missing hrtimer_init() trace points

hrtimer_init*_on_stack() is not covered by tracing when
CONFIG_DEBUG_OBJECTS_TIMERS=y.

Rework the functions similar to hrtimer_init() and hrtimer_init_sleeper()
so that the hrtimer_init() tracepoint is unconditionally available.

The rework makes hrtimer_init_sleeper() unused. Delete it.

Signed-off-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/74528e8abf2bb96e8bee85ffacbf14e15cf89f0d.1730386209.git.namcao@linutronix.de
---
 include/linux/hrtimer.h | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index aa1e65ccb615..5aa9d57528c4 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -228,32 +228,15 @@ static inline void hrtimer_cancel_wait_running(struct hrtimer *timer)
 /* Initialize timers: */
 extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
 			 enum hrtimer_mode mode);
-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
-				 enum hrtimer_mode mode);
-
-#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
 				  enum hrtimer_mode mode);
 extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
 					  clockid_t clock_id,
 					  enum hrtimer_mode mode);
 
+#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 extern void destroy_hrtimer_on_stack(struct hrtimer *timer);
 #else
-static inline void hrtimer_init_on_stack(struct hrtimer *timer,
-					 clockid_t which_clock,
-					 enum hrtimer_mode mode)
-{
-	hrtimer_init(timer, which_clock, mode);
-}
-
-static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
-						 clockid_t clock_id,
-						 enum hrtimer_mode mode)
-{
-	hrtimer_init_sleeper(sl, clock_id, mode);
-}
-
 static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
 #endif
 
-- 
cgit v1.2.3


From 908a1d775422ba2e27a5e33d0c130b522419e121 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Thu, 31 Oct 2024 16:14:20 +0100
Subject: hrtimers: Introduce hrtimer_setup() to replace hrtimer_init()

To initialize hrtimer, hrtimer_init() needs to be called and also
hrtimer::function must be set. This is error-prone and awkward to use.

Introduce hrtimer_setup() which does both of these things, so that users of
hrtimer can be simplified.

The new setup function also has a sanity check for the provided function
pointer. If NULL, a warning is emitted and a dummy callback installed.

hrtimer_init() will be removed as soon as all of its users have been
converted to the new function.

Signed-off-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/5057c1ddbfd4b92033cd93d37fe38e6b069d5ba6.1730386209.git.namcao@linutronix.de
---
 include/linux/hrtimer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 5aa9d57528c4..bcc0715c59a8 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -228,6 +228,8 @@ static inline void hrtimer_cancel_wait_running(struct hrtimer *timer)
 /* Initialize timers: */
 extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
 			 enum hrtimer_mode mode);
+extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *),
+			  clockid_t clock_id, enum hrtimer_mode mode);
 extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
 				  enum hrtimer_mode mode);
 extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
-- 
cgit v1.2.3


From 444cb7db4c9f9b5d96be17c38b3e989df7bfabd5 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Thu, 31 Oct 2024 16:14:21 +0100
Subject: hrtimers: Introduce hrtimer_setup_on_stack()

To initialize hrtimer on stack, hrtimer_init_on_stack() needs to be called
and also hrtimer::function must be set. This is error-prone and awkward to
use.

Introduce hrtimer_setup_on_stack() which does both of these things, so that
users of hrtimer can be simplified.

The new setup function also has a sanity check for the provided function
pointer. If NULL, a warning is emitted and a dummy callback installed.

hrtimer_init_on_stack() will be removed as soon as all of its users have
been converted to the new function.

Signed-off-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/4b05e2ab3a82c517adf67fabc0f0cd8fe118b97c.1730386209.git.namcao@linutronix.de
---
 include/linux/hrtimer.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index bcc0715c59a8..2da513f8d66a 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -232,6 +232,9 @@ extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function
 			  clockid_t clock_id, enum hrtimer_mode mode);
 extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
 				  enum hrtimer_mode mode);
+extern void hrtimer_setup_on_stack(struct hrtimer *timer,
+				   enum hrtimer_restart (*function)(struct hrtimer *),
+				   clockid_t clock_id, enum hrtimer_mode mode);
 extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
 					  clockid_t clock_id,
 					  enum hrtimer_mode mode);
-- 
cgit v1.2.3


From c9bd83abfeb9a9b103e689b251ccff7a01be8366 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Thu, 31 Oct 2024 16:14:22 +0100
Subject: hrtimers: Introduce hrtimer_setup_sleeper_on_stack()

The hrtimer_init*() API is replaced by hrtimer_setup*() variants to
initialize the timer including the callback function at once.

hrtimer_init_sleeper_on_stack() does not need user to setup the callback
function separately, so a new variant would not be strictly necessary.

Nonetheless, to keep the naming convention consistent, introduce
hrtimer_setup_sleeper_on_stack(). hrtimer_init_on_stack() will be removed
once all users are converted.

Signed-off-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/7b5e18e6dd0ace9eaa211201528cb9dc23752454.1730386209.git.namcao@linutronix.de
---
 include/linux/hrtimer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 2da513f8d66a..48872a2b4071 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -238,6 +238,8 @@ extern void hrtimer_setup_on_stack(struct hrtimer *timer,
 extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
 					  clockid_t clock_id,
 					  enum hrtimer_mode mode);
+extern void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id,
+					   enum hrtimer_mode mode);
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 extern void destroy_hrtimer_on_stack(struct hrtimer *timer);
-- 
cgit v1.2.3


From 8f02e3563bb5824eb01c94f2c75f1dcee2d05625 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Thu, 31 Oct 2024 16:14:23 +0100
Subject: hrtimers: Introduce hrtimer_update_function()

Some users of hrtimer need to change the callback function after the
initial setup. They write to hrtimer::function directly.

That's not safe under all circumstances as the write is lockless and a
concurrent timer expiry might end up using the wrong function pointer.

Introduce hrtimer_update_function(), which also performs runtime checks
whether it is safe to modify the callback.

This allows to make hrtimer::function private once all users are converted.

Signed-off-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20a937b0ae09ad54b5b6d86eabead7c570f1b72e.1730386209.git.namcao@linutronix.de
---
 include/linux/hrtimer.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 48872a2b4071..6e026730e803 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -327,6 +327,28 @@ static inline int hrtimer_callback_running(struct hrtimer *timer)
 	return timer->base->running == timer;
 }
 
+/**
+ * hrtimer_update_function - Update the timer's callback function
+ * @timer:	Timer to update
+ * @function:	New callback function
+ *
+ * Only safe to call if the timer is not enqueued. Can be called in the callback function if the
+ * timer is not enqueued at the same time (see the comments above HRTIMER_STATE_ENQUEUED).
+ */
+static inline void hrtimer_update_function(struct hrtimer *timer,
+					   enum hrtimer_restart (*function)(struct hrtimer *))
+{
+	guard(raw_spinlock_irqsave)(&timer->base->cpu_base->lock);
+
+	if (WARN_ON_ONCE(hrtimer_is_queued(timer)))
+		return;
+
+	if (WARN_ON_ONCE(!function))
+		return;
+
+	timer->function = function;
+}
+
 /* Forward a hrtimer so it expires after now: */
 extern u64
 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
-- 
cgit v1.2.3


From 211647e5121e0e0da974bf69a8eb7c9fe57fa3bd Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Thu, 31 Oct 2024 16:14:28 +0100
Subject: wait: Switch to use hrtimer_setup_sleeper_on_stack()

hrtimer_setup_sleeper_on_stack() replaces hrtimer_init_sleeper_on_stack()
to keep the naming convention consistent.

Convert the usage site over to it.

Signed-off-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/fc91182375df81120a88dbe0263267e24d1bf19e.1730386209.git.namcao@linutronix.de
---
 include/linux/wait.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 8aa3372f21a0..643b7c7bf376 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -541,8 +541,8 @@ do {										\
 	int __ret = 0;								\
 	struct hrtimer_sleeper __t;						\
 										\
-	hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC,			\
-				      HRTIMER_MODE_REL);			\
+	hrtimer_setup_sleeper_on_stack(&__t, CLOCK_MONOTONIC,			\
+				       HRTIMER_MODE_REL);			\
 	if ((timeout) != KTIME_MAX) {						\
 		hrtimer_set_expires_range_ns(&__t.timer, timeout,		\
 					current->timer_slack_ns);		\
-- 
cgit v1.2.3


From f3bef7aaa6c807b78e8fc6929c3226d3038fe505 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Thu, 31 Oct 2024 16:14:29 +0100
Subject: hrtimers: Delete hrtimer_init_sleeper_on_stack()

hrtimer_init_sleeper_on_stack() is now unused. Delete it.

Signed-off-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/52549846635c0b3a2abf82101f539efdabcd9778.1730386209.git.namcao@linutronix.de
---
 include/linux/hrtimer.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 6e026730e803..4e4f04b3c0c2 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -235,9 +235,6 @@ extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
 extern void hrtimer_setup_on_stack(struct hrtimer *timer,
 				   enum hrtimer_restart (*function)(struct hrtimer *),
 				   clockid_t clock_id, enum hrtimer_mode mode);
-extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
-					  clockid_t clock_id,
-					  enum hrtimer_mode mode);
 extern void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id,
 					   enum hrtimer_mode mode);
 
-- 
cgit v1.2.3


From 3c2fb0152175f9f596b40763cdc1378297da60af Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Thu, 31 Oct 2024 16:14:33 +0100
Subject: hrtimers: Delete hrtimer_init_on_stack()

hrtimer_init_on_stack() is now unused. Delete it.

Signed-off-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/510ce0d2944c4a382ea51e51d03dcfb73ba0f4f7.1730386209.git.namcao@linutronix.de
---
 include/linux/hrtimer.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4e4f04b3c0c2..7ef5f7ef31a9 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -230,8 +230,6 @@ extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
 			 enum hrtimer_mode mode);
 extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *),
 			  clockid_t clock_id, enum hrtimer_mode mode);
-extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
-				  enum hrtimer_mode mode);
 extern void hrtimer_setup_on_stack(struct hrtimer *timer,
 				   enum hrtimer_restart (*function)(struct hrtimer *),
 				   clockid_t clock_id, enum hrtimer_mode mode);
-- 
cgit v1.2.3


From b33cc96c7020b923085046e5cf2e934f41c530ec Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Oct 2024 16:25:30 +0100
Subject: mm: add PageAnonNotKsm()

Check that this anonymous page is really anonymous, not anonymous-or-KSM.
This optimises the debug check, but its real purpose is to remove the last
two users of PageKsm().

[willy@infradead.org: fix assertions]
  Link: https://lkml.kernel.org/r/ZwApWPER7caIA_N3@casper.infradead.org
Link: https://lkml.kernel.org/r/20241002152533.1350629-5-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alex Shi <alexs@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index cc839e4365c1..1fcef06a2d31 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -689,6 +689,13 @@ static __always_inline bool folio_test_anon(const struct folio *folio)
 	return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0;
 }
 
+static __always_inline bool PageAnonNotKsm(const struct page *page)
+{
+	unsigned long flags = (unsigned long)page_folio(page)->mapping;
+
+	return (flags & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_ANON;
+}
+
 static __always_inline bool PageAnon(const struct page *page)
 {
 	return folio_test_anon(page_folio(page));
@@ -1137,14 +1144,14 @@ static __always_inline int PageAnonExclusive(const struct page *page)
 
 static __always_inline void SetPageAnonExclusive(struct page *page)
 {
-	VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page);
+	VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page);
 	VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
 	set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
 }
 
 static __always_inline void ClearPageAnonExclusive(struct page *page)
 {
-	VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page);
+	VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page);
 	VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
 	clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
 }
-- 
cgit v1.2.3


From b9a256352f3ba697396c26d2a74f4081335f8cef Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Oct 2024 16:25:31 +0100
Subject: mm: remove PageKsm()

All callers have been converted to use folio_test_ksm() or
PageAnonNotKsm(), so we can remove this wrapper.

Link: https://lkml.kernel.org/r/20241002152533.1350629-6-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alex Shi <alexs@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 1fcef06a2d31..e80665bc51fa 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -725,13 +725,8 @@ static __always_inline bool folio_test_ksm(const struct folio *folio)
 	return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) ==
 				PAGE_MAPPING_KSM;
 }
-
-static __always_inline bool PageKsm(const struct page *page)
-{
-	return folio_test_ksm(page_folio(page));
-}
 #else
-TESTPAGEFLAG_FALSE(Ksm, ksm)
+FOLIO_TEST_FLAG_FALSE(ksm)
 #endif
 
 u64 stable_page_flags(const struct page *page);
-- 
cgit v1.2.3


From d7d65b1039019e8789119b498d97cf2531d989a8 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 3 Oct 2024 10:18:42 +0530
Subject: mm: move set_pxd_safe() helpers from generic to platform

set_pxd_safe() helpers that serve a specific purpose for both x86 and
riscv platforms, do not need to be in the common memory code.  Otherwise
they just unnecessarily make the common API more complicated.  This moves
the helpers from common code to platform instead.

Link: https://lkml.kernel.org/r/20241003044842.246016-1-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 38 --------------------------------------
 1 file changed, 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index e8b2ac6bd2ae..23aeffd89a4e 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1056,44 +1056,6 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b)
 }
 #endif
 
-/*
- * Use set_p*_safe(), and elide TLB flushing, when confident that *no*
- * TLB flush will be required as a result of the "set". For example, use
- * in scenarios where it is known ahead of time that the routine is
- * setting non-present entries, or re-setting an existing entry to the
- * same value. Otherwise, use the typical "set" helpers and flush the
- * TLB.
- */
-#define set_pte_safe(ptep, pte) \
-({ \
-	WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \
-	set_pte(ptep, pte); \
-})
-
-#define set_pmd_safe(pmdp, pmd) \
-({ \
-	WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \
-	set_pmd(pmdp, pmd); \
-})
-
-#define set_pud_safe(pudp, pud) \
-({ \
-	WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \
-	set_pud(pudp, pud); \
-})
-
-#define set_p4d_safe(p4dp, p4d) \
-({ \
-	WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \
-	set_p4d(p4dp, p4d); \
-})
-
-#define set_pgd_safe(pgdp, pgd) \
-({ \
-	WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \
-	set_pgd(pgdp, pgd); \
-})
-
 #ifndef __HAVE_ARCH_DO_SWAP_PAGE
 static inline void arch_do_swap_page_nr(struct mm_struct *mm,
 				     struct vm_area_struct *vma,
-- 
cgit v1.2.3


From 7f24cbc9c4d42db8a3c8484d120cf9c1da557fab Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Mon, 7 Oct 2024 09:50:29 +0200
Subject: mm/mmap: teach generic_get_unmapped_area{_topdown} to handle hugetlb
 mappings

Patch series "Unify hugetlb into arch_get_unmapped_area functions", v4.

This is an attempt to get rid of a fair amount of duplicated code wrt.
hugetlb and *get_unmapped_area* functions.

HugeTLB registers a .get_unmapped_area function which gets called from
__get_unmapped_area().
hugetlb_get_unmapped_area() is defined by a bunch of architectures and
it also has a generic definition for those that do not define it.
Short-long story is that there is a ton of duplicated code between
specific hugetlb *_get_unmapped_area_* functions and mm-core functions,
so we can do better by teaching arch_get_unmapped_area* functions how
to deal with hugetlb mappings.

Note that not a lot of things need to be taught though.
hugetlb_get_unmapped_area, that gets called for hugetlb mappings, runs
some sanity checks prior to calling mm_get_unmapped_area_vmflags(), so we
do not need to that down the road in the respective
{generic,arch}_get_unmapped_area* functions.

More information can be found in the respective patches.

LTP mmapstress hugetlb selftests were ran succesfully on:


This patch (of 9):

We want to stop special casing hugetlb mappings and make them go through
generic channels, so teach generic_get_unmapped_area{_topdown} to handle
those.  The main difference is that we set info.align_mask for huge
mappings.

Link: https://lkml.kernel.org/r/20241007075037.267650-1-osalvador@suse.de
Link: https://lkml.kernel.org/r/20241007075037.267650-2-osalvador@suse.de
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e4697539b665..368d552e4860 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -1035,9 +1035,19 @@ void hugetlb_unregister_node(struct node *node);
  */
 bool is_raw_hwpoison_page_in_hugepage(struct page *page);
 
+static inline unsigned long huge_page_mask_align(struct file *file)
+{
+	return PAGE_MASK & ~huge_page_mask(hstate_file(file));
+}
+
 #else	/* CONFIG_HUGETLB_PAGE */
 struct hstate {};
 
+static inline unsigned long huge_page_mask_align(struct file *file)
+{
+	return 0;
+}
+
 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
 {
 	return NULL;
-- 
cgit v1.2.3


From 7bd3f1e1a9ae7f7508c88dd056755a0a4741ae88 Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Mon, 7 Oct 2024 09:50:34 +0200
Subject: mm: make hugetlb mappings go through mm_get_unmapped_area_vmflags

Hugetlb mappings will no longer be special cased but rather go through the
generic mm_get_unmapped_area_vmflags function.  For that to happen, let us
remove the .get_unmapped_area from hugetlbfs_file_operations struct, and
hint __get_unmapped_area that it should not send hugetlb mappings through
thp_get_unmapped_area_vmflags but through mm_get_unmapped_area_vmflags.

Create also a function called hugetlb_mmap_check_and_align() where a
couple of safety checks are being done and the addr is aligned to the huge
page size.  Otherwise we will have to do this in every single function,
which duplicates quite a lot of code.

Link: https://lkml.kernel.org/r/20241007075037.267650-7-osalvador@suse.de
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 368d552e4860..3a81b6126f62 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -546,11 +546,10 @@ static inline struct hstate *hstate_inode(struct inode *i)
 }
 #endif /* !CONFIG_HUGETLBFS */
 
-#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-					unsigned long len, unsigned long pgoff,
-					unsigned long flags);
-#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
+unsigned long
+__generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+				    unsigned long len, unsigned long pgoff,
+				    unsigned long flags);
 
 unsigned long
 generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-- 
cgit v1.2.3


From cc92882ee218d62ef017fa545b3c8a2d1e060a5a Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Mon, 7 Oct 2024 09:50:35 +0200
Subject: mm: drop hugetlb_get_unmapped_area{_*} functions

Hugetlb mappings are now handled through normal channels just like any
other mapping, so we no longer need hugetlb_get_unmapped_area* specific
functions.

Link: https://lkml.kernel.org/r/20241007075037.267650-8-osalvador@suse.de
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3a81b6126f62..ae4fe8615bb6 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -547,15 +547,10 @@ static inline struct hstate *hstate_inode(struct inode *i)
 #endif /* !CONFIG_HUGETLBFS */
 
 unsigned long
-__generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 				    unsigned long len, unsigned long pgoff,
 				    unsigned long flags);
 
-unsigned long
-generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-				  unsigned long len, unsigned long pgoff,
-				  unsigned long flags);
-
 /*
  * huegtlb page specific state flags.  These flags are located in page.private
  * of the hugetlb head page.  Functions created via the below macros should be
-- 
cgit v1.2.3


From 5b2f650d593ed4d020228df8563e7ad23abc847f Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Mon, 7 Oct 2024 09:50:36 +0200
Subject: arch/s390: clean up hugetlb definitions

s390 redefines functions that are already defined (and the same) in
include/asm-generic/hugetlb.h.

Do as the other architectures:
1) include include/asm-generic/hugetlb.h
2) drop the already defined functions in the generic hugetlb.h and
3) use the __HAVE_ARCH_HUGE_* macros to define our own.

This gets rid of quite some code.

Link: https://lkml.kernel.org/r/20241007075037.267650-9-osalvador@suse.de
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/asm-generic/hugetlb.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 594d5905f615..67bbdafcfc22 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -42,20 +42,26 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
 	return pte_modify(pte, newprot);
 }
 
+#ifndef __HAVE_ARCH_HUGE_PTE_MKUFFD_WP
 static inline pte_t huge_pte_mkuffd_wp(pte_t pte)
 {
 	return huge_pte_wrprotect(pte_mkuffd_wp(pte));
 }
+#endif
 
+#ifndef __HAVE_ARCH_HUGE_PTE_CLEAR_UFFD_WP
 static inline pte_t huge_pte_clear_uffd_wp(pte_t pte)
 {
 	return pte_clear_uffd_wp(pte);
 }
+#endif
 
+#ifndef __HAVE_ARCH_HUGE_PTE_UFFD_WP
 static inline int huge_pte_uffd_wp(pte_t pte)
 {
 	return pte_uffd_wp(pte);
 }
+#endif
 
 #ifndef __HAVE_ARCH_HUGE_PTE_CLEAR
 static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
@@ -106,10 +112,12 @@ static inline int huge_pte_none(pte_t pte)
 #endif
 
 /* Please refer to comments above pte_none_mostly() for the usage */
+#ifndef __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY
 static inline int huge_pte_none_mostly(pte_t pte)
 {
 	return huge_pte_none(pte) || is_pte_marker(pte);
 }
+#endif
 
 #ifndef __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE
 static inline int prepare_hugepage_range(struct file *file,
-- 
cgit v1.2.3


From bd40b053fabe27209cb240d205a0c817cbe5fb87 Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Mon, 7 Oct 2024 09:50:37 +0200
Subject: mm: consolidate common checks in hugetlb_get_unmapped_area

prepare_hugepage_range() performs almost the same checks for all
architectures that define it, with the exception of mips and loongarch
that also check for overflows.

The rest checks for the addr and len to be properly aligned, so we can
move that to hugetlb_get_unmapped_area() and get rid of a fair amount of
duplicated code.

[akpm@linux-foundation.org: remove now-unused local]
 Link: https://lore.kernel.org/oe-kbuild-all/202410081210.uNLbf3Jk-lkp@intel.com/
Link: https://lkml.kernel.org/r/20241007075037.267650-10-osalvador@suse.de
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/asm-generic/hugetlb.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 67bbdafcfc22..f42133dae68e 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -123,13 +123,6 @@ static inline int huge_pte_none_mostly(pte_t pte)
 static inline int prepare_hugepage_range(struct file *file,
 		unsigned long addr, unsigned long len)
 {
-	struct hstate *h = hstate_file(file);
-
-	if (len & ~huge_page_mask(h))
-		return -EINVAL;
-	if (addr & ~huge_page_mask(h))
-		return -EINVAL;
-
 	return 0;
 }
 #endif
-- 
cgit v1.2.3


From afe789b7367ad43ba8f079981d40851f8bd319ce Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Tue, 8 Oct 2024 19:50:24 -0700
Subject: kaslr: rename physmem_end and PHYSMEM_END to direct_map_physmem_end

For clarity.  It's increasingly hard to reason about the code, when KASLR
is moving around the boundaries.  In this case where KASLR is randomizing
the location of the kernel image within physical memory, the maximum
number of address bits for physical memory has not changed.

What has changed is the ending address of memory that is allowed to be
directly mapped by the kernel.

Let's name the variable, and the associated macro accordingly.

Also, enhance the comment above the direct_map_physmem_end definition,
to further clarify how this all works.

Link: https://lkml.kernel.org/r/20241009025024.89813-1-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Will Deacon <will@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Jordan Niethe <jniethe@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8f5394d75ce2..4570f33e2429 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -97,11 +97,11 @@ extern const int mmap_rnd_compat_bits_max;
 extern int mmap_rnd_compat_bits __read_mostly;
 #endif
 
-#ifndef PHYSMEM_END
+#ifndef DIRECT_MAP_PHYSMEM_END
 # ifdef MAX_PHYSMEM_BITS
-# define PHYSMEM_END	((1ULL << MAX_PHYSMEM_BITS) - 1)
+# define DIRECT_MAP_PHYSMEM_END	((1ULL << MAX_PHYSMEM_BITS) - 1)
 # else
-# define PHYSMEM_END	(((phys_addr_t)-1)&~(1ULL<<63))
+# define DIRECT_MAP_PHYSMEM_END	(((phys_addr_t)-1)&~(1ULL<<63))
 # endif
 #endif
 
-- 
cgit v1.2.3


From 6359c39c9de66dede8ff5ff257c9e117483dbc7c Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Thu, 10 Oct 2024 14:15:56 +0800
Subject: mm: remove unused hugepage for vma_alloc_folio()

The hugepage parameter was deprecated since commit ddc1a5cbc05d
("mempolicy: alloc_pages_mpol() for NUMA policy without vma"), for
PMD-sized THP, it still tries only preferred node if possible in
vma_alloc_folio() by checking the order of the folio allocation.

Link: https://lkml.kernel.org/r/20241010061556.1846751-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/gfp.h     | 6 +++---
 include/linux/highmem.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a951de920e20..b65724c3427d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -306,7 +306,7 @@ struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order);
 struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order,
 		struct mempolicy *mpol, pgoff_t ilx, int nid);
 struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma,
-		unsigned long addr, bool hugepage);
+		unsigned long addr);
 #else
 static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order)
 {
@@ -326,7 +326,7 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde
 {
 	return folio_alloc_noprof(gfp, order);
 }
-#define vma_alloc_folio_noprof(gfp, order, vma, addr, hugepage)		\
+#define vma_alloc_folio_noprof(gfp, order, vma, addr)		\
 	folio_alloc_noprof(gfp, order)
 #endif
 
@@ -341,7 +341,7 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde
 static inline struct page *alloc_page_vma_noprof(gfp_t gfp,
 		struct vm_area_struct *vma, unsigned long addr)
 {
-	struct folio *folio = vma_alloc_folio_noprof(gfp, 0, vma, addr, false);
+	struct folio *folio = vma_alloc_folio_noprof(gfp, 0, vma, addr);
 
 	return &folio->page;
 }
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 930a591b9b61..bec9bd715acf 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -226,7 +226,7 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
 {
 	struct folio *folio;
 
-	folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr, false);
+	folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr);
 	if (folio)
 		clear_user_highpage(&folio->page, vaddr);
 
-- 
cgit v1.2.3


From 0aa3ef3637920799f1b2f67dfff0d698127444ac Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Wed, 9 Oct 2024 17:35:50 -0700
Subject: memcg: add tracing for memcg stat updates

The memcg stats are maintained in rstat infrastructure which provides very
fast updates side and reasonable read side.  However memcg added plethora
of stats and made the read side, which is cgroup rstat flush, very slow.
To solve that, threshold was added in the memcg stats read side i.e.  no
need to flush the stats if updates are within the threshold.

This threshold based improvement worked for sometime but more stats were
added to memcg and also the read codepath was getting triggered in the
performance sensitive paths which made threshold based ratelimiting
ineffective.  We need more visibility into the hot and cold stats i.e.
stats with a lot of updates.  Let's add trace to get that visibility.

[shakeel.butt@linux.dev: use unsigned long type for memcg_rstat_events, per Yosry]
  Link: https://lkml.kernel.org/r/20241015213721.3804209-1-shakeel.butt@linux.dev
Link: https://lkml.kernel.org/r/20241010003550.3695245-1-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: T.J. Mercier <tjmercier@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: JP Kobryn <inwardvessel@gmail.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/trace/events/memcg.h | 81 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 include/trace/events/memcg.h

(limited to 'include')

diff --git a/include/trace/events/memcg.h b/include/trace/events/memcg.h
new file mode 100644
index 000000000000..8667e57816d2
--- /dev/null
+++ b/include/trace/events/memcg.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM memcg
+
+#if !defined(_TRACE_MEMCG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MEMCG_H
+
+#include <linux/memcontrol.h>
+#include <linux/tracepoint.h>
+
+
+DECLARE_EVENT_CLASS(memcg_rstat_stats,
+
+	TP_PROTO(struct mem_cgroup *memcg, int item, int val),
+
+	TP_ARGS(memcg, item, val),
+
+	TP_STRUCT__entry(
+		__field(u64, id)
+		__field(int, item)
+		__field(int, val)
+	),
+
+	TP_fast_assign(
+		__entry->id = cgroup_id(memcg->css.cgroup);
+		__entry->item = item;
+		__entry->val = val;
+	),
+
+	TP_printk("memcg_id=%llu item=%d val=%d",
+		  __entry->id, __entry->item, __entry->val)
+);
+
+DEFINE_EVENT(memcg_rstat_stats, mod_memcg_state,
+
+	TP_PROTO(struct mem_cgroup *memcg, int item, int val),
+
+	TP_ARGS(memcg, item, val)
+);
+
+DEFINE_EVENT(memcg_rstat_stats, mod_memcg_lruvec_state,
+
+	TP_PROTO(struct mem_cgroup *memcg, int item, int val),
+
+	TP_ARGS(memcg, item, val)
+);
+
+DECLARE_EVENT_CLASS(memcg_rstat_events,
+
+	TP_PROTO(struct mem_cgroup *memcg, int item, unsigned long val),
+
+	TP_ARGS(memcg, item, val),
+
+	TP_STRUCT__entry(
+		__field(u64, id)
+		__field(int, item)
+		__field(unsigned long, val)
+	),
+
+	TP_fast_assign(
+		__entry->id = cgroup_id(memcg->css.cgroup);
+		__entry->item = item;
+		__entry->val = val;
+	),
+
+	TP_printk("memcg_id=%llu item=%d val=%lu",
+		  __entry->id, __entry->item, __entry->val)
+);
+
+DEFINE_EVENT(memcg_rstat_events, count_memcg_events,
+
+	TP_PROTO(struct mem_cgroup *memcg, int item, unsigned long val),
+
+	TP_ARGS(memcg, item, val)
+);
+
+
+#endif /* _TRACE_MEMCG_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 5708d96da20b99b4665ad72395e3727016057f70 Mon Sep 17 00:00:00 2001
From: Zi Yan <ziy@nvidia.com>
Date: Fri, 11 Oct 2024 11:03:04 -0400
Subject: mm: avoid zeroing user movable page twice with init_on_alloc=1

Commit 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and
init_on_free=1 boot options") forces allocated page to be zeroed in
post_alloc_hook() when init_on_alloc=1.

For order-0 folios, if arch does not define
vma_alloc_zeroed_movable_folio(), the default implementation again zeros
the page return from the buddy allocator.  So the page is zeroed twice.
Fix it by passing __GFP_ZERO instead to avoid double page zeroing.  At the
moment, s390,arm64,x86,alpha,m68k are not impacted since they define their
own vma_alloc_zeroed_movable_folio().

For >0 order folios (mTHP and PMD THP), folio_zero_user() is called to
zero the folio again.  Fix it by calling folio_zero_user() only if
init_on_alloc is set.  All arch are impacted.

Add alloc_zeroed() helper to encapsulate the init_on_alloc check.

[ziy@nvidia.com: comment fixes, per David]
  Link: https://lkml.kernel.org/r/97DB52E1-C594-49B5-9736-89AC302FAB01@nvidia.com
Link: https://lkml.kernel.org/r/20241011150304.709590-1-ziy@nvidia.com
Signed-off-by: Zi Yan <ziy@nvidia.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/highmem.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index bec9bd715acf..6e452bd8e7e3 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -224,13 +224,7 @@ static inline
 struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
 				   unsigned long vaddr)
 {
-	struct folio *folio;
-
-	folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr);
-	if (folio)
-		clear_user_highpage(&folio->page, vaddr);
-
-	return folio;
+	return vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr);
 }
 #endif
 
-- 
cgit v1.2.3


From 1f2d03cc535138b7cdbed0122cdc0f9e9626c6bf Mon Sep 17 00:00:00 2001
From: Jaewon Kim <jaewon31.kim@samsung.com>
Date: Fri, 11 Oct 2024 21:49:28 +0900
Subject: vmscan: add a vmscan event for reclaim_pages

reclaim_folio_list uses a dummy reclaim_stat and is not being used.  To
know the memory stat, add a new trace event.  This is useful how how many
pages are not reclaimed or why.

This is an example:

mm_vmscan_reclaim_pages: nid=0 nr_scanned=112 nr_reclaimed=112 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0

Currently reclaim_folio_list is only called by reclaim_pages, and
reclaim_pages is used by damon and madvise.  In the latest Android,
reclaim_pages is also used by shmem to reclaim all pages in a
address_space.

[jaewon31.kim@samsung.com: use sc.nr_scanned rather than new counting]
  Link: https://lkml.kernel.org/r/20241016143227.961162-1-jaewon31.kim@samsung.com
Link: https://lkml.kernel.org/r/20241011124928.1224813-1-jaewon31.kim@samsung.com
Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Jaewon Kim <jaewon31.kim@samsung.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/trace/events/vmscan.h | 45 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 1a488c30afa5..490958fa10de 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -346,6 +346,51 @@ TRACE_EVENT(mm_vmscan_write_folio,
 		show_reclaim_flags(__entry->reclaim_flags))
 );
 
+TRACE_EVENT(mm_vmscan_reclaim_pages,
+
+	TP_PROTO(int nid,
+		unsigned long nr_scanned, unsigned long nr_reclaimed,
+		struct reclaim_stat *stat),
+
+	TP_ARGS(nid, nr_scanned, nr_reclaimed, stat),
+
+	TP_STRUCT__entry(
+		__field(int, nid)
+		__field(unsigned long, nr_scanned)
+		__field(unsigned long, nr_reclaimed)
+		__field(unsigned long, nr_dirty)
+		__field(unsigned long, nr_writeback)
+		__field(unsigned long, nr_congested)
+		__field(unsigned long, nr_immediate)
+		__field(unsigned int, nr_activate0)
+		__field(unsigned int, nr_activate1)
+		__field(unsigned long, nr_ref_keep)
+		__field(unsigned long, nr_unmap_fail)
+	),
+
+	TP_fast_assign(
+		__entry->nid = nid;
+		__entry->nr_scanned = nr_scanned;
+		__entry->nr_reclaimed = nr_reclaimed;
+		__entry->nr_dirty = stat->nr_dirty;
+		__entry->nr_writeback = stat->nr_writeback;
+		__entry->nr_congested = stat->nr_congested;
+		__entry->nr_immediate = stat->nr_immediate;
+		__entry->nr_activate0 = stat->nr_activate[0];
+		__entry->nr_activate1 = stat->nr_activate[1];
+		__entry->nr_ref_keep = stat->nr_ref_keep;
+		__entry->nr_unmap_fail = stat->nr_unmap_fail;
+	),
+
+	TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld",
+		__entry->nid,
+		__entry->nr_scanned, __entry->nr_reclaimed,
+		__entry->nr_dirty, __entry->nr_writeback,
+		__entry->nr_congested, __entry->nr_immediate,
+		__entry->nr_activate0, __entry->nr_activate1,
+		__entry->nr_ref_keep, __entry->nr_unmap_fail)
+);
+
 TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
 
 	TP_PROTO(int nid,
-- 
cgit v1.2.3


From f1001f3d3b6868998cab73d10fda1a5c99ddf963 Mon Sep 17 00:00:00 2001
From: Wei Xu <weixugc@google.com>
Date: Thu, 17 Oct 2024 18:15:28 +0000
Subject: mm/mglru: reset page lru tier bits when activating

When a folio is activated, lru_gen_add_folio() moves the folio to the
youngest generation.  But unlike folio_update_gen()/folio_inc_gen(),
lru_gen_add_folio() doesn't reset the folio lru tier bits (LRU_REFS_MASK |
LRU_REFS_FLAGS).  This inconsistency can affect how pages are aged via
folio_mark_accessed() (e.g.  fd accesses), though no user visible impact
related to this has been detected yet.

Note that lru_gen_add_folio() cannot clear PG_workingset if the activation
is due to workingset refault, otherwise PSI accounting will be skipped.
So fix lru_gen_add_folio() to clear the lru tier bits other than
PG_workingset when activating a folio, and also clear all the lru tier
bits when a folio is activated via folio_activate() in
lru_gen_look_around().

Link: https://lkml.kernel.org/r/20241017181528.3358821-1-weixugc@google.com
Fixes: 018ee47f1489 ("mm: multi-gen LRU: exploit locality in rmap")
Signed-off-by: Wei Xu <weixugc@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Brian Geffon <bgeffon@google.com>
Cc: Jan Alexander Steffens <heftig@archlinux.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_inline.h | 15 ++++++++++++++-
 include/linux/mmzone.h    |  2 ++
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 6f801c7b36e2..355cf46a01a6 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -155,6 +155,11 @@ static inline int folio_lru_refs(struct folio *folio)
 	return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset;
 }
 
+static inline void folio_clear_lru_refs(struct folio *folio)
+{
+	set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0);
+}
+
 static inline int folio_lru_gen(struct folio *folio)
 {
 	unsigned long flags = READ_ONCE(folio->flags);
@@ -222,6 +227,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
 {
 	unsigned long seq;
 	unsigned long flags;
+	unsigned long mask;
 	int gen = folio_lru_gen(folio);
 	int type = folio_is_file_lru(folio);
 	int zone = folio_zonenum(folio);
@@ -257,7 +263,14 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
 	gen = lru_gen_from_seq(seq);
 	flags = (gen + 1UL) << LRU_GEN_PGOFF;
 	/* see the comment on MIN_NR_GENS about PG_active */
-	set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags);
+	mask = LRU_GEN_MASK;
+	/*
+	 * Don't clear PG_workingset here because it can affect PSI accounting
+	 * if the activation is due to workingset refault.
+	 */
+	if (folio_test_active(folio))
+		mask |= LRU_REFS_MASK | BIT(PG_referenced) | BIT(PG_active);
+	set_mask_bits(&folio->flags, mask, flags);
 
 	lru_gen_update_size(lruvec, folio, -1, gen);
 	/* for folio_rotate_reclaimable() */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5b1c984daf45..2e8c4307c728 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -403,6 +403,8 @@ enum {
 	NR_LRU_GEN_CAPS
 };
 
+#define LRU_REFS_FLAGS		(BIT(PG_referenced) | BIT(PG_workingset))
+
 #define MIN_LRU_BATCH		BITS_PER_LONG
 #define MAX_LRU_BATCH		(MIN_LRU_BATCH * 64)
 
-- 
cgit v1.2.3


From 9884efd795cc2f71ef3b7f42df32420b0b7ce34f Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Thu, 17 Oct 2024 22:14:56 +0800
Subject: mm: huge_memory: move file_thp_enabled() into huge_memory.c

file_thp_enabled() is only used in __thp_vma_allowable_orders(), so move
it into huge_memory.c, also check READ_ONLY_THP_FOR_FS ahead to avoid
unnecessary code if config disabled.

Link: https://lkml.kernel.org/r/20241017141457.1169092-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 8afe09a2cf03..006f730545c2 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -252,19 +252,6 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
 	return orders;
 }
 
-static inline bool file_thp_enabled(struct vm_area_struct *vma)
-{
-	struct inode *inode;
-
-	if (!vma->vm_file)
-		return false;
-
-	inode = vma->vm_file->f_inode;
-
-	return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) &&
-	       !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode);
-}
-
 unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 					 unsigned long vm_flags,
 					 unsigned long tva_flags,
-- 
cgit v1.2.3


From b7f058f827392022d8c689329f88c7b324d71dad Mon Sep 17 00:00:00 2001
From: Luoxi Li <kaixa@kiloview.com>
Date: Fri, 18 Oct 2024 17:22:35 +0800
Subject: mm: remove unused has_isolate_pageblock

has_isolate_pageblock() has been unused since commit 55612e80e722 ("mm:
page_alloc: close migratetype race between freeing and stealing")

Remove it.

Link: https://lkml.kernel.org/r/20241018092235.2764859-1-kaixa@kiloview.com
Signed-off-by: Luoxi Li <kaixa@kiloview.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-isolation.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index c16db0067090..73dc2c1841ec 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -3,10 +3,6 @@
 #define __LINUX_PAGEISOLATION_H
 
 #ifdef CONFIG_MEMORY_ISOLATION
-static inline bool has_isolate_pageblock(struct zone *zone)
-{
-	return zone->nr_isolate_pageblock;
-}
 static inline bool is_migrate_isolate_page(struct page *page)
 {
 	return get_pageblock_migratetype(page) == MIGRATE_ISOLATE;
@@ -16,10 +12,6 @@ static inline bool is_migrate_isolate(int migratetype)
 	return migratetype == MIGRATE_ISOLATE;
 }
 #else
-static inline bool has_isolate_pageblock(struct zone *zone)
-{
-	return false;
-}
 static inline bool is_migrate_isolate_page(struct page *page)
 {
 	return false;
-- 
cgit v1.2.3


From 78c018e3942c5dfbab7e6edb4eb784943878504b Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Mon, 7 Oct 2024 23:47:45 +0200
Subject: maple_tree: fix outdated flag name in comment

MAPLE_USE_RCU was renamed to MT_FLAGS_USE_RCU at some point, fix up the
comment.

Link: https://lkml.kernel.org/r/20241007-maple-tree-doc-fix-v1-1-6bbf89c1153d@google.com
Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index c2c11004085e..61c236850ca8 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -224,7 +224,7 @@ typedef struct { /* nothing */ } lockdep_map_p;
  * (set at tree creation time) and dynamic information set under the spinlock.
  *
  * Another use of flags are to indicate global states of the tree.  This is the
- * case with the MAPLE_USE_RCU flag, which indicates the tree is currently in
+ * case with the MT_FLAGS_USE_RCU flag, which indicates the tree is currently in
  * RCU mode.  This mode was added to allow the tree to reuse nodes instead of
  * re-allocating and RCU freeing nodes when there is a single user.
  */
-- 
cgit v1.2.3


From ed265529d39ac408396c031a4fd7e1ef922b80d0 Mon Sep 17 00:00:00 2001
From: Sourav Panda <souravpanda@google.com>
Date: Tue, 22 Oct 2024 23:24:40 +0000
Subject: mm/codetag: fix arg in pgalloc_tag_copy alloc_tag_sub

alloc_tag_sub() takes bytes as opposed to number of pages as argument.

Currently pgalloc_tag_copy() passes the number of pages.  This fix passes
the correct unit, which is the number of bytes allocated.

Link: https://lkml.kernel.org/r/20241022232440.334820-1-souravpanda@google.com
Fixes: e0a955bf7f61 ("mm/codetag: add pgalloc_tag_copy()")
Signed-off-by: Sourav Panda <souravpanda@google.com>
Acked-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4570f33e2429..eb070c14e309 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4207,7 +4207,7 @@ static inline void pgalloc_tag_copy(struct folio *new, struct folio *old)
 	/* Clear the old ref to the original allocation tag. */
 	clear_page_tag_ref(&old->page);
 	/* Decrement the counters of the tag on get_new_folio. */
-	alloc_tag_sub(ref, folio_nr_pages(new));
+	alloc_tag_sub(ref, folio_size(new));
 
 	__alloc_tag_ref_set(ref, tag);
 
-- 
cgit v1.2.3


From 628e1b8c4777941e119effc92cd395b4b02c2c5f Mon Sep 17 00:00:00 2001
From: James Houghton <jthoughton@google.com>
Date: Mon, 21 Oct 2024 16:02:12 +0000
Subject: mm: add missing mmu_notifier_clear_young for !MMU_NOTIFIER

Remove the now unnecessary ifdef in mm/damon/vaddr.c as well.

Link: https://lkml.kernel.org/r/20241021160212.9935-1-jthoughton@google.com
Signed-off-by: James Houghton <jthoughton@google.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index d39ebb10caeb..e2dd57ca368b 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -606,6 +606,13 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return 0;
 }
 
+static inline int mmu_notifier_clear_young(struct mm_struct *mm,
+					   unsigned long start,
+					   unsigned long end)
+{
+	return 0;
+}
+
 static inline int mmu_notifier_test_young(struct mm_struct *mm,
 					  unsigned long address)
 {
-- 
cgit v1.2.3


From 6b611388b626eaa59d202bf8f64d095ff80bcde6 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Thu, 24 Oct 2024 18:22:59 -0700
Subject: memcg-v1: remove charge move code

The memcg-v1 charge move feature has been deprecated completely and let's
remove the relevant code as well.

Link: https://lkml.kernel.org/r/20241025012304.2473312-3-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 34d2da05f2f1..0b113267b2de 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -299,11 +299,6 @@ struct mem_cgroup {
 	/* For oom notifier event fd */
 	struct list_head oom_notify;
 
-	/*
-	 * Should we move charges of a task when a task is moved into this
-	 * mem_cgroup ? And what type of charges should we move ?
-	 */
-	unsigned long move_charge_at_immigrate;
 	/* taken only while moving_account > 0 */
 	spinlock_t move_lock;
 	unsigned long move_lock_flags;
-- 
cgit v1.2.3


From a29c0e4b2e867f4e362a6740c430bfdc2efdd1d9 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Thu, 24 Oct 2024 18:23:03 -0700
Subject: memcg-v1: remove memcg move locking code

The memcg v1's charge move feature has been deprecated.  All the places
using the memcg move lock, have stopped using it as they don't need the
protection any more.  Let's proceed to remove all the locking code related
to charge moving.

Link: https://lkml.kernel.org/r/20241025012304.2473312-7-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 54 ----------------------------------------------
 1 file changed, 54 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0b113267b2de..bb49e0d4b377 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -299,20 +299,10 @@ struct mem_cgroup {
 	/* For oom notifier event fd */
 	struct list_head oom_notify;
 
-	/* taken only while moving_account > 0 */
-	spinlock_t move_lock;
-	unsigned long move_lock_flags;
-
 	/* Legacy tcp memory accounting */
 	bool tcpmem_active;
 	int tcpmem_pressure;
 
-	/*
-	 * set > 0 if pages under this cgroup are moving to other cgroup.
-	 */
-	atomic_t moving_account;
-	struct task_struct *move_lock_task;
-
 	/* List of events which userspace want to receive */
 	struct list_head event_list;
 	spinlock_t event_list_lock;
@@ -428,9 +418,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
  *
  * - the folio lock
  * - LRU isolation
- * - folio_memcg_lock()
  * - exclusive reference
- * - mem_cgroup_trylock_pages()
  *
  * For a kmem folio a caller should hold an rcu read lock to protect memcg
  * associated with a kmem folio from being released.
@@ -499,9 +487,7 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
  *
  * - the folio lock
  * - LRU isolation
- * - lock_folio_memcg()
  * - exclusive reference
- * - mem_cgroup_trylock_pages()
  *
  * For a kmem folio a caller should hold an rcu read lock to protect memcg
  * associated with a kmem folio from being released.
@@ -1867,26 +1853,6 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
 	return p->memcg_in_oom;
 }
 
-void folio_memcg_lock(struct folio *folio);
-void folio_memcg_unlock(struct folio *folio);
-
-/* try to stablize folio_memcg() for all the pages in a memcg */
-static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
-{
-	rcu_read_lock();
-
-	if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
-		return true;
-
-	rcu_read_unlock();
-	return false;
-}
-
-static inline void mem_cgroup_unlock_pages(void)
-{
-	rcu_read_unlock();
-}
-
 static inline void mem_cgroup_enter_user_fault(void)
 {
 	WARN_ON(current->in_user_fault);
@@ -1908,26 +1874,6 @@ unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
 	return 0;
 }
 
-static inline void folio_memcg_lock(struct folio *folio)
-{
-}
-
-static inline void folio_memcg_unlock(struct folio *folio)
-{
-}
-
-static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
-{
-	/* to match folio_memcg_rcu() */
-	rcu_read_lock();
-	return true;
-}
-
-static inline void mem_cgroup_unlock_pages(void)
-{
-	rcu_read_unlock();
-}
-
 static inline bool task_in_memcg_oom(struct task_struct *p)
 {
 	return false;
-- 
cgit v1.2.3


From 906c38ff52e95575ddf3281bee531eded3dba150 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Sat, 26 Oct 2024 09:37:07 -0700
Subject: memcg: workingset: remove folio_memcg_rcu usage

The function workingset_activation() is called from folio_mark_accessed()
with the guarantee that the given folio can not be freed under us in
workingset_activation().  In addition, the association of the folio and
its memcg can not be broken here because charge migration is no more.
There is no need to use folio_memcg_rcu.  Simply use folio_memcg_charged()
because that is what this function cares about.

[akpm@linux-foundation.org: provide folio_memcg_charged stub for CONFIG_MEMCG=n]
Link: https://lkml.kernel.org/r/20241026163707.2479526-1-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Suggested-by: Yu Zhao <yuzhao@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 34 ++--------------------------------
 1 file changed, 2 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bb49e0d4b377..8e4608be811d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -443,35 +443,6 @@ static inline bool folio_memcg_charged(struct folio *folio)
 	return __folio_memcg(folio) != NULL;
 }
 
-/**
- * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio.
- * @folio: Pointer to the folio.
- *
- * This function assumes that the folio is known to have a
- * proper memory cgroup pointer. It's not safe to call this function
- * against some type of folios, e.g. slab folios or ex-slab folios.
- *
- * Return: A pointer to the memory cgroup associated with the folio,
- * or NULL.
- */
-static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
-{
-	unsigned long memcg_data = READ_ONCE(folio->memcg_data);
-
-	VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
-
-	if (memcg_data & MEMCG_DATA_KMEM) {
-		struct obj_cgroup *objcg;
-
-		objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
-		return obj_cgroup_memcg(objcg);
-	}
-
-	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
-}
-
 /*
  * folio_memcg_check - Get the memory cgroup associated with a folio.
  * @folio: Pointer to the folio.
@@ -1084,10 +1055,9 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio)
 	return NULL;
 }
 
-static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
+static inline bool folio_memcg_charged(struct folio *folio)
 {
-	WARN_ON_ONCE(!rcu_read_lock_held());
-	return NULL;
+	return false;
 }
 
 static inline struct mem_cgroup *folio_memcg_check(struct folio *folio)
-- 
cgit v1.2.3


From 28aeaeaca62ebb1b638582ef81f8974f520c0dcb Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Mon, 28 Oct 2024 22:44:41 +0200
Subject: media: i2c: mt9p031: Drop support for legacy platform data

No user of the mt9p031_platform_data legacy platform data for board
files exist in the kernel anymore. Drop support from the driver.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Tarang Raval <tarang.raval@siliconsignals.io>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
---
 include/media/i2c/mt9p031.h | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 include/media/i2c/mt9p031.h

(limited to 'include')

diff --git a/include/media/i2c/mt9p031.h b/include/media/i2c/mt9p031.h
deleted file mode 100644
index f933cd0be8e5..000000000000
--- a/include/media/i2c/mt9p031.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef MT9P031_H
-#define MT9P031_H
-
-struct v4l2_subdev;
-
-/*
- * struct mt9p031_platform_data - MT9P031 platform data
- * @ext_freq: Input clock frequency
- * @target_freq: Pixel clock frequency
- */
-struct mt9p031_platform_data {
-	unsigned int pixclk_pol:1;
-	int ext_freq;
-	int target_freq;
-};
-
-#endif
-- 
cgit v1.2.3


From ff0f0353826d19d826ae07d941ebaf3ed241e577 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Sat, 19 Oct 2024 20:23:12 +0300
Subject: media: mc: Rename pad as origin in __media_pipeline_start()

Rename the pad field in __media_pipeline_start() to both better describe
what it is and avoid masking it during the loop.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
---
 include/media/media-entity.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/media/media-entity.h b/include/media/media-entity.h
index 0393b23129eb..64cf590b1134 100644
--- a/include/media/media-entity.h
+++ b/include/media/media-entity.h
@@ -1143,10 +1143,10 @@ struct media_entity *media_graph_walk_next(struct media_graph *graph);
 
 /**
  * media_pipeline_start - Mark a pipeline as streaming
- * @pad: Starting pad
+ * @origin: Starting pad
  * @pipe: Media pipeline to be assigned to all pads in the pipeline.
  *
- * Mark all pads connected to a given pad through enabled links, either
+ * Mark all pads connected to pad @origin through enabled links, either
  * directly or indirectly, as streaming. The given pipeline object is assigned
  * to every pad in the pipeline and stored in the media_pad pipe field.
  *
@@ -1155,17 +1155,17 @@ struct media_entity *media_graph_walk_next(struct media_graph *graph);
  * pipeline pointer must be identical for all nested calls to
  * media_pipeline_start().
  */
-__must_check int media_pipeline_start(struct media_pad *pad,
+__must_check int media_pipeline_start(struct media_pad *origin,
 				      struct media_pipeline *pipe);
 /**
  * __media_pipeline_start - Mark a pipeline as streaming
  *
- * @pad: Starting pad
+ * @origin: Starting pad
  * @pipe: Media pipeline to be assigned to all pads in the pipeline.
  *
  * ..note:: This is the non-locking version of media_pipeline_start()
  */
-__must_check int __media_pipeline_start(struct media_pad *pad,
+__must_check int __media_pipeline_start(struct media_pad *origin,
 					struct media_pipeline *pipe);
 
 /**
-- 
cgit v1.2.3


From 9907cda95fcbf44141b1292faab89cf8ec542f22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juraj=20=C5=A0arinay?= <juraj@sarinay.com>
Date: Sun, 3 Nov 2024 13:45:25 +0100
Subject: net: nfc: Propagate ISO14443 type A target ATS to userspace via
 netlink
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a 20-byte field ats to struct nfc_target and expose it as
NFC_ATTR_TARGET_ATS via the netlink interface. The payload contains
'historical bytes' that help to distinguish cards from one another.
The information is commonly used to assemble an emulated ATR similar
to that reported by smart cards with contacts.

Add a 20-byte field target_ats to struct nci_dev to hold the payload
obtained in nci_rf_intf_activated_ntf_packet() and copy it to over to
nfc_target.ats in nci_activate_target(). The approach is similar
to the handling of 'general bytes' within ATR_RES.

Replace the hard-coded size of rats_res within struct
activation_params_nfca_poll_iso_dep by the equal constant NFC_ATS_MAXSIZE
now defined in nfc.h

Within NCI, the information corresponds to the 'RATS Response' activation
parameter that omits the initial length byte TL. This loses no
information and is consistent with our handling of SENSB_RES that
also drops the first (constant) byte.

Tested with nxp_nci_i2c on a few type A targets including an
ICAO 9303 compliant passport.

I refrain from the corresponding change to digital_in_recv_ats()
to have the few drivers based on digital.h fill nfc_target.ats,
as I have no way to test it. That class of drivers appear not to set
NFC_ATTR_TARGET_SENSB_RES either. Consider a separate patch to propagate
(all) the parameters.

Signed-off-by: Juraj Šarinay <juraj@sarinay.com>
Link: https://patch.msgid.link/20241103124525.8392-1-juraj@sarinay.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/nfc/nci.h      | 2 +-
 include/net/nfc/nci_core.h | 4 ++++
 include/net/nfc/nfc.h      | 4 ++++
 include/uapi/linux/nfc.h   | 3 +++
 4 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index dc36519d16aa..09efcaed7c3f 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -475,7 +475,7 @@ struct nci_rf_discover_ntf {
 #define NCI_OP_RF_INTF_ACTIVATED_NTF	nci_opcode_pack(NCI_GID_RF_MGMT, 0x05)
 struct activation_params_nfca_poll_iso_dep {
 	__u8	rats_res_len;
-	__u8	rats_res[20];
+	__u8	rats_res[NFC_ATS_MAXSIZE];
 };
 
 struct activation_params_nfcb_poll_iso_dep {
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index ea8595651c38..e180bdf2f82b 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -265,6 +265,10 @@ struct nci_dev {
 	/* stored during intf_activated_ntf */
 	__u8 remote_gb[NFC_MAX_GT_LEN];
 	__u8 remote_gb_len;
+
+	/* stored during intf_activated_ntf */
+	__u8 target_ats[NFC_ATS_MAXSIZE];
+	__u8 target_ats_len;
 };
 
 /* ----- NCI Devices ----- */
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 3a3781838c67..127e6c7d910d 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -86,6 +86,8 @@ struct nfc_ops {
  *	is a type A one. The %sens_res most significant byte must be byte 2
  *	as described by the NFC Forum digital specification (i.e. the platform
  *	configuration one) while %sens_res least significant byte is byte 1.
+ * @ats_len: length of Answer To Select in bytes
+ * @ats: Answer To Select returned by an ISO 14443 Type A target upon activation
  */
 struct nfc_target {
 	u32 idx;
@@ -105,6 +107,8 @@ struct nfc_target {
 	u8 is_iso15693;
 	u8 iso15693_dsfid;
 	u8 iso15693_uid[NFC_ISO15693_UID_MAXSIZE];
+	u8 ats_len;
+	u8 ats[NFC_ATS_MAXSIZE];
 };
 
 /**
diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index 4fa4e979e948..2f5b4be25261 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -164,6 +164,7 @@ enum nfc_commands {
  * @NFC_ATTR_VENDOR_SUBCMD: Vendor specific sub command
  * @NFC_ATTR_VENDOR_DATA: Vendor specific data, to be optionally passed
  *	to a vendor specific command implementation
+ * @NFC_ATTR_TARGET_ATS: ISO 14443 type A target Answer To Select
  */
 enum nfc_attrs {
 	NFC_ATTR_UNSPEC,
@@ -198,6 +199,7 @@ enum nfc_attrs {
 	NFC_ATTR_VENDOR_ID,
 	NFC_ATTR_VENDOR_SUBCMD,
 	NFC_ATTR_VENDOR_DATA,
+	NFC_ATTR_TARGET_ATS,
 /* private: internal use only */
 	__NFC_ATTR_AFTER_LAST
 };
@@ -225,6 +227,7 @@ enum nfc_sdp_attr {
 #define NFC_GB_MAXSIZE			48
 #define NFC_FIRMWARE_NAME_MAXSIZE	32
 #define NFC_ISO15693_UID_MAXSIZE	8
+#define NFC_ATS_MAXSIZE			20
 
 /* NFC protocols */
 #define NFC_PROTO_JEWEL		1
-- 
cgit v1.2.3


From 495e7c8e9601245738a6ab53c992cc5aa0b13cb4 Mon Sep 17 00:00:00 2001
From: Jinjian Song <jinjian.song@fibocom.com>
Date: Mon, 4 Nov 2024 17:44:34 +0800
Subject: wwan: core: Add WWAN ADB and MIPC port type

Add new WWAN ports that connect to the device's ADB protocol interface
and MTK MIPC diagnostic interface.

Signed-off-by: Jinjian Song <jinjian.song@fibocom.com>
Reviewed-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/wwan.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index 170fdee6339c..79c781875c09 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -17,6 +17,8 @@
  * @WWAN_PORT_FIREHOSE: XML based command protocol
  * @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems
  * @WWAN_PORT_FASTBOOT: Fastboot protocol control
+ * @WWAN_PORT_ADB: ADB protocol control
+ * @WWAN_PORT_MIPC: MTK MIPC diagnostic interface
  *
  * @WWAN_PORT_MAX: Highest supported port types
  * @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type
@@ -30,6 +32,8 @@ enum wwan_port_type {
 	WWAN_PORT_FIREHOSE,
 	WWAN_PORT_XMMRPC,
 	WWAN_PORT_FASTBOOT,
+	WWAN_PORT_ADB,
+	WWAN_PORT_MIPC,
 
 	/* Add new port types above this line */
 
-- 
cgit v1.2.3


From ab9bc81c1cf0efc7fc5a3aa4e562aa88d09ada57 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 7 Nov 2024 05:45:34 -0700
Subject: Revert "block: pre-calculate max_zone_append_sectors"

This causes issue on, at least, nvme-mpath where my boot fails with:

WARNING: CPU: 354 PID: 2729 at block/blk-settings.c:75 blk_validate_limits+0x356/0x380
Modules linked in: tg3(+) nvme usbcore scsi_mod ptp i2c_piix4 libphy nvme_core crc32c_intel scsi_common usb_common pps_core i2c_smbus
CPU: 354 UID: 0 PID: 2729 Comm: kworker/u2061:1 Not tainted 6.12.0-rc6+ #181
Hardware name: Dell Inc. PowerEdge R7625/06444F, BIOS 1.8.3 04/02/2024
Workqueue: async async_run_entry_fn
RIP: 0010:blk_validate_limits+0x356/0x380
Code: f6 47 01 04 75 28 83 bf 94 00 00 00 00 75 39 83 bf 98 00 00 00 00 75 34 83 7f 68 00 75 32 31 c0 83 7f 5c 00 0f 84 9b fd ff ff <0f> 0b eb 13 0f 0b eb 0f 48 c7 c0 74 12 58 92 48 89 c7 e8 13 76 46
RSP: 0018:ffffa8a1dfb93b30 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff9232829c8388 RCX: 0000000000000088
RDX: 0000000000000080 RSI: 0000000000000200 RDI: ffffa8a1dfb93c38
RBP: 000000000000000c R08: 00000000ffffffff R09: 000000000000ffff
R10: 0000000000000000 R11: 0000000000000000 R12: ffff9232829b9000
R13: ffff9232829b9010 R14: ffffa8a1dfb93c38 R15: ffffa8a1dfb93c38
FS:  0000000000000000(0000) GS:ffff923867c80000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055c1b92480a8 CR3: 0000002484ff0002 CR4: 0000000000370ef0
Call Trace:
 <TASK>
 ? __warn+0xca/0x1a0
 ? blk_validate_limits+0x356/0x380
 ? report_bug+0x11a/0x1a0
 ? handle_bug+0x5e/0x90
 ? exc_invalid_op+0x16/0x40
 ? asm_exc_invalid_op+0x16/0x20
 ? blk_validate_limits+0x356/0x380
 blk_alloc_queue+0x7a/0x250
 __blk_alloc_disk+0x39/0x80
 nvme_mpath_alloc_disk+0x13d/0x1b0 [nvme_core]
 nvme_scan_ns+0xcc7/0x1010 [nvme_core]
 async_run_entry_fn+0x27/0x120
 process_scheduled_works+0x1a0/0x360
 worker_thread+0x2bc/0x350
 ? pr_cont_work+0x1b0/0x1b0
 kthread+0x111/0x120
 ? kthread_unuse_mm+0x90/0x90
 ret_from_fork+0x30/0x40
 ? kthread_unuse_mm+0x90/0x90
 ret_from_fork_asm+0x11/0x20
 </TASK>
---[ end trace 0000000000000000 ]---

presumably due to max_zone_append_sectors not being cleared to zero,
resulting in blk_validate_zoned_limits() complaining and failing.

This reverts commit 2a8f6153e1c2db06a537a5c9d61102eb591776f1.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6d1413bd69a5..7bfc877e159e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -375,7 +375,6 @@ struct queue_limits {
 	unsigned int		max_user_discard_sectors;
 	unsigned int		max_secure_erase_sectors;
 	unsigned int		max_write_zeroes_sectors;
-	unsigned int		max_hw_zone_append_sectors;
 	unsigned int		max_zone_append_sectors;
 	unsigned int		discard_granularity;
 	unsigned int		discard_alignment;
@@ -1205,9 +1204,25 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q)
 	return q->limits.max_segment_size;
 }
 
+static inline unsigned int
+queue_limits_max_zone_append_sectors(const struct queue_limits *l)
+{
+	unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors);
+
+	return min_not_zero(l->max_zone_append_sectors, max_sectors);
+}
+
+static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q)
+{
+	if (!blk_queue_is_zoned(q))
+		return 0;
+
+	return queue_limits_max_zone_append_sectors(&q->limits);
+}
+
 static inline bool queue_emulates_zone_append(struct request_queue *q)
 {
-	return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors;
+	return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors;
 }
 
 static inline bool bdev_emulates_zone_append(struct block_device *bdev)
@@ -1218,7 +1233,7 @@ static inline bool bdev_emulates_zone_append(struct block_device *bdev)
 static inline unsigned int
 bdev_max_zone_append_sectors(struct block_device *bdev)
 {
-	return bdev_limits(bdev)->max_zone_append_sectors;
+	return queue_max_zone_append_sectors(bdev_get_queue(bdev));
 }
 
 static inline unsigned int bdev_max_segments(struct block_device *bdev)
-- 
cgit v1.2.3


From df81366b484da9618cec12cfc30ec93f7d4b6ac7 Mon Sep 17 00:00:00 2001
From: Zong-Zhe Yang <kevin_yang@realtek.com>
Date: Fri, 1 Nov 2024 16:21:43 +0800
Subject: wifi: mac80211: fix description of ieee80211_set_active_links() for
 new sequence

The sequence of calls has changed, but the description is inconsistent.
So, fix the description.

Fixes: 188a1bf89432 ("wifi: mac80211: re-order assigning channel in activate links")
Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Link: https://patch.msgid.link/20241101082143.11138-1-kevin_yang@realtek.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b4f246cdcca4..a97c9f85ae9a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7672,12 +7672,12 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
  *
  *  - change_vif_links(0x11)
  *  - unassign_vif_chanctx(link_id=0)
+ *  - assign_vif_chanctx(link_id=4)
  *  - change_sta_links(0x11) for each affected STA (the AP)
  *    (TDLS connections on now inactive links should be torn down)
  *  - remove group keys on the old link (link_id 0)
  *  - add new group keys (GTK/IGTK/BIGTK) on the new link (link_id 4)
  *  - change_sta_links(0x10) for each affected STA (the AP)
- *  - assign_vif_chanctx(link_id=4)
  *  - change_vif_links(0x10)
  *
  * Return: 0 on success. An error code otherwise.
-- 
cgit v1.2.3


From 1900e1a4495b7376cb9b4e58f1d846660f4c9c4b Mon Sep 17 00:00:00 2001
From: Guixin Liu <kanie@linux.alibaba.com>
Date: Wed, 6 Nov 2024 15:34:45 +0800
Subject: nvme: add reservation command's defines

This is a preparation patch for NVMeOF target reservation
commands implantation.
Add the defines of reservation command, such as reservation log
and sub operations.

Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
Tested-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b58d9405d65e..44d048d68503 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -2037,4 +2037,72 @@ struct nvme_completion {
 #define NVME_MINOR(ver)		(((ver) >> 8) & 0xff)
 #define NVME_TERTIARY(ver)	((ver) & 0xff)
 
+enum {
+	NVME_AEN_RESV_LOG_PAGE_AVALIABLE	= 0x00,
+};
+
+enum {
+	NVME_PR_LOG_EMPTY_LOG_PAGE			= 0x00,
+	NVME_PR_LOG_REGISTRATION_PREEMPTED		= 0x01,
+	NVME_PR_LOG_RESERVATION_RELEASED		= 0x02,
+	NVME_PR_LOG_RESERVATOPM_PREEMPTED		= 0x03,
+};
+
+enum {
+	NVME_PR_NOTIFY_BIT_REG_PREEMPTED		= 1,
+	NVME_PR_NOTIFY_BIT_RESV_RELEASED		= 2,
+	NVME_PR_NOTIFY_BIT_RESV_PREEMPTED		= 3,
+};
+
+struct nvme_pr_log {
+	__le64			count;
+	__u8			type;
+	__u8			nr_pages;
+	__u8			rsvd1[2];
+	__le32			nsid;
+	__u8			rsvd2[48];
+};
+
+struct nvmet_pr_register_data {
+	__le64	crkey;
+	__le64	nrkey;
+};
+
+struct nvmet_pr_acquire_data {
+	__le64	crkey;
+	__le64	prkey;
+};
+
+struct nvmet_pr_release_data {
+	__le64	crkey;
+};
+
+enum nvme_pr_capabilities {
+	NVME_PR_SUPPORT_PTPL				= 1,
+	NVME_PR_SUPPORT_WRITE_EXCLUSIVE			= 1 << 1,
+	NVME_PR_SUPPORT_EXCLUSIVE_ACCESS		= 1 << 2,
+	NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY	= 1 << 3,
+	NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY	= 1 << 4,
+	NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS	= 1 << 5,
+	NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS	= 1 << 6,
+	NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF		= 1 << 7,
+};
+
+enum nvme_pr_register_action {
+	NVME_PR_REGISTER_ACT_REG		= 0,
+	NVME_PR_REGISTER_ACT_UNREG		= 1,
+	NVME_PR_REGISTER_ACT_REPLACE		= 1 << 1,
+};
+
+enum nvme_pr_acquire_action {
+	NVME_PR_ACQUIRE_ACT_ACQUIRE		= 0,
+	NVME_PR_ACQUIRE_ACT_PREEMPT		= 1,
+	NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT	= 1 << 1,
+};
+
+enum nvme_pr_release_action {
+	NVME_PR_RELEASE_ACT_RELEASE		= 0,
+	NVME_PR_RELEASE_ACT_CLEAR		= 1,
+};
+
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From beeb9220c7307fbb61a2cd6575907db52bde722f Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 23 Oct 2024 19:27:04 +0300
Subject: mm: vmalloc: group declarations depending on CONFIG_MMU together

Patch series "x86/module: use large ROX pages for text allocations", v7.

These patches add support for using large ROX pages for allocations of
executable memory on x86.

They address Andy's comments [1] about having executable mappings for code
that was not completely formed.

The approach taken is to allocate ROX memory along with writable but not
executable memory and use the writable copy to perform relocations and
alternatives patching.  After the module text gets into its final shape,
the contents of the writable memory is copied into the actual ROX location
using text poking.

The allocations of the ROX memory use vmalloc(VMAP_ALLOW_HUGE_MAP) to
allocate PMD aligned memory, fill that memory with invalid instructions
and in the end remap it as ROX.  Portions of these large pages are handed
out to execmem_alloc() callers without any changes to the permissions.
When the memory is freed with execmem_free() it is invalidated again so
that it won't contain stale instructions.

The module memory allocation, x86 code dealing with relocations and
alternatives patching take into account the existence of the two copies,
the writable memory and the ROX memory at the actual allocated virtual
address.

[1] https://lore.kernel.org/all/a17c65c6-863f-4026-9c6f-a04b659e9ab4@app.fastmail.com


This patch (of 8):

There are a couple of declarations that depend on CONFIG_MMU in
include/linux/vmalloc.h spread all over the file.

Group them all together to improve code readability.

No functional changes.

Link: https://lkml.kernel.org/r/20241023162711.2579610-1-rppt@kernel.org
Link: https://lkml.kernel.org/r/20241023162711.2579610-2-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: kdevops <kdevops@lists.linux.dev>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Brian Cain <bcain@quicinc.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/vmalloc.h | 60 ++++++++++++++++++++-----------------------------
 1 file changed, 24 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index ad2ce7a6ab7a..27408f21e501 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -134,12 +134,6 @@ extern void vm_unmap_ram(const void *mem, unsigned int count);
 extern void *vm_map_ram(struct page **pages, unsigned int count, int node);
 extern void vm_unmap_aliases(void);
 
-#ifdef CONFIG_MMU
-extern unsigned long vmalloc_nr_pages(void);
-#else
-static inline unsigned long vmalloc_nr_pages(void) { return 0; }
-#endif
-
 extern void *vmalloc_noprof(unsigned long size) __alloc_size(1);
 #define vmalloc(...)		alloc_hooks(vmalloc_noprof(__VA_ARGS__))
 
@@ -266,12 +260,29 @@ static inline bool is_vm_area_hugepages(const void *addr)
 #endif
 }
 
+/* for /proc/kcore */
+long vread_iter(struct iov_iter *iter, const char *addr, size_t count);
+
+/*
+ *	Internals.  Don't use..
+ */
+__init void vm_area_add_early(struct vm_struct *vm);
+__init void vm_area_register_early(struct vm_struct *vm, size_t align);
+
+int register_vmap_purge_notifier(struct notifier_block *nb);
+int unregister_vmap_purge_notifier(struct notifier_block *nb);
+
 #ifdef CONFIG_MMU
+#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
+
+unsigned long vmalloc_nr_pages(void);
+
 int vm_area_map_pages(struct vm_struct *area, unsigned long start,
 		      unsigned long end, struct page **pages);
 void vm_area_unmap_pages(struct vm_struct *area, unsigned long start,
 			 unsigned long end);
 void vunmap_range(unsigned long addr, unsigned long end);
+
 static inline void set_vm_flush_reset_perms(void *addr)
 {
 	struct vm_struct *vm = find_vm_area(addr);
@@ -279,24 +290,14 @@ static inline void set_vm_flush_reset_perms(void *addr)
 	if (vm)
 		vm->flags |= VM_FLUSH_RESET_PERMS;
 }
+#else  /* !CONFIG_MMU */
+#define VMALLOC_TOTAL 0UL
 
-#else
-static inline void set_vm_flush_reset_perms(void *addr)
-{
-}
-#endif
-
-/* for /proc/kcore */
-extern long vread_iter(struct iov_iter *iter, const char *addr, size_t count);
-
-/*
- *	Internals.  Don't use..
- */
-extern __init void vm_area_add_early(struct vm_struct *vm);
-extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
+static inline unsigned long vmalloc_nr_pages(void) { return 0; }
+static inline void set_vm_flush_reset_perms(void *addr) {}
+#endif /* CONFIG_MMU */
 
-#ifdef CONFIG_SMP
-# ifdef CONFIG_MMU
+#if defined(CONFIG_MMU) && defined(CONFIG_SMP)
 struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 				     const size_t *sizes, int nr_vms,
 				     size_t align);
@@ -311,22 +312,9 @@ pcpu_get_vm_areas(const unsigned long *offsets,
 	return NULL;
 }
 
-static inline void
-pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
-{
-}
-# endif
-#endif
-
-#ifdef CONFIG_MMU
-#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
-#else
-#define VMALLOC_TOTAL 0UL
+static inline void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) {}
 #endif
 
-int register_vmap_purge_notifier(struct notifier_block *nb);
-int unregister_vmap_purge_notifier(struct notifier_block *nb);
-
 #if defined(CONFIG_MMU) && defined(CONFIG_PRINTK)
 bool vmalloc_dump_obj(void *object);
 #else
-- 
cgit v1.2.3


From 0c3beacf681ec897e0b36685a9b49d01f5cb2dfb Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 23 Oct 2024 19:27:06 +0300
Subject: asm-generic: introduce text-patching.h

Several architectures support text patching, but they name the header
files that declare patching functions differently.

Make all such headers consistently named text-patching.h and add an empty
header in asm-generic for architectures that do not support text patching.

Link: https://lkml.kernel.org/r/20241023162711.2579610-4-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> # m68k
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: kdevops <kdevops@lists.linux.dev>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Brian Cain <bcain@quicinc.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/asm-generic/text-patching.h |  5 +++++
 include/linux/text-patching.h       | 15 +++++++++++++++
 2 files changed, 20 insertions(+)
 create mode 100644 include/asm-generic/text-patching.h
 create mode 100644 include/linux/text-patching.h

(limited to 'include')

diff --git a/include/asm-generic/text-patching.h b/include/asm-generic/text-patching.h
new file mode 100644
index 000000000000..2245c641b741
--- /dev/null
+++ b/include/asm-generic/text-patching.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_TEXT_PATCHING_H
+#define _ASM_GENERIC_TEXT_PATCHING_H
+
+#endif /* _ASM_GENERIC_TEXT_PATCHING_H */
diff --git a/include/linux/text-patching.h b/include/linux/text-patching.h
new file mode 100644
index 000000000000..ad5877ab0855
--- /dev/null
+++ b/include/linux/text-patching.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_TEXT_PATCHING_H
+#define _LINUX_TEXT_PATCHING_H
+
+#include <asm/text-patching.h>
+
+#ifndef text_poke_copy
+static inline void *text_poke_copy(void *dst, const void *src, size_t len)
+{
+	return memcpy(dst, src, len);
+}
+#define text_poke_copy text_poke_copy
+#endif
+
+#endif /* _LINUX_TEXT_PATCHING_H */
-- 
cgit v1.2.3


From 0c133b1e78cd34dd9d18da707dc6f46170e9129e Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 23 Oct 2024 19:27:07 +0300
Subject: module: prepare to handle ROX allocations for text

In order to support ROX allocations for module text, it is necessary to
handle modifications to the code, such as relocations and alternatives
patching, without write access to that memory.

One option is to use text patching, but this would make module loading
extremely slow and will expose executable code that is not finally formed.

A better way is to have memory allocated with ROX permissions contain
invalid instructions and keep a writable, but not executable copy of the
module text.  The relocations and alternative patches would be done on the
writable copy using the addresses of the ROX memory.  Once the module is
completely ready, the updated text will be copied to ROX memory using text
patching in one go and the writable copy will be freed.

Add support for that to module initialization code and provide necessary
interfaces in execmem.

Link: https://lkml.kernel.org/r/20241023162711.2579610-5-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewd-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: kdevops <kdevops@lists.linux.dev>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Brian Cain <bcain@quicinc.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/execmem.h      | 23 +++++++++++++++++++++++
 include/linux/module.h       | 16 ++++++++++++++++
 include/linux/moduleloader.h |  4 ++++
 3 files changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 32cef1144117..dfdf19f8a5e8 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -46,9 +46,11 @@ enum execmem_type {
 /**
  * enum execmem_range_flags - options for executable memory allocations
  * @EXECMEM_KASAN_SHADOW:	allocate kasan shadow
+ * @EXECMEM_ROX_CACHE:		allocations should use ROX cache of huge pages
  */
 enum execmem_range_flags {
 	EXECMEM_KASAN_SHADOW	= (1 << 0),
+	EXECMEM_ROX_CACHE	= (1 << 1),
 };
 
 /**
@@ -123,6 +125,27 @@ void *execmem_alloc(enum execmem_type type, size_t size);
  */
 void execmem_free(void *ptr);
 
+/**
+ * execmem_update_copy - copy an update to executable memory
+ * @dst:  destination address to update
+ * @src:  source address containing the data
+ * @size: how many bytes of memory shold be copied
+ *
+ * Copy @size bytes from @src to @dst using text poking if the memory at
+ * @dst is read-only.
+ *
+ * Return: a pointer to @dst or NULL on error
+ */
+void *execmem_update_copy(void *dst, const void *src, size_t size);
+
+/**
+ * execmem_is_rox - check if execmem is read-only
+ * @type - the execmem type to check
+ *
+ * Return: %true if the @type is read-only, %false if it's writable
+ */
+bool execmem_is_rox(enum execmem_type type);
+
 #if defined(CONFIG_EXECMEM) && !defined(CONFIG_ARCH_WANTS_EXECMEM_LATE)
 void execmem_init(void);
 #else
diff --git a/include/linux/module.h b/include/linux/module.h
index 88ecc5e9f523..2a9386cbdf85 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -367,6 +367,8 @@ enum mod_mem_type {
 
 struct module_memory {
 	void *base;
+	void *rw_copy;
+	bool is_rox;
 	unsigned int size;
 
 #ifdef CONFIG_MODULES_TREE_LOOKUP
@@ -767,6 +769,15 @@ static inline bool is_livepatch_module(struct module *mod)
 
 void set_module_sig_enforced(void);
 
+void *__module_writable_address(struct module *mod, void *loc);
+
+static inline void *module_writable_address(struct module *mod, void *loc)
+{
+	if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod)
+		return loc;
+	return __module_writable_address(mod, loc);
+}
+
 #else /* !CONFIG_MODULES... */
 
 static inline struct module *__module_address(unsigned long addr)
@@ -874,6 +885,11 @@ static inline bool module_is_coming(struct module *mod)
 {
 	return false;
 }
+
+static inline void *module_writable_address(struct module *mod, void *loc)
+{
+	return loc;
+}
 #endif /* CONFIG_MODULES */
 
 #ifdef CONFIG_SYSFS
diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index e395461d59e5..1f5507ba5a12 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -108,6 +108,10 @@ int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *mod);
 
+int module_post_finalize(const Elf_Ehdr *hdr,
+			 const Elf_Shdr *sechdrs,
+			 struct module *mod);
+
 #ifdef CONFIG_MODULES
 void flush_module_init_free_work(void);
 #else
-- 
cgit v1.2.3


From 0c6378a71574daa6cd1534ad42a956e3262756c7 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 23 Oct 2024 19:27:08 +0300
Subject: arch: introduce set_direct_map_valid_noflush()

Add an API that will allow updates of the direct/linear map for a set of
physically contiguous pages.

It will be used in the following patches.

Link: https://lkml.kernel.org/r/20241023162711.2579610-6-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: kdevops <kdevops@lists.linux.dev>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Brian Cain <bcain@quicinc.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/set_memory.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index e7aec20fb44f..3030d9245f5a 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -34,6 +34,12 @@ static inline int set_direct_map_default_noflush(struct page *page)
 	return 0;
 }
 
+static inline int set_direct_map_valid_noflush(struct page *page,
+					       unsigned nr, bool valid)
+{
+	return 0;
+}
+
 static inline bool kernel_page_present(struct page *page)
 {
 	return true;
-- 
cgit v1.2.3


From 2e45474ab14f0f17c1091c503a13ff2fe2a84486 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Wed, 23 Oct 2024 19:27:10 +0300
Subject: execmem: add support for cache of large ROX pages

Using large pages to map text areas reduces iTLB pressure and improves
performance.

Extend execmem_alloc() with an ability to use huge pages with ROX
permissions as a cache for smaller allocations.

To populate the cache, a writable large page is allocated from vmalloc
with VM_ALLOW_HUGE_VMAP, filled with invalid instructions and then
remapped as ROX.

The direct map alias of that large page is exculded from the direct map.

Portions of that large page are handed out to execmem_alloc() callers
without any changes to the permissions.

When the memory is freed with execmem_free() it is invalidated again so
that it won't contain stale instructions.

An architecture has to implement execmem_fill_trapping_insns() callback
and select ARCH_HAS_EXECMEM_ROX configuration option to be able to use the
ROX cache.

The cache is enabled on per-range basis when an architecture sets
EXECMEM_ROX_CACHE flag in definition of an execmem_range.

Link: https://lkml.kernel.org/r/20241023162711.2579610-8-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: kdevops <kdevops@lists.linux.dev>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Brian Cain <bcain@quicinc.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/execmem.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index dfdf19f8a5e8..1517fa196bf7 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -53,6 +53,20 @@ enum execmem_range_flags {
 	EXECMEM_ROX_CACHE	= (1 << 1),
 };
 
+#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX
+/**
+ * execmem_fill_trapping_insns - set memory to contain instructions that
+ *				 will trap
+ * @ptr:	pointer to memory to fill
+ * @size:	size of the range to fill
+ * @writable:	is the memory poited by @ptr is writable or ROX
+ *
+ * A hook for architecures to fill execmem ranges with invalid instructions.
+ * Architectures that use EXECMEM_ROX_CACHE must implement this.
+ */
+void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable);
+#endif
+
 /**
  * struct execmem_range - definition of an address space suitable for code and
  *			  related data allocations
-- 
cgit v1.2.3


From 7c8c76e446ca0079692fad44a3993cb1d7666c21 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 23 Oct 2024 10:07:54 -0700
Subject: maple_tree: add mas_for_each_rev() helper

Patch series "page allocation tag compression", v4.

This patchset implements several improvements:

1. Gracefully handles module unloading while there are used
   allocations allocated from that module;

2. Provides an option to store page allocation tag references in the
   page flags, removing dependency on page extensions and eliminating the
   memory overhead from storing page allocation references (~0.2% of total
   system memory).  This also improves page allocation performance when
   CONFIG_MEM_ALLOC_PROFILING is enabled by eliminating page extension
   lookup.  Page allocation performance overhead is reduced from 41% to
   5.5%.

Patch #1 introduces mas_for_each_rev() helper function.

Patch #2 introduces shutdown_mem_profiling() helper function to be used
when disabling memory allocation profiling.

Patch #3 copies module tags into virtually contiguous memory which
serves two purposes:

- Lets us deal with the situation when module is unloaded while there
  are still live allocations from that module.  Since we are using a copy
  version of the tags we can safely unload the module.  Space and gaps in
  this contiguous memory are managed using a maple tree.

- Enables simple indexing of the tags in the later patches.

Patch #4 changes the way we allocate virtually contiguous memory for
module tags to reserve only vitrual area and populate physical pages only
as needed at module load time.

Patch #5 abstracts page allocation tag reference to simplify later
changes.

Patch #6 adds compression option to the sysctl.vm.mem_profiling boot
parameter for storing page allocation tag references inside page flags if
they fit.  If the number of available page flag bits is insufficient to
address all kernel allocations, memory allocation profiling gets disabled
with an appropriate warning.


This patch (of 6):

Add mas_for_each_rev() function to iterate maple tree nodes in reverse
order.

Link: https://lkml.kernel.org/r/20241023170759.999909-1-surenb@google.com
Link: https://lkml.kernel.org/r/20241023170759.999909-2-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Suggested-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Minchan Kim <minchan@google.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xiongwei Song <xiongwei.song@windriver.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 61c236850ca8..cbbcd18d4186 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -592,6 +592,20 @@ static __always_inline void mas_reset(struct ma_state *mas)
 #define mas_for_each(__mas, __entry, __max) \
 	while (((__entry) = mas_find((__mas), (__max))) != NULL)
 
+/**
+ * mas_for_each_rev() - Iterate over a range of the maple tree in reverse order.
+ * @__mas: Maple Tree operation state (maple_state)
+ * @__entry: Entry retrieved from the tree
+ * @__min: minimum index to retrieve from the tree
+ *
+ * When returned, mas->index and mas->last will hold the entire range for the
+ * entry.
+ *
+ * Note: may return the zero entry.
+ */
+#define mas_for_each_rev(__mas, __entry, __min) \
+	while (((__entry) = mas_find_rev((__mas), (__min))) != NULL)
+
 #ifdef CONFIG_DEBUG_MAPLE_TREE
 enum mt_dump_format {
 	mt_dump_dec,
-- 
cgit v1.2.3


From 0db6f8d7820a4b788565dac8eed52bfc2c3216da Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 23 Oct 2024 10:07:56 -0700
Subject: alloc_tag: load module tags into separate contiguous memory

When a module gets unloaded there is a possibility that some of the
allocations it made are still used and therefore the allocation tags
corresponding to these allocations are still referenced.  As such, the
memory for these tags can't be freed.  This is currently handled as an
abnormal situation and module's data section is not being unloaded.  To
handle this situation without keeping module's data in memory, allow
codetags with longer lifespan than the module to be loaded into their own
separate memory.  The in-use memory areas and gaps after module unloading
in this separate memory are tracked using maple trees.  Allocation tags
arrange their separate memory so that it is virtually contiguous and that
will allow simple allocation tag indexing later on in this patchset.  The
size of this virtually contiguous memory is set to store up to 100000
allocation tags.

[surenb@google.com: fix empty codetag module section handling]
  Link: https://lkml.kernel.org/r/20241101000017.3856204-1-surenb@google.com
[akpm@linux-foundation.org: update comment, per Dan]
Link: https://lkml.kernel.org/r/20241023170759.999909-4-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Minchan Kim <minchan@google.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xiongwei Song <xiongwei.song@windriver.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/asm-generic/codetag.lds.h | 19 +++++++++++++++++++
 include/linux/alloc_tag.h         | 13 +++++++++++--
 include/linux/codetag.h           | 37 ++++++++++++++++++++++++++++++++-----
 3 files changed, 62 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h
index 64f536b80380..372c320c5043 100644
--- a/include/asm-generic/codetag.lds.h
+++ b/include/asm-generic/codetag.lds.h
@@ -11,4 +11,23 @@
 #define CODETAG_SECTIONS()		\
 	SECTION_WITH_BOUNDARIES(alloc_tags)
 
+/*
+ * Module codetags which aren't used after module unload, therefore have the
+ * same lifespan as the module and can be safely unloaded with the module.
+ */
+#define MOD_CODETAG_SECTIONS()
+
+#define MOD_SEPARATE_CODETAG_SECTION(_name)	\
+	.codetag.##_name : {			\
+		SECTION_WITH_BOUNDARIES(_name)	\
+	}
+
+/*
+ * For codetags which might be used after module unload, therefore might stay
+ * longer in memory. Each such codetag type has its own section so that we can
+ * unload them individually once unused.
+ */
+#define MOD_SEPARATE_CODETAG_SECTIONS()		\
+	MOD_SEPARATE_CODETAG_SECTION(alloc_tags)
+
 #endif /* __ASM_GENERIC_CODETAG_LDS_H */
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 941deffc590d..55d30543c4c7 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -30,6 +30,13 @@ struct alloc_tag {
 	struct alloc_tag_counters __percpu	*counters;
 } __aligned(8);
 
+struct alloc_tag_module_section {
+	unsigned long start_addr;
+	unsigned long end_addr;
+	/* used size */
+	unsigned long size;
+};
+
 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
 
 #define CODETAG_EMPTY	((void *)1)
@@ -54,6 +61,8 @@ static inline void set_codetag_empty(union codetag_ref *ref) {}
 
 #ifdef CONFIG_MEM_ALLOC_PROFILING
 
+#define ALLOC_TAG_SECTION_NAME	"alloc_tags"
+
 struct codetag_bytes {
 	struct codetag *ct;
 	s64 bytes;
@@ -76,7 +85,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
 
 #define DEFINE_ALLOC_TAG(_alloc_tag)						\
 	static struct alloc_tag _alloc_tag __used __aligned(8)			\
-	__section("alloc_tags") = {						\
+	__section(ALLOC_TAG_SECTION_NAME) = {					\
 		.ct = CODE_TAG_INIT,						\
 		.counters = &_shared_alloc_tag };
 
@@ -85,7 +94,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
 #define DEFINE_ALLOC_TAG(_alloc_tag)						\
 	static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr);	\
 	static struct alloc_tag _alloc_tag __used __aligned(8)			\
-	__section("alloc_tags") = {						\
+	__section(ALLOC_TAG_SECTION_NAME) = {					\
 		.ct = CODE_TAG_INIT,						\
 		.counters = &_alloc_tag_cntr };
 
diff --git a/include/linux/codetag.h b/include/linux/codetag.h
index c2a579ccd455..d10bd9810d32 100644
--- a/include/linux/codetag.h
+++ b/include/linux/codetag.h
@@ -35,8 +35,15 @@ struct codetag_type_desc {
 	size_t tag_size;
 	void (*module_load)(struct codetag_type *cttype,
 			    struct codetag_module *cmod);
-	bool (*module_unload)(struct codetag_type *cttype,
+	void (*module_unload)(struct codetag_type *cttype,
 			      struct codetag_module *cmod);
+#ifdef CONFIG_MODULES
+	void (*module_replaced)(struct module *mod, struct module *new_mod);
+	bool (*needs_section_mem)(struct module *mod, unsigned long size);
+	void *(*alloc_section_mem)(struct module *mod, unsigned long size,
+				   unsigned int prepend, unsigned long align);
+	void (*free_section_mem)(struct module *mod, bool used);
+#endif
 };
 
 struct codetag_iterator {
@@ -71,11 +78,31 @@ struct codetag_type *
 codetag_register_type(const struct codetag_type_desc *desc);
 
 #if defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES)
+
+bool codetag_needs_module_section(struct module *mod, const char *name,
+				  unsigned long size);
+void *codetag_alloc_module_section(struct module *mod, const char *name,
+				   unsigned long size, unsigned int prepend,
+				   unsigned long align);
+void codetag_free_module_sections(struct module *mod);
+void codetag_module_replaced(struct module *mod, struct module *new_mod);
 void codetag_load_module(struct module *mod);
-bool codetag_unload_module(struct module *mod);
-#else
+void codetag_unload_module(struct module *mod);
+
+#else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */
+
+static inline bool
+codetag_needs_module_section(struct module *mod, const char *name,
+			     unsigned long size) { return false; }
+static inline void *
+codetag_alloc_module_section(struct module *mod, const char *name,
+			     unsigned long size, unsigned int prepend,
+			     unsigned long align) { return NULL; }
+static inline void codetag_free_module_sections(struct module *mod) {}
+static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {}
 static inline void codetag_load_module(struct module *mod) {}
-static inline bool codetag_unload_module(struct module *mod) { return true; }
-#endif
+static inline void codetag_unload_module(struct module *mod) {}
+
+#endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */
 
 #endif /* _LINUX_CODETAG_H */
-- 
cgit v1.2.3


From 0f9b685626daa2f8e19a9788625c9b624c223e45 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 23 Oct 2024 10:07:57 -0700
Subject: alloc_tag: populate memory for module tags as needed

The memory reserved for module tags does not need to be backed by physical
pages until there are tags to store there.  Change the way we reserve this
memory to allocate only virtual area for the tags and populate it with
physical pages as needed when we load a module.

[surenb@google.com: avoid execmem_vmap() when !MMU]
  Link: https://lkml.kernel.org/r/20241031233611.3833002-1-surenb@google.com
Link: https://lkml.kernel.org/r/20241023170759.999909-5-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Minchan Kim <minchan@google.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xiongwei Song <xiongwei.song@windriver.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/execmem.h | 12 ++++++++++++
 include/linux/vmalloc.h |  3 +++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 1517fa196bf7..64130ae19690 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -139,6 +139,18 @@ void *execmem_alloc(enum execmem_type type, size_t size);
  */
 void execmem_free(void *ptr);
 
+#ifdef CONFIG_MMU
+/**
+ * execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory
+ * @size: size of the virtual mapping in bytes
+ *
+ * Maps virtually contiguous area in the range suitable for EXECMEM_MODULE_DATA.
+ *
+ * Return: the area descriptor on success or %NULL on failure.
+ */
+struct vm_struct *execmem_vmap(size_t size);
+#endif
+
 /**
  * execmem_update_copy - copy an update to executable memory
  * @dst:  destination address to update
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 27408f21e501..31e9ffd936e3 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -202,6 +202,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
 extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 							unsigned long pgoff);
 
+int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot,
+		     struct page **pages, unsigned int page_shift);
+
 /*
  * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
  * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
-- 
cgit v1.2.3


From 42895a86124418d8dd29a93812bc282e569ccfee Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 23 Oct 2024 10:07:58 -0700
Subject: alloc_tag: introduce pgtag_ref_handle to abstract page tag references

To simplify later changes to page tag references, introduce new
pgtag_ref_handle type.  This allows easy replacement of page_ext as a
storage of page allocation tags.

Link: https://lkml.kernel.org/r/20241023170759.999909-6-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Minchan Kim <minchan@google.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xiongwei Song <xiongwei.song@windriver.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h          | 25 ++++++------
 include/linux/pgalloc_tag.h | 92 ++++++++++++++++++++++++++-------------------
 2 files changed, 67 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index eb070c14e309..f9120ac6d901 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4181,37 +4181,38 @@ static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new
 		return;
 
 	for (i = nr_pages; i < (1 << old_order); i += nr_pages) {
-		union codetag_ref *ref = get_page_tag_ref(folio_page(folio, i));
+		union pgtag_ref_handle handle;
+		union codetag_ref ref;
 
-		if (ref) {
+		if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) {
 			/* Set new reference to point to the original tag */
-			alloc_tag_ref_set(ref, tag);
-			put_page_tag_ref(ref);
+			alloc_tag_ref_set(&ref, tag);
+			update_page_tag_ref(handle, &ref);
+			put_page_tag_ref(handle);
 		}
 	}
 }
 
 static inline void pgalloc_tag_copy(struct folio *new, struct folio *old)
 {
+	union pgtag_ref_handle handle;
+	union codetag_ref ref;
 	struct alloc_tag *tag;
-	union codetag_ref *ref;
 
 	tag = pgalloc_tag_get(&old->page);
 	if (!tag)
 		return;
 
-	ref = get_page_tag_ref(&new->page);
-	if (!ref)
+	if (!get_page_tag_ref(&new->page, &ref, &handle))
 		return;
 
 	/* Clear the old ref to the original allocation tag. */
 	clear_page_tag_ref(&old->page);
 	/* Decrement the counters of the tag on get_new_folio. */
-	alloc_tag_sub(ref, folio_size(new));
-
-	__alloc_tag_ref_set(ref, tag);
-
-	put_page_tag_ref(ref);
+	alloc_tag_sub(&ref, folio_size(new));
+	__alloc_tag_ref_set(&ref, tag);
+	update_page_tag_ref(handle, &ref);
+	put_page_tag_ref(handle);
 }
 #else /* !CONFIG_MEM_ALLOC_PROFILING */
 static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 59a3deb792a8..b13cd3313a88 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -11,46 +11,59 @@
 
 #include <linux/page_ext.h>
 
+union pgtag_ref_handle {
+	union codetag_ref *ref;	/* reference in page extension */
+};
+
 extern struct page_ext_operations page_alloc_tagging_ops;
 
-static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext)
+/* Should be called only if mem_alloc_profiling_enabled() */
+static inline bool get_page_tag_ref(struct page *page, union codetag_ref *ref,
+				    union pgtag_ref_handle *handle)
 {
-	return (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops);
-}
+	struct page_ext *page_ext;
+	union codetag_ref *tmp;
 
-static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref)
-{
-	return (void *)ref - page_alloc_tagging_ops.offset;
+	if (!page)
+		return false;
+
+	page_ext = page_ext_get(page);
+	if (!page_ext)
+		return false;
+
+	tmp = (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops);
+	ref->ct = tmp->ct;
+	handle->ref = tmp;
+	return true;
 }
 
-/* Should be called only if mem_alloc_profiling_enabled() */
-static inline union codetag_ref *get_page_tag_ref(struct page *page)
+static inline void put_page_tag_ref(union pgtag_ref_handle handle)
 {
-	if (page) {
-		struct page_ext *page_ext = page_ext_get(page);
+	if (WARN_ON(!handle.ref))
+		return;
 
-		if (page_ext)
-			return codetag_ref_from_page_ext(page_ext);
-	}
-	return NULL;
+	page_ext_put((void *)handle.ref - page_alloc_tagging_ops.offset);
 }
 
-static inline void put_page_tag_ref(union codetag_ref *ref)
+static inline void update_page_tag_ref(union pgtag_ref_handle handle,
+				       union codetag_ref *ref)
 {
-	if (WARN_ON(!ref))
+	if (WARN_ON(!handle.ref || !ref))
 		return;
 
-	page_ext_put(page_ext_from_codetag_ref(ref));
+	handle.ref->ct = ref->ct;
 }
 
 static inline void clear_page_tag_ref(struct page *page)
 {
 	if (mem_alloc_profiling_enabled()) {
-		union codetag_ref *ref = get_page_tag_ref(page);
+		union pgtag_ref_handle handle;
+		union codetag_ref ref;
 
-		if (ref) {
-			set_codetag_empty(ref);
-			put_page_tag_ref(ref);
+		if (get_page_tag_ref(page, &ref, &handle)) {
+			set_codetag_empty(&ref);
+			update_page_tag_ref(handle, &ref);
+			put_page_tag_ref(handle);
 		}
 	}
 }
@@ -59,11 +72,13 @@ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
 				   unsigned int nr)
 {
 	if (mem_alloc_profiling_enabled()) {
-		union codetag_ref *ref = get_page_tag_ref(page);
+		union pgtag_ref_handle handle;
+		union codetag_ref ref;
 
-		if (ref) {
-			alloc_tag_add(ref, task->alloc_tag, PAGE_SIZE * nr);
-			put_page_tag_ref(ref);
+		if (get_page_tag_ref(page, &ref, &handle)) {
+			alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr);
+			update_page_tag_ref(handle, &ref);
+			put_page_tag_ref(handle);
 		}
 	}
 }
@@ -71,11 +86,13 @@ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
 static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
 {
 	if (mem_alloc_profiling_enabled()) {
-		union codetag_ref *ref = get_page_tag_ref(page);
+		union pgtag_ref_handle handle;
+		union codetag_ref ref;
 
-		if (ref) {
-			alloc_tag_sub(ref, PAGE_SIZE * nr);
-			put_page_tag_ref(ref);
+		if (get_page_tag_ref(page, &ref, &handle)) {
+			alloc_tag_sub(&ref, PAGE_SIZE * nr);
+			update_page_tag_ref(handle, &ref);
+			put_page_tag_ref(handle);
 		}
 	}
 }
@@ -85,13 +102,14 @@ static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
 	struct alloc_tag *tag = NULL;
 
 	if (mem_alloc_profiling_enabled()) {
-		union codetag_ref *ref = get_page_tag_ref(page);
-
-		alloc_tag_sub_check(ref);
-		if (ref) {
-			if (ref->ct)
-				tag = ct_to_alloc_tag(ref->ct);
-			put_page_tag_ref(ref);
+		union pgtag_ref_handle handle;
+		union codetag_ref ref;
+
+		if (get_page_tag_ref(page, &ref, &handle)) {
+			alloc_tag_sub_check(&ref);
+			if (ref.ct)
+				tag = ct_to_alloc_tag(ref.ct);
+			put_page_tag_ref(handle);
 		}
 	}
 
@@ -106,8 +124,6 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
 
 #else /* CONFIG_MEM_ALLOC_PROFILING */
 
-static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; }
-static inline void put_page_tag_ref(union codetag_ref *ref) {}
 static inline void clear_page_tag_ref(struct page *page) {}
 static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
 				   unsigned int nr) {}
-- 
cgit v1.2.3


From 4835f747d3ed181bf2c67930fe06b2c01a5d2323 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 23 Oct 2024 10:07:59 -0700
Subject: alloc_tag: support for page allocation tag compression

Implement support for storing page allocation tag references directly in
the page flags instead of page extensions.  sysctl.vm.mem_profiling boot
parameter it extended to provide a way for a user to request this mode.
Enabling compression eliminates memory overhead caused by page_ext and
results in better performance for page allocations.  However this mode
will not work if the number of available page flag bits is insufficient to
address all kernel allocations.  Such condition can happen during boot or
when loading a module.  If this condition is detected, memory allocation
profiling gets disabled with an appropriate warning.  By default
compression mode is disabled.

Link: https://lkml.kernel.org/r/20241023170759.999909-7-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Minchan Kim <minchan@google.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xiongwei Song <xiongwei.song@windriver.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/alloc_tag.h         |  10 ++-
 include/linux/codetag.h           |   3 +
 include/linux/page-flags-layout.h |   7 ++
 include/linux/pgalloc_tag.h       | 145 +++++++++++++++++++++++++++++++++-----
 4 files changed, 147 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 55d30543c4c7..7c0786bdf9af 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -30,8 +30,16 @@ struct alloc_tag {
 	struct alloc_tag_counters __percpu	*counters;
 } __aligned(8);
 
+struct alloc_tag_kernel_section {
+	struct alloc_tag *first_tag;
+	unsigned long count;
+};
+
 struct alloc_tag_module_section {
-	unsigned long start_addr;
+	union {
+		unsigned long start_addr;
+		struct alloc_tag *first_tag;
+	};
 	unsigned long end_addr;
 	/* used size */
 	unsigned long size;
diff --git a/include/linux/codetag.h b/include/linux/codetag.h
index d10bd9810d32..d14dbd26b370 100644
--- a/include/linux/codetag.h
+++ b/include/linux/codetag.h
@@ -13,6 +13,9 @@ struct codetag_module;
 struct seq_buf;
 struct module;
 
+#define CODETAG_SECTION_START_PREFIX	"__start_"
+#define CODETAG_SECTION_STOP_PREFIX	"__stop_"
+
 /*
  * An instance of this structure is created in a special ELF section at every
  * code location being tagged.  At runtime, the special section is treated as
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
index 7d79818dc065..4f5c9e979bb9 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -111,5 +111,12 @@
 			    ZONES_WIDTH - LRU_GEN_WIDTH - SECTIONS_WIDTH - \
 			    NODES_WIDTH - KASAN_TAG_WIDTH - LAST_CPUPID_WIDTH)
 
+#define NR_NON_PAGEFLAG_BITS	(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH + \
+				LAST_CPUPID_SHIFT + KASAN_TAG_WIDTH + \
+				LRU_GEN_WIDTH + LRU_REFS_WIDTH)
+
+#define NR_UNUSED_PAGEFLAG_BITS	(BITS_PER_LONG - \
+				(NR_NON_PAGEFLAG_BITS + NR_PAGEFLAGS))
+
 #endif
 #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index b13cd3313a88..1fe63b52e5e5 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -11,29 +11,118 @@
 
 #include <linux/page_ext.h>
 
+extern struct page_ext_operations page_alloc_tagging_ops;
+extern unsigned long alloc_tag_ref_mask;
+extern int alloc_tag_ref_offs;
+extern struct alloc_tag_kernel_section kernel_tags;
+
+DECLARE_STATIC_KEY_FALSE(mem_profiling_compressed);
+
+typedef u16	pgalloc_tag_idx;
+
 union pgtag_ref_handle {
 	union codetag_ref *ref;	/* reference in page extension */
+	struct page *page;	/* reference in page flags */
 };
 
-extern struct page_ext_operations page_alloc_tagging_ops;
+/* Reserved indexes */
+#define CODETAG_ID_NULL		0
+#define CODETAG_ID_EMPTY	1
+#define CODETAG_ID_FIRST	2
+
+#ifdef CONFIG_MODULES
+
+extern struct alloc_tag_module_section module_tags;
+
+static inline struct alloc_tag *module_idx_to_tag(pgalloc_tag_idx idx)
+{
+	return &module_tags.first_tag[idx - kernel_tags.count];
+}
+
+static inline pgalloc_tag_idx module_tag_to_idx(struct alloc_tag *tag)
+{
+	return CODETAG_ID_FIRST + kernel_tags.count + (tag - module_tags.first_tag);
+}
+
+#else /* CONFIG_MODULES */
+
+static inline struct alloc_tag *module_idx_to_tag(pgalloc_tag_idx idx)
+{
+	pr_warn("invalid page tag reference %lu\n", (unsigned long)idx);
+	return NULL;
+}
+
+static inline pgalloc_tag_idx module_tag_to_idx(struct alloc_tag *tag)
+{
+	pr_warn("invalid page tag 0x%lx\n", (unsigned long)tag);
+	return CODETAG_ID_NULL;
+}
+
+#endif /* CONFIG_MODULES */
+
+static inline void idx_to_ref(pgalloc_tag_idx idx, union codetag_ref *ref)
+{
+	switch (idx) {
+	case (CODETAG_ID_NULL):
+		ref->ct = NULL;
+		break;
+	case (CODETAG_ID_EMPTY):
+		set_codetag_empty(ref);
+		break;
+	default:
+		idx -= CODETAG_ID_FIRST;
+		ref->ct = idx < kernel_tags.count ?
+			&kernel_tags.first_tag[idx].ct :
+			&module_idx_to_tag(idx)->ct;
+		break;
+	}
+}
+
+static inline pgalloc_tag_idx ref_to_idx(union codetag_ref *ref)
+{
+	struct alloc_tag *tag;
+
+	if (!ref->ct)
+		return CODETAG_ID_NULL;
+
+	if (is_codetag_empty(ref))
+		return CODETAG_ID_EMPTY;
+
+	tag = ct_to_alloc_tag(ref->ct);
+	if (tag >= kernel_tags.first_tag && tag < kernel_tags.first_tag + kernel_tags.count)
+		return CODETAG_ID_FIRST + (tag - kernel_tags.first_tag);
+
+	return module_tag_to_idx(tag);
+}
+
+
 
 /* Should be called only if mem_alloc_profiling_enabled() */
 static inline bool get_page_tag_ref(struct page *page, union codetag_ref *ref,
 				    union pgtag_ref_handle *handle)
 {
-	struct page_ext *page_ext;
-	union codetag_ref *tmp;
-
 	if (!page)
 		return false;
 
-	page_ext = page_ext_get(page);
-	if (!page_ext)
-		return false;
+	if (static_key_enabled(&mem_profiling_compressed)) {
+		pgalloc_tag_idx idx;
+
+		idx = (page->flags >> alloc_tag_ref_offs) & alloc_tag_ref_mask;
+		idx_to_ref(idx, ref);
+		handle->page = page;
+	} else {
+		struct page_ext *page_ext;
+		union codetag_ref *tmp;
+
+		page_ext = page_ext_get(page);
+		if (!page_ext)
+			return false;
+
+		tmp = (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops);
+		ref->ct = tmp->ct;
+		handle->ref = tmp;
+	}
 
-	tmp = (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops);
-	ref->ct = tmp->ct;
-	handle->ref = tmp;
 	return true;
 }
 
@@ -42,16 +131,35 @@ static inline void put_page_tag_ref(union pgtag_ref_handle handle)
 	if (WARN_ON(!handle.ref))
 		return;
 
-	page_ext_put((void *)handle.ref - page_alloc_tagging_ops.offset);
+	if (!static_key_enabled(&mem_profiling_compressed))
+		page_ext_put((void *)handle.ref - page_alloc_tagging_ops.offset);
 }
 
-static inline void update_page_tag_ref(union pgtag_ref_handle handle,
-				       union codetag_ref *ref)
+static inline void update_page_tag_ref(union pgtag_ref_handle handle, union codetag_ref *ref)
 {
-	if (WARN_ON(!handle.ref || !ref))
-		return;
-
-	handle.ref->ct = ref->ct;
+	if (static_key_enabled(&mem_profiling_compressed)) {
+		struct page *page = handle.page;
+		unsigned long old_flags;
+		unsigned long flags;
+		unsigned long idx;
+
+		if (WARN_ON(!page || !ref))
+			return;
+
+		idx = (unsigned long)ref_to_idx(ref);
+		idx = (idx & alloc_tag_ref_mask) << alloc_tag_ref_offs;
+		do {
+			old_flags = READ_ONCE(page->flags);
+			flags = old_flags;
+			flags &= ~(alloc_tag_ref_mask << alloc_tag_ref_offs);
+			flags |= idx;
+		} while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags)));
+	} else {
+		if (WARN_ON(!handle.ref || !ref))
+			return;
+
+		handle.ref->ct = ref->ct;
+	}
 }
 
 static inline void clear_page_tag_ref(struct page *page)
@@ -122,6 +230,8 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
 		this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
 }
 
+void __init alloc_tag_sec_init(void);
+
 #else /* CONFIG_MEM_ALLOC_PROFILING */
 
 static inline void clear_page_tag_ref(struct page *page) {}
@@ -130,6 +240,7 @@ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
 static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
 static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
 static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
+static inline void alloc_tag_sec_init(void) {}
 
 #endif /* CONFIG_MEM_ALLOC_PROFILING */
 
-- 
cgit v1.2.3


From b7fc16a16b0850e41b88eae0edfa4c085c012347 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Thu, 24 Oct 2024 09:23:18 -0700
Subject: mm/codetag: uninline and move pgalloc_tag_copy and pgalloc_tag_split

pgalloc_tag_copy() and pgalloc_tag_split() are sizable and outside of any
performance-critical paths, so it should be fine to uninline them.  Also
move their declarations into pgalloc_tag.h which seems like a more
appropriate place for them.  No functional changes other than uninlining.

Link: https://lkml.kernel.org/r/20241024162318.1640781-1-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Yu Zhao <yuzhao@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Sourav Panda <souravpanda@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h          | 58 ---------------------------------------------
 include/linux/pgalloc_tag.h |  5 ++++
 2 files changed, 5 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f9120ac6d901..08e487e5850a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4166,62 +4166,4 @@ static inline int do_mseal(unsigned long start, size_t len_in, unsigned long fla
 }
 #endif
 
-#ifdef CONFIG_MEM_ALLOC_PROFILING
-static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
-{
-	int i;
-	struct alloc_tag *tag;
-	unsigned int nr_pages = 1 << new_order;
-
-	if (!mem_alloc_profiling_enabled())
-		return;
-
-	tag = pgalloc_tag_get(&folio->page);
-	if (!tag)
-		return;
-
-	for (i = nr_pages; i < (1 << old_order); i += nr_pages) {
-		union pgtag_ref_handle handle;
-		union codetag_ref ref;
-
-		if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) {
-			/* Set new reference to point to the original tag */
-			alloc_tag_ref_set(&ref, tag);
-			update_page_tag_ref(handle, &ref);
-			put_page_tag_ref(handle);
-		}
-	}
-}
-
-static inline void pgalloc_tag_copy(struct folio *new, struct folio *old)
-{
-	union pgtag_ref_handle handle;
-	union codetag_ref ref;
-	struct alloc_tag *tag;
-
-	tag = pgalloc_tag_get(&old->page);
-	if (!tag)
-		return;
-
-	if (!get_page_tag_ref(&new->page, &ref, &handle))
-		return;
-
-	/* Clear the old ref to the original allocation tag. */
-	clear_page_tag_ref(&old->page);
-	/* Decrement the counters of the tag on get_new_folio. */
-	alloc_tag_sub(&ref, folio_size(new));
-	__alloc_tag_ref_set(&ref, tag);
-	update_page_tag_ref(handle, &ref);
-	put_page_tag_ref(handle);
-}
-#else /* !CONFIG_MEM_ALLOC_PROFILING */
-static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
-{
-}
-
-static inline void pgalloc_tag_copy(struct folio *new, struct folio *old)
-{
-}
-#endif /* CONFIG_MEM_ALLOC_PROFILING */
-
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 1fe63b52e5e5..0e43ab653ab6 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -230,6 +230,9 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
 		this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
 }
 
+void pgalloc_tag_split(struct folio *folio, int old_order, int new_order);
+void pgalloc_tag_copy(struct folio *new, struct folio *old);
+
 void __init alloc_tag_sec_init(void);
 
 #else /* CONFIG_MEM_ALLOC_PROFILING */
@@ -241,6 +244,8 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
 static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
 static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
 static inline void alloc_tag_sec_init(void) {}
+static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {}
+static inline void pgalloc_tag_copy(struct folio *new, struct folio *old) {}
 
 #endif /* CONFIG_MEM_ALLOC_PROFILING */
 
-- 
cgit v1.2.3


From d7cb6d7414ea1b33536fa6d11805cb8dceec1f97 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 7 Nov 2024 15:42:59 +0900
Subject: block: RCU protect disk->conv_zones_bitmap

Ensure that a disk revalidation changing the conventional zones bitmap
of a disk does not cause invalid memory references when using the
disk_zone_is_conv() helper by RCU protecting the disk->conv_zones_bitmap
pointer.

disk_zone_is_conv() is modified to operate under the RCU read lock and
the function disk_set_conv_zones_bitmap() is added to update a disk
conv_zones_bitmap pointer using rcu_replace_pointer() with the disk
zone_wplugs_lock spinlock held.

disk_free_zone_resources() is modified to call
disk_update_zone_resources() with a NULL bitmap pointer to free the disk
conv_zones_bitmap. disk_set_conv_zones_bitmap() is also used in
disk_update_zone_resources() to set the new (revalidated) bitmap and
free the old one.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20241107064300.227731-2-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7bfc877e159e..6d379803c777 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -195,7 +195,7 @@ struct gendisk {
 	unsigned int		nr_zones;
 	unsigned int		zone_capacity;
 	unsigned int		last_zone_capacity;
-	unsigned long		*conv_zones_bitmap;
+	unsigned long __rcu	*conv_zones_bitmap;
 	unsigned int            zone_wplugs_hash_bits;
 	spinlock_t              zone_wplugs_lock;
 	struct mempool_s	*zone_wplugs_pool;
-- 
cgit v1.2.3


From f3d9bf05140dd242cdc33c431489a853f2bc1b67 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 7 Nov 2024 15:43:00 +0900
Subject: block: Add a public bdev_zone_is_seq() helper

Turn the private disk_zone_is_conv() function in blk-zoned.c into a
public and documented bdev_zone_is_seq() helper with the inverse
polarity of the original function, also adding a check for non-zoned
devices so that all file systems can use the helper, even with a regular
block device.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20241107064300.227731-3-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6d379803c777..93551772c1d6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1395,6 +1395,33 @@ static inline bool bdev_is_zone_start(struct block_device *bdev,
 	return bdev_offset_from_zone_start(bdev, sector) == 0;
 }
 
+/**
+ * bdev_zone_is_seq - check if a sector belongs to a sequential write zone
+ * @bdev:	block device to check
+ * @sector:	sector number
+ *
+ * Check if @sector on @bdev is contained in a sequential write required zone.
+ */
+static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
+{
+	bool is_seq = false;
+
+#if IS_ENABLED(CONFIG_BLK_DEV_ZONED)
+	if (bdev_is_zoned(bdev)) {
+		struct gendisk *disk = bdev->bd_disk;
+		unsigned long *bitmap;
+
+		rcu_read_lock();
+		bitmap = rcu_dereference(disk->conv_zones_bitmap);
+		is_seq = !bitmap ||
+			!test_bit(disk_zone_no(disk, sector), bitmap);
+		rcu_read_unlock();
+	}
+#endif
+
+	return is_seq;
+}
+
 static inline int queue_dma_alignment(const struct request_queue *q)
 {
 	return q->limits.dma_alignment;
-- 
cgit v1.2.3


From f7470591f8db1a72ce9f7ab49cb13c2b21b92002 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 5 Oct 2024 21:01:12 +0100
Subject: mm: convert page_to_pgoff() to page_pgoff()

Patch series "page->index removals in mm", v2.

As part of shrinking struct page, we need to stop using page->index.  This
patchset gets rid of most of the remaining references to page->index in
mm, as well as increasing the number of functions which take a const
folio/page pointer.  It shrinks the text segment of mm by a few hundred
bytes in my test config, probably mostly from removing calls to
compound_head() in page_to_pgoff().


This patch (of 7):

Change the function signature to pass in the folio as all three callers
have it.  This removes a reference to page->index, which we're trying to
get rid of.  And add kernel-doc.

Link: https://lkml.kernel.org/r/20241005200121.3231142-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20241005200121.3231142-2-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h      |  2 +-
 include/linux/pagemap.h | 31 +++++++++++++++++--------------
 2 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 08e487e5850a..78848fbefe94 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1895,7 +1895,7 @@ static inline unsigned long page_to_section(const struct page *page)
  *
  * Return: The Page Frame Number of the first page in the folio.
  */
-static inline unsigned long folio_pfn(struct folio *folio)
+static inline unsigned long folio_pfn(const struct folio *folio)
 {
 	return page_to_pfn(&folio->page);
 }
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 68a5f1ff3301..bcf0865a38ae 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1011,22 +1011,25 @@ static inline struct folio *read_mapping_folio(struct address_space *mapping,
 	return read_cache_folio(mapping, index, NULL, file);
 }
 
-/*
- * Get the offset in PAGE_SIZE (even for hugetlb pages).
+/**
+ * page_pgoff - Calculate the logical page offset of this page.
+ * @folio: The folio containing this page.
+ * @page: The page which we need the offset of.
+ *
+ * For file pages, this is the offset from the beginning of the file
+ * in units of PAGE_SIZE.  For anonymous pages, this is the offset from
+ * the beginning of the anon_vma in units of PAGE_SIZE.  This will
+ * return nonsense for KSM pages.
+ *
+ * Context: Caller must have a reference on the folio or otherwise
+ * prevent it from being split or freed.
+ *
+ * Return: The offset in units of PAGE_SIZE.
  */
-static inline pgoff_t page_to_pgoff(struct page *page)
+static inline pgoff_t page_pgoff(const struct folio *folio,
+		const struct page *page)
 {
-	struct page *head;
-
-	if (likely(!PageTransTail(page)))
-		return page->index;
-
-	head = compound_head(page);
-	/*
-	 *  We don't initialize ->index for tail pages: calculate based on
-	 *  head page
-	 */
-	return head->index + page - head;
+	return folio->index + folio_page_idx(folio, page);
 }
 
 /*
-- 
cgit v1.2.3


From 713da0b33b3e9d16272b57f4c44dee5c052be9b7 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 5 Oct 2024 21:01:14 +0100
Subject: mm: renovate page_address_in_vma()

This function doesn't modify any of its arguments, so if we make a few
other functions take const pointers, we can make page_address_in_vma()
take const pointers too.  All of its callers have the containing folio
already, so pass that in as an argument instead of recalculating it.  Also
add kernel-doc

Link: https://lkml.kernel.org/r/20241005200121.3231142-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/rmap.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index d5e93e44322e..78923015a2e8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -728,11 +728,8 @@ page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw)
 }
 
 bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
-
-/*
- * Used by swapoff to help locate where page is expected in vma.
- */
-unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
+unsigned long page_address_in_vma(const struct folio *folio,
+		const struct page *, const struct vm_area_struct *);
 
 /*
  * Cleans the PTEs of shared mappings.
-- 
cgit v1.2.3


From 68158bfa3dbd4af8461ef75a91ffc03be942c8fe Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 5 Oct 2024 21:01:15 +0100
Subject: mm: mass constification of folio/page pointers

Now that page_pgoff() takes const pointers, we can constify the pointers
to a lot of functions.

Link: https://lkml.kernel.org/r/20241005200121.3231142-5-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/ksm.h  |  7 ++++---
 include/linux/rmap.h | 10 +++++-----
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 29022e71a074..6a53ac4885bb 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -90,7 +90,7 @@ struct folio *ksm_might_need_to_copy(struct folio *folio,
 
 void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc);
 void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
-void collect_procs_ksm(struct folio *folio, struct page *page,
+void collect_procs_ksm(const struct folio *folio, const struct page *page,
 		struct list_head *to_kill, int force_early);
 long ksm_process_profit(struct mm_struct *);
 
@@ -122,8 +122,9 @@ static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
 {
 }
 
-static inline void collect_procs_ksm(struct folio *folio, struct page *page,
-				     struct list_head *to_kill, int force_early)
+static inline void collect_procs_ksm(const struct folio *folio,
+		const struct page *page, struct list_head *to_kill,
+		int force_early)
 {
 }
 
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 78923015a2e8..683a04088f3f 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -171,7 +171,7 @@ static inline void anon_vma_merge(struct vm_area_struct *vma,
 	unlink_anon_vmas(next);
 }
 
-struct anon_vma *folio_get_anon_vma(struct folio *folio);
+struct anon_vma *folio_get_anon_vma(const struct folio *folio);
 
 /* RMAP flags, currently only relevant for some anon rmap operations. */
 typedef int __bitwise rmap_t;
@@ -194,8 +194,8 @@ enum rmap_level {
 	RMAP_LEVEL_PMD,
 };
 
-static inline void __folio_rmap_sanity_checks(struct folio *folio,
-		struct page *page, int nr_pages, enum rmap_level level)
+static inline void __folio_rmap_sanity_checks(const struct folio *folio,
+		const struct page *page, int nr_pages, enum rmap_level level)
 {
 	/* hugetlb folios are handled separately. */
 	VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
@@ -771,14 +771,14 @@ struct rmap_walk_control {
 	bool (*rmap_one)(struct folio *folio, struct vm_area_struct *vma,
 					unsigned long addr, void *arg);
 	int (*done)(struct folio *folio);
-	struct anon_vma *(*anon_lock)(struct folio *folio,
+	struct anon_vma *(*anon_lock)(const struct folio *folio,
 				      struct rmap_walk_control *rwc);
 	bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
 };
 
 void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc);
 void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc);
-struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
+struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio,
 					  struct rmap_walk_control *rwc);
 
 #else	/* !CONFIG_MMU */
-- 
cgit v1.2.3


From 0386aaa6e9c826bc494169a914e01a86befe6edf Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 5 Oct 2024 21:01:16 +0100
Subject: bootmem: stop using page->index

Encode the type into the bottom four bits of page->private and the info
into the remaining bits.  Also turn the bootmem type into a named enum.

[arnd@arndb.de: bootmem: add bootmem_type stub function]
  Link: https://lkml.kernel.org/r/20241015143802.577613-1-arnd@kernel.org
[akpm@linux-foundation.org: fix build with !CONFIG_HAVE_BOOTMEM_INFO_NODE]
 Link: https://lore.kernel.org/oe-kbuild-all/202410090311.eaqcL7IZ-lkp@intel.com/
Link: https://lkml.kernel.org/r/20241005200121.3231142-6-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: kernel test robot <lkp@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/bootmem_info.h | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h
index cffa38a73618..d8a8d245824a 100644
--- a/include/linux/bootmem_info.h
+++ b/include/linux/bootmem_info.h
@@ -6,11 +6,10 @@
 #include <linux/kmemleak.h>
 
 /*
- * Types for free bootmem stored in page->lru.next. These have to be in
- * some random range in unsigned long space for debugging purposes.
+ * Types for free bootmem stored in the low bits of page->private.
  */
-enum {
-	MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12,
+enum bootmem_type {
+	MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 1,
 	SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE,
 	MIX_SECTION_INFO,
 	NODE_INFO,
@@ -21,9 +20,19 @@ enum {
 void __init register_page_bootmem_info_node(struct pglist_data *pgdat);
 
 void get_page_bootmem(unsigned long info, struct page *page,
-		      unsigned long type);
+		enum bootmem_type type);
 void put_page_bootmem(struct page *page);
 
+static inline enum bootmem_type bootmem_type(const struct page *page)
+{
+	return (unsigned long)page->private & 0xf;
+}
+
+static inline unsigned long bootmem_info(const struct page *page)
+{
+	return (unsigned long)page->private >> 4;
+}
+
 /*
  * Any memory allocated via the memblock allocator and not via the
  * buddy will be marked reserved already in the memmap. For those
@@ -31,7 +40,7 @@ void put_page_bootmem(struct page *page);
  */
 static inline void free_bootmem_page(struct page *page)
 {
-	unsigned long magic = page->index;
+	enum bootmem_type type = bootmem_type(page);
 
 	/*
 	 * The reserve_bootmem_region sets the reserved flag on bootmem
@@ -39,7 +48,7 @@ static inline void free_bootmem_page(struct page *page)
 	 */
 	VM_BUG_ON_PAGE(page_ref_count(page) != 2, page);
 
-	if (magic == SECTION_INFO || magic == MIX_SECTION_INFO)
+	if (type == SECTION_INFO || type == MIX_SECTION_INFO)
 		put_page_bootmem(page);
 	else
 		VM_BUG_ON_PAGE(1, page);
@@ -53,8 +62,18 @@ static inline void put_page_bootmem(struct page *page)
 {
 }
 
+static inline enum bootmem_type bootmem_type(const struct page *page)
+{
+	return SECTION_INFO;
+}
+
+static inline unsigned long bootmem_info(const struct page *page)
+{
+	return 0;
+}
+
 static inline void get_page_bootmem(unsigned long info, struct page *page,
-				    unsigned long type)
+				    enum bootmem_type type)
 {
 }
 
-- 
cgit v1.2.3


From 6a78699838a0ddeed3620ddf50c1521f1fe1e811 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Thu, 31 Oct 2024 21:37:19 +0800
Subject: block: always verify unfreeze lock on the owner task

commit f1be1788a32e ("block: model freeze & enter queue as lock for
supporting lockdep") tries to apply lockdep for verifying freeze &
unfreeze. However, the verification is only done the outmost freeze and
unfreeze. This way is actually not correct because q->mq_freeze_depth
still may drop to zero on other task instead of the freeze owner task.

Fix this issue by always verifying the last unfreeze lock on the owner
task context, and make sure both the outmost freeze & unfreeze are
verified in the current task.

Fixes: f1be1788a32e ("block: model freeze & enter queue as lock for supporting lockdep")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20241031133723.303835-4-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 93551772c1d6..1b51a7c92e9b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -575,6 +575,10 @@ struct request_queue {
 	struct throtl_data *td;
 #endif
 	struct rcu_head		rcu_head;
+#ifdef CONFIG_LOCKDEP
+	struct task_struct	*mq_freeze_owner;
+	int			mq_freeze_owner_depth;
+#endif
 	wait_queue_head_t	mq_freeze_wq;
 	/*
 	 * Protect concurrent access to q_usage_counter by
-- 
cgit v1.2.3


From 9c477088b60dd23f3120b2b01b14d85047a033da Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 6 Nov 2024 21:19:59 +0100
Subject: net: phy: make genphy_c45_write_eee_adv() static

genphy_c45_write_eee_adv() isn't used outside phy-c45.c,
so make it static.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/d23bd784-44e6-4a15-af3a-b37379156521@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index bf0eb4e5d35c..bb09a5a128e1 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1953,7 +1953,6 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev,
 			       struct ethtool_keee *data);
 int genphy_c45_ethtool_set_eee(struct phy_device *phydev,
 			       struct ethtool_keee *data);
-int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv);
 int genphy_c45_an_config_eee_aneg(struct phy_device *phydev);
 int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv);
 
-- 
cgit v1.2.3


From db73835f54fc57bc267d43836905588f24ddac85 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 6 Nov 2024 21:25:12 +0100
Subject: net: phy: remove genphy_config_eee_advert

bcm_config_lre_aneg() doesn't use genphy_config_eee_advert() any longer.
As this was the only user, we can remove genphy_config_eee_advert() now.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/37da7f3e-b883-4c07-9881-b8c0516822b7@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index bb09a5a128e1..1e4127c495c0 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1885,7 +1885,6 @@ int genphy_read_abilities(struct phy_device *phydev);
 int genphy_setup_forced(struct phy_device *phydev);
 int genphy_restart_aneg(struct phy_device *phydev);
 int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart);
-int genphy_config_eee_advert(struct phy_device *phydev);
 int __genphy_config_aneg(struct phy_device *phydev, bool changed);
 int genphy_aneg_done(struct phy_device *phydev);
 int genphy_update_link(struct phy_device *phydev);
-- 
cgit v1.2.3


From 48171c65f61148b0025128b70837280123f1309d Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Wed, 6 Nov 2024 22:37:32 +0100
Subject: ipv4: Prepare ip_route_output() to future .flowi4_tos conversion.

Convert the "tos" parameter of ip_route_output() to dscp_t. This way
we'll have a dscp_t value directly available when .flowi4_tos will
eventually be converted to dscp_t.

All ip_route_output() callers but one set this "tos" parameter to 0 and
therefore don't need to be adapted to the new prototype.

Only br_nf_pre_routing_finish() needs conversion. It can just use
ip4h_dscp() to get the DSCP field from the IPv4 header.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/0f10d031dd44c70aae9bc6e19391cb30d5c2fe71.1730928699.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/route.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index 586e59f7ed8a..0a690adfdff5 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -156,12 +156,12 @@ static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4
  * structure is only partially set, it may bypass some fib-rules.
  */
 static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
-					     __be32 saddr, u8 tos, int oif,
-					     __u8 scope)
+					     __be32 saddr, dscp_t dscp,
+					     int oif, __u8 scope)
 {
 	struct flowi4 fl4 = {
 		.flowi4_oif = oif,
-		.flowi4_tos = tos,
+		.flowi4_tos = inet_dscp_to_dsfield(dscp),
 		.flowi4_scope = scope,
 		.daddr = daddr,
 		.saddr = saddr,
-- 
cgit v1.2.3


From b855f02427e995cbc905e134cc3a7f4e503c0455 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 1 Nov 2024 10:12:03 +0100
Subject: media: replace obsolete hans.verkuil@cisco.com alias

The old hans.verkuil@cisco.com email address was discontinued years ago.

Replace it with the correct hansverk@cisco.com email.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
---
 include/media/i2c/ths7303.h          | 2 +-
 include/media/media-request.h        | 2 +-
 include/uapi/linux/v4l2-dv-timings.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/media/i2c/ths7303.h b/include/media/i2c/ths7303.h
index fee2818c558d..fc937025cdb4 100644
--- a/include/media/i2c/ths7303.h
+++ b/include/media/i2c/ths7303.h
@@ -5,7 +5,7 @@
  * Copyright 2013 Cisco Systems, Inc. and/or its affiliates.
  *
  * Contributors:
- *     Hans Verkuil <hans.verkuil@cisco.com>
+ *     Hans Verkuil <hansverk@cisco.com>
  *     Lad, Prabhakar <prabhakar.lad@ti.com>
  *     Martin Bugge <marbugge@cisco.com>
  */
diff --git a/include/media/media-request.h b/include/media/media-request.h
index 3cd25a2717ce..d4ac557678a7 100644
--- a/include/media/media-request.h
+++ b/include/media/media-request.h
@@ -5,7 +5,7 @@
  * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  * Copyright (C) 2018 Intel Corporation
  *
- * Author: Hans Verkuil <hans.verkuil@cisco.com>
+ * Author: Hans Verkuil <hansverk@cisco.com>
  * Author: Sakari Ailus <sakari.ailus@linux.intel.com>
  */
 
diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h
index ef0128c7369c..44a16e0e5a12 100644
--- a/include/uapi/linux/v4l2-dv-timings.h
+++ b/include/uapi/linux/v4l2-dv-timings.h
@@ -2,7 +2,7 @@
 /*
  * V4L2 DV timings header.
  *
- * Copyright (C) 2012-2016  Hans Verkuil <hans.verkuil@cisco.com>
+ * Copyright (C) 2012-2016  Hans Verkuil <hansverk@cisco.com>
  */
 
 #ifndef _V4L2_DV_TIMINGS_H
-- 
cgit v1.2.3


From b45a3777ceabbe08ab7a6e97f258191c07cbab8d Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Fri, 8 Nov 2024 10:13:52 +0800
Subject: iommu: Pass old domain to set_dev_pasid op

To support domain replacement for pasid, the underlying iommu driver needs
to know the old domain hence be able to clean up the existing attachment.
It would be much convenient for iommu layer to pass down the old domain.
Otherwise, iommu drivers would need to track domain for pasids by
themselves, this would duplicate code among the iommu drivers. Or iommu
drivers would rely group->pasid_array to get domain, which may not always
the correct one.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Link: https://lore.kernel.org/r/20241107122234.7424-2-yi.l.liu@intel.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bd722f473635..32dce80aa7fd 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -642,7 +642,7 @@ struct iommu_ops {
 struct iommu_domain_ops {
 	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
 	int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
-			     ioasid_t pasid);
+			     ioasid_t pasid, struct iommu_domain *old);
 
 	int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
 			 phys_addr_t paddr, size_t pgsize, size_t pgcount,
-- 
cgit v1.2.3


From 980e3016ebcc00021bf8190c64fd65ba23e90498 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Fri, 8 Nov 2024 10:14:04 +0800
Subject: iommu: Make set_dev_pasid op support domain replacement

The iommu core is going to support domain replacement for pasid, it needs
to make the set_dev_pasid op support replacing domain and keep the old
domain config in the failure case.

AMD iommu driver does not support domain replacement for pasid yet, so it
would fail the set_dev_pasid op to keep the old config if the input @old
is non-NULL. Till now, all the set_dev_pasid callbacks can handle the old
parameter and can keep the old config when failed, so update the kdoc of
set_dev_pasid op.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Link: https://lore.kernel.org/r/20241107122234.7424-14-yi.l.liu@intel.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 32dce80aa7fd..27f923450a7c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -616,7 +616,8 @@ struct iommu_ops {
  * * EBUSY	- device is attached to a domain and cannot be changed
  * * ENODEV	- device specific errors, not able to be attached
  * * <others>	- treated as ENODEV by the caller. Use is discouraged
- * @set_dev_pasid: set an iommu domain to a pasid of device
+ * @set_dev_pasid: set or replace an iommu domain to a pasid of device. The pasid of
+ *                 the device should be left in the old config in error case.
  * @map_pages: map a physically contiguous set of pages of the same size to
  *             an iommu domain.
  * @unmap_pages: unmap a number of pages of the same size from an iommu domain
-- 
cgit v1.2.3


From d1f4390dd28ba110f232615dc4610ac1bb2f39f2 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <brgl@bgdev.pl>
Date: Fri, 8 Nov 2024 16:07:37 +0200
Subject: regmap: provide regmap_assign_bits()

Add another bits helper to regmap API: this one sets given bits if value
is true and clears them if it's false.

Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://patch.msgid.link/20241108-assign-bits-v1-1-382790562d99@ideasonboard.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index da07584284ab..0d5b74f9bd32 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1335,6 +1335,15 @@ static inline int regmap_clear_bits(struct regmap *map,
 	return regmap_update_bits_base(map, reg, bits, 0, NULL, false, false);
 }
 
+static inline int regmap_assign_bits(struct regmap *map, unsigned int reg,
+				     unsigned int bits, bool value)
+{
+	if (value)
+		return regmap_set_bits(map, reg, bits);
+	else
+		return regmap_clear_bits(map, reg, bits);
+}
+
 int regmap_test_bits(struct regmap *map, unsigned int reg, unsigned int bits);
 
 /**
@@ -1803,6 +1812,13 @@ static inline int regmap_clear_bits(struct regmap *map,
 	return -EINVAL;
 }
 
+static inline int regmap_assign_bits(struct regmap *map, unsigned int reg,
+				     unsigned int bits, bool value)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_test_bits(struct regmap *map,
 				   unsigned int reg, unsigned int bits)
 {
-- 
cgit v1.2.3


From 06cf321aadef17c7b1578369e314193c0e1c7d8e Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Thu, 7 Nov 2024 14:58:19 -0600
Subject: range: Add range_overlaps()

Code to support CXL Dynamic Capacity devices will have extent ranges
which need to be compared for intersection not a subset as is being
checked in range_contains().

range_overlaps() is defined in btrfs with a different meaning from what
is required in the standard range code.  Dan Williams pointed this out
in [1].  Adjust the btrfs call according to his suggestion there.

Then add a generic range_overlaps().

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: David Sterba <dsterba@suse.com>
Cc: linux-btrfs@vger.kernel.org
Link: https://lore.kernel.org/all/65949f79ef908_8dc68294f2@dwillia2-xfh.jf.intel.com.notmuch/ [1]
Acked-by: David Sterba <dsterba@suse.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Fan Ni <fan.ni@samsung.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Link: https://patch.msgid.link/20241107-dcd-type2-upstream-v7-1-56a84e66bc36@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/range.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/range.h b/include/linux/range.h
index 6ad0b73cb7ad..876cd5355158 100644
--- a/include/linux/range.h
+++ b/include/linux/range.h
@@ -13,11 +13,19 @@ static inline u64 range_len(const struct range *range)
 	return range->end - range->start + 1;
 }
 
+/* True if r1 completely contains r2 */
 static inline bool range_contains(struct range *r1, struct range *r2)
 {
 	return r1->start <= r2->start && r1->end >= r2->end;
 }
 
+/* True if any part of r1 overlaps r2 */
+static inline bool range_overlaps(const struct range *r1,
+				  const struct range *r2)
+{
+	return r1->start <= r2->end && r1->end >= r2->start;
+}
+
 int add_range(struct range *range, int az, int nr_range,
 		u64 start, u64 end);
 
-- 
cgit v1.2.3


From d62e2ed065785c9a7837519067e1307e4a24d2c2 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Thu, 7 Nov 2024 14:58:20 -0600
Subject: ACPI/CDAT: Add CDAT/DSMAS shared and read only flag values

The Coherent Device Attribute Table (CDAT) Device Scoped Memory Affinity
Structure (DSMAS) version 1.04 [1] defines flags to indicate if a DPA range
is read only and/or shared.

Add read only and shareable flag definitions.

This change was merged in ACPICA via PR 976.[2]

Link: https://uefi.org/sites/default/files/resources/Coherent%20Device%20Attribute%20Table_1.04%20published_0.pdf [1]
Link: https://github.com/acpica/acpica/pull/976 [2]
Cc: Robert Moore <robert.moore@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: linux-acpi@vger.kernel.org
Cc: acpica-devel@lists.linux.dev
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Link: https://patch.msgid.link/20241107-dcd-type2-upstream-v7-2-56a84e66bc36@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/acpi/actbl1.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 199afc2cd122..387fc821703a 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -403,6 +403,8 @@ struct acpi_cdat_dsmas {
 /* Flags for subtable above */
 
 #define ACPI_CDAT_DSMAS_NON_VOLATILE        (1 << 2)
+#define ACPI_CDAT_DSMAS_SHAREABLE           (1 << 3)
+#define ACPI_CDAT_DSMAS_READ_ONLY           (1 << 6)
 
 /* Subtable 1: Device scoped Latency and Bandwidth Information Structure (DSLBIS) */
 
-- 
cgit v1.2.3


From d1b3dad9de7962f7bea861f27e352bac17b68bed Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 5 Nov 2024 12:04:17 -0800
Subject: iommufd: Move struct iommufd_object to public iommufd header

Prepare for an embedded structure design for driver-level iommufd_viommu
objects:
    // include/linux/iommufd.h
    struct iommufd_viommu {
        struct iommufd_object obj;
        ....
    };

    // Some IOMMU driver
    struct iommu_driver_viommu {
        struct iommufd_viommu core;
        ....
    };

It has to expose struct iommufd_object and enum iommufd_object_type from
the core-level private header to the public iommufd header.

Link: https://patch.msgid.link/r/54a43b0768089d690104530754f499ca05ce0074.1730836219.git.nicolinc@nvidia.com
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommufd.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 30f832a60ccb..22948dd03d67 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -8,6 +8,7 @@
 
 #include <linux/err.h>
 #include <linux/errno.h>
+#include <linux/refcount.h>
 #include <linux/types.h>
 
 struct device;
@@ -18,6 +19,29 @@ struct iommufd_ctx;
 struct iommufd_device;
 struct page;
 
+enum iommufd_object_type {
+	IOMMUFD_OBJ_NONE,
+	IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
+	IOMMUFD_OBJ_DEVICE,
+	IOMMUFD_OBJ_HWPT_PAGING,
+	IOMMUFD_OBJ_HWPT_NESTED,
+	IOMMUFD_OBJ_IOAS,
+	IOMMUFD_OBJ_ACCESS,
+	IOMMUFD_OBJ_FAULT,
+#ifdef CONFIG_IOMMUFD_TEST
+	IOMMUFD_OBJ_SELFTEST,
+#endif
+	IOMMUFD_OBJ_MAX,
+};
+
+/* Base struct for all objects with a userspace ID handle. */
+struct iommufd_object {
+	refcount_t shortterm_users;
+	refcount_t users;
+	enum iommufd_object_type type;
+	unsigned int id;
+};
+
 struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
 					   struct device *dev, u32 *id);
 void iommufd_device_unbind(struct iommufd_device *idev);
-- 
cgit v1.2.3


From e8e26a0b09f5783be471b5ffb1e31822b1272c1d Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Tue, 15 Oct 2024 22:27:31 +0200
Subject: nfs: Annotate struct pnfs_commit_array with __counted_by()

Add the __counted_by compiler attribute to the flexible array member
buckets to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and
CONFIG_FORTIFY_SOURCE.

Compile-tested only.

Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 12d8e47bc5a3..559273a0f16d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1336,7 +1336,7 @@ struct pnfs_commit_array {
 	struct rcu_head rcu;
 	refcount_t refcount;
 	unsigned int nbuckets;
-	struct pnfs_commit_bucket buckets[];
+	struct pnfs_commit_bucket buckets[] __counted_by(nbuckets);
 };
 
 struct pnfs_ds_commit_info {
-- 
cgit v1.2.3


From 93970b6a143bd9f184ebab37c5862a204fb5690e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Tue, 29 Oct 2024 15:21:43 -0400
Subject: sunrpc: remove newlines from tracepoints

Tracepoint strings don't require newlines (and in fact, they are
undesirable).

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/trace/events/sunrpc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 5e8495216689..b13dc275ef4a 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -719,7 +719,7 @@ TRACE_EVENT(rpc_xdr_overflow,
 	),
 
 	TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
-		  " %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+		  " %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u",
 		__entry->task_id, __entry->client_id,
 		__get_str(progname), __entry->version, __get_str(procedure),
 		__entry->requested, __entry->p, __entry->end,
@@ -777,7 +777,7 @@ TRACE_EVENT(rpc_xdr_alignment,
 	),
 
 	TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
-		  " %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+		  " %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u",
 		__entry->task_id, __entry->client_id,
 		__get_str(progname), __entry->version, __get_str(procedure),
 		__entry->offset, __entry->copied,
-- 
cgit v1.2.3


From e32c260195e6ff72940ab7826e38e0a0066fc58f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Nov 2024 11:49:04 -1000
Subject: sched_ext: Enable the ops breather and eject BPF scheduler on
 softlockup

On 2 x Intel Sapphire Rapids machines with 224 logical CPUs, a poorly
behaving BPF scheduler can live-lock the system by making multiple CPUs bang
on the same DSQ to the point where soft-lockup detection triggers before
SCX's own watchdog can take action. It also seems possible that the machine
can be live-locked enough to prevent scx_ops_helper, which is an RT task,
from running in a timely manner.

Implement scx_softlockup() which is called when three quarters of
soft-lockup threshold has passed. The function immediately enables the ops
breather and triggers an ops error to initiate ejection of the BPF
scheduler.

The previous and this patch combined enable the kernel to reliably recover
the system from live-lock conditions that can be triggered by a poorly
behaving BPF scheduler on Intel dual socket systems.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Douglas Anderson <dianders@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched/ext.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 1ddbde64a31b..65bc0a489cd2 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -205,11 +205,13 @@ struct sched_ext_entity {
 
 void sched_ext_free(struct task_struct *p);
 void print_scx_info(const char *log_lvl, struct task_struct *p);
+void scx_softlockup(u32 dur_s);
 
 #else	/* !CONFIG_SCHED_CLASS_EXT */
 
 static inline void sched_ext_free(struct task_struct *p) {}
 static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
+static inline void scx_softlockup(u32 dur_s) {}
 
 #endif	/* CONFIG_SCHED_CLASS_EXT */
 #endif	/* _LINUX_SCHED_EXT_H */
-- 
cgit v1.2.3


From 20fd1383cd616d61b2a79967da1221dc6cfb8430 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Thu, 7 Nov 2024 18:52:33 +0000
Subject: iio: Move __private marking before struct element priv in struct
 iio_dev

This is to avoid tripping up kernel-doc which filters it out before
but not after the name.

Note the formatting is less than ideal as a result so we may revisit
in future.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 59c58f455311..ae65890d4567 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -624,7 +624,7 @@ struct iio_dev {
 	const struct iio_info		*info;
 	const struct iio_buffer_setup_ops	*setup_ops;
 
-	void				*priv __private;
+	void				*__private priv;
 };
 
 int iio_device_id(struct iio_dev *indio_dev);
-- 
cgit v1.2.3


From fffb9fff12250018a6f4d3e411f9d289210da329 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 4 Nov 2024 11:34:24 +0200
Subject: gpio: Get rid of GPIOF_ACTIVE_LOW

No more users.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20241104093609.156059-7-andriy.shevchenko@linux.intel.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 2d105be7bbc3..6270150f4e29 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -21,9 +21,6 @@ struct device;
 #define GPIOF_OUT_INIT_LOW	((0 << 0) | (0 << 1))
 #define GPIOF_OUT_INIT_HIGH	((0 << 0) | (1 << 1))
 
-/* Gpio pin is active-low */
-#define GPIOF_ACTIVE_LOW        (1 << 2)
-
 /**
  * struct gpio - a structure describing a GPIO with configuration
  * @gpio:	the GPIO number
-- 
cgit v1.2.3


From 580db513b4a9d52f306580015a1872eea0a0894e Mon Sep 17 00:00:00 2001
From: Khang Nguyen <khangng@os.amperecomputing.com>
Date: Tue, 5 Nov 2024 14:19:15 +0700
Subject: net: mctp: Expose transport binding identifier via IFLA attribute

MCTP control protocol implementations are transport binding dependent.
Endpoint discovery is mandatory based on transport binding.
Message timing requirements are specified in each respective transport
binding specification.

However, we currently have no means to get this information from MCTP
links.

Add a IFLA_MCTP_PHYS_BINDING netlink link attribute, which represents
the transport type using the DMTF DSP0239-defined type numbers, returned
as part of RTM_GETLINK data.

We get an IFLA_MCTP_PHYS_BINDING attribute for each MCTP link, for
example:

- 0x00 (unspec) for loopback interface;
- 0x01 (SMBus/I2C) for mctpi2c%d interfaces; and
- 0x05 (serial) for mctpserial%d interfaces.

Signed-off-by: Khang Nguyen <khangng@os.amperecomputing.com>
Reviewed-by: Matt Johnston <matt@codeconstruct.com.au>
Link: https://patch.msgid.link/20241105071915.821871-1-khangng@os.amperecomputing.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/mctp.h           | 18 ++++++++++++++++++
 include/net/mctpdevice.h     |  4 +++-
 include/uapi/linux/if_link.h |  1 +
 3 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mctp.h b/include/net/mctp.h
index 28d59ae94ca3..1ecbff7116f6 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -298,4 +298,22 @@ void mctp_routes_exit(void);
 int mctp_device_init(void);
 void mctp_device_exit(void);
 
+/* MCTP IDs and Codes from DMTF specification
+ * "DSP0239 Management Component Transport Protocol (MCTP) IDs and Codes"
+ * https://www.dmtf.org/sites/default/files/standards/documents/DSP0239_1.11.1.pdf
+ */
+enum mctp_phys_binding {
+	MCTP_PHYS_BINDING_UNSPEC	= 0x00,
+	MCTP_PHYS_BINDING_SMBUS		= 0x01,
+	MCTP_PHYS_BINDING_PCIE_VDM	= 0x02,
+	MCTP_PHYS_BINDING_USB		= 0x03,
+	MCTP_PHYS_BINDING_KCS		= 0x04,
+	MCTP_PHYS_BINDING_SERIAL	= 0x05,
+	MCTP_PHYS_BINDING_I3C		= 0x06,
+	MCTP_PHYS_BINDING_MMBI		= 0x07,
+	MCTP_PHYS_BINDING_PCC		= 0x08,
+	MCTP_PHYS_BINDING_UCIE		= 0x09,
+	MCTP_PHYS_BINDING_VENDOR	= 0xFF,
+};
+
 #endif /* __NET_MCTP_H */
diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h
index 5c0d04b5c12c..957d9ef924c5 100644
--- a/include/net/mctpdevice.h
+++ b/include/net/mctpdevice.h
@@ -22,6 +22,7 @@ struct mctp_dev {
 	refcount_t		refs;
 
 	unsigned int		net;
+	enum mctp_phys_binding	binding;
 
 	const struct mctp_netdev_ops *ops;
 
@@ -44,7 +45,8 @@ struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev);
 struct mctp_dev *__mctp_dev_get(const struct net_device *dev);
 
 int mctp_register_netdev(struct net_device *dev,
-			 const struct mctp_netdev_ops *ops);
+			 const struct mctp_netdev_ops *ops,
+			 enum mctp_phys_binding binding);
 void mctp_unregister_netdev(struct net_device *dev);
 
 void mctp_dev_hold(struct mctp_dev *mdev);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8516c1ccd57a..2575e0cd9b48 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1958,6 +1958,7 @@ struct ifla_rmnet_flags {
 enum {
 	IFLA_MCTP_UNSPEC,
 	IFLA_MCTP_NET,
+	IFLA_MCTP_PHYS_BINDING,
 	__IFLA_MCTP_MAX,
 };
 
-- 
cgit v1.2.3


From 41b3caa7c0761141aa6d508924b9d23db57a17bc Mon Sep 17 00:00:00 2001
From: Gilad Naaman <gnaaman@drivenets.com>
Date: Thu, 7 Nov 2024 16:04:38 +0000
Subject: neighbour: Add hlist_node to struct neighbour

Add a doubly-linked node to neighbours, so that they
can be deleted without iterating the entire bucket they're in.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241107160444.2913124-2-gnaaman@drivenets.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/neighbour.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 3887ed9e5026..0402447854c7 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -136,6 +136,7 @@ struct neigh_statistics {
 
 struct neighbour {
 	struct neighbour __rcu	*next;
+	struct hlist_node	hash;
 	struct neigh_table	*tbl;
 	struct neigh_parms	*parms;
 	unsigned long		confirmed;
@@ -191,6 +192,7 @@ struct pneigh_entry {
 
 struct neigh_hash_table {
 	struct neighbour __rcu	**hash_buckets;
+	struct hlist_head	*hash_heads;
 	unsigned int		hash_shift;
 	__u32			hash_rnd[NEIGH_NUM_HASH_RND];
 	struct rcu_head		rcu;
-- 
cgit v1.2.3


From d7ddee1a522ddf5b28e2a3f7093cf238c96f492a Mon Sep 17 00:00:00 2001
From: Gilad Naaman <gnaaman@drivenets.com>
Date: Thu, 7 Nov 2024 16:04:39 +0000
Subject: neighbour: Define neigh_for_each_in_bucket

Introduce neigh_for_each_in_bucket in neighbour.h, to help iterate over
the neighbour table more succinctly.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241107160444.2913124-3-gnaaman@drivenets.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/neighbour.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 0402447854c7..4b9068c5e668 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -277,6 +277,12 @@ static inline void *neighbour_priv(const struct neighbour *n)
 
 extern const struct nla_policy nda_policy[];
 
+#define neigh_for_each_in_bucket(pos, head) hlist_for_each_entry(pos, head, hash)
+#define neigh_for_each_in_bucket_rcu(pos, head) \
+	hlist_for_each_entry_rcu(pos, head, hash)
+#define neigh_for_each_in_bucket_safe(pos, tmp, head) \
+	hlist_for_each_entry_safe(pos, tmp, head, hash)
+
 static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey)
 {
 	return *(const u32 *)n->primary_key == *(const u32 *)pkey;
-- 
cgit v1.2.3


From 0e3bcb0f78a0ca7cfdb7906dc79d922e19ba09b5 Mon Sep 17 00:00:00 2001
From: Gilad Naaman <gnaaman@drivenets.com>
Date: Thu, 7 Nov 2024 16:04:41 +0000
Subject: neighbour: Convert iteration to use hlist+macro

Remove all usage of the bare neighbour::next pointer,
replacing them with neighbour::hash and its for_each macro.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241107160444.2913124-5-gnaaman@drivenets.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/neighbour.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 4b9068c5e668..94cf4f8c118f 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -311,12 +311,9 @@ static inline struct neighbour *___neigh_lookup_noref(
 	u32 hash_val;
 
 	hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
-	for (n = rcu_dereference(nht->hash_buckets[hash_val]);
-	     n != NULL;
-	     n = rcu_dereference(n->next)) {
+	neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[hash_val])
 		if (n->dev == dev && key_eq(n, pkey))
 			return n;
-	}
 
 	return NULL;
 }
-- 
cgit v1.2.3


From a01a67ab2fffa7458354f0a666a6d550fa2b82fc Mon Sep 17 00:00:00 2001
From: Gilad Naaman <gnaaman@drivenets.com>
Date: Thu, 7 Nov 2024 16:04:42 +0000
Subject: neighbour: Remove bare neighbour::next pointer

Remove the now-unused neighbour::next pointer, leaving struct neighbour
solely with the hlist_node implementation.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241107160444.2913124-6-gnaaman@drivenets.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/neighbour.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 94cf4f8c118f..40aac1e24c68 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -135,7 +135,6 @@ struct neigh_statistics {
 #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field)
 
 struct neighbour {
-	struct neighbour __rcu	*next;
 	struct hlist_node	hash;
 	struct neigh_table	*tbl;
 	struct neigh_parms	*parms;
@@ -191,7 +190,6 @@ struct pneigh_entry {
 #define NEIGH_NUM_HASH_RND	4
 
 struct neigh_hash_table {
-	struct neighbour __rcu	**hash_buckets;
 	struct hlist_head	*hash_heads;
 	unsigned int		hash_shift;
 	__u32			hash_rnd[NEIGH_NUM_HASH_RND];
@@ -354,7 +352,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags,
 		 u32 nlmsg_pid);
 void __neigh_set_probe_once(struct neighbour *neigh);
-bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl);
+bool neigh_remove_one(struct neighbour *ndel);
 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev);
-- 
cgit v1.2.3


From f7f52738637f4361c108cad36e23ee98959a9006 Mon Sep 17 00:00:00 2001
From: Gilad Naaman <gnaaman@drivenets.com>
Date: Thu, 7 Nov 2024 16:04:43 +0000
Subject: neighbour: Create netdev->neighbour association

Create a mapping between a netdev and its neighoburs,
allowing for much cheaper flushes.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241107160444.2913124-7-gnaaman@drivenets.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h      |  7 +++++++
 include/net/neighbour.h        |  9 ++-------
 include/net/neighbour_tables.h | 12 ++++++++++++
 3 files changed, 21 insertions(+), 7 deletions(-)
 create mode 100644 include/net/neighbour_tables.h

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3c552b648b27..df4483598628 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -52,6 +52,7 @@
 #include <net/net_trackers.h>
 #include <net/net_debug.h>
 #include <net/dropreason-core.h>
+#include <net/neighbour_tables.h>
 
 struct netpoll_info;
 struct device;
@@ -2032,6 +2033,9 @@ enum netdev_reg_state {
  *	@napi_defer_hard_irqs:	If not zero, provides a counter that would
  *				allow to avoid NIC hard IRQ, on busy queues.
  *
+ *	@neighbours:	List heads pointing to this device's neighbours'
+ *			dev_list, one per address-family.
+ *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */
@@ -2440,6 +2444,9 @@ struct net_device {
 	 */
 	struct net_shaper_hierarchy *net_shaper_hierarchy;
 #endif
+
+	struct hlist_head neighbours[NEIGH_NR_TABLES];
+
 	u8			priv[] ____cacheline_aligned
 				       __counted_by(priv_len);
 } ____cacheline_aligned;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 40aac1e24c68..9a832cab5b1d 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -29,6 +29,7 @@
 #include <linux/sysctl.h>
 #include <linux/workqueue.h>
 #include <net/rtnetlink.h>
+#include <net/neighbour_tables.h>
 
 /*
  * NUD stands for "neighbor unreachability detection"
@@ -136,6 +137,7 @@ struct neigh_statistics {
 
 struct neighbour {
 	struct hlist_node	hash;
+	struct hlist_node	dev_list;
 	struct neigh_table	*tbl;
 	struct neigh_parms	*parms;
 	unsigned long		confirmed;
@@ -236,13 +238,6 @@ struct neigh_table {
 	struct pneigh_entry	**phash_buckets;
 };
 
-enum {
-	NEIGH_ARP_TABLE = 0,
-	NEIGH_ND_TABLE = 1,
-	NEIGH_NR_TABLES,
-	NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
-};
-
 static inline int neigh_parms_family(struct neigh_parms *p)
 {
 	return p->tbl->family;
diff --git a/include/net/neighbour_tables.h b/include/net/neighbour_tables.h
new file mode 100644
index 000000000000..bcffbe8f7601
--- /dev/null
+++ b/include/net/neighbour_tables.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_NEIGHBOUR_TABLES_H
+#define _NET_NEIGHBOUR_TABLES_H
+
+enum {
+	NEIGH_ARP_TABLE = 0,
+	NEIGH_ND_TABLE = 1,
+	NEIGH_NR_TABLES,
+	NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
+};
+
+#endif
-- 
cgit v1.2.3


From 79bc0af904db647979c735563299c9b0d820e432 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 10 Oct 2024 21:35:42 +0200
Subject: hwmon: Add static visibility member to struct hwmon_ops

Several drivers return the same static value in their is_visible
callback, what results in code duplication. Therefore add an option
for drivers to specify a static visibility directly.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Message-ID: <89690b81-2c73-47ae-9ae9-45c77b45ca0c@gmail.com>
groeck: Renamed hwmon_ops_is_visible -> hwmon_is_visible
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/hwmon.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 5c6a421ad580..3a63dff62d03 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -368,7 +368,9 @@ enum hwmon_intrusion_attributes {
 
 /**
  * struct hwmon_ops - hwmon device operations
- * @is_visible: Callback to return attribute visibility. Mandatory.
+ * @visible:	Static visibility. If non-zero, 'is_visible' is ignored.
+ * @is_visible: Callback to return attribute visibility. Mandatory unless
+ *		'visible' is non-zero.
  *		Parameters are:
  *		@const void *drvdata:
  *			Pointer to driver-private data structure passed
@@ -412,6 +414,7 @@ enum hwmon_intrusion_attributes {
  *		The function returns 0 on success or a negative error number.
  */
 struct hwmon_ops {
+	umode_t visible;
 	umode_t (*is_visible)(const void *drvdata, enum hwmon_sensor_types type,
 			      u32 attr, int channel);
 	int (*read)(struct device *dev, enum hwmon_sensor_types type,
-- 
cgit v1.2.3


From 7506ebcd662b868780774d191a7c024c18c557a8 Mon Sep 17 00:00:00 2001
From: Naresh Solanki <naresh.solanki@9elements.com>
Date: Mon, 7 Oct 2024 14:34:24 +0530
Subject: hwmon: (max6639) : Configure based on DT property

Remove platform data & initialize with defaults
configuration & overwrite based on DT properties.

Signed-off-by: Naresh Solanki <naresh.solanki@9elements.com>
Message-ID: <20241007090426.811736-1-naresh.solanki@9elements.com>
[groeck: Dropped some unnecessary empty lines]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/platform_data/max6639.h | 15 ---------------
 1 file changed, 15 deletions(-)
 delete mode 100644 include/linux/platform_data/max6639.h

(limited to 'include')

diff --git a/include/linux/platform_data/max6639.h b/include/linux/platform_data/max6639.h
deleted file mode 100644
index 65bfdb4fdc15..000000000000
--- a/include/linux/platform_data/max6639.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_MAX6639_H
-#define _LINUX_MAX6639_H
-
-#include <linux/types.h>
-
-/* platform data for the MAX6639 temperature sensor and fan control */
-
-struct max6639_platform_data {
-	bool pwm_polarity;	/* Polarity low (0) or high (1, default) */
-	int ppr;		/* Pulses per rotation 1..4 (default == 2) */
-	int rpm_range;		/* 2000, 4000 (default), 8000 or 16000 */
-};
-
-#endif /* _LINUX_MAX6639_H */
-- 
cgit v1.2.3


From e7ac4daeed91a25382091e73818ea0cddb1afd5e Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Thu, 7 Nov 2024 14:12:46 +1300
Subject: mm: count zeromap read and set for swapout and swapin
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the proportion of folios from the zeromap is small, missing their
accounting may not significantly impact profiling.  However, it's easy to
construct a scenario where this becomes an issue—for example, allocating
1 GB of memory, writing zeros from userspace, followed by MADV_PAGEOUT,
and then swapping it back in.  In this case, the swap-out and swap-in
counts seem to vanish into a black hole, potentially causing semantic
ambiguity.

On the other hand, Usama reported that zero-filled pages can exceed 10% in
workloads utilizing zswap, while Hailong noted that some app in Android
have more than 6% zero-filled pages.  Before commit 0ca0c24e3211 ("mm:
store zero pages to be swapped out in a bitmap"), both zswap and zRAM
implemented similar optimizations, leading to these optimized-out pages
being counted in either zswap or zRAM counters (with pswpin/pswpout also
increasing for zRAM).  With zeromap functioning prior to both zswap and
zRAM, userspace will no longer detect these swap-out and swap-in actions.

We have three ways to address this:

1. Introduce a dedicated counter specifically for the zeromap.

2. Use pswpin/pswpout accounting, treating the zero map as a standard
   backend.  This approach aligns with zRAM's current handling of
   same-page fills at the device level.  However, it would mean losing the
   optimized-out page counters previously available in zRAM and would not
   align with systems using zswap.  Additionally, as noted by Nhat Pham,
   pswpin/pswpout counters apply only to I/O done directly to the backend
   device.

3. Count zeromap pages under zswap, aligning with system behavior when
   zswap is enabled.  However, this would not be consistent with zRAM, nor
   would it align with systems lacking both zswap and zRAM.

Given the complications with options 2 and 3, this patch selects
option 1.

We can find these counters from /proc/vmstat (counters for the whole
system) and memcg's memory.stat (counters for the interested memcg).

For example:

$ grep -E 'swpin_zero|swpout_zero' /proc/vmstat
swpin_zero 1648
swpout_zero 33536

$ grep -E 'swpin_zero|swpout_zero' /sys/fs/cgroup/system.slice/memory.stat
swpin_zero 3905
swpout_zero 3985

This patch does not address any specific zeromap bug, but the missing
swpout and swpin counts for zero-filled pages can be highly confusing and
may mislead user-space agents that rely on changes in these counters as
indicators.  Therefore, we add a Fixes tag to encourage the inclusion of
this counter in any kernel versions with zeromap.

Many thanks to Kanchana for the contribution of changing
count_objcg_event() to count_objcg_events() to support large folios[1],
which has now been incorporated into this patch.

[1] https://lkml.kernel.org/r/20241001053222.6944-5-kanchana.p.sridhar@intel.com

Link: https://lkml.kernel.org/r/20241107011246.59137-1-21cnbao@gmail.com
Fixes: 0ca0c24e3211 ("mm: store zero pages to be swapped out in a bitmap")
Co-developed-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Hailong Liu <hailong.liu@oppo.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h    | 12 +++++++-----
 include/linux/vm_event_item.h |  2 ++
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 34d2da05f2f1..e1b41554a5fb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1760,8 +1760,9 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg)
 
 struct mem_cgroup *mem_cgroup_from_slab_obj(void *p);
 
-static inline void count_objcg_event(struct obj_cgroup *objcg,
-				     enum vm_event_item idx)
+static inline void count_objcg_events(struct obj_cgroup *objcg,
+				      enum vm_event_item idx,
+				      unsigned long count)
 {
 	struct mem_cgroup *memcg;
 
@@ -1770,7 +1771,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
 
 	rcu_read_lock();
 	memcg = obj_cgroup_memcg(objcg);
-	count_memcg_events(memcg, idx, 1);
+	count_memcg_events(memcg, idx, count);
 	rcu_read_unlock();
 }
 
@@ -1825,8 +1826,9 @@ static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
 	return NULL;
 }
 
-static inline void count_objcg_event(struct obj_cgroup *objcg,
-				     enum vm_event_item idx)
+static inline void count_objcg_events(struct obj_cgroup *objcg,
+				      enum vm_event_item idx,
+				      unsigned long count)
 {
 }
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index aed952d04132..f70d0958095c 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -134,6 +134,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #ifdef CONFIG_SWAP
 		SWAP_RA,
 		SWAP_RA_HIT,
+		SWPIN_ZERO,
+		SWPOUT_ZERO,
 #ifdef CONFIG_KSM
 		KSM_SWPIN_COPY,
 #endif
-- 
cgit v1.2.3


From 69bad21551c9caea8c58800f96da48a704fd311e Mon Sep 17 00:00:00 2001
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Date: Mon, 30 Sep 2024 22:32:16 -0700
Subject: mm: define obj_cgroup_get() if CONFIG_MEMCG is not defined

Patch series "mm: zswap swap-out of large folios", v10.

This patch series enables zswap_store() to accept and store large folios.
The most significant contribution in this series is from the earlier RFC
submitted by Ryan Roberts [1].  Ryan's original RFC has been migrated to
mm-unstable as of 9-30-2024 in patch 6 of this series, and adapted based
on code review comments received for the current patch-series.

[1]: [RFC PATCH v1] mm: zswap: Store large folios without splitting
     https://lore.kernel.org/linux-mm/20231019110543.3284654-1-ryan.roberts@arm.com/T/#u

The first few patches do the prep work for supporting large folios in
zswap_store.  Patch 6 provides the main functionality to swap-out large
folios in zswap.  Patch 7 adds sysfs per-order hugepages "zswpout"
counters that get incremented upon successful zswap_store of large folios,
and also updates the documentation for this:

/sys/kernel/mm/transparent_hugepage/hugepages-*kB/stats/zswpout

This patch series is a prerequisite for zswap compress batching of large
folio swap-out and decompress batching of swap-ins based on
swapin_readahead(), using Intel IAA hardware acceleration, which we would
like to submit in subsequent patch-series, with performance improvement
data.

Thanks to Ying Huang for pre-posting review feedback and suggestions!

Thanks also to Nhat, Yosry, Johannes, Barry, Chengming, Usama, Ying and
Matthew for their helpful feedback, code/data reviews and suggestions!

Co-development signoff request:
===============================
I would like to thank Ryan Roberts for his original RFC [1] and request
his co-developer signoff on patch 6 in this series. Thanks Ryan!


System setup for testing:
=========================
Testing of this patch series was done with mm-unstable as of 9-27-2024,
commit de2fbaa6d9c3576ec7133ed02a370ec9376bf000 (without this patch-series)
and mm-unstable 9-30-2024 commit c121617e3606be6575cdacfdb63cc8d67b46a568
(with this patch-series). Data was gathered on an Intel Sapphire Rapids
server, dual-socket 56 cores per socket, 4 IAA devices per socket, 503 GiB
RAM and 525G SSD disk partition swap. Core frequency was fixed at 2500MHz.

The vm-scalability "usemem" test was run in a cgroup whose memory.high
was fixed at 150G. The is no swap limit set for the cgroup. 30 usemem
processes were run, each allocating and writing 10G of memory, and sleeping
for 10 sec before exiting:

usemem --init-time -w -O -s 10 -n 30 10g

Other kernel configuration parameters:

    zswap compressors : zstd, deflate-iaa
    zswap allocator   : zsmalloc
    vm.page-cluster   : 2

In the experiments where "deflate-iaa" is used as the zswap compressor,
IAA "compression verification" is enabled by default
(cat /sys/bus/dsa/drivers/crypto/verify_compress). Hence each IAA
compression will be decompressed internally by the "iaa_crypto" driver, the
crc-s returned by the hardware will be compared and errors reported in case
of mismatches. Thus "deflate-iaa" helps ensure better data integrity as
compared to the software compressors, and the experimental data listed
below is with verify_compress set to "1".


Metrics reporting methodology:
==============================
Total and average throughput are derived from the individual 30 processes'
throughputs reported by usemem. elapsed/sys times are measured with perf.

All percentage changes are "new" vs. "old"; hence a positive value
denotes an increase in the metric, whether it is throughput or latency,
and a negative value denotes a reduction in the metric. Positive throughput
change percentages and negative latency change percentages denote improvements.

The vm stats and sysfs hugepages stats included with the performance data
provide details on the swapout activity to zswap/swap device.


Testing labels used in data summaries:
======================================
The data refers to these test configurations and the before/after
comparisons that they do:

 before-case1:
 -------------
 mm-unstable 9-27-2024, CONFIG_THP_SWAP=N (compares zswap 4K vs. zswap 64K)

 In this scenario, CONFIG_THP_SWAP=N results in 64K/2M folios to be split
 into 4K folios that get processed by zswap.

 before-case2:
 -------------
 mm-unstable 9-27-2024, CONFIG_THP_SWAP=Y (compares SSD swap large folios vs. zswap large folios)

 In this scenario, CONFIG_THP_SWAP=Y results in zswap rejecting large
 folios, which will then be stored by the SSD swap device.

 after:
 ------
 v10 of this patch-series, CONFIG_THP_SWAP=Y

 The "after" is CONFIG_THP_SWAP=Y and v10 of this patch-series, that results
 in 64K/2M folios to not be split, and to be processed by zswap_store.


Regression Testing:
===================
I ran vm-scalability usemem without large folios, i.e., only 4K folios with
mm-unstable and this patch-series. The main goal was to make sure that
there is no functional or performance regression wrt the earlier zswap
behavior for 4K folios, now that 4K folios will be processed by the new
zswap_store() code.

The data indicates there is no significant regression.

 -------------------------------------------------------------------------------
 4K folios:
 ==========

 zswap compressor                zstd          zstd        zstd       zstd v10
                         before-case1  before-case2       after      vs.     vs.
                                                                   case1   case2
 -------------------------------------------------------------------------------
 Total throughput (KB/s)    4,793,363     4,880,978   4,853,074       1%     -1%
 Average throughput (KB/s)    159,778       162,699     161,769       1%     -1%
 elapsed time (sec)            130.14        123.17      126.29      -3%      3%
 sys time (sec)              3,135.53      2,985.64    3,083.18      -2%      3%
 memcg_high                   446,826       444,626     452,930
 memcg_swap_fail                    0             0           0
 zswpout                   48,932,107    48,931,971  48,931,820
 zswpin                           383           386         397
 pswpout                            0             0           0
 pswpin                             0             0           0
 thp_swpout                         0             0           0
 thp_swpout_fallback                0             0           0
 64kB-mthp_swpout_fallback          0             0           0
 pgmajfault                     3,063         3,077       3,479
 swap_ra                           93            94          96
 swap_ra_hit                       47            47          50
 ZSWPOUT-64kB                     n/a           n/a           0
 SWPOUT-64kB                        0             0           0
 -------------------------------------------------------------------------------


Performance Testing:
====================

We list the data for 64K folios with before/after data per-compressor,
followed by the same for 2M pmd-mappable folios.


 -------------------------------------------------------------------------------
 64K folios: zstd:
 =================

 zswap compressor                zstd          zstd         zstd      zstd v10
                         before-case1  before-case2        after     vs.    vs.
                                                                    case1  case2
 -------------------------------------------------------------------------------
 Total throughput (KB/s)    5,222,213     1,076,611    6,159,776      18%   472%
 Average throughput (KB/s)    174,073        35,887      205,325      18%   472%
 elapsed time (sec)            120.50        347.16       108.33     -10%   -69%
 sys time (sec)              2,930.33        248.16     2,549.65     -13%   927%
 memcg_high                   416,773       552,200      465,874
 memcg_swap_fail            3,192,906         1,293        1,012
 zswpout                   48,931,583        20,903   48,931,218
 zswpin                           384           363          410
 pswpout                            0    40,778,448            0
 pswpin                             0            16            0
 thp_swpout                         0             0            0
 thp_swpout_fallback                0             0            0
 64kB-mthp_swpout_fallback  3,192,906         1,293        1,012
 pgmajfault                     3,452         3,072        3,061
 swap_ra                           90            87          107
 swap_ra_hit                       42            43           57
 ZSWPOUT-64kB                     n/a           n/a    3,057,173
 SWPOUT-64kB                        0     2,548,653            0
 -------------------------------------------------------------------------------


 -------------------------------------------------------------------------------
 64K folios: deflate-iaa:
 ========================

 zswap compressor         deflate-iaa   deflate-iaa  deflate-iaa deflate-iaa v10
                         before-case1  before-case2        after     vs.     vs.
                                                                   case1   case2
 -------------------------------------------------------------------------------
 Total throughput (KB/s)    5,652,608     1,089,180    7,189,778     27%    560%
 Average throughput (KB/s)    188,420        36,306      239,659     27%    560%
 elapsed time (sec)            102.90        343.35        87.05    -15%    -75%
 sys time (sec)              2,246.86        213.53     1,864.16    -17%    773%
 memcg_high                   576,104       502,907      642,083
 memcg_swap_fail            4,016,117         1,407        1,478
 zswpout                   61,163,423        22,444   57,798,716
 zswpin                           401           368          454
 pswpout                            0    40,862,080            0
 pswpin                             0            20            0
 thp_swpout                         0             0            0
 thp_swpout_fallback                0             0            0
 64kB-mthp_swpout_fallback  4,016,117         1,407        1,478
 pgmajfault                     3,063         3,153        3,122
 swap_ra                           96            93          156
 swap_ra_hit                       46            45           83
 ZSWPOUT-64kB                     n/a           n/a    3,611,032
 SWPOUT-64kB                        0     2,553,880            0
 -------------------------------------------------------------------------------


 -------------------------------------------------------------------------------
 2M folios: zstd:
 ================

 zswap compressor                zstd          zstd         zstd      zstd v10
                         before-case1  before-case2        after     vs.    vs.
                                                                   case1  case2
 -------------------------------------------------------------------------------
 Total throughput (KB/s)    5,895,500     1,109,694    6,484,224     10%    484%
 Average throughput (KB/s)    196,516        36,989      216,140     10%    484%
 elapsed time (sec)            108.77        334.28       106.33     -2%    -68%
 sys time (sec)              2,657.14         94.88     2,376.13    -11%   2404%
 memcg_high                    64,200        66,316       56,898
 memcg_swap_fail              101,182            70           27
 zswpout                   48,931,499        36,507   48,890,640
 zswpin                           380           379          377
 pswpout                            0    40,166,400            0
 pswpin                             0             0            0
 thp_swpout                         0        78,450            0
 thp_swpout_fallback          101,182            70           27
 2MB-mthp_swpout_fallback           0             0           27
 pgmajfault                     3,067         3,417        3,311
 swap_ra                           91            90          854
 swap_ra_hit                       45            45          810
 ZSWPOUT-2MB                      n/a           n/a       95,459
 SWPOUT-2MB                         0        78,450            0
 -------------------------------------------------------------------------------


 -------------------------------------------------------------------------------
 2M folios: deflate-iaa:
 =======================

 zswap compressor         deflate-iaa   deflate-iaa  deflate-iaa deflate-iaa v10
                         before-case1  before-case2        after     vs.     vs.
                                                                   case1   case2
 -------------------------------------------------------------------------------
 Total throughput (KB/s)   6,286,587      1,126,785    7,073,464     13%    528%
 Average throughput (KB/s)   209,552         37,559      235,782     13%    528%
 elapsed time (sec)            96.19         333.03        85.79    -11%    -74%
 sys time (sec)             2,141.44          99.96     1,826.67    -15%   1727%
 memcg_high                   99,253         64,666       79,718
 memcg_swap_fail             129,074             53          165
 zswpout                  61,312,794         28,321   56,045,120
 zswpin                          383            406          403
 pswpout                           0     40,048,128            0
 pswpin                            0              0            0
 thp_swpout                        0         78,219            0
 thp_swpout_fallback         129,074             53          165
 2MB-mthp_swpout_fallback          0              0          165
 pgmajfault                    3,430          3,077       31,468
 swap_ra                          91            103       84,373
 swap_ra_hit                      47             46       84,317
 ZSWPOUT-2MB                     n/a            n/a      109,229
 SWPOUT-2MB                        0         78,219            0
 -------------------------------------------------------------------------------


And finally, this is a comparison of deflate-iaa vs. zstd with v10 of this
patch-series:

 ---------------------------------------------
                  zswap_store large folios v10
                  Impr w/ deflate-iaa vs. zstd

                       64K folios    2M folios
 ---------------------------------------------
 Throughput (KB/s)            17%           9%
 elapsed time (sec)          -20%         -19%
 sys time (sec)              -27%         -23%
 ---------------------------------------------


Conclusions based on the performance results:
=============================================

 v10 wrt before-case1:
 ---------------------
 We see significant improvements in throughput, elapsed and sys time for
 zstd and deflate-iaa, when comparing before-case1 (THP_SWAP=N) vs. after
 (THP_SWAP=Y) with zswap_store large folios.

 v10 wrt before-case2:
 ---------------------
 We see even more significant improvements in throughput and elapsed time
 for zstd and deflate-iaa, when comparing before-case2 (large-folio-SSD)
 vs. after (large-folio-zswap). The sys time increases with
 large-folio-zswap as expected, due to the CPU compression time
 vs. asynchronous disk write times, as pointed out by Ying and Yosry.

 In before-case2, when zswap does not store large folios, only allocations
 and cgroup charging due to 4K folio zswap stores count towards the cgroup
 memory limit. However, in the after scenario, with the introduction of
 zswap_store() of large folios, there is an added component of the zswap
 compressed pool usage from large folio stores from potentially all 30
 processes, that gets counted towards the memory limit. As a result, we see
 higher swapout activity in the "after" data.


Summary:
========
The v10 data presented above shows that zswap_store of large folios
demonstrates good throughput/performance improvements compared to
conventional SSD swap of large folios with a sufficiently large 525G SSD
swap device. Hence, it seems reasonable for zswap_store to support large
folios, so that further performance improvements can be implemented.

In the experimental setup used in this patchset, we have enabled IAA
compress verification to ensure additional hardware data integrity CRC
checks not currently done by the software compressors. We see good
throughput/latency improvements with deflate-iaa vs. zstd with zswap_store
of large folios.

Some of the ideas for further reducing latency that have shown promise in
our experiments, are:

1) IAA compress/decompress batching.
2) Distributing compress jobs across all IAA devices on the socket.

The tests run for this patchset are using only 1 IAA device per core, that
avails of 2 compress engines on the device. In our experiments with IAA
batching, we distribute compress jobs from all cores to the 8 compress
engines available per socket. We further compress the pages in each folio
in parallel in the accelerator. As a result, we improve compress latency
and reclaim throughput.

In decompress batching, we use swapin_readahead to generate a prefetch
batch of 4K folios that we decompress in parallel in IAA.

 ------------------------------------------------------------------------------
                          IAA compress/decompress batching
              Further improvements wrt v10 zswap_store Sequential
                          subpage store using "deflate-iaa":

                      "deflate-iaa" Batching  "deflate-iaa-canned" [2] Batching
                          Additional Impr               Additional Impr
                     64K folios    2M folios     64K folios    2M folios
 ------------------------------------------------------------------------------
 Throughput (KB/s)          19%          43%           26%           55%
 elapsed time (sec)         -5%         -14%          -10%          -21%
 sys time (sec)              4%          -7%           -4%          -18%
 ------------------------------------------------------------------------------


With zswap IAA compress/decompress batching, we are able to demonstrate
significant performance improvements and memory savings in server
scalability experiments in highly contended system scenarios under
significant memory pressure; as compared to software compressors. We hope
to submit this work in subsequent patch series. The current patch-series is
a prequisite for these future submissions.

[1] https://lore.kernel.org/linux-mm/20231019110543.3284654-1-ryan.roberts@arm.com/T/#u
[2] https://patchwork.kernel.org/project/linux-crypto/cover/cover.1710969449.git.andre.glover@linux.intel.com/


This patch (of 6):

This resolves an issue with obj_cgroup_get() not being defined if
CONFIG_MEMCG is not defined.

Before this patch, we would see build errors if obj_cgroup_get() is called
from code that is agnostic of CONFIG_MEMCG.

The zswap_store() changes for large folios in subsequent commits will
require the use of obj_cgroup_get() in zswap code that falls into this
category.

Link: https://lkml.kernel.org/r/20241001053222.6944-1-kanchana.p.sridhar@intel.com
Link: https://lkml.kernel.org/r/20241001053222.6944-2-kanchana.p.sridhar@intel.com
Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Wajdi Feghali <wajdi.k.feghali@intel.com>
Cc: "Zou, Nanhai" <nanhai.zou@intel.com>
Cc: Barry Song <21cnbao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0bd8f61a5597..5502aa8e138e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1233,6 +1233,10 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
 	return NULL;
 }
 
+static inline void obj_cgroup_get(struct obj_cgroup *objcg)
+{
+}
+
 static inline void obj_cgroup_put(struct obj_cgroup *objcg)
 {
 }
-- 
cgit v1.2.3


From 6e1fa555ec772046ec3b903f507ff7fed5323796 Mon Sep 17 00:00:00 2001
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Date: Mon, 30 Sep 2024 22:32:20 -0700
Subject: mm: zswap: modify zswap_stored_pages to be atomic_long_t

For zswap_store() to support large folios, we need to be able to do a
batch update of zswap_stored_pages upon successful store of all pages in
the folio.  For this, we need to add folio_nr_pages(), which returns a
long, to zswap_stored_pages.

Link: https://lkml.kernel.org/r/20241001053222.6944-6-kanchana.p.sridhar@intel.com
Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Acked-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Wajdi Feghali <wajdi.k.feghali@intel.com>
Cc: "Zou, Nanhai" <nanhai.zou@intel.com>
Cc: Barry Song <21cnbao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/zswap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 9cd1beef0654..d961ead91bf1 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -7,7 +7,7 @@
 
 struct lruvec;
 
-extern atomic_t zswap_stored_pages;
+extern atomic_long_t zswap_stored_pages;
 
 #ifdef CONFIG_ZSWAP
 
-- 
cgit v1.2.3


From 0c560dd86040556a9e55d88229d9295672428c78 Mon Sep 17 00:00:00 2001
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Date: Mon, 30 Sep 2024 22:32:22 -0700
Subject: mm: swap: count successful large folio zswap stores in hugepage
 zswpout stats

Added a new MTHP_STAT_ZSWPOUT entry to the sysfs transparent_hugepage
stats so that successful large folio zswap stores can be accounted under
the per-order sysfs "zswpout" stats:

/sys/kernel/mm/transparent_hugepage/hugepages-*kB/stats/zswpout

Other non-zswap swap device swap-out events will be counted under
the existing sysfs "swpout" stats:

/sys/kernel/mm/transparent_hugepage/hugepages-*kB/stats/swpout

Also, added documentation for the newly added sysfs per-order hugepage
"zswpout" stats. The documentation clarifies that only non-zswap swapouts
will be accounted in the existing "swpout" stats.

Link: https://lkml.kernel.org/r/20241001053222.6944-8-kanchana.p.sridhar@intel.com
Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Wajdi Feghali <wajdi.k.feghali@intel.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: "Zou, Nanhai" <nanhai.zou@intel.com>
Cc: Barry Song <21cnbao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 006f730545c2..c59e5aa9b081 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -119,6 +119,7 @@ enum mthp_stat_item {
 	MTHP_STAT_ANON_FAULT_ALLOC,
 	MTHP_STAT_ANON_FAULT_FALLBACK,
 	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
+	MTHP_STAT_ZSWPOUT,
 	MTHP_STAT_SWPOUT,
 	MTHP_STAT_SWPOUT_FALLBACK,
 	MTHP_STAT_SHMEM_ALLOC,
-- 
cgit v1.2.3


From aaf2914aec0fa67395574f6fa6b726168b049e60 Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Sat, 26 Oct 2024 21:24:23 +1300
Subject: mm: add per-order mTHP swpin counters

This helps profile the sizes of folios being swapped in. Currently,
only mTHP swap-out is being counted.
The new interface can be found at:
/sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
         swpin
For example,
cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpin
12809
cat /sys/kernel/mm/transparent_hugepage/hugepages-32kB/stats/swpin
4763

[v-songbaohua@oppo.com: add a blank line in doc]
  Link: https://lkml.kernel.org/r/20241030233423.80759-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20241026082423.26298-1-21cnbao@gmail.com
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index c59e5aa9b081..b94c2e8ee918 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -120,6 +120,7 @@ enum mthp_stat_item {
 	MTHP_STAT_ANON_FAULT_FALLBACK,
 	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
 	MTHP_STAT_ZSWPOUT,
+	MTHP_STAT_SWPIN,
 	MTHP_STAT_SWPOUT,
 	MTHP_STAT_SWPOUT_FALLBACK,
 	MTHP_STAT_SHMEM_ALLOC,
-- 
cgit v1.2.3


From 5f6170a469cd2c13ad4dffe42714cf777b132451 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Mon, 28 Oct 2024 14:13:27 +0000
Subject: mm: pagewalk: add the ability to install PTEs

Patch series "implement lightweight guard pages", v4.

Userland library functions such as allocators and threading
implementations often require regions of memory to act as 'guard pages' -
mappings which, when accessed, result in a fatal signal being sent to the
accessing process.

The current means by which these are implemented is via a PROT_NONE mmap()
mapping, which provides the required semantics however incur an overhead
of a VMA for each such region.

With a great many processes and threads, this can rapidly add up and incur
a significant memory penalty.  It also has the added problem of preventing
merges that might otherwise be permitted.

This series takes a different approach - an idea suggested by Vlastimil
Babka (and before him David Hildenbrand and Jann Horn - perhaps more - the
provenance becomes a little tricky to ascertain after this - please
forgive any omissions!) - rather than locating the guard pages at the VMA
layer, instead placing them in page tables mapping the required ranges.

Early testing of the prototype version of this code suggests a 5 times
speed up in memory mapping invocations (in conjunction with use of
process_madvise()) and a 13% reduction in VMAs on an entirely idle android
system and unoptimised code.

We expect with optimisation and a loaded system with a larger number of
guard pages this could significantly increase, but in any case these
numbers are encouraging.

This way, rather than having separate VMAs specifying which parts of a
range are guard pages, instead we have a VMA spanning the entire range of
memory a user is permitted to access and including ranges which are to be
'guarded'.

After mapping this, a user can specify which parts of the range should
result in a fatal signal when accessed.

By restricting the ability to specify guard pages to memory mapped by
existing VMAs, we can rely on the mappings being torn down when the
mappings are ultimately unmapped and everything works simply as if the
memory were not faulted in, from the point of view of the containing VMAs.

This mechanism in effect poisons memory ranges similar to hardware memory
poisoning, only it is an entirely software-controlled form of poisoning.

The mechanism is implemented via madvise() behaviour - MADV_GUARD_INSTALL
which installs page table-level guard page markers - and MADV_GUARD_REMOVE
- which clears them.

Guard markers can be installed across multiple VMAs and any existing
mappings will be cleared, that is zapped, before installing the guard page
markers in the page tables.

There is no concept of 'nested' guard markers, multiple attempts to
install guard markers in a range will, after the first attempt, have no
effect.

Importantly, removing guard markers over a range that contains both guard
markers and ordinary backed memory has no effect on anything but the guard
markers (including leaving huge pages un-split), so a user can safely
remove guard markers over a range of memory leaving the rest intact.

The actual mechanism by which the page table entries are specified makes
use of existing logic - PTE markers, which are used for the userfaultfd
UFFDIO_POISON mechanism.

Unfortunately PTE_MARKER_POISONED is not suited for the guard page
mechanism as it results in VM_FAULT_HWPOISON semantics in the fault
handler, so we add our own specific PTE_MARKER_GUARD and adapt existing
logic to handle it.

We also extend the generic page walk mechanism to allow for installation
of PTEs (carefully restricted to memory management logic only to prevent
unwanted abuse).

We ensure that zapping performed by MADV_DONTNEED and MADV_FREE do not
remove guard markers, nor does forking (except when VM_WIPEONFORK is
specified for a VMA which implies a total removal of memory
characteristics).

It's important to note that the guard page implementation is emphatically
NOT a security feature, so a user can remove the markers if they wish.  We
simply implement it in such a way as to provide the least surprising
behaviour.

An extensive set of self-tests are provided which ensure behaviour is as
expected and additionally self-documents expected behaviour of guard
ranges.


This patch (of 5):

The existing generic pagewalk logic permits the walking of page tables,
invoking callbacks at individual page table levels via user-provided
mm_walk_ops callbacks.

This is useful for traversing existing page table entries, but precludes
the ability to establish new ones.

Existing mechanism for performing a walk which also installs page table
entries if necessary are heavily duplicated throughout the kernel, each
with semantic differences from one another and largely unavailable for use
elsewhere.

Rather than add yet another implementation, we extend the generic pagewalk
logic to enable the installation of page table entries by adding a new
install_pte() callback in mm_walk_ops.  If this is specified, then upon
encountering a missing page table entry, we allocate and install a new one
and continue the traversal.

If a THP huge page is encountered at either the PMD or PUD level we split
it only if there are ops->pte_entry() (or ops->pmd_entry at PUD level),
otherwise if there is only an ops->install_pte(), we avoid the unnecessary
split.

We do not support hugetlb at this stage.

If this function returns an error, or an allocation fails during the
operation, we abort the operation altogether.  It is up to the caller to
deal appropriately with partially populated page table ranges.

If install_pte() is defined, the semantics of pte_entry() change - this
callback is then only invoked if the entry already exists.  This is a
useful property, as it allows a caller to handle existing PTEs while
installing new ones where necessary in the specified range.

If install_pte() is not defined, then there is no functional difference to
this patch, so all existing logic will work precisely as it did before.

As we only permit the installation of PTEs where a mapping does not
already exist there is no need for TLB management, however we do invoke
update_mmu_cache() for architectures which require manual maintenance of
mappings for other CPUs.

We explicitly do not allow the existing page walk API to expose this
feature as it is dangerous and intended for internal mm use only.
Therefore we provide a new walk_page_range_mm() function exposed only to
mm/internal.h.

We take the opportunity to additionally clean up the page walker logic to
be a little easier to follow.

Link: https://lkml.kernel.org/r/cover.1730123433.git.lorenzo.stoakes@oracle.com
Link: https://lkml.kernel.org/r/51b432ebef013e3fdf9f92101533435de1bffadf.1730123433.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Jann Horn <jannh@google.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Suggested-by: Jann Horn <jannh@google.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Cc: Arnd Bergmann <arnd@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Chris Zankel <chris@zankel.net>
Cc: Helge Deller <deller@gmx.de>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Vlastimil Babka <vbabkba@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagewalk.h | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index f5eb5a32aeed..9700a29f8afb 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -25,12 +25,15 @@ enum page_walk_lock {
  *			this handler is required to be able to handle
  *			pmd_trans_huge() pmds.  They may simply choose to
  *			split_huge_page() instead of handling it explicitly.
- * @pte_entry:		if set, called for each PTE (lowest-level) entry,
- *			including empty ones
+ * @pte_entry:		if set, called for each PTE (lowest-level) entry
+ *			including empty ones, except if @install_pte is set.
+ *			If @install_pte is set, @pte_entry is called only for
+ *			existing PTEs.
  * @pte_hole:		if set, called for each hole at all levels,
  *			depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD.
  *			Any folded depths (where PTRS_PER_P?D is equal to 1)
- *			are skipped.
+ *			are skipped. If @install_pte is specified, this will
+ *			not trigger for any populated ranges.
  * @hugetlb_entry:	if set, called for each hugetlb entry. This hook
  *			function is called with the vma lock held, in order to
  *			protect against a concurrent freeing of the pte_t* or
@@ -51,6 +54,13 @@ enum page_walk_lock {
  * @pre_vma:            if set, called before starting walk on a non-null vma.
  * @post_vma:           if set, called after a walk on a non-null vma, provided
  *                      that @pre_vma and the vma walk succeeded.
+ * @install_pte:        if set, missing page table entries are installed and
+ *                      thus all levels are always walked in the specified
+ *                      range. This callback is then invoked at the PTE level
+ *                      (having split any THP pages prior), providing the PTE to
+ *                      install. If allocations fail, the walk is aborted. This
+ *                      operation is only available for userland memory. Not
+ *                      usable for hugetlb ranges.
  *
  * p?d_entry callbacks are called even if those levels are folded on a
  * particular architecture/configuration.
@@ -76,6 +86,8 @@ struct mm_walk_ops {
 	int (*pre_vma)(unsigned long start, unsigned long end,
 		       struct mm_walk *walk);
 	void (*post_vma)(struct mm_walk *walk);
+	int (*install_pte)(unsigned long addr, unsigned long next,
+			   pte_t *ptep, struct mm_walk *walk);
 	enum page_walk_lock walk_lock;
 };
 
-- 
cgit v1.2.3


From 7c53dfbdb024915f23f03f972b33744309d4608b Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Mon, 28 Oct 2024 14:13:28 +0000
Subject: mm: add PTE_MARKER_GUARD PTE marker

Add a new PTE marker that results in any access causing the accessing
process to segfault.

This is preferable to PTE_MARKER_POISONED, which results in the same
handling as hardware poisoned memory, and is thus undesirable for cases
where we simply wish to 'soft' poison a range.

This is in preparation for implementing the ability to specify guard pages
at the page table level, i.e.  ranges that, when accessed, should cause
process termination.

Additionally, rename zap_drop_file_uffd_wp() to zap_drop_markers() - the
function checks the ZAP_FLAG_DROP_MARKER flag so naming it for this single
purpose was simply incorrect.

We then reuse the same logic to determine whether a zap should clear a
guard entry - this should only be performed on teardown and never on
MADV_DONTNEED or MADV_FREE.

We additionally add a WARN_ON_ONCE() in hugetlb logic should a guard
marker be encountered there, as we explicitly do not support this
operation and this should not occur.

Link: https://lkml.kernel.org/r/f47f3d5acca2dcf9bbf655b6d33f3dc713e4a4a0.1730123433.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Vlastimil Babka <vbabkba@suse.cz>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Suggested-by: Jann Horn <jannh@google.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Cc: Arnd Bergmann <arnd@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Chris Zankel <chris@zankel.net>
Cc: Helge Deller <deller@gmx.de>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_inline.h |  2 +-
 include/linux/swapops.h   | 24 +++++++++++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 355cf46a01a6..1b6a917fffa4 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -544,7 +544,7 @@ static inline pte_marker copy_pte_marker(
 {
 	pte_marker srcm = pte_marker_get(entry);
 	/* Always copy error entries. */
-	pte_marker dstm = srcm & PTE_MARKER_POISONED;
+	pte_marker dstm = srcm & (PTE_MARKER_POISONED | PTE_MARKER_GUARD);
 
 	/* Only copy PTE markers if UFFD register matches. */
 	if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma))
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index cb468e418ea1..96f26e29fefe 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -426,9 +426,19 @@ typedef unsigned long pte_marker;
  * "Poisoned" here is meant in the very general sense of "future accesses are
  * invalid", instead of referring very specifically to hardware memory errors.
  * This marker is meant to represent any of various different causes of this.
+ *
+ * Note that, when encountered by the faulting logic, PTEs with this marker will
+ * result in VM_FAULT_HWPOISON and thus regardless trigger hardware memory error
+ * logic.
  */
 #define  PTE_MARKER_POISONED			BIT(1)
-#define  PTE_MARKER_MASK			(BIT(2) - 1)
+/*
+ * Indicates that, on fault, this PTE will case a SIGSEGV signal to be
+ * sent. This means guard markers behave in effect as if the region were mapped
+ * PROT_NONE, rather than if they were a memory hole or equivalent.
+ */
+#define  PTE_MARKER_GUARD			BIT(2)
+#define  PTE_MARKER_MASK			(BIT(3) - 1)
 
 static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
 {
@@ -464,6 +474,18 @@ static inline int is_poisoned_swp_entry(swp_entry_t entry)
 {
 	return is_pte_marker_entry(entry) &&
 	    (pte_marker_get(entry) & PTE_MARKER_POISONED);
+
+}
+
+static inline swp_entry_t make_guard_swp_entry(void)
+{
+	return make_pte_marker_entry(PTE_MARKER_GUARD);
+}
+
+static inline int is_guard_swp_entry(swp_entry_t entry)
+{
+	return is_pte_marker_entry(entry) &&
+		(pte_marker_get(entry) & PTE_MARKER_GUARD);
 }
 
 /*
-- 
cgit v1.2.3


From 662df3e5c37666d6ed75c88098699e070a4b35b5 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Mon, 28 Oct 2024 14:13:29 +0000
Subject: mm: madvise: implement lightweight guard page mechanism

Implement a new lightweight guard page feature, that is regions of
userland virtual memory that, when accessed, cause a fatal signal to
arise.

Currently users must establish PROT_NONE ranges to achieve this.

However this is very costly memory-wise - we need a VMA for each and every
one of these regions AND they become unmergeable with surrounding VMAs.

In addition repeated mmap() calls require repeated kernel context switches
and contention of the mmap lock to install these ranges, potentially also
having to unmap memory if installed over existing ranges.

The lightweight guard approach eliminates the VMA cost altogether - rather
than establishing a PROT_NONE VMA, it operates at the level of page table
entries - establishing PTE markers such that accesses to them cause a
fault followed by a SIGSGEV signal being raised.

This is achieved through the PTE marker mechanism, which we have already
extended to provide PTE_MARKER_GUARD, which we installed via the generic
page walking logic which we have extended for this purpose.

These guard ranges are established with MADV_GUARD_INSTALL.  If the range
in which they are installed contain any existing mappings, they will be
zapped, i.e.  free the range and unmap memory (thus mimicking the
behaviour of MADV_DONTNEED in this respect).

Any existing guard entries will be left untouched.  There is therefore no
nesting of guarded pages.

Guarded ranges are NOT cleared by MADV_DONTNEED nor MADV_FREE (in both
instances the memory range may be reused at which point a user would
expect guards to still be in place), but they are cleared via
MADV_GUARD_REMOVE, process teardown or unmapping of memory ranges.

The guard property can be removed from ranges via MADV_GUARD_REMOVE.  The
ranges over which this is applied, should they contain non-guard entries,
will be untouched, with only guard entries being cleared.

We permit this operation on anonymous memory only, and only VMAs which are
non-special, non-huge and not mlock()'d (if we permitted this we'd have to
drop locked pages which would be rather counterintuitive).

Racing page faults can cause repeated attempts to install guard pages that
are interrupted, result in a zap, and this process can end up being
repeated.  If this happens more than would be expected in normal
operation, we rescind locks and retry the whole thing, which avoids lock
contention in this scenario.

Link: https://lkml.kernel.org/r/6aafb5821bf209f277dfae0787abb2ef87a37542.1730123433.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Suggested-by: Jann Horn <jannh@google.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Suggested-by: Jann Horn <jannh@google.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Arnd Bergmann <arnd@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Chris Zankel <chris@zankel.net>
Cc: Helge Deller <deller@gmx.de>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/uapi/asm-generic/mman-common.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 6ce1f1ceb432..1ea2c4c33b86 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -79,6 +79,9 @@
 
 #define MADV_COLLAPSE	25		/* Synchronous hugepage collapse */
 
+#define MADV_GUARD_INSTALL 102		/* fatal signal on access to range */
+#define MADV_GUARD_REMOVE 103		/* unguard range */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
-- 
cgit v1.2.3


From ab6e8e74e47362bd9d79dd4394a167b2afe0cc77 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Sun, 27 Oct 2024 13:14:42 -0700
Subject: mm: delete the unused put_pages_list()

The last user of put_pages_list() converted away from it in 6.10 commit
06c375053cef ("iommu/vt-d: add wrapper functions for page allocations"):
delete put_pages_list().

Link: https://lkml.kernel.org/r/d9985d6a-293e-176b-e63d-82fdfd28c139@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Peter Xu <peterx@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Yang Shi <shy828301@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 78848fbefe94..32888a97ab44 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1286,8 +1286,6 @@ static inline struct folio *virt_to_folio(const void *x)
 
 void __folio_put(struct folio *folio);
 
-void put_pages_list(struct list_head *pages);
-
 void split_page(struct page *page, unsigned int order);
 void folio_copy(struct folio *dst, struct folio *src);
 int folio_mc_copy(struct folio *dst, struct folio *src);
-- 
cgit v1.2.3


From f914ac96ee8828368f5a24553e75216d76da0b42 Mon Sep 17 00:00:00 2001
From: JP Kobryn <inwardvessel@gmail.com>
Date: Mon, 28 Oct 2024 19:11:06 -0700
Subject: memcg: add flush tracepoint

This tracepoint gives visibility on how often the flushing of memcg stats
occurs and contains info on whether it was forced, skipped, and the value
of stats updated.  It can help with understanding how readers are affected
by having to perform the flush, and the effectiveness of the flush by
inspecting the number of stats updated.  Paired with the recently added
tracepoints for tracing rstat updates, it can also help show correlation
where stats exceed thresholds frequently.

Link: https://lkml.kernel.org/r/20241029021106.25587-3-inwardvessel@gmail.com
Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/trace/events/memcg.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/memcg.h b/include/trace/events/memcg.h
index 8667e57816d2..dfe2f51019b4 100644
--- a/include/trace/events/memcg.h
+++ b/include/trace/events/memcg.h
@@ -74,6 +74,31 @@ DEFINE_EVENT(memcg_rstat_events, count_memcg_events,
 	TP_ARGS(memcg, item, val)
 );
 
+TRACE_EVENT(memcg_flush_stats,
+
+	TP_PROTO(struct mem_cgroup *memcg, s64 stats_updates,
+		bool force, bool needs_flush),
+
+	TP_ARGS(memcg, stats_updates, force, needs_flush),
+
+	TP_STRUCT__entry(
+		__field(u64, id)
+		__field(s64, stats_updates)
+		__field(bool, force)
+		__field(bool, needs_flush)
+	),
+
+	TP_fast_assign(
+		__entry->id = cgroup_id(memcg->css.cgroup);
+		__entry->stats_updates = stats_updates;
+		__entry->force = force;
+		__entry->needs_flush = needs_flush;
+	),
+
+	TP_printk("memcg_id=%llu stats_updates=%lld force=%d needs_flush=%d",
+		__entry->id, __entry->stats_updates,
+		__entry->force, __entry->needs_flush)
+);
 
 #endif /* _TRACE_MEMCG_H */
 
-- 
cgit v1.2.3


From 48b50624aec454ce0fa8f78ef96e2f43bc0be495 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 21 Oct 2024 12:33:38 +0200
Subject: backing-file: clean up the API

 - Pass iocb to ctx->end_write() instead of file + pos

 - Get rid of ctx->user_file, which is redundant most of the time

 - Instead pass iocb to backing_file_splice_read and
   backing_file_splice_write

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 include/linux/backing-file.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
index 2eed0ffb5e8f..1476a6ed1bfd 100644
--- a/include/linux/backing-file.h
+++ b/include/linux/backing-file.h
@@ -14,9 +14,8 @@
 
 struct backing_file_ctx {
 	const struct cred *cred;
-	struct file *user_file;
-	void (*accessed)(struct file *);
-	void (*end_write)(struct file *, loff_t, ssize_t);
+	void (*accessed)(struct file *file);
+	void (*end_write)(struct kiocb *iocb, ssize_t);
 };
 
 struct file *backing_file_open(const struct path *user_path, int flags,
@@ -31,13 +30,13 @@ ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
 ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
 				struct kiocb *iocb, int flags,
 				struct backing_file_ctx *ctx);
-ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
+ssize_t backing_file_splice_read(struct file *in, struct kiocb *iocb,
 				 struct pipe_inode_info *pipe, size_t len,
 				 unsigned int flags,
 				 struct backing_file_ctx *ctx);
 ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
-				  struct file *out, loff_t *ppos, size_t len,
-				  unsigned int flags,
+				  struct file *out, struct kiocb *iocb,
+				  size_t len, unsigned int flags,
 				  struct backing_file_ctx *ctx);
 int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
 		      struct backing_file_ctx *ctx);
-- 
cgit v1.2.3


From 49dffdfde462c7823de6ed882f71ce233aaeba63 Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Wed, 6 Nov 2024 16:57:17 -0800
Subject: cred: Add a light version of override/revert_creds()

Add a light version of override/revert_creds(), this should only be
used when the credentials in question will outlive the critical
section and the critical section doesn't change the ->usage of the
credentials.

Suggested-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 include/linux/cred.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 2976f534a7a3..e4a3155fe409 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -172,6 +172,24 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred)
 					  cred->cap_inheritable));
 }
 
+/*
+ * Override creds without bumping reference count. Caller must ensure
+ * reference remains valid or has taken reference. Almost always not the
+ * interface you want. Use override_creds()/revert_creds() instead.
+ */
+static inline const struct cred *override_creds_light(const struct cred *override_cred)
+{
+	const struct cred *old = current->cred;
+
+	rcu_assign_pointer(current->cred, override_cred);
+	return old;
+}
+
+static inline void revert_creds_light(const struct cred *revert_cred)
+{
+	rcu_assign_pointer(current->cred, revert_cred);
+}
+
 /**
  * get_new_cred_many - Get references on a new set of credentials
  * @cred: The new credentials to reference
-- 
cgit v1.2.3


From 5c2e7736e20d9b348a44cafbfa639fe2653fbc34 Mon Sep 17 00:00:00 2001
From: Eder Zulian <ezulian@redhat.com>
Date: Thu, 7 Nov 2024 17:32:23 +0100
Subject: rust: helpers: Avoid raw_spin_lock initialization for PREEMPT_RT

When PREEMPT_RT=y, spin locks are mapped to rt_mutex types, so using
spinlock_check() + __raw_spin_lock_init() to initialize spin locks is
incorrect, and would cause build errors.

Introduce __spin_lock_init() to initialize a spin lock with lockdep
rquired information for PREEMPT_RT builds, and use it in the Rust
helper.

Fixes: d2d6422f8bd1 ("x86: Allow to enable PREEMPT_RT.")
Closes: https://lore.kernel.org/oe-kbuild-all/202409251238.vetlgXE9-lkp@intel.com/
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Eder Zulian <ezulian@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Tested-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lore.kernel.org/r/20241107163223.2092690-2-ezulian@redhat.com
---
 include/linux/spinlock_rt.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
index f9f14e135be7..f6499c37157d 100644
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -16,22 +16,21 @@ static inline void __rt_spin_lock_init(spinlock_t *lock, const char *name,
 }
 #endif
 
-#define spin_lock_init(slock)					\
+#define __spin_lock_init(slock, name, key, percpu)		\
 do {								\
-	static struct lock_class_key __key;			\
-								\
 	rt_mutex_base_init(&(slock)->lock);			\
-	__rt_spin_lock_init(slock, #slock, &__key, false);	\
+	__rt_spin_lock_init(slock, name, key, percpu);		\
 } while (0)
 
-#define local_spin_lock_init(slock)				\
+#define _spin_lock_init(slock, percpu)				\
 do {								\
 	static struct lock_class_key __key;			\
-								\
-	rt_mutex_base_init(&(slock)->lock);			\
-	__rt_spin_lock_init(slock, #slock, &__key, true);	\
+	__spin_lock_init(slock, #slock, &__key, percpu);	\
 } while (0)
 
+#define spin_lock_init(slock)		_spin_lock_init(slock, false)
+#define local_spin_lock_init(slock)	_spin_lock_init(slock, true)
+
 extern void rt_spin_lock(spinlock_t *lock) __acquires(lock);
 extern void rt_spin_lock_nested(spinlock_t *lock, int subclass)	__acquires(lock);
 extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock) __acquires(lock);
-- 
cgit v1.2.3


From c554aa9ca976480839af342204e05bb4ce8367d5 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 1 Nov 2024 22:13:33 +0100
Subject: uprobes: Re-order struct uprobe_task to save some space

On x86_64, with allmodconfig, struct uprobe_task is 72 bytes long, with a
hole and some padding.

	/* size: 72, cachelines: 2, members: 7 */
	/* sum members: 64, holes: 1, sum holes: 4 */
	/* padding: 4 */
	/* forced alignments: 1, forced holes: 1, sum forced holes: 4 */
	/* last cacheline: 8 bytes */

Reorder the structure to fill the hole and avoid the padding.

This way, the whole structure fits in a single cacheline and some memory is
saved when it is allocated.

	/* size: 64, cachelines: 1, members: 7 */
	/* forced alignments: 1 */

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/a9f541d0cedf421f765c77a1fb93d6a979778a88.1730495562.git.christophe.jaillet@wanadoo.fr
---
 include/linux/uprobes.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 7a051b5d2edd..e0a4c2082245 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -121,6 +121,9 @@ struct hprobe {
 struct uprobe_task {
 	enum uprobe_task_state		state;
 
+	unsigned int			depth;
+	struct return_instance		*return_instances;
+
 	union {
 		struct {
 			struct arch_uprobe_task	autask;
@@ -138,9 +141,6 @@ struct uprobe_task {
 	unsigned long			xol_vaddr;
 
 	struct arch_uprobe              *auprobe;
-
-	struct return_instance		*return_instances;
-	unsigned int			depth;
 };
 
 struct return_consumer {
-- 
cgit v1.2.3


From ed76c07c6885b249ce8486dac22fb97151a83185 Mon Sep 17 00:00:00 2001
From: Marcos Paulo de Souza <mpdesouza@suse.com>
Date: Tue, 5 Nov 2024 16:45:08 -0300
Subject: printk: Introduce FORCE_CON flag

Introduce FORCE_CON flag to printk. The new flag will make it possible to
create a context where printk messages will never be suppressed.

This mechanism will be used in the next patch to create a force_con
context on sysrq handling, removing an existing workaround on the
loglevel global variable. The workaround existed to make sure that sysrq
header messages were sent to all consoles, but this doesn't work with
deferred messages because the loglevel might be restored to its original
value before a console flushes the messages.

Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
Reviewed-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20241105-printk-loud-con-v2-1-bd3ecdf7b0e4@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/printk.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index eca9bb2ee637..232e5fd06701 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -166,6 +166,9 @@ __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...);
 extern void __printk_deferred_enter(void);
 extern void __printk_deferred_exit(void);
 
+extern void printk_force_console_enter(void);
+extern void printk_force_console_exit(void);
+
 /*
  * The printk_deferred_enter/exit macros are available only as a hack for
  * some code paths that need to defer all printk console printing. Interrupts
-- 
cgit v1.2.3


From c28b97f53be7b598ae5fb47cc9dc80912019d7a8 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 24 Sep 2024 17:12:32 +0100
Subject: btrfs: qgroups: remove bytenr field from struct
 btrfs_qgroup_extent_record

Now that we track qgroup extent records in a xarray we don't need to have
a "bytenr" field in  struct btrfs_qgroup_extent_record, since we can get
it from the index of the record in the xarray.

So remove the field and grab the bytenr from either the index key or any
other place where it's available (delayed refs). This reduces the size of
struct btrfs_qgroup_extent_record from 40 bytes down to 32 bytes, meaning
that we now can store 128 instances of this structure instead of 102 per
4K page.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index af6b3827fb1d..8d2ff32fb3b0 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1706,9 +1706,10 @@ DEFINE_EVENT(btrfs__qgroup_rsv_data, btrfs_qgroup_release_data,
 
 DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
 	TP_PROTO(const struct btrfs_fs_info *fs_info,
-		 const struct btrfs_qgroup_extent_record *rec),
+		 const struct btrfs_qgroup_extent_record *rec,
+		 u64 bytenr),
 
-	TP_ARGS(fs_info, rec),
+	TP_ARGS(fs_info, rec, bytenr),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	u64,  bytenr		)
@@ -1716,7 +1717,7 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
 	),
 
 	TP_fast_assign_btrfs(fs_info,
-		__entry->bytenr		= rec->bytenr;
+		__entry->bytenr		= bytenr;
 		__entry->num_bytes	= rec->num_bytes;
 	),
 
@@ -1727,17 +1728,19 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
 DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_account_extents,
 
 	TP_PROTO(const struct btrfs_fs_info *fs_info,
-		 const struct btrfs_qgroup_extent_record *rec),
+		 const struct btrfs_qgroup_extent_record *rec,
+		 u64 bytenr),
 
-	TP_ARGS(fs_info, rec)
+	TP_ARGS(fs_info, rec, bytenr)
 );
 
 DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_trace_extent,
 
 	TP_PROTO(const struct btrfs_fs_info *fs_info,
-		 const struct btrfs_qgroup_extent_record *rec),
+		 const struct btrfs_qgroup_extent_record *rec,
+		 u64 bytenr),
 
-	TP_ARGS(fs_info, rec)
+	TP_ARGS(fs_info, rec, bytenr)
 );
 
 TRACE_EVENT(qgroup_num_dirty_extents,
-- 
cgit v1.2.3


From b628c139519ae0e5453e5327161a41bae966201d Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Thu, 3 Oct 2024 15:27:27 +0100
Subject: btrfs: remove unused btrfs_try_tree_write_lock()

btrfs_try_tree_write_lock() has been unused since commit
50b21d7a066f ("btrfs: submit a writeback bio per extent_buffer").
Remove it as we don't need it anymore.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 8d2ff32fb3b0..bd515415ea8b 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -2344,7 +2344,6 @@ DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock_blocking);
 DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_read);
 DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_write);
 DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_read_lock);
-DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_write_lock);
 DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic);
 
 DECLARE_EVENT_CLASS(btrfs__space_info_update,
-- 
cgit v1.2.3


From 70a5f9e266cf1f544f33ce56f1eee634d79f3030 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 4 Sep 2024 17:03:43 +0100
Subject: btrfs: simplify tracking progress for the extent map shrinker

Now that the extent map shrinker can only be run by a single task (as a
work queue item) there is no need to keep the progress of the shrinker
protected by a spinlock and passing the progress to trace events as
parameters. So remove the lock and simplify the arguments for the trace
events.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index bd515415ea8b..879016b4e391 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -2555,10 +2555,9 @@ TRACE_EVENT(btrfs_extent_map_shrinker_count,
 
 TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
 
-	TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr,
-		 u64 last_root_id, u64 last_ino),
+	TP_PROTO(const struct btrfs_fs_info *fs_info, long nr),
 
-	TP_ARGS(fs_info, nr_to_scan, nr, last_root_id, last_ino),
+	TP_ARGS(fs_info, nr),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	long,	nr_to_scan	)
@@ -2568,10 +2567,11 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
 	),
 
 	TP_fast_assign_btrfs(fs_info,
-		__entry->nr_to_scan	= nr_to_scan;
+		__entry->nr_to_scan	= \
+		     atomic64_read(&fs_info->extent_map_shrinker_nr_to_scan);
 		__entry->nr		= nr;
-		__entry->last_root_id	= last_root_id;
-		__entry->last_ino	= last_ino;
+		__entry->last_root_id	= fs_info->extent_map_shrinker_last_root;
+		__entry->last_ino	= fs_info->extent_map_shrinker_last_ino;
 	),
 
 	TP_printk_btrfs("nr_to_scan=%ld nr=%ld last_root=%llu(%s) last_ino=%llu",
@@ -2581,10 +2581,9 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
 
 TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit,
 
-	TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr,
-		 u64 last_root_id, u64 last_ino),
+	TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr),
 
-	TP_ARGS(fs_info, nr_dropped, nr, last_root_id, last_ino),
+	TP_ARGS(fs_info, nr_dropped, nr),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	long,	nr_dropped	)
@@ -2596,8 +2595,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit,
 	TP_fast_assign_btrfs(fs_info,
 		__entry->nr_dropped	= nr_dropped;
 		__entry->nr		= nr;
-		__entry->last_root_id	= last_root_id;
-		__entry->last_ino	= last_ino;
+		__entry->last_root_id	= fs_info->extent_map_shrinker_last_root;
+		__entry->last_ino	= fs_info->extent_map_shrinker_last_ino;
 	),
 
 	TP_printk_btrfs("nr_dropped=%ld nr=%ld last_root=%llu(%s) last_ino=%llu",
-- 
cgit v1.2.3


From e7fa845010f12bb98251a2bf9dcf39fd601f9424 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 5 Sep 2024 11:31:49 +0100
Subject: btrfs: rename extent map shrinker members from struct btrfs_fs_info

The names for the members of struct btrfs_fs_info related to the extent
map shrinker are a bit too long, so rename them to be shorter by replacing
the "extent_map_" prefix with the "em_" prefix.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 879016b4e391..4df93ca9b7a8 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -2568,10 +2568,10 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
 
 	TP_fast_assign_btrfs(fs_info,
 		__entry->nr_to_scan	= \
-		     atomic64_read(&fs_info->extent_map_shrinker_nr_to_scan);
+		     atomic64_read(&fs_info->em_shrinker_nr_to_scan);
 		__entry->nr		= nr;
-		__entry->last_root_id	= fs_info->extent_map_shrinker_last_root;
-		__entry->last_ino	= fs_info->extent_map_shrinker_last_ino;
+		__entry->last_root_id	= fs_info->em_shrinker_last_root;
+		__entry->last_ino	= fs_info->em_shrinker_last_ino;
 	),
 
 	TP_printk_btrfs("nr_to_scan=%ld nr=%ld last_root=%llu(%s) last_ino=%llu",
@@ -2595,8 +2595,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit,
 	TP_fast_assign_btrfs(fs_info,
 		__entry->nr_dropped	= nr_dropped;
 		__entry->nr		= nr;
-		__entry->last_root_id	= fs_info->extent_map_shrinker_last_root;
-		__entry->last_ino	= fs_info->extent_map_shrinker_last_ino;
+		__entry->last_root_id	= fs_info->em_shrinker_last_root;
+		__entry->last_ino	= fs_info->em_shrinker_last_ino;
 	),
 
 	TP_printk_btrfs("nr_dropped=%ld nr=%ld last_root=%llu(%s) last_ino=%llu",
-- 
cgit v1.2.3


From df3b8ca604f224eb4cd51669416ad4d607682273 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 4 Nov 2024 16:12:04 +0000
Subject: io_uring/cmd: let cmds to know about dying task

When the taks that submitted a request is dying, a task work for that
request might get run by a kernel thread or even worse by a half
dismantled task. We can't just cancel the task work without running the
callback as the cmd might need to do some clean up, so pass a flag
instead. If set, it's not safe to access any task resources and the
callback is expected to cancel the cmd ASAP.

Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/linux/io_uring_types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 4b9ba523978d..2ee5dc105b58 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -37,6 +37,7 @@ enum io_uring_cmd_flags {
 	/* set when uring wants to cancel a previously issued command */
 	IO_URING_F_CANCEL		= (1 << 11),
 	IO_URING_F_COMPAT		= (1 << 12),
+	IO_URING_F_TASK_DEAD		= (1 << 13),
 };
 
 struct io_wq_work_node {
-- 
cgit v1.2.3


From 6c83d153ed86eb17c46eafe4e78af4ce2071a052 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Mon, 21 Oct 2024 21:28:26 +0200
Subject: btrfs: add new ioctl to wait for cleaned subvolumes

Add a new unprivileged ioctl that will let the command
'btrfs subvolume sync' work without the (privileged) SEARCH_TREE ioctl.

There are several modes of operation, where the most common ones are to
wait on a specific subvolume or all currently queued for cleaning. This
is utilized e.g. in backup applications that delete subvolumes and wait
until they're cleaned to check for remaining space.

The other modes are for flexibility, e.g. for monitoring or
checkpoints in the queue of deleted subvolumes, again without the need
to use SEARCH_TREE.

Notes:

- waiting is interruptible, the timeout is set to 1 second and is not
  configurable

- repeated calls to the ioctl see a different state, so this is
  inherently racy when using e.g. the count or peek next/last

Use cases:

- a subvolume A was deleted, wait for cleaning (WAIT_FOR_ONE)

- a bunch of subvolumes were deleted, wait for all (WAIT_FOR_QUEUED or
  PEEK_LAST + WAIT_FOR_ONE)

- count how many are queued (not blocking), for monitoring purposes

- report progress (PEEK_NEXT), may miss some if cleaning is quick

- own waiting in user space (PEEK_LAST until it's 0)

Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index cdf6ad872149..d3b222d7af24 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -1049,6 +1049,29 @@ struct btrfs_ioctl_encoded_io_args {
 #define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
 #define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1
 
+/*
+ * Wait for subvolume cleaning process. This queries the kernel queue and it
+ * can change between the calls.
+ *
+ * - FOR_ONE	- specify the subvolid
+ * - FOR_QUEUED - wait for all currently queued
+ * - COUNT	- count number of queued
+ * - PEEK_FIRST - read which is the first in the queue (to be cleaned or being
+ * 		  cleaned already), or 0 if the queue is empty
+ * - PEEK_LAST  - read the last subvolid in the queue, or 0 if the queue is empty
+ */
+struct btrfs_ioctl_subvol_wait {
+	__u64 subvolid;
+	__u32 mode;
+	__u32 count;
+};
+
+#define BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE		(0)
+#define BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED	(1)
+#define BTRFS_SUBVOL_SYNC_COUNT			(2)
+#define BTRFS_SUBVOL_SYNC_PEEK_FIRST		(3)
+#define BTRFS_SUBVOL_SYNC_PEEK_LAST		(4)
+
 /* Error codes as returned by the kernel */
 enum btrfs_err_code {
 	BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@@ -1181,6 +1204,8 @@ enum btrfs_err_code {
 				    struct btrfs_ioctl_encoded_io_args)
 #define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
 				     struct btrfs_ioctl_encoded_io_args)
+#define BTRFS_IOC_SUBVOL_SYNC_WAIT _IOW(BTRFS_IOCTL_MAGIC, 65, \
+					struct btrfs_ioctl_subvol_wait)
 
 #ifdef __cplusplus
 }
-- 
cgit v1.2.3


From cbe82e140bb76e1aa9f808cc841654a25b70d4e5 Mon Sep 17 00:00:00 2001
From: Guo Ren <guoren@linux.alibaba.com>
Date: Sun, 3 Nov 2024 15:51:49 +0100
Subject: asm-generic: ticket-lock: Reuse arch_spinlock_t of qspinlock

The arch_spinlock_t of qspinlock has contained the atomic_t val, which
satisfies the ticket-lock requirement. Thus, unify the arch_spinlock_t
into qspinlock_types.h. This is the preparation for the next combo
spinlock.

Reviewed-by: Leonardo Bras <leobras@redhat.com>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/linux-riscv/CAK8P3a2rnz9mQqhN6-e0CGUUv9rntRELFdxt_weiD7FxH7fkfQ@mail.gmail.com/
Signed-off-by: Guo Ren <guoren@kernel.org>
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Andrea Parri <parri.andrea@gmail.com>
Link: https://lore.kernel.org/r/20241103145153.105097-10-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 include/asm-generic/spinlock.h       | 14 +++++++-------
 include/asm-generic/spinlock_types.h | 12 ++----------
 2 files changed, 9 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
index 90803a826ba0..4773334ee638 100644
--- a/include/asm-generic/spinlock.h
+++ b/include/asm-generic/spinlock.h
@@ -32,7 +32,7 @@
 
 static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
 {
-	u32 val = atomic_fetch_add(1<<16, lock);
+	u32 val = atomic_fetch_add(1<<16, &lock->val);
 	u16 ticket = val >> 16;
 
 	if (ticket == (u16)val)
@@ -46,31 +46,31 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
 	 * have no outstanding writes due to the atomic_fetch_add() the extra
 	 * orderings are free.
 	 */
-	atomic_cond_read_acquire(lock, ticket == (u16)VAL);
+	atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL);
 	smp_mb();
 }
 
 static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
 {
-	u32 old = atomic_read(lock);
+	u32 old = atomic_read(&lock->val);
 
 	if ((old >> 16) != (old & 0xffff))
 		return false;
 
-	return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */
+	return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */
 }
 
 static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
-	u32 val = atomic_read(lock);
+	u32 val = atomic_read(&lock->val);
 
 	smp_store_release(ptr, (u16)val + 1);
 }
 
 static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
-	u32 val = lock.counter;
+	u32 val = lock.val.counter;
 
 	return ((val >> 16) == (val & 0xffff));
 }
@@ -84,7 +84,7 @@ static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
 
 static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
-	u32 val = atomic_read(lock);
+	u32 val = atomic_read(&lock->val);
 
 	return (s16)((val >> 16) - (val & 0xffff)) > 1;
 }
diff --git a/include/asm-generic/spinlock_types.h b/include/asm-generic/spinlock_types.h
index 8962bb730945..f534aa5de394 100644
--- a/include/asm-generic/spinlock_types.h
+++ b/include/asm-generic/spinlock_types.h
@@ -3,15 +3,7 @@
 #ifndef __ASM_GENERIC_SPINLOCK_TYPES_H
 #define __ASM_GENERIC_SPINLOCK_TYPES_H
 
-#include <linux/types.h>
-typedef atomic_t arch_spinlock_t;
-
-/*
- * qrwlock_types depends on arch_spinlock_t, so we must typedef that before the
- * include.
- */
-#include <asm/qrwlock_types.h>
-
-#define __ARCH_SPIN_LOCK_UNLOCKED	ATOMIC_INIT(0)
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
 
 #endif /* __ASM_GENERIC_SPINLOCK_TYPES_H */
-- 
cgit v1.2.3


From 22c33321e260c8b4c1877b2cc0c4e26a0c74c23f Mon Sep 17 00:00:00 2001
From: Guo Ren <guoren@linux.alibaba.com>
Date: Sun, 3 Nov 2024 15:51:50 +0100
Subject: asm-generic: ticket-lock: Add separate ticket-lock.h

Add a separate ticket-lock.h to include multiple spinlock versions and
select one at compile time or runtime.

Reviewed-by: Leonardo Bras <leobras@redhat.com>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/linux-riscv/CAK8P3a2rnz9mQqhN6-e0CGUUv9rntRELFdxt_weiD7FxH7fkfQ@mail.gmail.com/
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Andrea Parri <parri.andrea@gmail.com>
Link: https://lore.kernel.org/r/20241103145153.105097-11-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 include/asm-generic/spinlock.h        |  87 +---------------------------
 include/asm-generic/ticket_spinlock.h | 103 ++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 86 deletions(-)
 create mode 100644 include/asm-generic/ticket_spinlock.h

(limited to 'include')

diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
index 4773334ee638..970590baf61b 100644
--- a/include/asm-generic/spinlock.h
+++ b/include/asm-generic/spinlock.h
@@ -1,94 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
-/*
- * 'Generic' ticket-lock implementation.
- *
- * It relies on atomic_fetch_add() having well defined forward progress
- * guarantees under contention. If your architecture cannot provide this, stick
- * to a test-and-set lock.
- *
- * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a
- * sub-word of the value. This is generally true for anything LL/SC although
- * you'd be hard pressed to find anything useful in architecture specifications
- * about this. If your architecture cannot do this you might be better off with
- * a test-and-set.
- *
- * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence
- * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with
- * a full fence after the spin to upgrade the otherwise-RCpc
- * atomic_cond_read_acquire().
- *
- * The implementation uses smp_cond_load_acquire() to spin, so if the
- * architecture has WFE like instructions to sleep instead of poll for word
- * modifications be sure to implement that (see ARM64 for example).
- *
- */
-
 #ifndef __ASM_GENERIC_SPINLOCK_H
 #define __ASM_GENERIC_SPINLOCK_H
 
-#include <linux/atomic.h>
-#include <asm-generic/spinlock_types.h>
-
-static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	u32 val = atomic_fetch_add(1<<16, &lock->val);
-	u16 ticket = val >> 16;
-
-	if (ticket == (u16)val)
-		return;
-
-	/*
-	 * atomic_cond_read_acquire() is RCpc, but rather than defining a
-	 * custom cond_read_rcsc() here we just emit a full fence.  We only
-	 * need the prior reads before subsequent writes ordering from
-	 * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
-	 * have no outstanding writes due to the atomic_fetch_add() the extra
-	 * orderings are free.
-	 */
-	atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL);
-	smp_mb();
-}
-
-static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
-{
-	u32 old = atomic_read(&lock->val);
-
-	if ((old >> 16) != (old & 0xffff))
-		return false;
-
-	return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */
-}
-
-static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
-	u32 val = atomic_read(&lock->val);
-
-	smp_store_release(ptr, (u16)val + 1);
-}
-
-static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
-{
-	u32 val = lock.val.counter;
-
-	return ((val >> 16) == (val & 0xffff));
-}
-
-static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
-	arch_spinlock_t val = READ_ONCE(*lock);
-
-	return !arch_spin_value_unlocked(val);
-}
-
-static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
-{
-	u32 val = atomic_read(&lock->val);
-
-	return (s16)((val >> 16) - (val & 0xffff)) > 1;
-}
-
+#include <asm-generic/ticket_spinlock.h>
 #include <asm/qrwlock.h>
 
 #endif /* __ASM_GENERIC_SPINLOCK_H */
diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
new file mode 100644
index 000000000000..cfcff22b37b3
--- /dev/null
+++ b/include/asm-generic/ticket_spinlock.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * 'Generic' ticket-lock implementation.
+ *
+ * It relies on atomic_fetch_add() having well defined forward progress
+ * guarantees under contention. If your architecture cannot provide this, stick
+ * to a test-and-set lock.
+ *
+ * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a
+ * sub-word of the value. This is generally true for anything LL/SC although
+ * you'd be hard pressed to find anything useful in architecture specifications
+ * about this. If your architecture cannot do this you might be better off with
+ * a test-and-set.
+ *
+ * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence
+ * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with
+ * a full fence after the spin to upgrade the otherwise-RCpc
+ * atomic_cond_read_acquire().
+ *
+ * The implementation uses smp_cond_load_acquire() to spin, so if the
+ * architecture has WFE like instructions to sleep instead of poll for word
+ * modifications be sure to implement that (see ARM64 for example).
+ *
+ */
+
+#ifndef __ASM_GENERIC_TICKET_SPINLOCK_H
+#define __ASM_GENERIC_TICKET_SPINLOCK_H
+
+#include <linux/atomic.h>
+#include <asm-generic/spinlock_types.h>
+
+static __always_inline void ticket_spin_lock(arch_spinlock_t *lock)
+{
+	u32 val = atomic_fetch_add(1<<16, &lock->val);
+	u16 ticket = val >> 16;
+
+	if (ticket == (u16)val)
+		return;
+
+	/*
+	 * atomic_cond_read_acquire() is RCpc, but rather than defining a
+	 * custom cond_read_rcsc() here we just emit a full fence.  We only
+	 * need the prior reads before subsequent writes ordering from
+	 * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
+	 * have no outstanding writes due to the atomic_fetch_add() the extra
+	 * orderings are free.
+	 */
+	atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL);
+	smp_mb();
+}
+
+static __always_inline bool ticket_spin_trylock(arch_spinlock_t *lock)
+{
+	u32 old = atomic_read(&lock->val);
+
+	if ((old >> 16) != (old & 0xffff))
+		return false;
+
+	return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */
+}
+
+static __always_inline void ticket_spin_unlock(arch_spinlock_t *lock)
+{
+	u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+	u32 val = atomic_read(&lock->val);
+
+	smp_store_release(ptr, (u16)val + 1);
+}
+
+static __always_inline int ticket_spin_value_unlocked(arch_spinlock_t lock)
+{
+	u32 val = lock.val.counter;
+
+	return ((val >> 16) == (val & 0xffff));
+}
+
+static __always_inline int ticket_spin_is_locked(arch_spinlock_t *lock)
+{
+	arch_spinlock_t val = READ_ONCE(*lock);
+
+	return !ticket_spin_value_unlocked(val);
+}
+
+static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
+{
+	u32 val = atomic_read(&lock->val);
+
+	return (s16)((val >> 16) - (val & 0xffff)) > 1;
+}
+
+/*
+ * Remapping spinlock architecture specific functions to the corresponding
+ * ticket spinlock functions.
+ */
+#define arch_spin_is_locked(l)		ticket_spin_is_locked(l)
+#define arch_spin_is_contended(l)	ticket_spin_is_contended(l)
+#define arch_spin_value_unlocked(l)	ticket_spin_value_unlocked(l)
+#define arch_spin_lock(l)		ticket_spin_lock(l)
+#define arch_spin_trylock(l)		ticket_spin_trylock(l)
+#define arch_spin_unlock(l)		ticket_spin_unlock(l)
+
+#endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
-- 
cgit v1.2.3


From ab83647fadae2f1f723119dc066b39a461d6d288 Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alexghiti@rivosinc.com>
Date: Sun, 3 Nov 2024 15:51:53 +0100
Subject: riscv: Add qspinlock support

In order to produce a generic kernel, a user can select
CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket
spinlock implementation if Zabha or Ziccrse are not present.

Note that we can't use alternatives here because the discovery of
extensions is done too late and we need to start with the qspinlock
implementation because the ticket spinlock implementation would pollute
the spinlock value, so let's use static keys.

This is largely based on Guo's work and Leonardo reviews at [1].

Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
Signed-off-by: Guo Ren <guoren@kernel.org>
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Andrea Parri <parri.andrea@gmail.com>
Link: https://lore.kernel.org/r/20241103145153.105097-14-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 include/asm-generic/qspinlock.h       | 2 ++
 include/asm-generic/ticket_spinlock.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 0655aa5b57b2..bf47cca2c375 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 }
 #endif
 
+#ifndef __no_arch_spinlock_redefine
 /*
  * Remapping spinlock architecture specific functions to the corresponding
  * queued spinlock functions.
@@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 #define arch_spin_lock(l)		queued_spin_lock(l)
 #define arch_spin_trylock(l)		queued_spin_trylock(l)
 #define arch_spin_unlock(l)		queued_spin_unlock(l)
+#endif
 
 #endif /* __ASM_GENERIC_QSPINLOCK_H */
diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
index cfcff22b37b3..325779970d8a 100644
--- a/include/asm-generic/ticket_spinlock.h
+++ b/include/asm-generic/ticket_spinlock.h
@@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
 	return (s16)((val >> 16) - (val & 0xffff)) > 1;
 }
 
+#ifndef __no_arch_spinlock_redefine
 /*
  * Remapping spinlock architecture specific functions to the corresponding
  * ticket spinlock functions.
@@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
 #define arch_spin_lock(l)		ticket_spin_lock(l)
 #define arch_spin_trylock(l)		ticket_spin_trylock(l)
 #define arch_spin_unlock(l)		ticket_spin_unlock(l)
+#endif
 
 #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
-- 
cgit v1.2.3


From 0ef2b9e698dbf9ba78f67952a747f35eb7060470 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 4 Nov 2024 07:26:31 +0100
Subject: block: lift bio_is_zone_append to bio.h

Make bio_is_zone_append globally available, because file systems need
to use to check for a zone append bio in their end_io handlers to deal
with the block layer emulation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20241104062647.91160-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 4a1bf43ca53d..60830a6a5939 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -675,6 +675,23 @@ static inline void bio_clear_polled(struct bio *bio)
 	bio->bi_opf &= ~REQ_POLLED;
 }
 
+/**
+ * bio_is_zone_append - is this a zone append bio?
+ * @bio:	bio to check
+ *
+ * Check if @bio is a zone append operation.  Core block layer code and end_io
+ * handlers must use this instead of an open coded REQ_OP_ZONE_APPEND check
+ * because the block layer can rewrite REQ_OP_ZONE_APPEND to REQ_OP_WRITE if
+ * it is not natively supported.
+ */
+static inline bool bio_is_zone_append(struct bio *bio)
+{
+	if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
+		return false;
+	return bio_op(bio) == REQ_OP_ZONE_APPEND ||
+		bio_flagged(bio, BIO_EMULATES_ZONE_APPEND);
+}
+
 struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
 		unsigned int nr_pages, blk_opf_t opf, gfp_t gfp);
 struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new);
-- 
cgit v1.2.3


From d920179b3d4842a0e27cae54fdddbe5ef3977e73 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 8 Nov 2024 14:45:34 +0100
Subject: bpf: Add support for uprobe multi session attach

Adding support to attach BPF program for entry and return probe
of the same function. This is common use case which at the moment
requires to create two uprobe multi links.

Adding new BPF_TRACE_UPROBE_SESSION attach type that instructs
kernel to attach single link program to both entry and exit probe.

It's possible to control execution of the BPF program on return
probe simply by returning zero or non zero from the entry BPF
program execution to execute or not the BPF program on return
probe respectively.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20241108134544.480660-4-jolsa@kernel.org
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f28b6527e815..4162afc6b5d0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1116,6 +1116,7 @@ enum bpf_attach_type {
 	BPF_NETKIT_PRIMARY,
 	BPF_NETKIT_PEER,
 	BPF_TRACE_KPROBE_SESSION,
+	BPF_TRACE_UPROBE_SESSION,
 	__MAX_BPF_ATTACH_TYPE
 };
 
-- 
cgit v1.2.3


From f6b9a69a9e56b2083aca8a925fc1a28eb698e3ed Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 9 Nov 2024 15:14:29 -0800
Subject: bpf: Refactor active lock management

When bpf_spin_lock was introduced originally, there was deliberation on
whether to use an array of lock IDs, but since bpf_spin_lock is limited
to holding a single lock at any given time, we've been using a single ID
to identify the held lock.

In preparation for introducing spin locks that can be taken multiple
times, introduce support for acquiring multiple lock IDs. For this
purpose, reuse the acquired_refs array and store both lock and pointer
references. We tag the entry with REF_TYPE_PTR or REF_TYPE_LOCK to
disambiguate and find the relevant entry. The ptr field is used to track
the map_ptr or btf (for bpf_obj_new allocations) to ensure locks can be
matched with protected fields within the same "allocation", i.e.
bpf_obj_new object or map value.

The struct active_lock is changed to an int as the state is part of the
acquired_refs array, and we only need active_lock as a cheap way of
detecting lock presence.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20241109231430.2475236-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf_verifier.h | 53 +++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 4513372c5bc8..d84beed92ae4 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -48,22 +48,6 @@ enum bpf_reg_liveness {
 	REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
 };
 
-/* For every reg representing a map value or allocated object pointer,
- * we consider the tuple of (ptr, id) for them to be unique in verifier
- * context and conside them to not alias each other for the purposes of
- * tracking lock state.
- */
-struct bpf_active_lock {
-	/* This can either be reg->map_ptr or reg->btf. If ptr is NULL,
-	 * there's no active lock held, and other fields have no
-	 * meaning. If non-NULL, it indicates that a lock is held and
-	 * id member has the reg->id of the register which can be >= 0.
-	 */
-	void *ptr;
-	/* This will be reg->id */
-	u32 id;
-};
-
 #define ITER_PREFIX "bpf_iter_"
 
 enum bpf_iter_state {
@@ -266,6 +250,13 @@ struct bpf_stack_state {
 };
 
 struct bpf_reference_state {
+	/* Each reference object has a type. Ensure REF_TYPE_PTR is zero to
+	 * default to pointer reference on zero initialization of a state.
+	 */
+	enum ref_state_type {
+		REF_TYPE_PTR = 0,
+		REF_TYPE_LOCK,
+	} type;
 	/* Track each reference created with a unique id, even if the same
 	 * instruction creates the reference multiple times (eg, via CALL).
 	 */
@@ -274,17 +265,23 @@ struct bpf_reference_state {
 	 * is used purely to inform the user of a reference leak.
 	 */
 	int insn_idx;
-	/* There can be a case like:
-	 * main (frame 0)
-	 *  cb (frame 1)
-	 *   func (frame 3)
-	 *    cb (frame 4)
-	 * Hence for frame 4, if callback_ref just stored boolean, it would be
-	 * impossible to distinguish nested callback refs. Hence store the
-	 * frameno and compare that to callback_ref in check_reference_leak when
-	 * exiting a callback function.
-	 */
-	int callback_ref;
+	union {
+		/* There can be a case like:
+		 * main (frame 0)
+		 *  cb (frame 1)
+		 *   func (frame 3)
+		 *    cb (frame 4)
+		 * Hence for frame 4, if callback_ref just stored boolean, it would be
+		 * impossible to distinguish nested callback refs. Hence store the
+		 * frameno and compare that to callback_ref in check_reference_leak when
+		 * exiting a callback function.
+		 */
+		int callback_ref;
+		/* Use to keep track of the source object of a lock, to ensure
+		 * it matches on unlock.
+		 */
+		void *ptr;
+	};
 };
 
 struct bpf_retval_range {
@@ -332,6 +329,7 @@ struct bpf_func_state {
 
 	/* The following fields should be last. See copy_func_state() */
 	int acquired_refs;
+	int active_locks;
 	struct bpf_reference_state *refs;
 	/* The state of the stack. Each element of the array describes BPF_REG_SIZE
 	 * (i.e. 8) bytes worth of stack memory.
@@ -434,7 +432,6 @@ struct bpf_verifier_state {
 	u32 insn_idx;
 	u32 curframe;
 
-	struct bpf_active_lock active_lock;
 	bool speculative;
 	bool active_rcu_lock;
 	u32 active_preempt_lock;
-- 
cgit v1.2.3


From ae6e3a273f590a2b64f14a9fab3546c3a8f44ed4 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 9 Nov 2024 15:14:30 -0800
Subject: bpf: Drop special callback reference handling

Logic to prevent callbacks from acquiring new references for the program
(i.e. leaving acquired references), and releasing caller references
(i.e. those acquired in parent frames) was introduced in commit
9d9d00ac29d0 ("bpf: Fix reference state management for synchronous callbacks").

This was necessary because back then, the verifier simulated each
callback once (that could potentially be executed N times, where N can
be zero). This meant that callbacks that left lingering resources or
cleared caller resources could do it more than once, operating on
undefined state or leaking memory.

With the fixes to callback verification in commit
ab5cfac139ab ("bpf: verify callbacks as if they are called unknown number of times"),
all of this extra logic is no longer necessary. Hence, drop it as part
of this commit.

Cc: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20241109231430.2475236-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf_verifier.h | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d84beed92ae4..3a74033d49c4 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -265,23 +265,10 @@ struct bpf_reference_state {
 	 * is used purely to inform the user of a reference leak.
 	 */
 	int insn_idx;
-	union {
-		/* There can be a case like:
-		 * main (frame 0)
-		 *  cb (frame 1)
-		 *   func (frame 3)
-		 *    cb (frame 4)
-		 * Hence for frame 4, if callback_ref just stored boolean, it would be
-		 * impossible to distinguish nested callback refs. Hence store the
-		 * frameno and compare that to callback_ref in check_reference_leak when
-		 * exiting a callback function.
-		 */
-		int callback_ref;
-		/* Use to keep track of the source object of a lock, to ensure
-		 * it matches on unlock.
-		 */
-		void *ptr;
-	};
+	/* Use to keep track of the source object of a lock, to ensure
+	 * it matches on unlock.
+	 */
+	void *ptr;
 };
 
 struct bpf_retval_range {
-- 
cgit v1.2.3


From 559218d43ec9dde3d2847c7aa127e88d6ab1c9ed Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 8 Nov 2024 16:46:51 +0100
Subject: block: pre-calculate max_zone_append_sectors

max_zone_append_sectors differs from all other queue limits in that the
final value used is not stored in the queue_limits but needs to be
obtained using queue_limits_max_zone_append_sectors helper.  This not
only adds (tiny) extra overhead to the I/O path, but also can be easily
forgotten in file system code.

Add a new max_hw_zone_append_sectors value to queue_limits which is
set by the driver, and calculate max_zone_append_sectors from that and
the other inputs in blk_validate_zoned_limits, similar to how
max_sectors is calculated to fix this.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20241104073955.112324-3-hch@lst.de
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20241108154657.845768-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1b51a7c92e9b..65f37ae70712 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -375,6 +375,7 @@ struct queue_limits {
 	unsigned int		max_user_discard_sectors;
 	unsigned int		max_secure_erase_sectors;
 	unsigned int		max_write_zeroes_sectors;
+	unsigned int		max_hw_zone_append_sectors;
 	unsigned int		max_zone_append_sectors;
 	unsigned int		discard_granularity;
 	unsigned int		discard_alignment;
@@ -1208,25 +1209,9 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q)
 	return q->limits.max_segment_size;
 }
 
-static inline unsigned int
-queue_limits_max_zone_append_sectors(const struct queue_limits *l)
-{
-	unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors);
-
-	return min_not_zero(l->max_zone_append_sectors, max_sectors);
-}
-
-static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q)
-{
-	if (!blk_queue_is_zoned(q))
-		return 0;
-
-	return queue_limits_max_zone_append_sectors(&q->limits);
-}
-
 static inline bool queue_emulates_zone_append(struct request_queue *q)
 {
-	return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors;
+	return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors;
 }
 
 static inline bool bdev_emulates_zone_append(struct block_device *bdev)
@@ -1237,7 +1222,7 @@ static inline bool bdev_emulates_zone_append(struct block_device *bdev)
 static inline unsigned int
 bdev_max_zone_append_sectors(struct block_device *bdev)
 {
-	return queue_max_zone_append_sectors(bdev_get_queue(bdev));
+	return bdev_limits(bdev)->max_zone_append_sectors;
 }
 
 static inline unsigned int bdev_max_segments(struct block_device *bdev)
-- 
cgit v1.2.3


From 377dda2cff59825079aee3906aa4904779747b0b Mon Sep 17 00:00:00 2001
From: Qiang Yu <qiang.yu@amd.com>
Date: Fri, 25 Oct 2024 11:23:29 +0800
Subject: drm/fourcc: add AMD_FMT_MOD_TILE_GFX9_4K_D_X
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is used when radeonsi export small texture's modifier
to user with eglExportDMABUFImageQueryMESA().

mesa changes is available here:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31658

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <qiang.yu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/drm_fourcc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 78abd819fd62..70f3b00b0681 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -1516,6 +1516,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
  * 64K_D_2D on GFX12 is identical to 64K_D on GFX11.
  */
 #define AMD_FMT_MOD_TILE_GFX9_64K_D 10
+#define AMD_FMT_MOD_TILE_GFX9_4K_D_X 22
 #define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25
 #define AMD_FMT_MOD_TILE_GFX9_64K_D_X 26
 #define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27
-- 
cgit v1.2.3


From fd0a5afb5455b4561bfc6dfb0c4b2d8226f9ccfe Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Mon, 4 Nov 2024 20:16:30 +0300
Subject: kunit: skb: use "gfp" variable instead of hardcoding GFP_KERNEL

The intent here was clearly to use the gfp variable flags instead of
hardcoding GFP_KERNEL.  All the callers pass GFP_KERNEL as the gfp
flags so this doesn't affect runtime.

Fixes: b3231d353a51 ("kunit: add a convenience allocation wrapper for SKBs")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/kunit/skbuff.h b/include/kunit/skbuff.h
index 44d12370939a..345e1e8f0312 100644
--- a/include/kunit/skbuff.h
+++ b/include/kunit/skbuff.h
@@ -29,7 +29,7 @@ static void kunit_action_kfree_skb(void *p)
 static inline struct sk_buff *kunit_zalloc_skb(struct kunit *test, int len,
 					       gfp_t gfp)
 {
-	struct sk_buff *res = alloc_skb(len, GFP_KERNEL);
+	struct sk_buff *res = alloc_skb(len, gfp);
 
 	if (!res || skb_pad(res, len))
 		return NULL;
-- 
cgit v1.2.3


From 5a47c2080a7316f184107464e4f76737c0c05186 Mon Sep 17 00:00:00 2001
From: Guixin Liu <kanie@linux.alibaba.com>
Date: Wed, 6 Nov 2024 15:34:46 +0800
Subject: nvmet: support reservation feature

This patch implements the reservation feature, including:
  1. reservation register(register, unregister and replace).
  2. reservation acquire(acquire, preempt, preempt and abort).
  3. reservation release(release and clear).
  4. reservation report.
  5. set feature and get feature of reservation notify mask.
  6. get log page of reservation event.

Not supported:
  1. persistent reservation through power loss.

Test cases:
  Use nvme-cli and fio to test all implemented sub features:
  1. use nvme resv-register to register host a registrant or
     unregister or replace a new key.
  2. use nvme resv-acquire to set host to the holder, and use fio
     to send read and write io in all reservation type. And also
     test preempt and "preempt and abort".
  3. use nvme resv-report to show all registrants and reservation
     status.
  4. use nvme resv-release to release all registrants.
  5. use nvme get-log to get events generated by the preceding
     operations.

In addition, make reservation configurable, one can set ns to
support reservation before enable ns. The default of resv_enable
is false.

Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
Reviewed-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 44d048d68503..0179bb6d502d 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -2045,7 +2045,7 @@ enum {
 	NVME_PR_LOG_EMPTY_LOG_PAGE			= 0x00,
 	NVME_PR_LOG_REGISTRATION_PREEMPTED		= 0x01,
 	NVME_PR_LOG_RESERVATION_RELEASED		= 0x02,
-	NVME_PR_LOG_RESERVATOPM_PREEMPTED		= 0x03,
+	NVME_PR_LOG_RESERVATOIN_PREEMPTED		= 0x03,
 };
 
 enum {
-- 
cgit v1.2.3


From 61c9967cd63448292a64f9ee9aeb6e2053e3a624 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Mon, 4 Nov 2024 13:24:36 -0800
Subject: nvmet: implement active command set ns list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is required for nvme 2.1 for targets that support multiple command
sets. We support NVM and ZNS, so are required to support this
identification.

Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Matias Bjørling <matias.bjorling@wdc.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 0179bb6d502d..26de7c5c12be 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -522,6 +522,7 @@ enum {
 	NVME_ID_CNS_NS_DESC_LIST	= 0x03,
 	NVME_ID_CNS_CS_NS		= 0x05,
 	NVME_ID_CNS_CS_CTRL		= 0x06,
+	NVME_ID_CNS_NS_ACTIVE_LIST_CS	= 0x07,
 	NVME_ID_CNS_NS_CS_INDEP		= 0x08,
 	NVME_ID_CNS_NS_PRESENT_LIST	= 0x10,
 	NVME_ID_CNS_NS_PRESENT		= 0x11,
-- 
cgit v1.2.3


From 83acb24e6de7bbb5cb0df1ba0f47a92da9112061 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Mon, 4 Nov 2024 14:00:14 -0800
Subject: nvmet: implement supported log pages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This log is required for nvme 2.1.

Reviewed-by: Matias Bjørling <matias.bjorling@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 26de7c5c12be..e9e508bca60f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1245,6 +1245,7 @@ enum {
 	NVME_FEAT_WRITE_PROTECT	= 0x84,
 	NVME_FEAT_VENDOR_START	= 0xC0,
 	NVME_FEAT_VENDOR_END	= 0xFF,
+	NVME_LOG_SUPPORTED	= 0x00,
 	NVME_LOG_ERROR		= 0x01,
 	NVME_LOG_SMART		= 0x02,
 	NVME_LOG_FW_SLOT	= 0x03,
@@ -1262,6 +1263,14 @@ enum {
 	NVME_FWACT_ACTV		= (2 << 3),
 };
 
+struct nvme_supported_log {
+	__le32	lids[256];
+};
+
+enum {
+	NVME_LIDS_LSUPP	= 1 << 0,
+};
+
 /* NVMe Namespace Write Protect State */
 enum {
 	NVME_NS_NO_WRITE_PROTECT = 0,
-- 
cgit v1.2.3


From e973c91727d49bb128c95210b3aa1960b9421d18 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Mon, 4 Nov 2024 14:07:42 -0800
Subject: nvmet: implement supported features log
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This log is required for nvme 2.1.

Reviewed-by: Matias Bjørling <matias.bjorling@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index e9e508bca60f..31d7ec6d8b93 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1256,6 +1256,7 @@ enum {
 	NVME_LOG_TELEMETRY_CTRL = 0x08,
 	NVME_LOG_ENDURANCE_GROUP = 0x09,
 	NVME_LOG_ANA		= 0x0c,
+	NVME_LOG_FEATURES	= 0x12,
 	NVME_LOG_DISC		= 0x70,
 	NVME_LOG_RESERVATION	= 0x80,
 	NVME_FWACT_REPL		= (0 << 3),
@@ -1271,6 +1272,16 @@ enum {
 	NVME_LIDS_LSUPP	= 1 << 0,
 };
 
+struct nvme_supported_features_log {
+	__le32	fis[256];
+};
+
+enum {
+	NVME_FIS_FSUPP	= 1 << 0,
+	NVME_FIS_NSCPE	= 1 << 20,
+	NVME_FIS_CSCPE	= 1 << 21,
+};
+
 /* NVMe Namespace Write Protect State */
 enum {
 	NVME_NS_NO_WRITE_PROTECT = 0,
-- 
cgit v1.2.3


From 266b652c65b44fb2ccfa17cdb54ce2ef723deb0a Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 1 Nov 2024 14:46:01 -0700
Subject: nvmet: implement endurance groups

Most of the returned information is just stubbed data. The target must
support these in order to report rotational media. Since this driver
doesn't know any better, each namespace is its own endurance group with
the engid value matching the nsid.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 31d7ec6d8b93..6d5b4299a1b2 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -327,7 +327,8 @@ struct nvme_id_ctrl {
 	__le32			sanicap;
 	__le32			hmminds;
 	__le16			hmmaxd;
-	__u8			rsvd338[4];
+	__le16			nvmsetidmax;
+	__le16			endgidmax;
 	__u8			anatt;
 	__u8			anacap;
 	__le32			anagrpmax;
@@ -531,6 +532,7 @@ enum {
 	NVME_ID_CNS_SCNDRY_CTRL_LIST	= 0x15,
 	NVME_ID_CNS_NS_GRANULARITY	= 0x16,
 	NVME_ID_CNS_UUID_LIST		= 0x17,
+	NVME_ID_CNS_ENDGRP_LIST		= 0x19,
 };
 
 enum {
@@ -618,6 +620,28 @@ enum {
 	NVME_NIDT_CSI		= 0x04,
 };
 
+struct nvme_endurance_group_log {
+	__u8	egcw;
+	__u8	egfeat;
+	__u8	rsvd2;
+	__u8	avsp;
+	__u8	avspt;
+	__u8	pused;
+	__le16	did;
+	__u8	rsvd8[24];
+	__u8	ee[16];
+	__u8	dur[16];
+	__u8	duw[16];
+	__u8	muw[16];
+	__u8	hrc[16];
+	__u8	hwc[16];
+	__u8	mdie[16];
+	__u8	neile[16];
+	__u8	tegcap[16];
+	__u8	uegcap[16];
+	__u8	rsvd192[320];
+};
+
 struct nvme_smart_log {
 	__u8			critical_warning;
 	__u8			temperature[2];
@@ -1302,7 +1326,8 @@ struct nvme_identify {
 	__u8			cns;
 	__u8			rsvd3;
 	__le16			ctrlid;
-	__u8			rsvd11[3];
+	__le16			cnssid;
+	__u8			rsvd11;
 	__u8			csi;
 	__u32			rsvd12[4];
 };
-- 
cgit v1.2.3


From 5fd075cdaf3649000677d960fd9e45c08081b7e0 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 1 Nov 2024 13:48:47 -0700
Subject: nvmet: implement rotational media information log

Most of the information is stubbed. Supporting these commands is a
requirement for supporting rotational media.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 6d5b4299a1b2..99cf0ee73714 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -642,6 +642,18 @@ struct nvme_endurance_group_log {
 	__u8	rsvd192[320];
 };
 
+struct nvme_rotational_media_log {
+	__le16	endgid;
+	__le16	numa;
+	__le16	nrs;
+	__u8	rsvd6[2];
+	__le32	spinc;
+	__le32	fspinc;
+	__le32	ldc;
+	__le32	fldc;
+	__u8	rsvd24[488];
+};
+
 struct nvme_smart_log {
 	__u8			critical_warning;
 	__u8			temperature[2];
@@ -1281,6 +1293,7 @@ enum {
 	NVME_LOG_ENDURANCE_GROUP = 0x09,
 	NVME_LOG_ANA		= 0x0c,
 	NVME_LOG_FEATURES	= 0x12,
+	NVME_LOG_RMI		= 0x16,
 	NVME_LOG_DISC		= 0x70,
 	NVME_LOG_RESERVATION	= 0x80,
 	NVME_FWACT_REPL		= (0 << 3),
@@ -1435,7 +1448,7 @@ struct nvme_get_log_page_command {
 	__u8			lsp; /* upper 4 bits reserved */
 	__le16			numdl;
 	__le16			numdu;
-	__u16			rsvd11;
+	__le16			lsi;
 	union {
 		struct {
 			__le32 lpol;
-- 
cgit v1.2.3


From e2758c76a0ab2032a0d11abc1c2ff08661fdf428 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 1 Nov 2024 12:29:40 -0700
Subject: nvmet: support for csi identify ns

Implements reporting the I/O Command Set Independent Identify Namespace
command.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 99cf0ee73714..c136d64c7d73 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -563,6 +563,7 @@ enum {
 	NVME_NS_FLBAS_LBA_SHIFT	= 1,
 	NVME_NS_FLBAS_META_EXT	= 0x10,
 	NVME_NS_NMIC_SHARED	= 1 << 0,
+	NVME_NS_ROTATIONAL	= 1 << 4,
 	NVME_LBAF_RP_BEST	= 0,
 	NVME_LBAF_RP_BETTER	= 1,
 	NVME_LBAF_RP_GOOD	= 2,
-- 
cgit v1.2.3


From 8a825d22a70915bd80c811fa93538cf2540af29d Mon Sep 17 00:00:00 2001
From: Guixin Liu <kanie@linux.alibaba.com>
Date: Mon, 4 Nov 2024 16:55:00 +0800
Subject: nvme: check ns's volatile write cache not present

When the VWC of a namespace does not exist, the BLK_FEAT_WRITE_CACHE
flag should not be set when registering the block device, regardless
of whether the controller supports VWC.

Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c136d64c7d73..0a6e22038ce3 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -564,6 +564,7 @@ enum {
 	NVME_NS_FLBAS_META_EXT	= 0x10,
 	NVME_NS_NMIC_SHARED	= 1 << 0,
 	NVME_NS_ROTATIONAL	= 1 << 4,
+	NVME_NS_VWC_NOT_PRESENT = 1 << 5,
 	NVME_LBAF_RP_BEST	= 0,
 	NVME_LBAF_RP_BETTER	= 1,
 	NVME_LBAF_RP_GOOD	= 2,
-- 
cgit v1.2.3


From 1af75b2ad08bd5977c51c2d0fc11741a4c0a48d9 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@oss.qualcomm.com>
Date: Sun, 10 Nov 2024 09:33:40 -0800
Subject: firmware: qcom: scm: Introduce CP_SMMU_APERTURE_ID

The QCOM_SCM_SVC_MP service provides QCOM_SCM_MP_CP_SMMU_APERTURE_ID,
which is used to trigger the mapping of register banks into the SMMU
context for per-processes page tables to function (in case this isn't
statically setup by firmware).

This is necessary on e.g. QCS6490 Rb3Gen2, in order to avoid "CP | AHB
bus error"-errors from the GPU.

Introduce a function to allow the msm driver to invoke this call.

Signed-off-by: Bjorn Andersson <bjorn.andersson@oss.qualcomm.com>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Link: https://lore.kernel.org/r/20241110-adreno-smmu-aparture-v2-1-9b1fb2ee41d4@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/firmware/qcom/qcom_scm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h
index 9f14976399ab..4621aec0328c 100644
--- a/include/linux/firmware/qcom/qcom_scm.h
+++ b/include/linux/firmware/qcom/qcom_scm.h
@@ -85,6 +85,8 @@ int qcom_scm_io_writel(phys_addr_t addr, unsigned int val);
 
 bool qcom_scm_restore_sec_cfg_available(void);
 int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare);
+int qcom_scm_set_gpu_smmu_aperture(unsigned int context_bank);
+bool qcom_scm_set_gpu_smmu_aperture_is_available(void);
 int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size);
 int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare);
 int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size);
-- 
cgit v1.2.3


From 7f4b3960e54faec72132a71da4a84a8e2a0b9037 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 8 Nov 2024 11:41:44 +0100
Subject: net: netlink: add nla_get_*_default() accessors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are quite a number of places that use patterns
such as

  if (attr)
     val = nla_get_u16(attr);
  else
     val = DEFAULT;

Add nla_get_u16_default() and friends like that to
not have to type this out all the time.

Acked-by: Toke Høiland-Jørgensen <toke@kernel.org>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://patch.msgid.link/20241108114145.acd2aadb03ac.I3df6aac71d38a5baa1c0a03d0c7e82d4395c030e@changeid
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netlink.h | 262 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 262 insertions(+)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 2dc671c977ff..39eaa6be6ca8 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -142,6 +142,8 @@
  *   nla_get_flag(nla)			return 1 if flag is true
  *   nla_get_msecs(nla)			get payload for a msecs attribute
  *
+ *   The same functions also exist with _default().
+ *
  * Attribute Misc:
  *   nla_memcpy(dest, nla, count)	copy attribute into memory
  *   nla_memcmp(nla, data, size)	compare attribute with memory area
@@ -1695,6 +1697,20 @@ static inline u32 nla_get_u32(const struct nlattr *nla)
 	return *(u32 *) nla_data(nla);
 }
 
+/**
+ * nla_get_u32_default - return payload of u32 attribute or default
+ * @nla: u32 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline u32 nla_get_u32_default(const struct nlattr *nla, u32 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_u32(nla);
+}
+
 /**
  * nla_get_be32 - return payload of __be32 attribute
  * @nla: __be32 netlink attribute
@@ -1704,6 +1720,21 @@ static inline __be32 nla_get_be32(const struct nlattr *nla)
 	return *(__be32 *) nla_data(nla);
 }
 
+/**
+ * nla_get_be32_default - return payload of be32 attribute or default
+ * @nla: __be32 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline __be32 nla_get_be32_default(const struct nlattr *nla,
+					  __be32 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_be32(nla);
+}
+
 /**
  * nla_get_le32 - return payload of __le32 attribute
  * @nla: __le32 netlink attribute
@@ -1713,6 +1744,21 @@ static inline __le32 nla_get_le32(const struct nlattr *nla)
 	return *(__le32 *) nla_data(nla);
 }
 
+/**
+ * nla_get_le32_default - return payload of le32 attribute or default
+ * @nla: __le32 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline __le32 nla_get_le32_default(const struct nlattr *nla,
+					  __le32 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_le32(nla);
+}
+
 /**
  * nla_get_u16 - return payload of u16 attribute
  * @nla: u16 netlink attribute
@@ -1722,6 +1768,20 @@ static inline u16 nla_get_u16(const struct nlattr *nla)
 	return *(u16 *) nla_data(nla);
 }
 
+/**
+ * nla_get_u16_default - return payload of u16 attribute or default
+ * @nla: u16 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline u16 nla_get_u16_default(const struct nlattr *nla, u16 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_u16(nla);
+}
+
 /**
  * nla_get_be16 - return payload of __be16 attribute
  * @nla: __be16 netlink attribute
@@ -1731,6 +1791,21 @@ static inline __be16 nla_get_be16(const struct nlattr *nla)
 	return *(__be16 *) nla_data(nla);
 }
 
+/**
+ * nla_get_be16_default - return payload of be16 attribute or default
+ * @nla: __be16 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline __be16 nla_get_be16_default(const struct nlattr *nla,
+					  __be16 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_be16(nla);
+}
+
 /**
  * nla_get_le16 - return payload of __le16 attribute
  * @nla: __le16 netlink attribute
@@ -1740,6 +1815,21 @@ static inline __le16 nla_get_le16(const struct nlattr *nla)
 	return *(__le16 *) nla_data(nla);
 }
 
+/**
+ * nla_get_le16_default - return payload of le16 attribute or default
+ * @nla: __le16 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline __le16 nla_get_le16_default(const struct nlattr *nla,
+					  __le16 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_le16(nla);
+}
+
 /**
  * nla_get_u8 - return payload of u8 attribute
  * @nla: u8 netlink attribute
@@ -1749,6 +1839,20 @@ static inline u8 nla_get_u8(const struct nlattr *nla)
 	return *(u8 *) nla_data(nla);
 }
 
+/**
+ * nla_get_u8_default - return payload of u8 attribute or default
+ * @nla: u8 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline u8 nla_get_u8_default(const struct nlattr *nla, u8 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_u8(nla);
+}
+
 /**
  * nla_get_u64 - return payload of u64 attribute
  * @nla: u64 netlink attribute
@@ -1762,6 +1866,20 @@ static inline u64 nla_get_u64(const struct nlattr *nla)
 	return tmp;
 }
 
+/**
+ * nla_get_u64_default - return payload of u64 attribute or default
+ * @nla: u64 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline u64 nla_get_u64_default(const struct nlattr *nla, u64 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_u64(nla);
+}
+
 /**
  * nla_get_uint - return payload of uint attribute
  * @nla: uint netlink attribute
@@ -1773,6 +1891,20 @@ static inline u64 nla_get_uint(const struct nlattr *nla)
 	return nla_get_u64(nla);
 }
 
+/**
+ * nla_get_uint_default - return payload of uint attribute or default
+ * @nla: uint netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline u64 nla_get_uint_default(const struct nlattr *nla, u64 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_uint(nla);
+}
+
 /**
  * nla_get_be64 - return payload of __be64 attribute
  * @nla: __be64 netlink attribute
@@ -1786,6 +1918,21 @@ static inline __be64 nla_get_be64(const struct nlattr *nla)
 	return tmp;
 }
 
+/**
+ * nla_get_be64_default - return payload of be64 attribute or default
+ * @nla: __be64 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline __be64 nla_get_be64_default(const struct nlattr *nla,
+					  __be64 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_be64(nla);
+}
+
 /**
  * nla_get_le64 - return payload of __le64 attribute
  * @nla: __le64 netlink attribute
@@ -1795,6 +1942,21 @@ static inline __le64 nla_get_le64(const struct nlattr *nla)
 	return *(__le64 *) nla_data(nla);
 }
 
+/**
+ * nla_get_le64_default - return payload of le64 attribute or default
+ * @nla: __le64 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline __le64 nla_get_le64_default(const struct nlattr *nla,
+					  __le64 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_le64(nla);
+}
+
 /**
  * nla_get_s32 - return payload of s32 attribute
  * @nla: s32 netlink attribute
@@ -1804,6 +1966,20 @@ static inline s32 nla_get_s32(const struct nlattr *nla)
 	return *(s32 *) nla_data(nla);
 }
 
+/**
+ * nla_get_s32_default - return payload of s32 attribute or default
+ * @nla: s32 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline s32 nla_get_s32_default(const struct nlattr *nla, s32 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_s32(nla);
+}
+
 /**
  * nla_get_s16 - return payload of s16 attribute
  * @nla: s16 netlink attribute
@@ -1813,6 +1989,20 @@ static inline s16 nla_get_s16(const struct nlattr *nla)
 	return *(s16 *) nla_data(nla);
 }
 
+/**
+ * nla_get_s16_default - return payload of s16 attribute or default
+ * @nla: s16 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline s16 nla_get_s16_default(const struct nlattr *nla, s16 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_s16(nla);
+}
+
 /**
  * nla_get_s8 - return payload of s8 attribute
  * @nla: s8 netlink attribute
@@ -1822,6 +2012,20 @@ static inline s8 nla_get_s8(const struct nlattr *nla)
 	return *(s8 *) nla_data(nla);
 }
 
+/**
+ * nla_get_s8_default - return payload of s8 attribute or default
+ * @nla: s8 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline s8 nla_get_s8_default(const struct nlattr *nla, s8 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_s8(nla);
+}
+
 /**
  * nla_get_s64 - return payload of s64 attribute
  * @nla: s64 netlink attribute
@@ -1835,6 +2039,20 @@ static inline s64 nla_get_s64(const struct nlattr *nla)
 	return tmp;
 }
 
+/**
+ * nla_get_s64_default - return payload of s64 attribute or default
+ * @nla: s64 netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline s64 nla_get_s64_default(const struct nlattr *nla, s64 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_s64(nla);
+}
+
 /**
  * nla_get_sint - return payload of uint attribute
  * @nla: uint netlink attribute
@@ -1846,6 +2064,20 @@ static inline s64 nla_get_sint(const struct nlattr *nla)
 	return nla_get_s64(nla);
 }
 
+/**
+ * nla_get_sint_default - return payload of sint attribute or default
+ * @nla: sint netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline s64 nla_get_sint_default(const struct nlattr *nla, s64 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_sint(nla);
+}
+
 /**
  * nla_get_flag - return payload of flag attribute
  * @nla: flag netlink attribute
@@ -1868,6 +2100,21 @@ static inline unsigned long nla_get_msecs(const struct nlattr *nla)
 	return msecs_to_jiffies((unsigned long) msecs);
 }
 
+/**
+ * nla_get_msecs_default - return payload of msecs attribute or default
+ * @nla: msecs netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline unsigned long nla_get_msecs_default(const struct nlattr *nla,
+						  unsigned long defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_msecs(nla);
+}
+
 /**
  * nla_get_in_addr - return payload of IPv4 address attribute
  * @nla: IPv4 address netlink attribute
@@ -1877,6 +2124,21 @@ static inline __be32 nla_get_in_addr(const struct nlattr *nla)
 	return *(__be32 *) nla_data(nla);
 }
 
+/**
+ * nla_get_in_addr_default - return payload of be32 attribute or default
+ * @nla: IPv4 address netlink attribute, may be %NULL
+ * @defvalue: default value to use if @nla is %NULL
+ *
+ * Return: the value of the attribute, or the default value if not present
+ */
+static inline __be32 nla_get_in_addr_default(const struct nlattr *nla,
+					     __be32 defvalue)
+{
+	if (!nla)
+		return defvalue;
+	return nla_get_in_addr(nla);
+}
+
 /**
  * nla_get_in6_addr - return payload of IPv6 address attribute
  * @nla: IPv6 address netlink attribute
-- 
cgit v1.2.3


From b376d519bd142c65ba9bba35db12b6be95b46893 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 29 Sep 2024 20:50:16 -0400
Subject: xdrgen: Implement big-endian enums

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/xdr.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 5f775e104f9a..a2ab813a9800 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -680,6 +680,27 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr)
 	return 0;
 }
 
+/**
+ * xdr_stream_decode_be32 - Decode a big-endian 32-bit integer
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store integer
+ *
+ * Return values:
+ *   %0 on success
+ *   %-EBADMSG on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_decode_be32(struct xdr_stream *xdr, __be32 *ptr)
+{
+	const size_t count = sizeof(*ptr);
+	__be32 *p = xdr_inline_decode(xdr, count);
+
+	if (unlikely(!p))
+		return -EBADMSG;
+	*ptr = *p;
+	return 0;
+}
+
 /**
  * xdr_stream_decode_u64 - Decode a 64-bit integer
  * @xdr: pointer to xdr_stream
-- 
cgit v1.2.3


From 631c2925bae41c11dcf3915a2ab5f3be9af54277 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Oct 2024 14:54:33 -0400
Subject: xdrgen: Keep track of on-the-wire data type widths

The generic parts of the RPC layer need to know the widths (in
XDR_UNIT increments) of the XDR data types defined for each
protocol.

As a first step, add dictionaries to keep track of the symbolic and
actual maximum XDR width of XDR types.

This makes it straightforward to look up the width of a type by its
name. The built-in dictionaries are pre-loaded with the widths of
the built-in XDR types as defined in RFC 4506.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/xdrgen/_defs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdrgen/_defs.h b/include/linux/sunrpc/xdrgen/_defs.h
index be9e62371758..20c7270aa64d 100644
--- a/include/linux/sunrpc/xdrgen/_defs.h
+++ b/include/linux/sunrpc/xdrgen/_defs.h
@@ -23,4 +23,13 @@ typedef struct {
 	u8 *data;
 } opaque;
 
+#define XDR_void		(0)
+#define XDR_bool		(1)
+#define XDR_int			(1)
+#define XDR_unsigned_int	(1)
+#define XDR_long		(1)
+#define XDR_unsigned_long	(1)
+#define XDR_hyper		(2)
+#define XDR_unsigned_hyper	(2)
+
 #endif /* _SUNRPC_XDRGEN__DEFS_H_ */
-- 
cgit v1.2.3


From 65941f10caf2c04781a7defa4ec0ab119dbd235a Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Mon, 28 Oct 2024 19:53:37 +0800
Subject: mm: move the page fragment allocator from page_alloc into its own
 file

Inspired by [1], move the page fragment allocator from page_alloc
into its own c file and header file, as we are about to make more
change for it to replace another page_frag implementation in
sock.c

As this patchset is going to replace 'struct page_frag' with
'struct page_frag_cache' in sched.h, including page_frag_cache.h
in sched.h has a compiler error caused by interdependence between
mm_types.h and mm.h for asm-offsets.c, see [2]. So avoid the compiler
error by moving 'struct page_frag_cache' to mm_types_task.h as
suggested by Alexander, see [3].

1. https://lore.kernel.org/all/20230411160902.4134381-3-dhowells@redhat.com/
2. https://lore.kernel.org/all/15623dac-9358-4597-b3ee-3694a5956920@gmail.com/
3. https://lore.kernel.org/all/CAKgT0UdH1yD=LSCXFJ=YM_aiA4OomD-2wXykO42bizaWMt_HOA@mail.gmail.com/
CC: David Howells <dhowells@redhat.com>
CC: Linux-MM <linux-mm@kvack.org>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Link: https://patch.msgid.link/20241028115343.3405838-3-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/gfp.h             | 22 ----------------------
 include/linux/mm_types.h        | 18 ------------------
 include/linux/mm_types_task.h   | 18 ++++++++++++++++++
 include/linux/page_frag_cache.h | 31 +++++++++++++++++++++++++++++++
 include/linux/skbuff.h          |  1 +
 5 files changed, 50 insertions(+), 40 deletions(-)
 create mode 100644 include/linux/page_frag_cache.h

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a951de920e20..a0a6d25f883f 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -371,28 +371,6 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
 
-struct page_frag_cache;
-void page_frag_cache_drain(struct page_frag_cache *nc);
-extern void __page_frag_cache_drain(struct page *page, unsigned int count);
-void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
-			      gfp_t gfp_mask, unsigned int align_mask);
-
-static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
-					  unsigned int fragsz, gfp_t gfp_mask,
-					  unsigned int align)
-{
-	WARN_ON_ONCE(!is_power_of_2(align));
-	return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
-}
-
-static inline void *page_frag_alloc(struct page_frag_cache *nc,
-			     unsigned int fragsz, gfp_t gfp_mask)
-{
-	return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
-}
-
-extern void page_frag_free(void *addr);
-
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6e3bdf8e38bc..92314ef2d978 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -521,9 +521,6 @@ static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
  */
 #define STRUCT_PAGE_MAX_SHIFT	(order_base_2(sizeof(struct page)))
 
-#define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
-#define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
-
 /*
  * page_private can be used on tail pages.  However, PagePrivate is only
  * checked by the VM on the head page.  So page_private on the tail pages
@@ -542,21 +539,6 @@ static inline void *folio_get_private(struct folio *folio)
 	return folio->private;
 }
 
-struct page_frag_cache {
-	void * va;
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-	__u16 offset;
-	__u16 size;
-#else
-	__u32 offset;
-#endif
-	/* we maintain a pagecount bias, so that we dont dirty cache line
-	 * containing page->_refcount every time we allocate a fragment.
-	 */
-	unsigned int		pagecnt_bias;
-	bool pfmemalloc;
-};
-
 typedef unsigned long vm_flags_t;
 
 /*
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index bff5706b76e1..0ac6daebdd5c 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -8,6 +8,7 @@
  * (These are defined separately to decouple sched.h from mm_types.h as much as possible.)
  */
 
+#include <linux/align.h>
 #include <linux/types.h>
 
 #include <asm/page.h>
@@ -43,6 +44,23 @@ struct page_frag {
 #endif
 };
 
+#define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
+#define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
+struct page_frag_cache {
+	void *va;
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	__u16 offset;
+	__u16 size;
+#else
+	__u32 offset;
+#endif
+	/* we maintain a pagecount bias, so that we dont dirty cache line
+	 * containing page->_refcount every time we allocate a fragment.
+	 */
+	unsigned int		pagecnt_bias;
+	bool pfmemalloc;
+};
+
 /* Track pages that require TLB flushes */
 struct tlbflush_unmap_batch {
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
new file mode 100644
index 000000000000..67ac8626ed9b
--- /dev/null
+++ b/include/linux/page_frag_cache.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_PAGE_FRAG_CACHE_H
+#define _LINUX_PAGE_FRAG_CACHE_H
+
+#include <linux/log2.h>
+#include <linux/mm_types_task.h>
+#include <linux/types.h>
+
+void page_frag_cache_drain(struct page_frag_cache *nc);
+void __page_frag_cache_drain(struct page *page, unsigned int count);
+void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
+			      gfp_t gfp_mask, unsigned int align_mask);
+
+static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
+					  unsigned int fragsz, gfp_t gfp_mask,
+					  unsigned int align)
+{
+	WARN_ON_ONCE(!is_power_of_2(align));
+	return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
+}
+
+static inline void *page_frag_alloc(struct page_frag_cache *nc,
+				    unsigned int fragsz, gfp_t gfp_mask)
+{
+	return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
+}
+
+void page_frag_free(void *addr);
+
+#endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e5095d75abba..60535c706851 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -31,6 +31,7 @@
 #include <linux/in6.h>
 #include <linux/if_packet.h>
 #include <linux/llist.h>
+#include <linux/page_frag_cache.h>
 #include <net/flow.h>
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <linux/netfilter/nf_conntrack_common.h>
-- 
cgit v1.2.3


From 3d18dfe69ce46f106af327736d2261d7e3ee81c0 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Mon, 28 Oct 2024 19:53:39 +0800
Subject: mm: page_frag: avoid caller accessing 'page_frag_cache' directly

Use appropriate frag_page API instead of caller accessing
'page_frag_cache' directly.

CC: Andrew Morton <akpm@linux-foundation.org>
CC: Linux-MM <linux-mm@kvack.org>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Link: https://patch.msgid.link/20241028115343.3405838-5-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/page_frag_cache.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
index 67ac8626ed9b..0a52f7a179c8 100644
--- a/include/linux/page_frag_cache.h
+++ b/include/linux/page_frag_cache.h
@@ -7,6 +7,16 @@
 #include <linux/mm_types_task.h>
 #include <linux/types.h>
 
+static inline void page_frag_cache_init(struct page_frag_cache *nc)
+{
+	nc->va = NULL;
+}
+
+static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
+{
+	return !!nc->pfmemalloc;
+}
+
 void page_frag_cache_drain(struct page_frag_cache *nc);
 void __page_frag_cache_drain(struct page *page, unsigned int count);
 void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
-- 
cgit v1.2.3


From 0c3ce2f50261cd2f654d931eeb933c370a3a7d7a Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Mon, 28 Oct 2024 19:53:41 +0800
Subject: mm: page_frag: reuse existing space for 'size' and 'pfmemalloc'

Currently there is one 'struct page_frag' for every 'struct
sock' and 'struct task_struct', we are about to replace the
'struct page_frag' with 'struct page_frag_cache' for them.
Before begin the replacing, we need to ensure the size of
'struct page_frag_cache' is not bigger than the size of
'struct page_frag', as there may be tens of thousands of
'struct sock' and 'struct task_struct' instances in the
system.

By or'ing the page order & pfmemalloc with lower bits of
'va' instead of using 'u16' or 'u32' for page size and 'u8'
for pfmemalloc, we are able to avoid 3 or 5 bytes space waste.
And page address & pfmemalloc & order is unchanged for the
same page in the same 'page_frag_cache' instance, it makes
sense to fit them together.

After this patch, the size of 'struct page_frag_cache' should be
the same as the size of 'struct page_frag'.

CC: Andrew Morton <akpm@linux-foundation.org>
CC: Linux-MM <linux-mm@kvack.org>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Link: https://patch.msgid.link/20241028115343.3405838-7-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mm_types_task.h   | 19 +++++++++++--------
 include/linux/page_frag_cache.h | 24 ++++++++++++++++++++++--
 2 files changed, 33 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 0ac6daebdd5c..a82aa80c0ba4 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -47,18 +47,21 @@ struct page_frag {
 #define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 struct page_frag_cache {
-	void *va;
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	/* encoded_page consists of the virtual address, pfmemalloc bit and
+	 * order of a page.
+	 */
+	unsigned long encoded_page;
+
+	/* we maintain a pagecount bias, so that we dont dirty cache line
+	 * containing page->_refcount every time we allocate a fragment.
+	 */
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
 	__u16 offset;
-	__u16 size;
+	__u16 pagecnt_bias;
 #else
 	__u32 offset;
+	__u32 pagecnt_bias;
 #endif
-	/* we maintain a pagecount bias, so that we dont dirty cache line
-	 * containing page->_refcount every time we allocate a fragment.
-	 */
-	unsigned int		pagecnt_bias;
-	bool pfmemalloc;
 };
 
 /* Track pages that require TLB flushes */
diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
index 0a52f7a179c8..41a91df82631 100644
--- a/include/linux/page_frag_cache.h
+++ b/include/linux/page_frag_cache.h
@@ -3,18 +3,38 @@
 #ifndef _LINUX_PAGE_FRAG_CACHE_H
 #define _LINUX_PAGE_FRAG_CACHE_H
 
+#include <linux/bits.h>
 #include <linux/log2.h>
 #include <linux/mm_types_task.h>
 #include <linux/types.h>
 
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+/* Use a full byte here to enable assembler optimization as the shift
+ * operation is usually expecting a byte.
+ */
+#define PAGE_FRAG_CACHE_ORDER_MASK		GENMASK(7, 0)
+#else
+/* Compiler should be able to figure out we don't read things as any value
+ * ANDed with 0 is 0.
+ */
+#define PAGE_FRAG_CACHE_ORDER_MASK		0
+#endif
+
+#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT		(PAGE_FRAG_CACHE_ORDER_MASK + 1)
+
+static inline bool encoded_page_decode_pfmemalloc(unsigned long encoded_page)
+{
+	return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
+}
+
 static inline void page_frag_cache_init(struct page_frag_cache *nc)
 {
-	nc->va = NULL;
+	nc->encoded_page = 0;
 }
 
 static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
 {
-	return !!nc->pfmemalloc;
+	return encoded_page_decode_pfmemalloc(nc->encoded_page);
 }
 
 void page_frag_cache_drain(struct page_frag_cache *nc);
-- 
cgit v1.2.3


From d2bd39c0456b75be9dfc7d774b8d021355c26ae3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Fri, 18 Oct 2024 17:47:49 +0300
Subject: PCI: Store all PCIe Supported Link Speeds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PCIe bandwidth controller added by a subsequent commit will require
selecting PCIe Link Speeds that are lower than the Maximum Link Speed.

The struct pci_bus only stores max_bus_speed. Even if PCIe r6.1 sec 8.2.1
currently disallows gaps in supported Link Speeds, the Implementation Note
in PCIe r6.1 sec 7.5.3.18, recommends determining supported Link Speeds
using the Supported Link Speeds Vector in the Link Capabilities 2 Register
(when available) to "avoid software being confused if a future
specification defines Links that do not require support for all slower
speeds."

Reuse code in pcie_get_speed_cap() to add pcie_get_supported_speeds() to
query the Supported Link Speeds Vector of a PCIe device. The value is taken
directly from the Supported Link Speeds Vector or synthesized from the Max
Link Speed in the Link Capabilities Register when the Link Capabilities 2
Register is not available.

The Supported Link Speeds Vector in the Link Capabilities Register 2
corresponds to the bus below on Root Ports and Downstream Ports, whereas it
corresponds to the bus above on Upstream Ports and Endpoints (PCIe r6.1 sec
7.5.3.18):

  Supported Link Speeds Vector - This field indicates the supported Link
  speed(s) of the associated Port.

Add supported_speeds into the struct pci_dev that caches the
Supported Link Speeds Vector.

supported_speeds contains a set of Link Speeds only in the case where PCIe
Link Speed can be determined. Root Complex Integrated Endpoints do not have
a well-defined Link Speed because they do not implement either of the Link
Capabilities Registers, which is allowed by PCIe r6.1 sec 7.5.3 (the same
limitation applies to determining cur_bus_speed and max_bus_speed that are
PCI_SPEED_UNKNOWN in such case). This is of no concern from PCIe bandwidth
controller point of view because such devices are not attached into a PCIe
Root Port that could be controlled.

The supported_speeds field keeps the extra reserved zero at the least
significant bit to match the Link Capabilities 2 Register layout.

An attempt was made to store supported_speeds field into the struct pci_bus
as an intersection of both ends of the Link, however, the subordinate
struct pci_bus is not available early enough. The Target Speed quirk (in
pcie_failed_link_retrain()) can run either during initial scan or later,
requiring it to use the API provided by the PCIe bandwidth controller to
set the Target Link Speed in order to co-exist with the bandwidth
controller. When the Target Speed quirk is calling the bandwidth controller
during initial scan, the struct pci_bus is not yet initialized. As such,
storing supported_speeds into the struct pci_bus is not viable.

Suggested-by: Lukas Wunner <lukas@wunner.de>
Link: https://lore.kernel.org/r/20241018144755.7875-4-ilpo.jarvinen@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
[bhelgaas: move pcie_get_supported_speeds() decl to drivers/pci/pci.h]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/pci.h           | 10 +++++++++-
 include/uapi/linux/pci_regs.h |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index be5ed534c39c..99c6fa30d25b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -318,7 +318,14 @@ struct pci_sriov;
 struct pci_p2pdma;
 struct rcec_ea;
 
-/* The pci_dev structure describes PCI devices */
+/* struct pci_dev - describes a PCI device
+ *
+ * @supported_speeds:	PCIe Supported Link Speeds Vector (+ reserved 0 at
+ *			LSB). 0 when the supported speeds cannot be
+ *			determined (e.g., for Root Complex Integrated
+ *			Endpoints without the relevant Capability
+ *			Registers).
+ */
 struct pci_dev {
 	struct list_head bus_list;	/* Node in per-bus list */
 	struct pci_bus	*bus;		/* Bus this device is on */
@@ -522,6 +529,7 @@ struct pci_dev {
 	struct npem	*npem;		/* Native PCIe Enclosure Management */
 #endif
 	u16		acs_cap;	/* ACS Capability offset */
+	u8		supported_speeds; /* Supported Link Speeds Vector */
 	phys_addr_t	rom;		/* Physical address if not from BAR */
 	size_t		romlen;		/* Length if not from BAR */
 	/*
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 12323b3334a9..f3c9de0a497c 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -678,6 +678,7 @@
 #define PCI_EXP_DEVSTA2		0x2a	/* Device Status 2 */
 #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 0x2c	/* end of v2 EPs w/o link */
 #define PCI_EXP_LNKCAP2		0x2c	/* Link Capabilities 2 */
+#define  PCI_EXP_LNKCAP2_SLS		0x000000fe /* Supported Link Speeds Vector */
 #define  PCI_EXP_LNKCAP2_SLS_2_5GB	0x00000002 /* Supported Speed 2.5GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_5_0GB	0x00000004 /* Supported Speed 5GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_8_0GB	0x00000008 /* Supported Speed 8GT/s */
-- 
cgit v1.2.3


From 38a18dfe9035d5a02a53271824de1854129c61dc Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Tue, 22 Oct 2024 15:48:51 -0700
Subject: PCI: Unexport pci_walk_bus_locked()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There's only one user of pci_walk_bus_locked(), and it's internal to the
PCI core.  Unexport it and make it private to drivers/pci/.

Link: https://lore.kernel.org/r/20241022224851.340648-6-kbusch@meta.com
Signed-off-by: Keith Busch <kbusch@kernel.org>
[bhelgaas: move decl to drivers/pci/pci.h]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/pci.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 573b4c4c2be6..056290377273 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1623,8 +1623,6 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
 
 void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
 		  void *userdata);
-void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
-			 void *userdata);
 int pci_cfg_space_size(struct pci_dev *dev);
 unsigned char pci_bus_max_busnr(struct pci_bus *bus);
 void pci_setup_bridge(struct pci_bus *bus);
-- 
cgit v1.2.3


From fb56007c9bc38550755a53f7afd71a287340b724 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 6 Nov 2024 06:10:19 +0000
Subject: vt_buffer.h: get rid of dead code in default scr_...() instances

Only 4 architectures define VT_BUF_HAVE_RW (alpha, mips, powerpc, sparc)
and all of them define VT_BUF_HAVE_MEM{SET,CPY,MOVE}W.  In other
words, the code under #ifdef VT_BUF_HAVE_RW in default scr_mem...w()
instances won't be compiled anyway.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/vt_buffer.h | 24 ------------------------
 1 file changed, 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/vt_buffer.h b/include/linux/vt_buffer.h
index 919d999a8c1d..b6eeb8cb6070 100644
--- a/include/linux/vt_buffer.h
+++ b/include/linux/vt_buffer.h
@@ -28,45 +28,21 @@
 #ifndef VT_BUF_HAVE_MEMSETW
 static inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
 {
-#ifdef VT_BUF_HAVE_RW
-	count /= 2;
-	while (count--)
-		scr_writew(c, s++);
-#else
 	memset16(s, c, count / 2);
-#endif
 }
 #endif
 
 #ifndef VT_BUF_HAVE_MEMCPYW
 static inline void scr_memcpyw(u16 *d, const u16 *s, unsigned int count)
 {
-#ifdef VT_BUF_HAVE_RW
-	count /= 2;
-	while (count--)
-		scr_writew(scr_readw(s++), d++);
-#else
 	memcpy(d, s, count);
-#endif
 }
 #endif
 
 #ifndef VT_BUF_HAVE_MEMMOVEW
 static inline void scr_memmovew(u16 *d, const u16 *s, unsigned int count)
 {
-#ifdef VT_BUF_HAVE_RW
-	if (d < s)
-		scr_memcpyw(d, s, count);
-	else {
-		count /= 2;
-		d += count;
-		s += count;
-		while (count--)
-			scr_writew(scr_readw(--s), --d);
-	}
-#else
 	memmove(d, s, count);
-#endif
 }
 #endif
 
-- 
cgit v1.2.3


From 0af8e32343f8d0db31f593464fc140eaef25a281 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 6 Nov 2024 06:11:47 +0000
Subject: empty include/asm-generic/vga.h

all places that use anything defined in it (vgacon, mdacon and
vga16fb) are built only on architectures that have all that
stuff in their native asm/vga.h

allows to kill stub asm/vga.h on sh, while we are at it...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/vga.h | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vga.h b/include/asm-generic/vga.h
index adf91a783b5c..5dcaf4ae904a 100644
--- a/include/asm-generic/vga.h
+++ b/include/asm-generic/vga.h
@@ -1,25 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*
- *	Access to VGA videoram
- *
- *	(c) 1998 Martin Mares <mj@ucw.cz>
- */
 #ifndef __ASM_GENERIC_VGA_H
 #define __ASM_GENERIC_VGA_H
-
-/*
- *	On most architectures that support VGA, we can just
- *	recalculate addresses and then access the videoram
- *	directly without any black magic.
- *
- *	Everyone else needs to ioremap the address and use
- *	proper I/O accesses.
- */
-#ifndef VGA_MAP_MEM
-#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x)
-#endif
-
-#define vga_readb(x) (*(x))
-#define vga_writeb(x, y) (*(y) = (x))
-
-#endif /* _ASM_GENERIC_VGA_H */
+#endif /* __ASM_GENERIC_VGA_H */
-- 
cgit v1.2.3


From 82ee16cfb290ae259d1cd6658a6988b430258e94 Mon Sep 17 00:00:00 2001
From: Karel Balej <balejk@matfyz.cz>
Date: Sat, 12 Oct 2024 21:31:39 +0200
Subject: rtc: add driver for Marvell 88PM886 PMIC RTC

RTC lives on the chip's base register page. Add the relevant register
definitions and implement a basic set/read time functionality. Tested
with the samsung,coreprimevelte smartphone which contains this PMIC and
whose vendor kernel tree has also served as the sole reference for this.

Signed-off-by: Karel Balej <balejk@matfyz.cz>
Acked-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/20241012193345.18594-2-balejk@matfyz.cz
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/mfd/88pm886.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/88pm886.h b/include/linux/mfd/88pm886.h
index 133aa302e492..85eca44f39ab 100644
--- a/include/linux/mfd/88pm886.h
+++ b/include/linux/mfd/88pm886.h
@@ -31,6 +31,15 @@
 #define PM886_INT_WC			BIT(1)
 #define PM886_INT_MASK_MODE		BIT(2)
 
+#define PM886_REG_RTC_CNT1		0xd1
+#define PM886_REG_RTC_CNT2		0xd2
+#define PM886_REG_RTC_CNT3		0xd3
+#define PM886_REG_RTC_CNT4		0xd4
+#define PM886_REG_RTC_SPARE1		0xea
+#define PM886_REG_RTC_SPARE2		0xeb
+#define PM886_REG_RTC_SPARE3		0xec
+#define PM886_REG_RTC_SPARE4		0xed
+#define PM886_REG_RTC_SPARE5		0xee
 #define PM886_REG_RTC_SPARE6		0xef
 
 #define PM886_REG_BUCK_EN		0x08
-- 
cgit v1.2.3


From 8b9a7bd4d6c83300e50bb1d7071c6032a07e2fed Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 6 Nov 2024 13:00:45 +0000
Subject: rxrpc: Add a tracepoint for aborts being proposed

Add a tracepoint to rxrpc to trace the proposal of an abort.  The abort is
performed asynchronously by the I/O thread.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
Link: https://patch.msgid.link/726356.1730898045@warthog.procyon.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/trace/events/rxrpc.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index cc22596c7250..d03e0bd8c028 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -773,6 +773,31 @@ TRACE_EVENT(rxrpc_rx_done,
 	    TP_printk("r=%d a=%d", __entry->result, __entry->abort_code)
 	    );
 
+TRACE_EVENT(rxrpc_abort_call,
+	    TP_PROTO(const struct rxrpc_call *call, int abort_code),
+
+	    TP_ARGS(call, abort_code),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		call_nr)
+		    __field(enum rxrpc_abort_reason,	why)
+		    __field(int,			abort_code)
+		    __field(int,			error)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call_nr	= call->debug_id;
+		    __entry->why	= call->send_abort_why;
+		    __entry->abort_code	= abort_code;
+		    __entry->error	= call->send_abort_err;
+			   ),
+
+	    TP_printk("c=%08x a=%d e=%d %s",
+		      __entry->call_nr,
+		      __entry->abort_code, __entry->error,
+		      __print_symbolic(__entry->why, rxrpc_abort_reasons))
+	    );
+
 TRACE_EVENT(rxrpc_abort,
 	    TP_PROTO(unsigned int call_nr, enum rxrpc_abort_reason why,
 		     u32 cid, u32 call_id, rxrpc_seq_t seq, int abort_code, int error),
-- 
cgit v1.2.3


From a7306f3c283bfe03611229bb6280987aae2af8f9 Mon Sep 17 00:00:00 2001
From: Nataniel Farzan <natanielfarzan@gmail.com>
Date: Mon, 4 Nov 2024 19:22:32 -0800
Subject: Improve consistency of '#error' directive messages

Remove the use of contractions and use proper punctuation in #error
directive messages that discourage the direct inclusion of header files.

Link: https://lkml.kernel.org/r/20241105032231.28833-1-natanielfarzan@gmail.com
Signed-off-by: Nataniel Farzan <natanielfarzan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/acpi/platform/aclinux.h   | 2 +-
 include/linux/compiler-clang.h    | 2 +-
 include/linux/compiler-gcc.h      | 2 +-
 include/linux/pm_wakeup.h         | 2 +-
 include/linux/rwlock.h            | 2 +-
 include/linux/rwlock_api_smp.h    | 2 +-
 include/linux/spinlock_api_smp.h  | 2 +-
 include/linux/spinlock_types_up.h | 2 +-
 include/linux/spinlock_up.h       | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 565341c826e3..f3249b7df5cb 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -15,7 +15,7 @@
 /* ACPICA external files should not include ACPICA headers directly. */
 
 #if !defined(BUILDING_ACPICA) && !defined(_LINUX_ACPI_H)
-#error "Please don't include <acpi/acpi.h> directly, include <linux/acpi.h> instead."
+#error "Please do not include <acpi/acpi.h> directly, include <linux/acpi.h> instead."
 #endif
 
 #endif
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 4c1a39dcb624..2e7c2c282f3a 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __LINUX_COMPILER_TYPES_H
-#error "Please don't include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead."
+#error "Please do not include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead."
 #endif
 
 /* Compiler specific definitions for Clang compiler */
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cd6f9aae311f..d0ed9583743f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __LINUX_COMPILER_TYPES_H
-#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
+#error "Please do not include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
 #endif
 
 /*
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 76cd1f9f1365..222f7530806c 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -10,7 +10,7 @@
 #define _LINUX_PM_WAKEUP_H
 
 #ifndef _DEVICE_H_
-# error "please don't include this file directly"
+# error "Please do not include this file directly."
 #endif
 
 #include <linux/types.h>
diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index c0ef596f340b..5b87c6f4a243 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -2,7 +2,7 @@
 #define __LINUX_RWLOCK_H
 
 #ifndef __LINUX_INSIDE_SPINLOCK_H
-# error "please don't include this file directly"
+# error "Please do not include this file directly."
 #endif
 
 /*
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index dceb0a59b692..31d3d1116323 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -2,7 +2,7 @@
 #define __LINUX_RWLOCK_API_SMP_H
 
 #ifndef __LINUX_SPINLOCK_API_SMP_H
-# error "please don't include this file directly"
+# error "Please do not include this file directly."
 #endif
 
 /*
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 89eb6f4c659c..9ecb0ab504e3 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -2,7 +2,7 @@
 #define __LINUX_SPINLOCK_API_SMP_H
 
 #ifndef __LINUX_INSIDE_SPINLOCK_H
-# error "please don't include this file directly"
+# error "Please do not include this file directly."
 #endif
 
 /*
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index 7f86a2016ac5..fc4e2d017c20 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -2,7 +2,7 @@
 #define __LINUX_SPINLOCK_TYPES_UP_H
 
 #ifndef __LINUX_SPINLOCK_TYPES_RAW_H
-# error "please don't include this file directly"
+# error "Please do not include this file directly."
 #endif
 
 /*
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index c87204247592..1e84e71ca495 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -2,7 +2,7 @@
 #define __LINUX_SPINLOCK_UP_H
 
 #ifndef __LINUX_INSIDE_SPINLOCK_H
-# error "please don't include this file directly"
+# error "Please do not include this file directly."
 #endif
 
 #include <asm/processor.h>	/* for cpu_relax() */
-- 
cgit v1.2.3


From bc73b4186736341ab5cd2c199da82db6e1134e13 Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <aardelean@baylibre.com>
Date: Tue, 5 Nov 2024 16:54:05 +0200
Subject: util_macros.h: fix/rework find_closest() macros

A bug was found in the find_closest() (find_closest_descending() is also
affected after some testing), where for certain values with small
progressions, the rounding (done by averaging 2 values) causes an
incorrect index to be returned.  The rounding issues occur for
progressions of 1, 2 and 3.  It goes away when the progression/interval
between two values is 4 or larger.

It's particularly bad for progressions of 1.  For example if there's an
array of 'a = { 1, 2, 3 }', using 'find_closest(2, a ...)' would return 0
(the index of '1'), rather than returning 1 (the index of '2').  This
means that for exact values (with a progression of 1), find_closest() will
misbehave and return the index of the value smaller than the one we're
searching for.

For progressions of 2 and 3, the exact values are obtained correctly; but
values aren't approximated correctly (as one would expect).  Starting with
progressions of 4, all seems to be good (one gets what one would expect).

While one could argue that 'find_closest()' should not be used for arrays
with progressions of 1 (i.e. '{1, 2, 3, ...}', the macro should still
behave correctly.

The bug was found while testing the 'drivers/iio/adc/ad7606.c',
specifically the oversampling feature.
For reference, the oversampling values are listed as:
   static const unsigned int ad7606_oversampling_avail[7] = {
          1, 2, 4, 8, 16, 32, 64,
   };

When doing:
  1. $ echo 1 > /sys/bus/iio/devices/iio\:device0/oversampling_ratio
     $ cat /sys/bus/iio/devices/iio\:device0/oversampling_ratio
     1  # this is fine
  2. $ echo 2 > /sys/bus/iio/devices/iio\:device0/oversampling_ratio
     $ cat /sys/bus/iio/devices/iio\:device0/oversampling_ratio
     1  # this is wrong; 2 should be returned here
  3. $ echo 3 > /sys/bus/iio/devices/iio\:device0/oversampling_ratio
     $ cat /sys/bus/iio/devices/iio\:device0/oversampling_ratio
     2  # this is fine
  4. $ echo 4 > /sys/bus/iio/devices/iio\:device0/oversampling_ratio
     $ cat /sys/bus/iio/devices/iio\:device0/oversampling_ratio
     4  # this is fine
And from here-on, the values are as correct (one gets what one would
expect.)

While writing a kunit test for this bug, a peculiar issue was found for the
array in the 'drivers/hwmon/ina2xx.c' & 'drivers/iio/adc/ina2xx-adc.c'
drivers. While running the kunit test (for 'ina226_avg_tab' from these
drivers):
  * idx = find_closest([-1 to 2], ina226_avg_tab, ARRAY_SIZE(ina226_avg_tab));
    This returns idx == 0, so value.
  * idx = find_closest(3, ina226_avg_tab, ARRAY_SIZE(ina226_avg_tab));
    This returns idx == 0, value 1; and now one could argue whether 3 is
    closer to 4 or to 1. This quirk only appears for value '3' in this
    array, but it seems to be a another rounding issue.
  * And from 4 onwards the 'find_closest'() works fine (one gets what one
    would expect).

This change reworks the find_closest() macros to also check the difference
between the left and right elements when 'x'. If the distance to the right
is smaller (than the distance to the left), the index is incremented by 1.
This also makes redundant the need for using the DIV_ROUND_CLOSEST() macro.

In order to accommodate for any mix of negative + positive values, the
internal variables '__fc_x', '__fc_mid_x', '__fc_left' & '__fc_right' are
forced to 'long' type. This also addresses any potential bugs/issues with
'x' being of an unsigned type. In those situations any comparison between
signed & unsigned would be promoted to a comparison between 2 unsigned
numbers; this is especially annoying when '__fc_left' & '__fc_right'
underflow.

The find_closest_descending() macro was also reworked and duplicated from
the find_closest(), and it is being iterated in reverse. The main reason
for this is to get the same indices as 'find_closest()' (but in reverse).
The comparison for '__fc_right < __fc_left' favors going the array in
ascending order.
For example for array '{ 1024, 512, 256, 128, 64, 16, 4, 1 }' and x = 3, we
get:
    __fc_mid_x = 2
    __fc_left = -1
    __fc_right = -2
    Then '__fc_right < __fc_left' evaluates to true and '__fc_i++' becomes 7
    which is not quite incorrect, but 3 is closer to 4 than to 1.

This change has been validated with the kunit from the next patch.

Link: https://lkml.kernel.org/r/20241105145406.554365-1-aardelean@baylibre.com
Fixes: 95d119528b0b ("util_macros.h: add find_closest() macro")
Signed-off-by: Alexandru Ardelean <aardelean@baylibre.com>
Cc: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/util_macros.h | 56 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h
index 6bb460c3e818..825487fb66fa 100644
--- a/include/linux/util_macros.h
+++ b/include/linux/util_macros.h
@@ -4,19 +4,6 @@
 
 #include <linux/math.h>
 
-#define __find_closest(x, a, as, op)					\
-({									\
-	typeof(as) __fc_i, __fc_as = (as) - 1;				\
-	typeof(x) __fc_x = (x);						\
-	typeof(*a) const *__fc_a = (a);					\
-	for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) {			\
-		if (__fc_x op DIV_ROUND_CLOSEST(__fc_a[__fc_i] +	\
-						__fc_a[__fc_i + 1], 2))	\
-			break;						\
-	}								\
-	(__fc_i);							\
-})
-
 /**
  * find_closest - locate the closest element in a sorted array
  * @x: The reference value.
@@ -25,8 +12,27 @@
  * @as: Size of 'a'.
  *
  * Returns the index of the element closest to 'x'.
+ * Note: If using an array of negative numbers (or mixed positive numbers),
+ *       then be sure that 'x' is of a signed-type to get good results.
  */
-#define find_closest(x, a, as) __find_closest(x, a, as, <=)
+#define find_closest(x, a, as)						\
+({									\
+	typeof(as) __fc_i, __fc_as = (as) - 1;				\
+	long __fc_mid_x, __fc_x = (x);					\
+	long __fc_left, __fc_right;					\
+	typeof(*a) const *__fc_a = (a);					\
+	for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) {			\
+		__fc_mid_x = (__fc_a[__fc_i] + __fc_a[__fc_i + 1]) / 2;	\
+		if (__fc_x <= __fc_mid_x) {				\
+			__fc_left = __fc_x - __fc_a[__fc_i];		\
+			__fc_right = __fc_a[__fc_i + 1] - __fc_x;	\
+			if (__fc_right < __fc_left)			\
+				__fc_i++;				\
+			break;						\
+		}							\
+	}								\
+	(__fc_i);							\
+})
 
 /**
  * find_closest_descending - locate the closest element in a sorted array
@@ -36,9 +42,27 @@
  * @as: Size of 'a'.
  *
  * Similar to find_closest() but 'a' is expected to be sorted in descending
- * order.
+ * order. The iteration is done in reverse order, so that the comparison
+ * of '__fc_right' & '__fc_left' also works for unsigned numbers.
  */
-#define find_closest_descending(x, a, as) __find_closest(x, a, as, >=)
+#define find_closest_descending(x, a, as)				\
+({									\
+	typeof(as) __fc_i, __fc_as = (as) - 1;				\
+	long __fc_mid_x, __fc_x = (x);					\
+	long __fc_left, __fc_right;					\
+	typeof(*a) const *__fc_a = (a);					\
+	for (__fc_i = __fc_as; __fc_i >= 1; __fc_i--) {			\
+		__fc_mid_x = (__fc_a[__fc_i] + __fc_a[__fc_i - 1]) / 2;	\
+		if (__fc_x <= __fc_mid_x) {				\
+			__fc_left = __fc_x - __fc_a[__fc_i];		\
+			__fc_right = __fc_a[__fc_i - 1] - __fc_x;	\
+			if (__fc_right < __fc_left)			\
+				__fc_i--;				\
+			break;						\
+		}							\
+	}								\
+	(__fc_i);							\
+})
 
 /**
  * is_insidevar - check if the @ptr points inside the @var memory range.
-- 
cgit v1.2.3


From 3f28bbe56c7b77e73f1dd0515cad009cfdd64962 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Tue, 5 Nov 2024 01:52:52 +0800
Subject: mm/list_lru: don't pass unnecessary key parameters

Patch series "mm/list_lru: Split list_lru lock into per-cgroup scope".

When LOCKDEP is not enabled, lock_class_key is an empty struct that is
never used.  But the list_lru initialization function still takes a
placeholder pointer as parameter, and the compiler cannot optimize it
because the function is not static and exported.

Remove this parameter and move it inside the list_lru struct.  Only use it
when LOCKDEP is enabled.  Kernel builds with LOCKDEP will be slightly
larger, while !LOCKDEP builds without it will be slightly smaller (the
common case).

Link: https://lkml.kernel.org/r/20241104175257.60853-1-ryncsn@gmail.com
Link: https://lkml.kernel.org/r/20241104175257.60853-2-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Chengming Zhou <zhouchengming@bytedance.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/list_lru.h | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 5099a8ccd5f4..eba93f6511f3 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -56,16 +56,28 @@ struct list_lru {
 	bool			memcg_aware;
 	struct xarray		xa;
 #endif
+#ifdef CONFIG_LOCKDEP
+	struct lock_class_key	*key;
+#endif
 };
 
 void list_lru_destroy(struct list_lru *lru);
 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
-		    struct lock_class_key *key, struct shrinker *shrinker);
+		    struct shrinker *shrinker);
 
 #define list_lru_init(lru)				\
-	__list_lru_init((lru), false, NULL, NULL)
+	__list_lru_init((lru), false, NULL)
 #define list_lru_init_memcg(lru, shrinker)		\
-	__list_lru_init((lru), true, NULL, shrinker)
+	__list_lru_init((lru), true, shrinker)
+
+static inline int list_lru_init_memcg_key(struct list_lru *lru, struct shrinker *shrinker,
+					  struct lock_class_key *key)
+{
+#ifdef CONFIG_LOCKDEP
+	lru->key = key;
+#endif
+	return list_lru_init_memcg(lru, shrinker);
+}
 
 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
 			 gfp_t gfp);
-- 
cgit v1.2.3


From fb56fdf8b9a2f7397f8a83dce50189f3f0cf71af Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Tue, 5 Nov 2024 01:52:56 +0800
Subject: mm/list_lru: split the lock to per-cgroup scope

Currently, every list_lru has a per-node lock that protects adding,
deletion, isolation, and reparenting of all list_lru_one instances
belonging to this list_lru on this node.  This lock contention is heavy
when multiple cgroups modify the same list_lru.

This lock can be split into per-cgroup scope to reduce contention.

To achieve this, we need a stable list_lru_one for every cgroup.  This
commit adds a lock to each list_lru_one and introduced a helper function
lock_list_lru_of_memcg, making it possible to pin the list_lru of a memcg.
Then reworked the reparenting process.

Reparenting will switch the list_lru_one instances one by one.  By locking
each instance and marking it dead using the nr_items counter, reparenting
ensures that all items in the corresponding cgroup (on-list or not,
because items have a stable cgroup, see below) will see the list_lru_one
switch synchronously.

Objcg reparent is also moved after list_lru reparent so items will have a
stable mem cgroup until all list_lru_one instances are drained.

The only caller that doesn't work the *_obj interfaces are direct calls to
list_lru_{add,del}.  But it's only used by zswap and that's also based on
objcg, so it's fine.

This also changes the bahaviour of the isolation function when LRU_RETRY
or LRU_REMOVED_RETRY is returned, because now releasing the lock could
unblock reparenting and free the list_lru_one, isolation function will
have to return withoug re-lock the lru.

prepare() {
    mkdir /tmp/test-fs
    modprobe brd rd_nr=1 rd_size=33554432
    mkfs.xfs -f /dev/ram0
    mount -t xfs /dev/ram0 /tmp/test-fs
    for i in $(seq 1 512); do
        mkdir "/tmp/test-fs/$i"
        for j in $(seq 1 10240); do
            echo TEST-CONTENT > "/tmp/test-fs/$i/$j"
        done &
    done; wait
}

do_test() {
    read_worker() {
        sleep 1
        tar -cv "$1" &>/dev/null
    }
    read_in_all() {
        cd "/tmp/test-fs" && ls
        for i in $(seq 1 512); do
            (exec sh -c 'echo "$PPID"') > "/sys/fs/cgroup/benchmark/$i/cgroup.procs"
            read_worker "$i" &
        done; wait
    }
    for i in $(seq 1 512); do
        mkdir -p "/sys/fs/cgroup/benchmark/$i"
    done
    echo +memory > /sys/fs/cgroup/benchmark/cgroup.subtree_control
    echo 512M > /sys/fs/cgroup/benchmark/memory.max
    echo 3 > /proc/sys/vm/drop_caches
    time read_in_all
}

Above script simulates compression of small files in multiple cgroups
with memory pressure. Run prepare() then do_test for 6 times:

Before:
real      0m7.762s user      0m11.340s sys       3m11.224s
real      0m8.123s user      0m11.548s sys       3m2.549s
real      0m7.736s user      0m11.515s sys       3m11.171s
real      0m8.539s user      0m11.508s sys       3m7.618s
real      0m7.928s user      0m11.349s sys       3m13.063s
real      0m8.105s user      0m11.128s sys       3m14.313s

After this commit (about ~15% faster):
real      0m6.953s user      0m11.327s sys       2m42.912s
real      0m7.453s user      0m11.343s sys       2m51.942s
real      0m6.916s user      0m11.269s sys       2m43.957s
real      0m6.894s user      0m11.528s sys       2m45.346s
real      0m6.911s user      0m11.095s sys       2m43.168s
real      0m6.773s user      0m11.518s sys       2m40.774s

Link: https://lkml.kernel.org/r/20241104175257.60853-6-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Cc: Chengming Zhou <zhouchengming@bytedance.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/list_lru.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index eba93f6511f3..10ba9a54d42c 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -32,6 +32,8 @@ struct list_lru_one {
 	struct list_head	list;
 	/* may become negative during memcg reparenting */
 	long			nr_items;
+	/* protects all fields above */
+	spinlock_t		lock;
 };
 
 struct list_lru_memcg {
@@ -41,11 +43,9 @@ struct list_lru_memcg {
 };
 
 struct list_lru_node {
-	/* protects all lists on the node, including per cgroup */
-	spinlock_t		lock;
 	/* global list, used for the root cgroup in cgroup aware lrus */
 	struct list_lru_one	lru;
-	long			nr_items;
+	atomic_long_t		nr_items;
 } ____cacheline_aligned_in_smp;
 
 struct list_lru {
-- 
cgit v1.2.3


From da0c02516c501b43bd39ad4aca5779c86153d143 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Tue, 5 Nov 2024 01:52:57 +0800
Subject: mm/list_lru: simplify the list_lru walk callback function

Now isolation no longer takes the list_lru global node lock, only use the
per-cgroup lock instead.  And this lock is inside the list_lru_one being
walked, no longer needed to pass the lock explicitly.

Link: https://lkml.kernel.org/r/20241104175257.60853-7-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Cc: Chengming Zhou <zhouchengming@bytedance.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/list_lru.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 10ba9a54d42c..05c166811f6b 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -184,7 +184,7 @@ void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
 			   struct list_head *head);
 
 typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item,
-		struct list_lru_one *list, spinlock_t *lock, void *cb_arg);
+		struct list_lru_one *list, void *cb_arg);
 
 /**
  * list_lru_walk_one: walk a @lru, isolating and disposing freeable items.
-- 
cgit v1.2.3


From 7591c127f3b17d5879f18819cad7058bf3a2e276 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 4 Nov 2024 11:19:44 +0000
Subject: kmemleak: iommu/iova: fix transient kmemleak false positive

The introduction of iova_depot_pop() in 911aa1245da8 ("iommu/iova: Make
the rcache depot scale better") confused kmemleak by moving a struct
iova_magazine object from a singly linked list to rcache->depot and
resetting the 'next' pointer referencing it.  Unlike doubly linked lists,
the content of the object being referred is never changed on removal from
a singly linked list and the kmemleak checksum heuristics do not detect
such scenario.  This leads to false positives like:

unreferenced object 0xffff8881a5301000 (size 1024):
  comm "softirq", pid 0, jiffies 4306297099 (age 462.991s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 e7 7d 05 00 00 00 00 00  .........}......
    0f b4 05 00 00 00 00 00 b4 96 05 00 00 00 00 00  ................
  backtrace:
    [<ffffffff819f5f08>] __kmem_cache_alloc_node+0x1e8/0x320
    [<ffffffff818a239a>] kmalloc_trace+0x2a/0x60
    [<ffffffff8231d31e>] free_iova_fast+0x28e/0x4e0
    [<ffffffff82310860>] fq_ring_free_locked+0x1b0/0x310
    [<ffffffff8231225d>] fq_flush_timeout+0x19d/0x2e0
    [<ffffffff813e95ba>] call_timer_fn+0x19a/0x5c0
    [<ffffffff813ea16b>] __run_timers+0x78b/0xb80
    [<ffffffff813ea5bd>] run_timer_softirq+0x5d/0xd0
    [<ffffffff82f1d915>] __do_softirq+0x205/0x8b5

Introduce kmemleak_transient_leak() which resets the object checksum
requiring another scan pass before it is reported (if still unreferenced).
Call this new API in iova_depot_pop().

Link: https://lkml.kernel.org/r/20241104111944.2207155-1-catalin.marinas@arm.com
Link: https://lore.kernel.org/r/ZY1osaGLyT-sdKE8@shredder/
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Ido Schimmel <idosch@idosch.org>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kmemleak.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 6a3cd1bf4680..93a73c076d16 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -26,6 +26,7 @@ extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
 extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
 extern void kmemleak_update_trace(const void *ptr) __ref;
 extern void kmemleak_not_leak(const void *ptr) __ref;
+extern void kmemleak_transient_leak(const void *ptr) __ref;
 extern void kmemleak_ignore(const void *ptr) __ref;
 extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
 extern void kmemleak_no_scan(const void *ptr) __ref;
@@ -93,6 +94,9 @@ static inline void kmemleak_update_trace(const void *ptr)
 static inline void kmemleak_not_leak(const void *ptr)
 {
 }
+static inline void kmemleak_transient_leak(const void *ptr)
+{
+}
 static inline void kmemleak_ignore(const void *ptr)
 {
 }
-- 
cgit v1.2.3


From 7269ed4af344184ab9bdf318fe8864cf64849735 Mon Sep 17 00:00:00 2001
From: Bibo Mao <maobibo@loongson.cn>
Date: Mon, 4 Nov 2024 15:07:12 +0800
Subject: mm: define general function pXd_init()

pud_init(), pmd_init() and kernel_pte_init() are duplicated defined in
file kasan.c and sparse-vmemmap.c as weak functions.  Move them to generic
header file pgtable.h, architecture can redefine them.

Link: https://lkml.kernel.org/r/20241104070712.52902-1-maobibo@loongson.cn
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Reviewed-by: Huacai Chen <chenhuacai@loongson.cn>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h      |  3 ---
 include/linux/pgtable.h | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 32888a97ab44..5d6cd523c7c0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3819,9 +3819,6 @@ void *sparse_buffer_alloc(unsigned long size);
 struct page * __populate_section_memmap(unsigned long pfn,
 		unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
 		struct dev_pagemap *pgmap);
-void pud_init(void *addr);
-void pmd_init(void *addr);
-void kernel_pte_init(void *addr);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
 p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
 pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 23aeffd89a4e..adef9d6e9b1b 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -90,6 +90,27 @@ static inline unsigned long pud_index(unsigned long address)
 #define pgd_index(a)  (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
 #endif
 
+#ifndef kernel_pte_init
+static inline void kernel_pte_init(void *addr)
+{
+}
+#define kernel_pte_init kernel_pte_init
+#endif
+
+#ifndef pmd_init
+static inline void pmd_init(void *addr)
+{
+}
+#define pmd_init pmd_init
+#endif
+
+#ifndef pud_init
+static inline void pud_init(void *addr)
+{
+}
+#define pud_init pud_init
+#endif
+
 #ifndef pte_offset_kernel
 static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
 {
-- 
cgit v1.2.3


From 9b5c87d47949292ff21ee2fadd1e83820662a430 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 5 Nov 2024 12:34:57 +0100
Subject: mm: mmap_lock: check trace_mmap_lock_$type_enabled() instead of
 regcount

Since 7d6be67cfdd4 ("mm: mmap_lock: replace get_memcg_path_buf() with
on-stack buffer") we use trace_mmap_lock_reg()/unreg() only to maintain an
atomic reg_refcount which is checked to avoid performing
get_mm_memcg_path() in case none of the tracepoints using it is enabled.

This can be achieved directly by putting all the work needed for the
tracepoint behind the trace_mmap_lock_##type##_enabled(), as suggested by
Documentation/trace/tracepoints.rst and with the following advantages:

- uses the tracepoint's static key instead of evaluating a branch

- the check tracepoint specific, not shared by all of them

- we can get rid of trace_mmap_lock_reg()/unreg() completely

Thus use the trace_..._enabled() check and remove unnecessary code.

Link: https://lkml.kernel.org/r/20241105113456.95066-2-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/trace/events/mmap_lock.h | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h
index f2827f98a44f..bc2e3ad787b3 100644
--- a/include/trace/events/mmap_lock.h
+++ b/include/trace/events/mmap_lock.h
@@ -10,9 +10,6 @@
 
 struct mm_struct;
 
-extern int trace_mmap_lock_reg(void);
-extern void trace_mmap_lock_unreg(void);
-
 DECLARE_EVENT_CLASS(mmap_lock,
 
 	TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write),
@@ -40,16 +37,15 @@ DECLARE_EVENT_CLASS(mmap_lock,
 );
 
 #define DEFINE_MMAP_LOCK_EVENT(name)                                    \
-	DEFINE_EVENT_FN(mmap_lock, name,                                \
+	DEFINE_EVENT(mmap_lock, name,                                   \
 		TP_PROTO(struct mm_struct *mm, const char *memcg_path,  \
 			bool write),                                    \
-		TP_ARGS(mm, memcg_path, write),                         \
-		trace_mmap_lock_reg, trace_mmap_lock_unreg)
+		TP_ARGS(mm, memcg_path, write))
 
 DEFINE_MMAP_LOCK_EVENT(mmap_lock_start_locking);
 DEFINE_MMAP_LOCK_EVENT(mmap_lock_released);
 
-TRACE_EVENT_FN(mmap_lock_acquire_returned,
+TRACE_EVENT(mmap_lock_acquire_returned,
 
 	TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write,
 		bool success),
@@ -76,9 +72,7 @@ TRACE_EVENT_FN(mmap_lock_acquire_returned,
 		__get_str(memcg_path),
 		__entry->write ? "true" : "false",
 		__entry->success ? "true" : "false"
-	),
-
-	trace_mmap_lock_reg, trace_mmap_lock_unreg
+	)
 );
 
 #endif /* _TRACE_MMAP_LOCK_H */
-- 
cgit v1.2.3


From d5ec8d91f82ef78405b506737952dec8af95a95b Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 7 Nov 2024 16:48:14 -0800
Subject: rtnetlink: Remove __rtnl_link_unregister().

rtnl_link_unregister() holds RTNL and calls __rtnl_link_unregister(),
where we call synchronize_srcu() to wait inflight RTM_NEWLINK requests
for per-netns RTNL.

We put synchronize_srcu() in __rtnl_link_unregister() due to ifb.ko
and dummy.ko.

However, rtnl_newlink() will acquire SRCU before RTNL later in this
series.  Then, lockdep will detect the deadlock:

   rtnl_link_unregister()       rtnl_newlink()
   ----                         ----
   lock(rtnl_mutex);
                                lock(&ops->srcu);
                                lock(rtnl_mutex);
   sync(&ops->srcu);

To avoid the problem, we must call synchronize_srcu() before RTNL in
rtnl_link_unregister().

As a preparation, let's remove __rtnl_link_unregister().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241108004823.29419-2-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/rtnetlink.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index b260c0cc9671..3ebfcc6e56fd 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -165,7 +165,6 @@ struct rtnl_link_ops {
 };
 
 int __rtnl_link_register(struct rtnl_link_ops *ops);
-void __rtnl_link_unregister(struct rtnl_link_ops *ops);
 
 int rtnl_link_register(struct rtnl_link_ops *ops);
 void rtnl_link_unregister(struct rtnl_link_ops *ops);
-- 
cgit v1.2.3


From 6b57ff21a3109b1dba2d286ff415463e6fb1fca3 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 7 Nov 2024 16:48:15 -0800
Subject: rtnetlink: Protect link_ops by mutex.

rtnl_link_unregister() holds RTNL and calls synchronize_srcu(),
but rtnl_newlink() will acquire SRCU frist and then RTNL.

Then, we need to unlink ops and call synchronize_srcu() outside
of RTNL to avoid the deadlock.

   rtnl_link_unregister()       rtnl_newlink()
   ----                         ----
   lock(rtnl_mutex);
                                lock(&ops->srcu);
                                lock(rtnl_mutex);
   sync(&ops->srcu);

Let's move as such and add a mutex to protect link_ops.

Now, link_ops is protected by its dedicated mutex and
rtnl_link_register() no longer needs to hold RTNL.

While at it, we move the initialisation of ops->dellink and
ops->srcu out of the mutex scope.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241108004823.29419-3-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/rtnetlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 3ebfcc6e56fd..7559020f760c 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -71,7 +71,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
 /**
  *	struct rtnl_link_ops - rtnetlink link operations
  *
- *	@list: Used internally, protected by RTNL and SRCU
+ *	@list: Used internally, protected by link_ops_mutex and SRCU
  *	@srcu: Used internally
  *	@kind: Identifier
  *	@netns_refund: Physical device, move to init_net on netns exit
-- 
cgit v1.2.3


From 68297dbb967f87c3c92af9d2f652270f57c547c7 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 7 Nov 2024 16:48:16 -0800
Subject: rtnetlink: Remove __rtnl_link_register()

link_ops is protected by link_ops_mutex and no longer needs RTNL,
so we have no reason to have __rtnl_link_register() separately.

Let's remove it and call rtnl_link_register() from ifb.ko and
dummy.ko.

Note that both modules' init() work on init_net only, so we need
not export pernet_ops_rwsem and can use rtnl_net_lock() there.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241108004823.29419-4-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/rtnetlink.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 7559020f760c..ef7c11f0d74c 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -164,8 +164,6 @@ struct rtnl_link_ops {
 						   int *prividx, int attr);
 };
 
-int __rtnl_link_register(struct rtnl_link_ops *ops);
-
 int rtnl_link_register(struct rtnl_link_ops *ops);
 void rtnl_link_unregister(struct rtnl_link_ops *ops);
 
-- 
cgit v1.2.3


From 28690e5361c05fd4ef0ca3a17d1c667cba790554 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 7 Nov 2024 16:48:18 -0800
Subject: rtnetlink: Add peer_type in struct rtnl_link_ops.

In ops->newlink(), veth, vxcan, and netkit call rtnl_link_get_net() with
a net pointer, which is the first argument of ->newlink().

rtnl_link_get_net() could return another netns based on IFLA_NET_NS_PID
and IFLA_NET_NS_FD in the peer device's attributes.

We want to get it and fill rtnl_nets->nets[] in advance in rtnl_newlink()
for per-netns RTNL.

All of the three get the peer netns in the same way:

  1. Call rtnl_nla_parse_ifinfomsg()
  2. Call ops->validate() (vxcan doesn't have)
  3. Call rtnl_link_get_net_tb()

Let's add a new field peer_type to struct rtnl_link_ops and prefetch
netns in the peer ifla to add it to rtnl_nets in rtnl_newlink().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20241108004823.29419-6-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/rtnetlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index ef7c11f0d74c..bef76abcff8d 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -75,6 +75,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
  *	@srcu: Used internally
  *	@kind: Identifier
  *	@netns_refund: Physical device, move to init_net on netns exit
+ *	@peer_type: Peer device specific netlink attribute number (e.g. VETH_INFO_PEER)
  *	@maxtype: Highest device specific netlink attribute number
  *	@policy: Netlink policy for device specific attribute validation
  *	@validate: Optional validation function for netlink/changelink parameters
@@ -116,6 +117,7 @@ struct rtnl_link_ops {
 	void			(*setup)(struct net_device *dev);
 
 	bool			netns_refund;
+	const u16		peer_type;
 	unsigned int		maxtype;
 	const struct nla_policy	*policy;
 	int			(*validate)(struct nlattr *tb[],
-- 
cgit v1.2.3


From 636af13f213bf9b28a34254327934bc72a797754 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 7 Nov 2024 16:48:23 -0800
Subject: rtnetlink: Register rtnl_dellink() and rtnl_setlink() with
 RTNL_FLAG_DOIT_PERNET_WIP.

Currently, rtnl_setlink() and rtnl_dellink() cannot be fully converted
to per-netns RTNL due to a lack of handling peer/lower/upper devices in
different netns.

For example, when we change a device in rtnl_setlink() and need to
propagate that to its upper devices, we want to avoid acquiring all netns
locks, for which we do not know the upper limit.

The same situation happens when we remove a device.

rtnl_dellink() could be transformed to remove a single device in the
requested netns and delegate other devices to per-netns work, and
rtnl_setlink() might be ?

Until we come up with a better idea, let's use a new flag
RTNL_FLAG_DOIT_PERNET_WIP for rtnl_dellink() and rtnl_setlink().

This will unblock converting RTNL users where such devices are not related.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241108004823.29419-11-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/rtnetlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index bef76abcff8d..bc0069a8b6ea 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -13,6 +13,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
 enum rtnl_link_flags {
 	RTNL_FLAG_DOIT_UNLOCKED		= BIT(0),
 #define RTNL_FLAG_DOIT_PERNET		RTNL_FLAG_DOIT_UNLOCKED
+#define RTNL_FLAG_DOIT_PERNET_WIP	RTNL_FLAG_DOIT_UNLOCKED
 	RTNL_FLAG_BULK_DEL_SUPPORTED	= BIT(1),
 	RTNL_FLAG_DUMP_UNLOCKED		= BIT(2),
 	RTNL_FLAG_DUMP_SPLIT_NLM_DONE	= BIT(3),	/* legacy behavior */
-- 
cgit v1.2.3


From 5dc51ec86df6e2214d8398079c1e31736593ab53 Mon Sep 17 00:00:00 2001
From: Martin Karsten <mkarsten@uwaterloo.ca>
Date: Sat, 9 Nov 2024 05:02:31 +0000
Subject: net: Add napi_struct parameter irq_suspend_timeout

Add a per-NAPI IRQ suspension parameter, which can be get/set with
netdev-genl.

This patch doesn't change any behavior but prepares the code for other
changes in the following commits which use irq_suspend_timeout as a
timeout for IRQ suspension.

Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
Co-developed-by: Joe Damato <jdamato@fastly.com>
Signed-off-by: Joe Damato <jdamato@fastly.com>
Tested-by: Joe Damato <jdamato@fastly.com>
Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Link: https://patch.msgid.link/20241109050245.191288-2-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h   | 2 ++
 include/uapi/linux/netdev.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index df4483598628..0aae346d919e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -348,6 +348,7 @@ struct gro_list {
  */
 struct napi_config {
 	u64 gro_flush_timeout;
+	u64 irq_suspend_timeout;
 	u32 defer_hard_irqs;
 	unsigned int napi_id;
 };
@@ -384,6 +385,7 @@ struct napi_struct {
 	struct hrtimer		timer;
 	struct task_struct	*thread;
 	unsigned long		gro_flush_timeout;
+	unsigned long		irq_suspend_timeout;
 	u32			defer_hard_irqs;
 	/* control-path-only fields follow */
 	struct list_head	dev_list;
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index e3ebb49f60d2..e4be227d3ad6 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -124,6 +124,7 @@ enum {
 	NETDEV_A_NAPI_PID,
 	NETDEV_A_NAPI_DEFER_HARD_IRQS,
 	NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
+	NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
 
 	__NETDEV_A_NAPI_MAX,
 	NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
-- 
cgit v1.2.3


From 3fcbecbdeb048dfd1bea824f4276717fed02d10e Mon Sep 17 00:00:00 2001
From: Martin Karsten <mkarsten@uwaterloo.ca>
Date: Sat, 9 Nov 2024 05:02:32 +0000
Subject: net: Add control functions for irq suspension

The napi_suspend_irqs routine bootstraps irq suspension by elongating
the defer timeout to irq_suspend_timeout.

The napi_resume_irqs routine effectively cancels irq suspension by
forcing the napi to be scheduled immediately.

Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
Co-developed-by: Joe Damato <jdamato@fastly.com>
Signed-off-by: Joe Damato <jdamato@fastly.com>
Tested-by: Joe Damato <jdamato@fastly.com>
Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Link: https://patch.msgid.link/20241109050245.191288-3-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/busy_poll.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index f03040baaefd..c858270141bc 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -52,6 +52,9 @@ void napi_busy_loop_rcu(unsigned int napi_id,
 			bool (*loop_end)(void *, unsigned long),
 			void *loop_end_arg, bool prefer_busy_poll, u16 budget);
 
+void napi_suspend_irqs(unsigned int napi_id);
+void napi_resume_irqs(unsigned int napi_id);
+
 #else /* CONFIG_NET_RX_BUSY_POLL */
 static inline unsigned long net_busy_loop_on(void)
 {
-- 
cgit v1.2.3


From 213a695297e1f0c2ed814488757d496b0d7f7267 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Mon, 11 Nov 2024 20:49:11 +0800
Subject: bpf: Replace the document for PTR_TO_BTF_ID_OR_NULL

Commit c25b2ae13603 ("bpf: Replace PTR_TO_XXX_OR_NULL with PTR_TO_XXX |
PTR_MAYBE_NULL") moved the fields around and misplaced the
documentation for "PTR_TO_BTF_ID_OR_NULL". So, let's replace it in the
proper place.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20241111124911.1436911-1-dongml2@chinatelecom.cn
---
 include/linux/bpf.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1b84613b10ac..7da41ae2eac8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -932,10 +932,6 @@ enum bpf_reg_type {
 	 * additional context, assume the value is non-null.
 	 */
 	PTR_TO_BTF_ID,
-	/* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not
-	 * been checked for null. Used primarily to inform the verifier
-	 * an explicit null check is required for this struct.
-	 */
 	PTR_TO_MEM,		 /* reg points to valid memory region */
 	PTR_TO_ARENA,
 	PTR_TO_BUF,		 /* reg points to a read/write buffer */
@@ -948,6 +944,10 @@ enum bpf_reg_type {
 	PTR_TO_SOCKET_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_SOCKET,
 	PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON,
 	PTR_TO_TCP_SOCK_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_TCP_SOCK,
+	/* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not
+	 * been checked for null. Used primarily to inform the verifier
+	 * an explicit null check is required for this struct.
+	 */
 	PTR_TO_BTF_ID_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_BTF_ID,
 
 	/* This must be the last entry. Its purpose is to ensure the enum is
-- 
cgit v1.2.3


From db0fc586edde83ff7ff65fea56c4f72dae511764 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Mon, 28 Oct 2024 16:31:32 -0700
Subject: drm/i915/gsc: ARL-H and ARL-U need a newer GSC FW.

All MTL and ARL SKUs share the same GSC FW, but the newer platforms are
only supported in newer blobs. In particular, ARL-S is supported
starting from 102.0.10.1878 (which is already the minimum required
version for ARL in the code), while ARL-H and ARL-U are supported from
102.1.15.1926. Therefore, the driver needs to check which specific ARL
subplatform its running on when verifying that the GSC FW is new enough
for it.

Fixes: 2955ae8186c8 ("drm/i915: ARL requires a newer GSC firmware")
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241028233132.149745-1-daniele.ceraolospurio@intel.com
(cherry picked from commit 3c1d5ced18db8a67251c8436cf9bdc061f972bdb)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 include/drm/intel/i915_pciids.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h
index 2bf03ebfcf73..f35534522d33 100644
--- a/include/drm/intel/i915_pciids.h
+++ b/include/drm/intel/i915_pciids.h
@@ -771,13 +771,24 @@
 	INTEL_ATS_M150_IDS(MACRO__, ## __VA_ARGS__), \
 	INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
 
-/* MTL */
-#define INTEL_ARL_IDS(MACRO__, ...) \
-	MACRO__(0x7D41, ## __VA_ARGS__), \
+/* ARL */
+#define INTEL_ARL_H_IDS(MACRO__, ...) \
 	MACRO__(0x7D51, ## __VA_ARGS__), \
-	MACRO__(0x7D67, ## __VA_ARGS__), \
 	MACRO__(0x7DD1, ## __VA_ARGS__)
 
+#define INTEL_ARL_U_IDS(MACRO__, ...) \
+	MACRO__(0x7D41, ## __VA_ARGS__) \
+
+#define INTEL_ARL_S_IDS(MACRO__, ...) \
+	MACRO__(0x7D67, ## __VA_ARGS__), \
+	MACRO__(0xB640, ## __VA_ARGS__)
+
+#define INTEL_ARL_IDS(MACRO__, ...) \
+	INTEL_ARL_H_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_ARL_U_IDS(MACRO__, ## __VA_ARGS__), \
+	INTEL_ARL_S_IDS(MACRO__, ## __VA_ARGS__)
+
+/* MTL */
 #define INTEL_MTL_IDS(MACRO__, ...) \
 	INTEL_ARL_IDS(MACRO__, ## __VA_ARGS__), \
 	MACRO__(0x7D40, ## __VA_ARGS__), \
-- 
cgit v1.2.3


From 37653a0b8a6f5d6ab23daa8e585c5ed24a0fc500 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Thu, 7 Nov 2024 20:55:53 +0800
Subject: net: ip: make fib_validate_source() support drop reasons

In this commit, we make fib_validate_source() and __fib_validate_source()
return -reason instead of errno on error.

The return value of fib_validate_source can be -errno, 0, and 1. It's hard
to make fib_validate_source() return drop reasons directly.

The fib_validate_source() will return 1 if the scope of the source(revert)
route is HOST. And the __mkroute_input() will mark the skb with
IPSKB_DOREDIRECT in this case (combine with some other conditions). And
then, a REDIRECT ICMP will be sent in ip_forward() if this flag exists. We
can't pass this information to __mkroute_input if we make
fib_validate_source() return drop reasons.

Therefore, we introduce the wrapper fib_validate_source_reason() for
fib_validate_source(), which will return the drop reasons on error.

In the origin logic, LINUX_MIB_IPRPFILTER will be counted if
fib_validate_source() return -EXDEV. And now, we need to adjust it by
checking "reason == SKB_DROP_REASON_IP_RPFILTER". However, this will take
effect only after the patch "net: ip: make ip_route_input_noref() return
drop reasons", as we can't pass the drop reasons from
fib_validate_source() to ip_rcv_finish_core() in this patch.

Following new drop reasons are added in this patch:

  SKB_DROP_REASON_IP_LOCAL_SOURCE
  SKB_DROP_REASON_IP_INVALID_SOURCE

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/dropreason-core.h | 10 ++++++++++
 include/net/ip_fib.h          | 12 ++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index d59bb96c5a02..62a60be1db84 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -76,6 +76,8 @@
 	FN(INVALID_PROTO)		\
 	FN(IP_INADDRERRORS)		\
 	FN(IP_INNOROUTES)		\
+	FN(IP_LOCAL_SOURCE)		\
+	FN(IP_INVALID_SOURCE)		\
 	FN(PKT_TOO_BIG)			\
 	FN(DUP_FRAG)			\
 	FN(FRAG_REASM_TIMEOUT)		\
@@ -373,6 +375,14 @@ enum skb_drop_reason {
 	 * IPSTATS_MIB_INADDRERRORS
 	 */
 	SKB_DROP_REASON_IP_INNOROUTES,
+	/** @SKB_DROP_REASON_IP_LOCAL_SOURCE: the source ip is local */
+	SKB_DROP_REASON_IP_LOCAL_SOURCE,
+	/**
+	 * @SKB_DROP_REASON_IP_INVALID_SOURCE: the source ip is invalid:
+	 * 1) source ip is multicast or limited broadcast
+	 * 2) source ip is zero and not IGMP
+	 */
+	SKB_DROP_REASON_IP_INVALID_SOURCE,
 	/**
 	 * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
 	 * MTU)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index b6e44f4eaa4c..a113c11ab56b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -452,6 +452,18 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 			dscp_t dscp, int oif, struct net_device *dev,
 			struct in_device *idev, u32 *itag);
 
+static inline enum skb_drop_reason
+fib_validate_source_reason(struct sk_buff *skb, __be32 src, __be32 dst,
+			   dscp_t dscp, int oif, struct net_device *dev,
+			   struct in_device *idev, u32 *itag)
+{
+	int err = fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
+				      itag);
+	if (err < 0)
+		return -err;
+	return SKB_NOT_DROPPED_YET;
+}
+
 #ifdef CONFIG_IP_ROUTE_CLASSID
 static inline int fib_num_tclassid_users(struct net *net)
 {
-- 
cgit v1.2.3


From d46f827016d891dbc234cb05c406180f77fb3b2d Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Thu, 7 Nov 2024 20:55:55 +0800
Subject: net: ip: make ip_mc_validate_source() return drop reason

Make ip_mc_validate_source() return drop reason, and adjust the call of
it in ip_route_input_mc().

Another caller of it is ip_rcv_finish_core->udp_v4_early_demux, and the
errno is not checked in detail, so we don't do more adjustment for it.

The drop reason "SKB_DROP_REASON_IP_LOCALNET" is added in this commit.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/dropreason-core.h | 3 +++
 include/net/route.h           | 7 ++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 62a60be1db84..a2a1fb90e0e5 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -78,6 +78,7 @@
 	FN(IP_INNOROUTES)		\
 	FN(IP_LOCAL_SOURCE)		\
 	FN(IP_INVALID_SOURCE)		\
+	FN(IP_LOCALNET)			\
 	FN(PKT_TOO_BIG)			\
 	FN(DUP_FRAG)			\
 	FN(FRAG_REASM_TIMEOUT)		\
@@ -383,6 +384,8 @@ enum skb_drop_reason {
 	 * 2) source ip is zero and not IGMP
 	 */
 	SKB_DROP_REASON_IP_INVALID_SOURCE,
+	/** @SKB_DROP_REASON_IP_LOCALNET: source or dest ip is local net */
+	SKB_DROP_REASON_IP_LOCALNET,
 	/**
 	 * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
 	 * MTU)
diff --git a/include/net/route.h b/include/net/route.h
index 0a690adfdff5..e2e1922c58fb 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -199,9 +199,10 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
 	return ip_route_output_key(net, fl4);
 }
 
-int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-			  dscp_t dscp, struct net_device *dev,
-			  struct in_device *in_dev, u32 *itag);
+enum skb_drop_reason
+ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+		      dscp_t dscp, struct net_device *dev,
+		      struct in_device *in_dev, u32 *itag);
 int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			 dscp_t dscp, struct net_device *dev);
 int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-- 
cgit v1.2.3


From 5b92112acd8e2ed84a4df653fc20575f4da6fa49 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Thu, 7 Nov 2024 20:55:56 +0800
Subject: net: ip: make ip_route_input_slow() return drop reasons

In this commit, we make ip_route_input_slow() return skb drop reasons,
and following new skb drop reasons are added:

  SKB_DROP_REASON_IP_INVALID_DEST

The only caller of ip_route_input_slow() is ip_route_input_rcu(), and we
adjust it by making it return -EINVAL on error.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/dropreason-core.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index a2a1fb90e0e5..74624d369d48 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -79,6 +79,7 @@
 	FN(IP_LOCAL_SOURCE)		\
 	FN(IP_INVALID_SOURCE)		\
 	FN(IP_LOCALNET)			\
+	FN(IP_INVALID_DEST)		\
 	FN(PKT_TOO_BIG)			\
 	FN(DUP_FRAG)			\
 	FN(FRAG_REASM_TIMEOUT)		\
@@ -386,6 +387,11 @@ enum skb_drop_reason {
 	SKB_DROP_REASON_IP_INVALID_SOURCE,
 	/** @SKB_DROP_REASON_IP_LOCALNET: source or dest ip is local net */
 	SKB_DROP_REASON_IP_LOCALNET,
+	/**
+	 * @SKB_DROP_REASON_IP_INVALID_DEST: the dest ip is invalid:
+	 * 1) dest ip is 0
+	 */
+	SKB_DROP_REASON_IP_INVALID_DEST,
 	/**
 	 * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
 	 * MTU)
-- 
cgit v1.2.3


From 82d9983ebeb871cb5abd27c12a950c14c68772e1 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Thu, 7 Nov 2024 20:55:58 +0800
Subject: net: ip: make ip_route_input_noref() return drop reasons

In this commit, we make ip_route_input_noref() return drop reasons, which
come from ip_route_input_rcu().

We need adjust the callers of ip_route_input_noref() to make sure the
return value of ip_route_input_noref() is used properly.

The errno that ip_route_input_noref() returns comes from ip_route_input
and bpf_lwt_input_reroute in the origin logic, and we make them return
-EINVAL on error instead. In the following patch, we will make
ip_route_input() returns drop reasons too.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/route.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index e2e1922c58fb..b85ffa3e042b 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -203,8 +203,9 @@ enum skb_drop_reason
 ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		      dscp_t dscp, struct net_device *dev,
 		      struct in_device *in_dev, u32 *itag);
-int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-			 dscp_t dscp, struct net_device *dev);
+enum skb_drop_reason
+ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+		     dscp_t dscp, struct net_device *dev);
 int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		      dscp_t dscp, struct net_device *dev,
 		      const struct sk_buff *hint);
@@ -212,18 +213,18 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 				 dscp_t dscp, struct net_device *devin)
 {
-	int err;
+	enum skb_drop_reason reason;
 
 	rcu_read_lock();
-	err = ip_route_input_noref(skb, dst, src, dscp, devin);
-	if (!err) {
+	reason = ip_route_input_noref(skb, dst, src, dscp, devin);
+	if (!reason) {
 		skb_dst_force(skb);
 		if (!skb_dst(skb))
-			err = -EINVAL;
+			reason = SKB_DROP_REASON_NOT_SPECIFIED;
 	}
 	rcu_read_unlock();
 
-	return err;
+	return reason ? -EINVAL : 0;
 }
 
 void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif,
-- 
cgit v1.2.3


From 50038bf38e6577a15d52b890d82c197cf3b163a0 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Thu, 7 Nov 2024 20:55:59 +0800
Subject: net: ip: make ip_route_input() return drop reasons

In this commit, we make ip_route_input() return skb drop reasons that come
from ip_route_input_noref().

Meanwhile, adjust all the call to it.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/route.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index b85ffa3e042b..fb3433dc9c72 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -210,8 +210,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		      dscp_t dscp, struct net_device *dev,
 		      const struct sk_buff *hint);
 
-static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
-				 dscp_t dscp, struct net_device *devin)
+static inline enum skb_drop_reason
+ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, dscp_t dscp,
+	       struct net_device *devin)
 {
 	enum skb_drop_reason reason;
 
@@ -224,7 +225,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 	}
 	rcu_read_unlock();
 
-	return reason ? -EINVAL : 0;
+	return reason;
 }
 
 void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif,
-- 
cgit v1.2.3


From d9340d1e02779dbf83d53b0deb4068c7768b8261 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Thu, 7 Nov 2024 20:56:00 +0800
Subject: net: ip: make ip_mkroute_input/__mkroute_input return drop reasons

In this commit, we make ip_mkroute_input() and __mkroute_input() return
drop reasons.

The drop reason "SKB_DROP_REASON_ARP_PVLAN_DISABLE" is introduced for
the case: the packet which is not IP is forwarded to the in_dev, and
the proxy_arp_pvlan is not enabled.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/dropreason-core.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 74624d369d48..6c5a1ea209a2 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -104,6 +104,7 @@
 	FN(IP_TUNNEL_ECN)		\
 	FN(TUNNEL_TXINFO)		\
 	FN(LOCAL_MAC)			\
+	FN(ARP_PVLAN_DISABLE)		\
 	FNe(MAX)
 
 /**
@@ -477,6 +478,12 @@ enum skb_drop_reason {
 	 * the MAC address of the local netdev.
 	 */
 	SKB_DROP_REASON_LOCAL_MAC,
+	/**
+	 * @SKB_DROP_REASON_ARP_PVLAN_DISABLE: packet which is not IP is
+	 * forwarded to the in_dev, and the proxy_arp_pvlan is not
+	 * enabled.
+	 */
+	SKB_DROP_REASON_ARP_PVLAN_DISABLE,
 	/**
 	 * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
 	 * shouldn't be used as a real 'reason' - only for tracing code gen
-- 
cgit v1.2.3


From 479aed04e84a5d66caa3b25bfc651292c153ef70 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Thu, 7 Nov 2024 20:56:01 +0800
Subject: net: ip: make ip_route_use_hint() return drop reasons

In this commit, we make ip_route_use_hint() return drop reasons. The
drop reasons that we return are similar to what we do in
ip_route_input_slow(), and no drop reasons are added in this commit.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/route.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index fb3433dc9c72..84cb1e04f5cd 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -206,9 +206,10 @@ ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 enum skb_drop_reason
 ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		     dscp_t dscp, struct net_device *dev);
-int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-		      dscp_t dscp, struct net_device *dev,
-		      const struct sk_buff *hint);
+enum skb_drop_reason
+ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+		  dscp_t dscp, struct net_device *dev,
+		  const struct sk_buff *hint);
 
 static inline enum skb_drop_reason
 ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, dscp_t dscp,
-- 
cgit v1.2.3


From 12079a59ce52e72a342c49cfacf0281213fd6f32 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Thu, 7 Nov 2024 08:11:44 -0800
Subject: net: Implement fault injection forcing skb reallocation

Introduce a fault injection mechanism to force skb reallocation. The
primary goal is to catch bugs related to pointer invalidation after
potential skb reallocation.

The fault injection mechanism aims to identify scenarios where callers
retain pointers to various headers in the skb but fail to reload these
pointers after calling a function that may reallocate the data. This
type of bug can lead to memory corruption or crashes if the old,
now-invalid pointers are used.

By forcing reallocation through fault injection, we can stress-test code
paths and ensure proper pointer management after potential skb
reallocations.

Add a hook for fault injection in the following functions:

 * pskb_trim_rcsum()
 * pskb_may_pull_reason()
 * pskb_trim()

As the other fault injection mechanism, protect it under a debug Kconfig
called CONFIG_FAIL_SKB_REALLOC.

This patch was *heavily* inspired by Jakub's proposal from:
https://lore.kernel.org/all/20240719174140.47a868e6@kernel.org/

CC: Akinobu Mita <akinobu.mita@gmail.com>
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Akinobu Mita <akinobu.mita@gmail.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Guillaume Nault <gnault@redhat.com>
Link: https://patch.msgid.link/20241107-fault_v6-v6-1-1b82cb6ecacd@debian.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 60535c706851..58009fa66102 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2682,6 +2682,12 @@ static inline void skb_assert_len(struct sk_buff *skb)
 #endif /* CONFIG_DEBUG_NET */
 }
 
+#if defined(CONFIG_FAIL_SKB_REALLOC)
+void skb_might_realloc(struct sk_buff *skb);
+#else
+static inline void skb_might_realloc(struct sk_buff *skb) {}
+#endif
+
 /*
  *	Add data to an sk_buff
  */
@@ -2782,6 +2788,7 @@ static inline enum skb_drop_reason
 pskb_may_pull_reason(struct sk_buff *skb, unsigned int len)
 {
 	DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
+	skb_might_realloc(skb);
 
 	if (likely(len <= skb_headlen(skb)))
 		return SKB_NOT_DROPPED_YET;
@@ -3240,6 +3247,7 @@ static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
 
 static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
 {
+	skb_might_realloc(skb);
 	return (len < skb->len) ? __pskb_trim(skb, len) : 0;
 }
 
@@ -3994,6 +4002,7 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len);
 
 static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 {
+	skb_might_realloc(skb);
 	if (likely(len >= skb->len))
 		return 0;
 	return pskb_trim_rcsum_slow(skb, len);
-- 
cgit v1.2.3


From 406c5548c661df0bff6bb6ee79bf9d49faf23e31 Mon Sep 17 00:00:00 2001
From: MeiChia Chiu <MeiChia.Chiu@mediatek.com>
Date: Tue, 12 Nov 2024 16:38:46 +0800
Subject: wifi: mac80211: Support EHT 1024 aggregation size in TX

Support EHT 1024 aggregation size in TX

The 1024 agg size for RX is supported but not for TX.
This patch adds this support and refactors common parsing logics for
addbaext in both process_addba_resp and process_addba_req into a
function.

Reviewed-by: Shayne Chen <shayne.chen@mediatek.com>
Reviewed-by: Money Wang <money.wang@mediatek.com>
Co-developed-by: Peter Chiu <chui-hao.chiu@mediatek.com>
Signed-off-by: Peter Chiu <chui-hao.chiu@mediatek.com>
Signed-off-by: MeiChia Chiu <MeiChia.Chiu@mediatek.com>
Link: https://patch.msgid.link/20241112083846.32063-1-MeiChia.Chiu@mediatek.com
[pass elems/len instead of mgmt/len/is_req]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 456bca45ff05..05dedc45505c 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1445,6 +1445,8 @@ struct ieee80211_mgmt {
 					__le16 status;
 					__le16 capab;
 					__le16 timeout;
+					/* followed by BA Extension */
+					u8 variable[];
 				} __packed addba_resp;
 				struct{
 					u8 action_code;
-- 
cgit v1.2.3


From ed9d95f691c29748f21bc019de9566b698fdfab7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 11 Nov 2024 10:09:56 -0500
Subject: fs: add the ability for statmount() to report the fs_subtype

/proc/self/mountinfo prints out the sb->s_subtype after the type. This
is particularly useful for disambiguating FUSE mounts (at least when the
userland driver bothers to set it). Add STATMOUNT_FS_SUBTYPE and claim
one of the __spare2 fields to point to the offset into the str[] array.

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Ian Kent <raven@themaw.net>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/20241111-statmount-v4-2-2eaf35d07a80@kernel.org
Acked-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/mount.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 225bc366ffcb..2e939dddf9cb 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -173,7 +173,9 @@ struct statmount {
 	__u32 mnt_root;		/* [str] Root of mount relative to root of fs */
 	__u32 mnt_point;	/* [str] Mountpoint relative to current root */
 	__u64 mnt_ns_id;	/* ID of the mount namespace */
-	__u64 __spare2[49];
+	__u32 fs_subtype;	/* [str] Subtype of fs_type (if any) */
+	__u32 __spare1[1];
+	__u64 __spare2[48];
 	char str[];		/* Variable size part containing strings */
 };
 
@@ -207,6 +209,7 @@ struct mnt_id_req {
 #define STATMOUNT_FS_TYPE		0x00000020U	/* Want/got fs_type */
 #define STATMOUNT_MNT_NS_ID		0x00000040U	/* Want/got mnt_ns_id */
 #define STATMOUNT_MNT_OPTS		0x00000080U	/* Want/got mnt_opts */
+#define STATMOUNT_FS_SUBTYPE		0x00000100U	/* Want/got fs_subtype */
 
 /*
  * Special @mnt_id values that can be passed to listmount
-- 
cgit v1.2.3


From 8182a8b39aa227f5b99b8d4d18f296b82ce4b94c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Nov 2024 06:43:55 +0100
Subject: writeback: wbc_attach_fdatawrite_inode out of line

This allows exporting this high-level interface only while keeping
wbc_attach_and_unlock_inode private in fs-writeback.c and unexporting
__inode_attach_wb.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20241112054403.1470586-3-hch@lst.de
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/writeback.h | 28 ++--------------------------
 1 file changed, 2 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d6db822e4bb3..aee3e1b4c50f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -213,9 +213,6 @@ static inline void wait_on_inode(struct inode *inode)
 #include <linux/bio.h>
 
 void __inode_attach_wb(struct inode *inode, struct folio *folio);
-void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
-				 struct inode *inode)
-	__releases(&inode->i_lock);
 void wbc_detach_inode(struct writeback_control *wbc);
 void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
 			      size_t bytes);
@@ -254,22 +251,8 @@ static inline void inode_detach_wb(struct inode *inode)
 	}
 }
 
-/**
- * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite
- * @wbc: writeback_control of interest
- * @inode: target inode
- *
- * This function is to be used by __filemap_fdatawrite_range(), which is an
- * alternative entry point into writeback code, and first ensures @inode is
- * associated with a bdi_writeback and attaches it to @wbc.
- */
-static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
-					       struct inode *inode)
-{
-	spin_lock(&inode->i_lock);
-	inode_attach_wb(inode, NULL);
-	wbc_attach_and_unlock_inode(wbc, inode);
-}
+void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
+		struct inode *inode);
 
 /**
  * wbc_init_bio - writeback specific initializtion of bio
@@ -303,13 +286,6 @@ static inline void inode_detach_wb(struct inode *inode)
 {
 }
 
-static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
-					       struct inode *inode)
-	__releases(&inode->i_lock)
-{
-	spin_unlock(&inode->i_lock);
-}
-
 static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
 					       struct inode *inode)
 {
-- 
cgit v1.2.3


From 365f34483be33a9d0151c06ac39627d7927210d9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 15 Oct 2024 09:11:00 -0700
Subject: srcu: Renaming in preparation for additional reader flavor

Currently, there are only two flavors of readers, normal and NMI-safe.
A number of fields, functions, and types reflect this restriction.
This renaming-only commit prepares for the addition of light-weight
(as in memory-barrier-free) readers.  OK, OK, there is also a drive-by
white-space fixeup!

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 include/linux/srcu.h     | 21 ++++++++++-----------
 include/linux/srcutree.h |  2 +-
 2 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 835bbb2d1f88..06728ef6f32a 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -181,10 +181,9 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
 #define SRCU_NMI_SAFE		0x2
 
 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
-void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe);
+void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
 #else
-static inline void srcu_check_nmi_safety(struct srcu_struct *ssp,
-					 bool nmi_safe) { }
+static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) { }
 #endif
 
 
@@ -245,7 +244,7 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
 {
 	int retval;
 
-	srcu_check_nmi_safety(ssp, false);
+	srcu_check_read_flavor(ssp, false);
 	retval = __srcu_read_lock(ssp);
 	srcu_lock_acquire(&ssp->dep_map);
 	return retval;
@@ -262,7 +261,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
 {
 	int retval;
 
-	srcu_check_nmi_safety(ssp, true);
+	srcu_check_read_flavor(ssp, true);
 	retval = __srcu_read_lock_nmisafe(ssp);
 	rcu_try_lock_acquire(&ssp->dep_map);
 	return retval;
@@ -274,7 +273,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
 {
 	int retval;
 
-	srcu_check_nmi_safety(ssp, false);
+	srcu_check_read_flavor(ssp, false);
 	retval = __srcu_read_lock(ssp);
 	return retval;
 }
@@ -303,7 +302,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
 static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp)
 {
 	WARN_ON_ONCE(in_nmi());
-	srcu_check_nmi_safety(ssp, false);
+	srcu_check_read_flavor(ssp, false);
 	return __srcu_read_lock(ssp);
 }
 
@@ -318,7 +317,7 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
 	__releases(ssp)
 {
 	WARN_ON_ONCE(idx & ~0x1);
-	srcu_check_nmi_safety(ssp, false);
+	srcu_check_read_flavor(ssp, false);
 	srcu_lock_release(&ssp->dep_map);
 	__srcu_read_unlock(ssp, idx);
 }
@@ -334,7 +333,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
 	__releases(ssp)
 {
 	WARN_ON_ONCE(idx & ~0x1);
-	srcu_check_nmi_safety(ssp, true);
+	srcu_check_read_flavor(ssp, true);
 	rcu_lock_release(&ssp->dep_map);
 	__srcu_read_unlock_nmisafe(ssp, idx);
 }
@@ -343,7 +342,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
 static inline notrace void
 srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
 {
-	srcu_check_nmi_safety(ssp, false);
+	srcu_check_read_flavor(ssp, false);
 	__srcu_read_unlock(ssp, idx);
 }
 
@@ -360,7 +359,7 @@ static inline void srcu_up_read(struct srcu_struct *ssp, int idx)
 {
 	WARN_ON_ONCE(idx & ~0x1);
 	WARN_ON_ONCE(in_nmi());
-	srcu_check_nmi_safety(ssp, false);
+	srcu_check_read_flavor(ssp, false);
 	__srcu_read_unlock(ssp, idx);
 }
 
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index ed57598394de..ab7d8d215b84 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -25,7 +25,7 @@ struct srcu_data {
 	/* Read-side state. */
 	atomic_long_t srcu_lock_count[2];	/* Locks per CPU. */
 	atomic_long_t srcu_unlock_count[2];	/* Unlocks per CPU. */
-	int srcu_nmi_safety;			/* NMI-safe srcu_struct structure? */
+	int srcu_reader_flavor;			/* Reader flavor for srcu_struct structure? */
 
 	/* Update-side state. */
 	spinlock_t __private lock ____cacheline_internodealigned_in_smp;
-- 
cgit v1.2.3


From c071b8e5351453c2cb2d12f425d928f3a24ed2d3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 15 Oct 2024 09:11:03 -0700
Subject: srcu: Improve srcu_read_lock{,_nmisafe}() comments

This commit adds some additional usage constraints to the kernel-doc
headers of srcu_read_lock() and srcu_read_lock_nmi_safe().

Suggested-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 include/linux/srcu.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 06728ef6f32a..46fd06b212ba 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -235,10 +235,13 @@ static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flav
  * a mutex that is held elsewhere while calling synchronize_srcu() or
  * synchronize_srcu_expedited().
  *
- * Note that srcu_read_lock() and the matching srcu_read_unlock() must
- * occur in the same context, for example, it is illegal to invoke
- * srcu_read_unlock() in an irq handler if the matching srcu_read_lock()
- * was invoked in process context.
+ * The return value from srcu_read_lock() must be passed unaltered
+ * to the matching srcu_read_unlock().  Note that srcu_read_lock() and
+ * the matching srcu_read_unlock() must occur in the same context, for
+ * example, it is illegal to invoke srcu_read_unlock() in an irq handler
+ * if the matching srcu_read_lock() was invoked in process context.  Or,
+ * for that matter to invoke srcu_read_unlock() from one task and the
+ * matching srcu_read_lock() from another.
  */
 static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
 {
@@ -256,6 +259,10 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
  *
  * Enter an SRCU read-side critical section, but in an NMI-safe manner.
  * See srcu_read_lock() for more information.
+ *
+ * If srcu_read_lock_nmisafe() is ever used on an srcu_struct structure,
+ * then none of the other flavors may be used, whether before, during,
+ * or after.
  */
 static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp)
 {
-- 
cgit v1.2.3


From 05829be27fe6f64e0675dc3be3a12d43b52492e1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 15 Oct 2024 09:11:04 -0700
Subject: srcu: Create CPP macros for normal and NMI-safe SRCU readers

This commit creates SRCU_READ_FLAVOR_NORMAL and SRCU_READ_FLAVOR_NMI
C-preprocessor macros for srcu_read_lock() and srcu_read_lock_nmisafe(),
respectively.  These replace the old true/false values that were
previously passed to srcu_check_read_flavor().  In addition, the
srcu_check_read_flavor() function itself requires a bit of rework to
handle bitmasks instead of true/false values.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 include/linux/srcu.h     | 20 ++++++++------------
 include/linux/srcutree.h |  4 ++++
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 46fd06b212ba..84daaa33ea0a 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -176,10 +176,6 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-#define SRCU_NMI_UNKNOWN	0x0
-#define SRCU_NMI_UNSAFE		0x1
-#define SRCU_NMI_SAFE		0x2
-
 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
 void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
 #else
@@ -247,7 +243,7 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
 {
 	int retval;
 
-	srcu_check_read_flavor(ssp, false);
+	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL);
 	retval = __srcu_read_lock(ssp);
 	srcu_lock_acquire(&ssp->dep_map);
 	return retval;
@@ -268,7 +264,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
 {
 	int retval;
 
-	srcu_check_read_flavor(ssp, true);
+	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NMI);
 	retval = __srcu_read_lock_nmisafe(ssp);
 	rcu_try_lock_acquire(&ssp->dep_map);
 	return retval;
@@ -280,7 +276,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
 {
 	int retval;
 
-	srcu_check_read_flavor(ssp, false);
+	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL);
 	retval = __srcu_read_lock(ssp);
 	return retval;
 }
@@ -309,7 +305,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
 static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp)
 {
 	WARN_ON_ONCE(in_nmi());
-	srcu_check_read_flavor(ssp, false);
+	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL);
 	return __srcu_read_lock(ssp);
 }
 
@@ -324,7 +320,7 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
 	__releases(ssp)
 {
 	WARN_ON_ONCE(idx & ~0x1);
-	srcu_check_read_flavor(ssp, false);
+	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL);
 	srcu_lock_release(&ssp->dep_map);
 	__srcu_read_unlock(ssp, idx);
 }
@@ -340,7 +336,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
 	__releases(ssp)
 {
 	WARN_ON_ONCE(idx & ~0x1);
-	srcu_check_read_flavor(ssp, true);
+	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NMI);
 	rcu_lock_release(&ssp->dep_map);
 	__srcu_read_unlock_nmisafe(ssp, idx);
 }
@@ -349,7 +345,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
 static inline notrace void
 srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
 {
-	srcu_check_read_flavor(ssp, false);
+	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL);
 	__srcu_read_unlock(ssp, idx);
 }
 
@@ -366,7 +362,7 @@ static inline void srcu_up_read(struct srcu_struct *ssp, int idx)
 {
 	WARN_ON_ONCE(idx & ~0x1);
 	WARN_ON_ONCE(in_nmi());
-	srcu_check_read_flavor(ssp, false);
+	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL);
 	__srcu_read_unlock(ssp, idx);
 }
 
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index ab7d8d215b84..79ad809c7f03 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -43,6 +43,10 @@ struct srcu_data {
 	struct srcu_struct *ssp;
 };
 
+/* Values for ->srcu_reader_flavor. */
+#define SRCU_READ_FLAVOR_NORMAL	0x1		// srcu_read_lock().
+#define SRCU_READ_FLAVOR_NMI	0x2		// srcu_read_lock_nmisafe().
+
 /*
  * Node in SRCU combining tree, similar in function to rcu_data.
  */
-- 
cgit v1.2.3


From 6364dd8191d27230176ac4b1b4daaecaf4807399 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 15 Oct 2024 09:11:05 -0700
Subject: srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()

This patch adds srcu_read_lock_lite() and srcu_read_unlock_lite(), which
dispense with the read-side smp_mb() but also are restricted to code
regions that RCU is watching.  If a given srcu_struct structure uses
srcu_read_lock_lite() and srcu_read_unlock_lite(), it is not permitted
to use any other SRCU read-side marker, before, during, or after.

Another price of light-weight readers is heavier weight grace periods.
Such readers mean that SRCU grace periods on srcu_struct structures
used by light-weight readers will incur at least two calls to
synchronize_rcu().  In addition, normal SRCU grace periods for
light-weight-reader srcu_struct structures never auto-expedite.
Note that expedited SRCU grace periods for light-weight-reader
srcu_struct structures still invoke synchronize_rcu(), not
synchronize_srcu_expedited().  Something about wishing to keep
the IPIs down to a dull roar.

The srcu_read_lock_lite() and srcu_read_unlock_lite() functions may not
(repeat, *not*) be used from NMI handlers, but if this is needed, an
additional flavor of SRCU reader can be added by some future commit.

[ paulmck: Apply Alexei Starovoitov expediting feedback. ]
[ paulmck: Apply kernel test robot feedback. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: kernel test robot <oliver.sang@intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 include/linux/srcu.h     | 51 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/srcutree.h |  1 +
 2 files changed, 51 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 84daaa33ea0a..4ba96e2cfa40 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -56,6 +56,13 @@ void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
 void cleanup_srcu_struct(struct srcu_struct *ssp);
 int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
 void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
+#ifdef CONFIG_TINY_SRCU
+#define __srcu_read_lock_lite __srcu_read_lock
+#define __srcu_read_unlock_lite __srcu_read_unlock
+#else // #ifdef CONFIG_TINY_SRCU
+int __srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp);
+void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) __releases(ssp);
+#endif // #else // #ifdef CONFIG_TINY_SRCU
 void synchronize_srcu(struct srcu_struct *ssp);
 
 #define SRCU_GET_STATE_COMPLETED 0x1
@@ -179,7 +186,7 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
 void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
 #else
-static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) { }
+#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0)
 #endif
 
 
@@ -249,6 +256,32 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
 	return retval;
 }
 
+/**
+ * srcu_read_lock_lite - register a new reader for an SRCU-protected structure.
+ * @ssp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section, but for a light-weight
+ * smp_mb()-free reader.  See srcu_read_lock() for more information.
+ *
+ * If srcu_read_lock_lite() is ever used on an srcu_struct structure,
+ * then none of the other flavors may be used, whether before, during,
+ * or after.  Note that grace-period auto-expediting is disabled for _lite
+ * srcu_struct structures because auto-expedited grace periods invoke
+ * synchronize_rcu_expedited(), IPIs and all.
+ *
+ * Note that srcu_read_lock_lite() can be invoked only from those contexts
+ * where RCU is watching.  Otherwise, lockdep will complain.
+ */
+static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp)
+{
+	int retval;
+
+	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+	retval = __srcu_read_lock_lite(ssp);
+	rcu_try_lock_acquire(&ssp->dep_map);
+	return retval;
+}
+
 /**
  * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure.
  * @ssp: srcu_struct in which to register the new reader.
@@ -325,6 +358,22 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
 	__srcu_read_unlock(ssp, idx);
 }
 
+/**
+ * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure.
+ * @ssp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Exit a light-weight SRCU read-side critical section.
+ */
+static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
+	__releases(ssp)
+{
+	WARN_ON_ONCE(idx & ~0x1);
+	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+	srcu_lock_release(&ssp->dep_map);
+	__srcu_read_unlock(ssp, idx);
+}
+
 /**
  * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
  * @ssp: srcu_struct in which to unregister the old reader.
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 79ad809c7f03..8074138cbd62 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -46,6 +46,7 @@ struct srcu_data {
 /* Values for ->srcu_reader_flavor. */
 #define SRCU_READ_FLAVOR_NORMAL	0x1		// srcu_read_lock().
 #define SRCU_READ_FLAVOR_NMI	0x2		// srcu_read_lock_nmisafe().
+#define SRCU_READ_FLAVOR_LITE	0x4		// srcu_read_lock_lite().
 
 /*
  * Node in SRCU combining tree, similar in function to rcu_data.
-- 
cgit v1.2.3


From bb94b12e4503bdce003a74e95ee4214eba923f86 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 15 Oct 2024 09:11:06 -0700
Subject: srcu: Allow inlining of __srcu_read_{,un}lock_lite()

This commit moves __srcu_read_lock_lite() and __srcu_read_unlock_lite()
into include/linux/srcu.h and marks them "static inline" so that they
can be inlined into srcu_read_lock_lite() and srcu_read_unlock_lite(),
respectively.  They are not hand-inlined due to Tree SRCU and Tiny SRCU
having different implementations.

The earlier removal of smp_mb() combined with the inlining produce
significant single-percentage performance wins.

Link: https://lore.kernel.org/all/CAEf4BzYgiNmSb=ZKQ65tm6nJDi1UX2Gq26cdHSH1mPwXJYZj5g@mail.gmail.com/

Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <bpf@vger.kernel.org>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 include/linux/srcutree.h | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'include')

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 8074138cbd62..778eb61542e1 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -209,4 +209,43 @@ void synchronize_srcu_expedited(struct srcu_struct *ssp);
 void srcu_barrier(struct srcu_struct *ssp);
 void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
 
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct.  Returns an index that must be passed to the matching
+ * srcu_read_unlock_lite().
+ *
+ * Note that this_cpu_inc() is an RCU read-side critical section either
+ * because it disables interrupts, because it is a single instruction,
+ * or because it is a read-modify-write atomic operation, depending on
+ * the whims of the architecture.
+ */
+static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
+{
+	int idx;
+
+	RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
+	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
+	this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); /* Y */
+	barrier(); /* Avoid leaking the critical section. */
+	return idx;
+}
+
+/*
+ * Removes the count for the old reader from the appropriate
+ * per-CPU element of the srcu_struct.  Note that this may well be a
+ * different CPU than that which was incremented by the corresponding
+ * srcu_read_lock_lite(), but it must be within the same task.
+ *
+ * Note that this_cpu_inc() is an RCU read-side critical section either
+ * because it disables interrupts, because it is a single instruction,
+ * or because it is a read-modify-write atomic operation, depending on
+ * the whims of the architecture.
+ */
+static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
+{
+	barrier();  /* Avoid leaking the critical section. */
+	this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter);  /* Z */
+	RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite().");
+}
+
 #endif
-- 
cgit v1.2.3


From 768b1f87098a4a586353898b074989808b1b27ad Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 15 Oct 2024 09:11:12 -0700
Subject: srcu: Improve srcu_read_lock_lite() kernel-doc comment

Where RCU is watching is where it is OK to invoke rcu_read_lock().

Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 include/linux/srcu.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4ba96e2cfa40..bab1dae3f69e 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -270,7 +270,8 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
  * synchronize_rcu_expedited(), IPIs and all.
  *
  * Note that srcu_read_lock_lite() can be invoked only from those contexts
- * where RCU is watching.  Otherwise, lockdep will complain.
+ * where RCU is watching, that is, from contexts where it would be legal
+ * to invoke rcu_read_lock().  Otherwise, lockdep will complain.
  */
 static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp)
 {
-- 
cgit v1.2.3


From 7d4f46c2372d5a850e28f63001013de50843b6e6 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 5 Nov 2024 12:04:18 -0800
Subject: iommufd: Move _iommufd_object_alloc helper to a sharable file

The following patch will add a new vIOMMU allocator that will require this
_iommufd_object_alloc to be sharable with IOMMU drivers (and iommufd too).

Add a new driver.c file that will be built with CONFIG_IOMMUFD_DRIVER_CORE
selected by CONFIG_IOMMUFD, and put the CONFIG_DRIVER under that remaining
to be selectable for drivers to build the existing iova_bitmap.c file.

Link: https://patch.msgid.link/r/2f4f6e116dc49ffb67ff6c5e8a7a8e789ab9e98e.1730836219.git.nicolinc@nvidia.com
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommufd.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 22948dd03d67..94522d4029ca 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -135,4 +135,17 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
 	return -EOPNOTSUPP;
 }
 #endif /* CONFIG_IOMMUFD */
+
+#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE)
+struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
+					     size_t size,
+					     enum iommufd_object_type type);
+#else /* !CONFIG_IOMMUFD_DRIVER_CORE */
+static inline struct iommufd_object *
+_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size,
+		      enum iommufd_object_type type)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+#endif /* CONFIG_IOMMUFD_DRIVER_CORE */
 #endif
-- 
cgit v1.2.3


From 6b22d562fcd6e3d1cc1c265b0596840946d16a09 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 5 Nov 2024 12:04:19 -0800
Subject: iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct

Add a new IOMMUFD_OBJ_VIOMMU with an iommufd_viommu structure to represent
a slice of physical IOMMU device passed to or shared with a user space VM.
This slice, now a vIOMMU object, is a group of virtualization resources of
a physical IOMMU's, such as:
 - Security namespace for guest owned ID, e.g. guest-controlled cache tags
 - Non-device-affiliated event reporting, e.g. invalidation queue errors
 - Access to a sharable nesting parent pagetable across physical IOMMUs
 - Virtualization of various platforms IDs, e.g. RIDs and others
 - Delivery of paravirtualized invalidation
 - Direct assigned invalidation queues
 - Direct assigned interrupts

Add a new viommu_alloc op in iommu_ops, for drivers to allocate their own
vIOMMU structures. And this allocation also needs a free(), so add struct
iommufd_viommu_ops.

To simplify a vIOMMU allocation, provide a iommufd_viommu_alloc() helper.
It's suggested that a driver should embed a core-level viommu structure in
its driver-level viommu struct and call the iommufd_viommu_alloc() helper,
meanwhile the driver can also implement a viommu ops:
    struct my_driver_viommu {
        struct iommufd_viommu core;
        /* driver-owned properties/features */
        ....
    };

    static const struct iommufd_viommu_ops my_driver_viommu_ops = {
        .free = my_driver_viommu_free,
        /* future ops for virtualization features */
        ....
    };

    static struct iommufd_viommu my_driver_viommu_alloc(...)
    {
        struct my_driver_viommu *my_viommu =
                iommufd_viommu_alloc(ictx, my_driver_viommu, core,
                                     my_driver_viommu_ops);
        /* Init my_viommu and related HW feature */
        ....
        return &my_viommu->core;
    }

    static struct iommu_domain_ops my_driver_domain_ops = {
        ....
        .viommu_alloc = my_driver_viommu_alloc,
    };

Link: https://patch.msgid.link/r/64685e2b79dea0f1dc56f6ede04809b72d578935.1730836219.git.nicolinc@nvidia.com
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommu.h   | 14 ++++++++++++++
 include/linux/iommufd.h | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bd722f473635..2574fc8abaf2 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -42,6 +42,8 @@ struct notifier_block;
 struct iommu_sva;
 struct iommu_dma_cookie;
 struct iommu_fault_param;
+struct iommufd_ctx;
+struct iommufd_viommu;
 
 #define IOMMU_FAULT_PERM_READ	(1 << 0) /* read */
 #define IOMMU_FAULT_PERM_WRITE	(1 << 1) /* write */
@@ -542,6 +544,14 @@ static inline int __iommu_copy_struct_from_user_array(
  * @remove_dev_pasid: Remove any translation configurations of a specific
  *                    pasid, so that any DMA transactions with this pasid
  *                    will be blocked by the hardware.
+ * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind
+ *                the @dev, as the set of virtualization resources shared/passed
+ *                to user space IOMMU instance. And associate it with a nesting
+ *                @parent_domain. The @viommu_type must be defined in the header
+ *                include/uapi/linux/iommufd.h
+ *                It is required to call iommufd_viommu_alloc() helper for
+ *                a bundled allocation of the core and the driver structures,
+ *                using the given @ictx pointer.
  * @pgsize_bitmap: bitmap of all possible supported page sizes
  * @owner: Driver module providing these ops
  * @identity_domain: An always available, always attachable identity
@@ -591,6 +601,10 @@ struct iommu_ops {
 	void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid,
 				 struct iommu_domain *domain);
 
+	struct iommufd_viommu *(*viommu_alloc)(
+		struct device *dev, struct iommu_domain *parent_domain,
+		struct iommufd_ctx *ictx, unsigned int viommu_type);
+
 	const struct iommu_domain_ops *default_domain_ops;
 	unsigned long pgsize_bitmap;
 	struct module *owner;
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 94522d4029ca..4fc2ce332f10 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -17,6 +17,7 @@ struct iommu_group;
 struct iommufd_access;
 struct iommufd_ctx;
 struct iommufd_device;
+struct iommufd_viommu_ops;
 struct page;
 
 enum iommufd_object_type {
@@ -28,6 +29,7 @@ enum iommufd_object_type {
 	IOMMUFD_OBJ_IOAS,
 	IOMMUFD_OBJ_ACCESS,
 	IOMMUFD_OBJ_FAULT,
+	IOMMUFD_OBJ_VIOMMU,
 #ifdef CONFIG_IOMMUFD_TEST
 	IOMMUFD_OBJ_SELFTEST,
 #endif
@@ -78,6 +80,26 @@ void iommufd_access_detach(struct iommufd_access *access);
 
 void iommufd_ctx_get(struct iommufd_ctx *ictx);
 
+struct iommufd_viommu {
+	struct iommufd_object obj;
+	struct iommufd_ctx *ictx;
+	struct iommu_device *iommu_dev;
+	struct iommufd_hwpt_paging *hwpt;
+
+	const struct iommufd_viommu_ops *ops;
+
+	unsigned int type;
+};
+
+/**
+ * struct iommufd_viommu_ops - vIOMMU specific operations
+ * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory
+ *           of the vIOMMU will be free-ed by iommufd core after calling this op
+ */
+struct iommufd_viommu_ops {
+	void (*destroy)(struct iommufd_viommu *viommu);
+};
+
 #if IS_ENABLED(CONFIG_IOMMUFD)
 struct iommufd_ctx *iommufd_ctx_from_file(struct file *file);
 struct iommufd_ctx *iommufd_ctx_from_fd(int fd);
@@ -148,4 +170,22 @@ _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size,
 	return ERR_PTR(-EOPNOTSUPP);
 }
 #endif /* CONFIG_IOMMUFD_DRIVER_CORE */
+
+/*
+ * Helpers for IOMMU driver to allocate driver structures that will be freed by
+ * the iommufd core. The free op will be called prior to freeing the memory.
+ */
+#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops)             \
+	({                                                                     \
+		drv_struct *ret;                                               \
+									       \
+		static_assert(__same_type(struct iommufd_viommu,               \
+					  ((drv_struct *)NULL)->member));      \
+		static_assert(offsetof(drv_struct, member.obj) == 0);          \
+		ret = (drv_struct *)_iommufd_object_alloc(                     \
+			ictx, sizeof(drv_struct), IOMMUFD_OBJ_VIOMMU);         \
+		if (!IS_ERR(ret))                                              \
+			ret->member.ops = viommu_ops;                          \
+		ret;                                                           \
+	})
 #endif
-- 
cgit v1.2.3


From 4db97c21ed07a7d4081ed9820599fa36857083d6 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 5 Nov 2024 12:04:21 -0800
Subject: iommufd/viommu: Add IOMMU_VIOMMU_ALLOC ioctl

Add a new ioctl for user space to do a vIOMMU allocation. It must be based
on a nesting parent HWPT, so take its refcount.

IOMMU driver wanting to support vIOMMUs must define its IOMMU_VIOMMU_TYPE_
in the uAPI header and implement a viommu_alloc op in its iommu_ops.

Link: https://patch.msgid.link/r/dc2b8ba9ac935007beff07c1761c31cd097ed780.1730836219.git.nicolinc@nvidia.com
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 41b1a01e9293..302844136b02 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -52,6 +52,7 @@ enum {
 	IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
 	IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
 	IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f,
+	IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
 };
 
 /**
@@ -822,4 +823,43 @@ struct iommu_fault_alloc {
 	__u32 out_fault_fd;
 };
 #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
+
+/**
+ * enum iommu_viommu_type - Virtual IOMMU Type
+ * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use
+ */
+enum iommu_viommu_type {
+	IOMMU_VIOMMU_TYPE_DEFAULT = 0,
+};
+
+/**
+ * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC)
+ * @size: sizeof(struct iommu_viommu_alloc)
+ * @flags: Must be 0
+ * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type
+ * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU
+ * @hwpt_id: ID of a nesting parent HWPT to associate to
+ * @out_viommu_id: Output virtual IOMMU ID for the allocated object
+ *
+ * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's
+ * virtualization support that is a security-isolated slice of the real IOMMU HW
+ * that is unique to a specific VM. Operations global to the IOMMU are connected
+ * to the vIOMMU, such as:
+ * - Security namespace for guest owned ID, e.g. guest-controlled cache tags
+ * - Non-device-affiliated event reporting, e.g. invalidation queue errors
+ * - Access to a sharable nesting parent pagetable across physical IOMMUs
+ * - Virtualization of various platforms IDs, e.g. RIDs and others
+ * - Delivery of paravirtualized invalidation
+ * - Direct assigned invalidation queues
+ * - Direct assigned interrupts
+ */
+struct iommu_viommu_alloc {
+	__u32 size;
+	__u32 flags;
+	__u32 type;
+	__u32 dev_id;
+	__u32 hwpt_id;
+	__u32 out_viommu_id;
+};
+#define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
 #endif
-- 
cgit v1.2.3


From 69d2689e57f5cb235c0609dd2f8fa10c1a832f87 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 5 Nov 2024 12:04:22 -0800
Subject: iommufd: Add alloc_domain_nested op to iommufd_viommu_ops

Allow IOMMU driver to use a vIOMMU object that holds a nesting parent
hwpt/domain to allocate a nested domain.

Link: https://patch.msgid.link/r/2dcdb5e405dc0deb68230564530d989d285d959c.1730836219.git.nicolinc@nvidia.com
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommufd.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 4fc2ce332f10..de9b56265c9c 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -14,6 +14,7 @@
 struct device;
 struct file;
 struct iommu_group;
+struct iommu_user_data;
 struct iommufd_access;
 struct iommufd_ctx;
 struct iommufd_device;
@@ -95,9 +96,17 @@ struct iommufd_viommu {
  * struct iommufd_viommu_ops - vIOMMU specific operations
  * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory
  *           of the vIOMMU will be free-ed by iommufd core after calling this op
+ * @alloc_domain_nested: Allocate a IOMMU_DOMAIN_NESTED on a vIOMMU that holds a
+ *                       nesting parent domain (IOMMU_DOMAIN_PAGING). @user_data
+ *                       must be defined in include/uapi/linux/iommufd.h.
+ *                       It must fully initialize the new iommu_domain before
+ *                       returning. Upon failure, ERR_PTR must be returned.
  */
 struct iommufd_viommu_ops {
 	void (*destroy)(struct iommufd_viommu *viommu);
+	struct iommu_domain *(*alloc_domain_nested)(
+		struct iommufd_viommu *viommu, u32 flags,
+		const struct iommu_user_data *user_data);
 };
 
 #if IS_ENABLED(CONFIG_IOMMUFD)
-- 
cgit v1.2.3


From 13a750180fc86d41695c8f64d8892412482a401d Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 5 Nov 2024 12:04:23 -0800
Subject: iommufd: Allow pt_id to carry viommu_id for IOMMU_HWPT_ALLOC

Now a vIOMMU holds a shareable nesting parent HWPT. So, it can act like
that nesting parent HWPT to allocate a nested HWPT.

Support that in the IOMMU_HWPT_ALLOC ioctl handler, and update its kdoc.

Also, add an iommufd_viommu_alloc_hwpt_nested helper to allocate a nested
HWPT for a vIOMMU object. Since a vIOMMU object holds the parent hwpt's
refcount already, increase the refcount of the vIOMMU only.

Link: https://patch.msgid.link/r/a0f24f32bfada8b448d17587adcaedeeb50a67ed.1730836219.git.nicolinc@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 302844136b02..a498d4838f9a 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -435,7 +435,7 @@ enum iommu_hwpt_data_type {
  * @size: sizeof(struct iommu_hwpt_alloc)
  * @flags: Combination of enum iommufd_hwpt_alloc_flags
  * @dev_id: The device to allocate this HWPT for
- * @pt_id: The IOAS or HWPT to connect this HWPT to
+ * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to
  * @out_hwpt_id: The ID of the new HWPT
  * @__reserved: Must be 0
  * @data_type: One of enum iommu_hwpt_data_type
@@ -454,11 +454,13 @@ enum iommu_hwpt_data_type {
  * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
  * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
  *
- * A user-managed nested HWPT will be created from a given parent HWPT via
- * @pt_id, in which the parent HWPT must be allocated previously via the
- * same ioctl from a given IOAS (@pt_id). In this case, the @data_type
- * must be set to a pre-defined type corresponding to an I/O page table
- * type supported by the underlying IOMMU hardware.
+ * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a
+ * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be
+ * allocated previously via the same ioctl from a given IOAS (@pt_id). In this
+ * case, the @data_type must be set to a pre-defined type corresponding to an
+ * I/O page table type supported by the underlying IOMMU hardware. The device
+ * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU
+ * instance.
  *
  * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
  * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
-- 
cgit v1.2.3


From 0ce5c2477af2e2284b9c70474e4dae85db211680 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 5 Nov 2024 12:05:09 -0800
Subject: iommufd/viommu: Add IOMMUFD_OBJ_VDEVICE and IOMMU_VDEVICE_ALLOC ioctl

Introduce a new IOMMUFD_OBJ_VDEVICE to represent a physical device (struct
device) against a vIOMMU (struct iommufd_viommu) object in a VM.

This vDEVICE object (and its structure) holds all the infos and attributes
in the VM, regarding the device related to the vIOMMU.

As an initial patch, add a per-vIOMMU virtual ID. This can be:
 - Virtual StreamID on a nested ARM SMMUv3, an index to a Stream Table
 - Virtual DeviceID on a nested AMD IOMMU, an index to a Device Table
 - Virtual RID on a nested Intel VT-D IOMMU, an index to a Context Table
Potentially, this vDEVICE structure would hold some vData for Confidential
Compute Architecture (CCA). Use this virtual ID to index an "vdevs" xarray
that belongs to a vIOMMU object.

Add a new ioctl for vDEVICE allocations. Since a vDEVICE is a connection
of a device object and an iommufd_viommu object, take two refcounts in the
ioctl handler.

Link: https://patch.msgid.link/r/cda8fd2263166e61b8191a3b3207e0d2b08545bf.1730836308.git.nicolinc@nvidia.com
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommufd.h      |  4 ++++
 include/uapi/linux/iommufd.h | 22 ++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index de9b56265c9c..71fa1e343023 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -10,6 +10,7 @@
 #include <linux/errno.h>
 #include <linux/refcount.h>
 #include <linux/types.h>
+#include <linux/xarray.h>
 
 struct device;
 struct file;
@@ -31,6 +32,7 @@ enum iommufd_object_type {
 	IOMMUFD_OBJ_ACCESS,
 	IOMMUFD_OBJ_FAULT,
 	IOMMUFD_OBJ_VIOMMU,
+	IOMMUFD_OBJ_VDEVICE,
 #ifdef CONFIG_IOMMUFD_TEST
 	IOMMUFD_OBJ_SELFTEST,
 #endif
@@ -89,6 +91,8 @@ struct iommufd_viommu {
 
 	const struct iommufd_viommu_ops *ops;
 
+	struct xarray vdevs;
+
 	unsigned int type;
 };
 
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index a498d4838f9a..9b5236004b8e 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -53,6 +53,7 @@ enum {
 	IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
 	IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f,
 	IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
+	IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
 };
 
 /**
@@ -864,4 +865,25 @@ struct iommu_viommu_alloc {
 	__u32 out_viommu_id;
 };
 #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
+
+/**
+ * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC)
+ * @size: sizeof(struct iommu_vdevice_alloc)
+ * @viommu_id: vIOMMU ID to associate with the virtual device
+ * @dev_id: The physical device to allocate a virtual instance on the vIOMMU
+ * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY
+ * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID
+ *           of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table
+ *
+ * Allocate a virtual device instance (for a physical device) against a vIOMMU.
+ * This instance holds the device's information (related to its vIOMMU) in a VM.
+ */
+struct iommu_vdevice_alloc {
+	__u32 size;
+	__u32 viommu_id;
+	__u32 dev_id;
+	__u32 out_vdevice_id;
+	__aligned_u64 virt_id;
+};
+#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
 #endif
-- 
cgit v1.2.3


From 67db79dc1a411335f8a7e53a5e206d96b237d9a8 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 5 Nov 2024 12:05:11 -0800
Subject: iommu/viommu: Add cache_invalidate to iommufd_viommu_ops

This per-vIOMMU cache_invalidate op is like the cache_invalidate_user op
in struct iommu_domain_ops, but wider, supporting device cache (e.g. PCI
ATC invaldiations).

Link: https://patch.msgid.link/r/90138505850fa6b165135e78a87b4cc7022869a4.1730836308.git.nicolinc@nvidia.com
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommufd.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 71fa1e343023..2bc735ff9511 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -16,6 +16,7 @@ struct device;
 struct file;
 struct iommu_group;
 struct iommu_user_data;
+struct iommu_user_data_array;
 struct iommufd_access;
 struct iommufd_ctx;
 struct iommufd_device;
@@ -105,12 +106,21 @@ struct iommufd_viommu {
  *                       must be defined in include/uapi/linux/iommufd.h.
  *                       It must fully initialize the new iommu_domain before
  *                       returning. Upon failure, ERR_PTR must be returned.
+ * @cache_invalidate: Flush hardware cache used by a vIOMMU. It can be used for
+ *                    any IOMMU hardware specific cache: TLB and device cache.
+ *                    The @array passes in the cache invalidation requests, in
+ *                    form of a driver data structure. A driver must update the
+ *                    array->entry_num to report the number of handled requests.
+ *                    The data structure of the array entry must be defined in
+ *                    include/uapi/linux/iommufd.h
  */
 struct iommufd_viommu_ops {
 	void (*destroy)(struct iommufd_viommu *viommu);
 	struct iommu_domain *(*alloc_domain_nested)(
 		struct iommufd_viommu *viommu, u32 flags,
 		const struct iommu_user_data *user_data);
+	int (*cache_invalidate)(struct iommufd_viommu *viommu,
+				struct iommu_user_data_array *array);
 };
 
 #if IS_ENABLED(CONFIG_IOMMUFD)
-- 
cgit v1.2.3


From 54ce69e36c71c88f258b1a322c54343d90954858 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 5 Nov 2024 12:05:12 -0800
Subject: iommufd: Allow hwpt_id to carry viommu_id for IOMMU_HWPT_INVALIDATE

With a vIOMMU object, use space can flush any IOMMU related cache that can
be directed via a vIOMMU object. It is similar to the IOMMU_HWPT_INVALIDATE
uAPI, but can cover a wider range than IOTLB, e.g. device/desciprtor cache.

Allow hwpt_id of the iommu_hwpt_invalidate structure to carry a viommu_id,
and reuse the IOMMU_HWPT_INVALIDATE uAPI for vIOMMU invalidations. Drivers
can define different structures for vIOMMU invalidations v.s. HWPT ones.

Since both the HWPT-based and vIOMMU-based invalidation pathways check own
cache invalidation op, remove the WARN_ON_ONCE in the allocator.

Update the uAPI, kdoc, and selftest case accordingly.

Link: https://patch.msgid.link/r/b411e2245e303b8a964f39f49453a5dff280968f.1730836308.git.nicolinc@nvidia.com
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 9b5236004b8e..badb41c5bfa4 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -700,7 +700,7 @@ struct iommu_hwpt_vtd_s1_invalidate {
 /**
  * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
  * @size: sizeof(struct iommu_hwpt_invalidate)
- * @hwpt_id: ID of a nested HWPT for cache invalidation
+ * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation
  * @data_uptr: User pointer to an array of driver-specific cache invalidation
  *             data.
  * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
@@ -711,8 +711,11 @@ struct iommu_hwpt_vtd_s1_invalidate {
  *             Output the number of requests successfully handled by kernel.
  * @__reserved: Must be 0.
  *
- * Invalidate the iommu cache for user-managed page table. Modifications on a
- * user-managed page table should be followed by this operation to sync cache.
+ * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications
+ * on a user-managed page table should be followed by this operation, if a HWPT
+ * is passed in via @hwpt_id. Other caches, such as device cache or descriptor
+ * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field.
+ *
  * Each ioctl can support one or more cache invalidation requests in the array
  * that has a total size of @entry_len * @entry_num.
  *
-- 
cgit v1.2.3


From 4f2e59ccb69866c5165525bd7aee47cd5cd00acc Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 5 Nov 2024 12:05:13 -0800
Subject: iommu: Add iommu_copy_struct_from_full_user_array helper

The iommu_copy_struct_from_user_array helper can be used to copy a single
entry from a user array which might not be efficient if the array is big.

Add a new iommu_copy_struct_from_full_user_array to copy the entire user
array at once. Update the existing iommu_copy_struct_from_user_array kdoc
accordingly.

Link: https://patch.msgid.link/r/5cd773d9c26920c5807d232b21d415ea79172e49.1730836308.git.nicolinc@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommu.h | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 2574fc8abaf2..11de66237eaa 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -493,7 +493,9 @@ static inline int __iommu_copy_struct_from_user_array(
  * @index: Index to the location in the array to copy user data from
  * @min_last: The last member of the data structure @kdst points in the
  *            initial version.
- * Return 0 for success, otherwise -error.
+ *
+ * Copy a single entry from a user array. Return 0 for success, otherwise
+ * -error.
  */
 #define iommu_copy_struct_from_user_array(kdst, user_array, data_type, index, \
 					  min_last)                           \
@@ -501,6 +503,50 @@ static inline int __iommu_copy_struct_from_user_array(
 		kdst, user_array, data_type, index, sizeof(*(kdst)),          \
 		offsetofend(typeof(*(kdst)), min_last))
 
+/**
+ * iommu_copy_struct_from_full_user_array - Copy iommu driver specific user
+ *         space data from an iommu_user_data_array
+ * @kdst: Pointer to an iommu driver specific user data that is defined in
+ *        include/uapi/linux/iommufd.h
+ * @kdst_entry_size: sizeof(*kdst)
+ * @user_array: Pointer to a struct iommu_user_data_array for a user space
+ *              array
+ * @data_type: The data type of the @kdst. Must match with @user_array->type
+ *
+ * Copy the entire user array. kdst must have room for kdst_entry_size *
+ * user_array->entry_num bytes. Return 0 for success, otherwise -error.
+ */
+static inline int
+iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
+				       struct iommu_user_data_array *user_array,
+				       unsigned int data_type)
+{
+	unsigned int i;
+	int ret;
+
+	if (user_array->type != data_type)
+		return -EINVAL;
+	if (!user_array->entry_num)
+		return -EINVAL;
+	if (likely(user_array->entry_len == kdst_entry_size)) {
+		if (copy_from_user(kdst, user_array->uptr,
+				   user_array->entry_num *
+					   user_array->entry_len))
+			return -EFAULT;
+	}
+
+	/* Copy item by item */
+	for (i = 0; i != user_array->entry_num; i++) {
+		ret = copy_struct_from_user(
+			kdst + kdst_entry_size * i, kdst_entry_size,
+			user_array->uptr + user_array->entry_len * i,
+			user_array->entry_len);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
 /**
  * struct iommu_ops - iommu ops and capabilities
  * @capable: check capability
-- 
cgit v1.2.3


From c747e67978ff473e6830afe0b1f3986dd1f444b5 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 5 Nov 2024 12:05:14 -0800
Subject: iommufd/viommu: Add iommufd_viommu_find_dev helper

This avoids a bigger trouble of exposing struct iommufd_device and struct
iommufd_vdevice in the public header.

Link: https://patch.msgid.link/r/84fa7c624db4d4508067ccfdf42059533950180a.1730836308.git.nicolinc@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommufd.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 2bc735ff9511..11110c749200 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -185,6 +185,8 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
 					     size_t size,
 					     enum iommufd_object_type type);
+struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
+				       unsigned long vdev_id);
 #else /* !CONFIG_IOMMUFD_DRIVER_CORE */
 static inline struct iommufd_object *
 _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size,
@@ -192,6 +194,12 @@ _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size,
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
+
+static inline struct device *
+iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id)
+{
+	return NULL;
+}
 #endif /* CONFIG_IOMMUFD_DRIVER_CORE */
 
 /*
-- 
cgit v1.2.3


From ad8d1e323dd37f91d0c973e2a74c7b9054219adc Mon Sep 17 00:00:00 2001
From: Kunwu Chan <chentao@kylinos.cn>
Date: Mon, 2 Sep 2024 07:39:20 +0100
Subject: ARM: 9415/1: amba: Add dev_is_amba() function and export it for
 modules

Add dev_is_amba() function to determine
whether the device is a AMBA device.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Kunwu Chan <chentao@kylinos.cn>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 include/linux/amba/bus.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index dda2f3ea89cb..9946276aff73 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -121,6 +121,7 @@ extern const struct bus_type amba_bustype;
 #ifdef CONFIG_ARM_AMBA
 int __amba_driver_register(struct amba_driver *, struct module *);
 void amba_driver_unregister(struct amba_driver *);
+bool dev_is_amba(const struct device *dev);
 #else
 static inline int __amba_driver_register(struct amba_driver *drv,
 					 struct module *owner)
@@ -130,6 +131,10 @@ static inline int __amba_driver_register(struct amba_driver *drv,
 static inline void amba_driver_unregister(struct amba_driver *drv)
 {
 }
+static inline bool dev_is_amba(const struct device *dev)
+{
+	return false;
+}
 #endif
 
 struct amba_device *amba_device_alloc(const char *, resource_size_t, size_t);
-- 
cgit v1.2.3


From 69d9b312f38aa19f8c801e90bd23d70685be49f0 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Wed, 30 Oct 2024 21:20:52 -0300
Subject: iommu/arm-smmu-v3: Support IOMMU_VIOMMU_ALLOC

Add a new driver-type for ARM SMMUv3 to enum iommu_viommu_type. Implement
an arm_vsmmu_alloc().

As an initial step, copy the VMID from s2_parent. A followup series is
required to give the VIOMMU object it's own VMID that will be used in all
nesting configurations.

Link: https://patch.msgid.link/r/8-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index f4f76759b738..7cb13a29969d 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -425,10 +425,12 @@ struct iommu_hwpt_vtd_s1 {
  * enum iommu_hwpt_data_type - IOMMU HWPT Data Type
  * @IOMMU_HWPT_DATA_NONE: no data
  * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
+ * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table
  */
 enum iommu_hwpt_data_type {
 	IOMMU_HWPT_DATA_NONE = 0,
 	IOMMU_HWPT_DATA_VTD_S1 = 1,
+	IOMMU_HWPT_DATA_ARM_SMMUV3 = 2,
 };
 
 /**
@@ -868,9 +870,11 @@ struct iommu_fault_alloc {
 /**
  * enum iommu_viommu_type - Virtual IOMMU Type
  * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use
+ * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type
  */
 enum iommu_viommu_type {
 	IOMMU_VIOMMU_TYPE_DEFAULT = 0,
+	IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1,
 };
 
 /**
-- 
cgit v1.2.3


From 1e8be08d1c91d52a9b51d424db78ddbf88660bbb Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 30 Oct 2024 21:20:53 -0300
Subject: iommu/arm-smmu-v3: Support IOMMU_DOMAIN_NESTED

For SMMUv3 a IOMMU_DOMAIN_NESTED is composed of a S2 iommu_domain acting
as the parent and a user provided STE fragment that defines the CD table
and related data with addresses translated by the S2 iommu_domain.

The kernel only permits userspace to control certain allowed bits of the
STE that are safe for user/guest control.

IOTLB maintenance is a bit subtle here, the S1 implicitly includes the S2
translation, but there is no way of knowing which S1 entries refer to a
range of S2.

For the IOTLB we follow ARM's guidance and issue a CMDQ_OP_TLBI_NH_ALL to
flush all ASIDs from the VMID after flushing the S2 on any change to the
S2.

The IOMMU_DOMAIN_NESTED can only be created from inside a VIOMMU as the
invalidation path relies on the VIOMMU to translate virtual stream ID used
in the invalidation commands for the CD table and ATS.

Link: https://patch.msgid.link/r/9-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Donald Dutile <ddutile@redhat.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 7cb13a29969d..b6baaa1e55b1 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -421,6 +421,26 @@ struct iommu_hwpt_vtd_s1 {
 	__u32 __reserved;
 };
 
+/**
+ * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE
+ *                                (IOMMU_HWPT_DATA_ARM_SMMUV3)
+ *
+ * @ste: The first two double words of the user space Stream Table Entry for
+ *       the translation. Must be little-endian.
+ *       Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
+ *       - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
+ *       - word-1: S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
+ *
+ * -EIO will be returned if @ste is not legal or contains any non-allowed field.
+ * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
+ * nested domain will translate the same as the nesting parent. The S1 will
+ * install a Context Descriptor Table pointing at userspace memory translated
+ * by the nesting parent.
+ */
+struct iommu_hwpt_arm_smmuv3 {
+	__aligned_le64 ste[2];
+};
+
 /**
  * enum iommu_hwpt_data_type - IOMMU HWPT Data Type
  * @IOMMU_HWPT_DATA_NONE: no data
-- 
cgit v1.2.3


From 67e4fe3985138325c9b21193be52266750616182 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 30 Oct 2024 21:20:54 -0300
Subject: iommu/arm-smmu-v3: Use S2FWB for NESTED domains

Force Write Back (FWB) changes how the S2 IOPTE's MemAttr field
works. When S2FWB is supported and enabled the IOPTE will force cachable
access to IOMMU_CACHE memory when nesting with a S1 and deny cachable
access when !IOMMU_CACHE.

When using a single stage of translation, a simple S2 domain, it doesn't
change things for PCI devices as it is just a different encoding for the
existing mapping of the IOMMU protection flags to cachability attributes.
For non-PCI it also changes the combining rules when incoming transactions
have inconsistent attributes.

However, when used with a nested S1, FWB has the effect of preventing the
guest from choosing a MemAttr in it's S1 that would cause ordinary DMA to
bypass the cache. Consistent with KVM we wish to deny the guest the
ability to become incoherent with cached memory the hypervisor believes is
cachable so we don't have to flush it.

Allow NESTED domains to be created if the SMMU has S2FWB support and use
S2FWB for NESTING_PARENTS. This is an additional option to CANWBS.

Link: https://patch.msgid.link/r/10-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Donald Dutile <ddutile@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/io-pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index b1ecfc3cd5bc..ce86b09ae80f 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -87,6 +87,7 @@ struct io_pgtable_cfg {
 	 *	attributes set in the TCR for a non-coherent page-table walker.
 	 *
 	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable.
+	 * IO_PGTABLE_QUIRK_ARM_S2FWB: Use the FWB format for the MemAttrs bits
 	 */
 	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
@@ -95,6 +96,7 @@ struct io_pgtable_cfg {
 	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
 	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
 	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
+	#define IO_PGTABLE_QUIRK_ARM_S2FWB		BIT(8)
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;
-- 
cgit v1.2.3


From f27298a82ba09a1c8aecee8a209b2a312beac672 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 30 Oct 2024 21:20:55 -0300
Subject: iommu/arm-smmu-v3: Allow ATS for IOMMU_DOMAIN_NESTED

The EATS flag needs to flow through the vSTE and into the pSTE, and ensure
physical ATS is enabled on the PCI device.

The physical ATS state must match the VM's idea of EATS as we rely on the
VM to issue the ATS invalidation commands. Thus ATS must remain off at the
device until EATS on a nesting domain turns it on. Attaching a nesting
domain is the point where the invalidation responsibility transfers to
userspace.

Update the ATS logic to track EATS for nesting domains and flush the
ATC whenever the S2 nesting parent changes.

Link: https://patch.msgid.link/r/11-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index b6baaa1e55b1..a66eb0384cd6 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -429,7 +429,7 @@ struct iommu_hwpt_vtd_s1 {
  *       the translation. Must be little-endian.
  *       Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
  *       - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
- *       - word-1: S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
+ *       - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
  *
  * -EIO will be returned if @ste is not legal or contains any non-allowed field.
  * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
-- 
cgit v1.2.3


From d68beb276ba26cec47350a6d468e967673ee0c56 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Wed, 30 Oct 2024 21:20:56 -0300
Subject: iommu/arm-smmu-v3: Support IOMMU_HWPT_INVALIDATE using a VIOMMU
 object

Implement the vIOMMU's cache_invalidate op for user space to invalidate
the IOTLB entries, Device ATS and CD entries that are cached by hardware.

Add struct iommu_viommu_arm_smmuv3_invalidate defining invalidation
entries that are simply in the native format of a 128-bit TLBI
command. Scan those commands against the permitted command list and fix
their VMID/SID fields to match what is stored in the vIOMMU.

Link: https://patch.msgid.link/r/12-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com
Co-developed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index a66eb0384cd6..747d3d9baa3d 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -713,9 +713,11 @@ struct iommu_hwpt_get_dirty_bitmap {
  * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
  *                                        Data Type
  * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
+ * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3
  */
 enum iommu_hwpt_invalidate_data_type {
 	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
+	IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1,
 };
 
 /**
@@ -754,6 +756,28 @@ struct iommu_hwpt_vtd_s1_invalidate {
 	__u32 __reserved;
 };
 
+/**
+ * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation
+ *         (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
+ * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
+ *       Must be little-endian.
+ *
+ * Supported command list only when passing in a vIOMMU via @hwpt_id:
+ *     CMDQ_OP_TLBI_NSNH_ALL
+ *     CMDQ_OP_TLBI_NH_VA
+ *     CMDQ_OP_TLBI_NH_VAA
+ *     CMDQ_OP_TLBI_NH_ALL
+ *     CMDQ_OP_TLBI_NH_ASID
+ *     CMDQ_OP_ATC_INV
+ *     CMDQ_OP_CFGI_CD
+ *     CMDQ_OP_CFGI_CD_ALL
+ *
+ * -EIO will be returned if the command is not supported.
+ */
+struct iommu_viommu_arm_smmuv3_invalidate {
+	__aligned_le64 cmd[2];
+};
+
 /**
  * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
  * @size: sizeof(struct iommu_hwpt_invalidate)
-- 
cgit v1.2.3


From 134e9d035d830aabd1121bcda89f7ee9a476d3a3 Mon Sep 17 00:00:00 2001
From: Taniya Das <quic_tdas@quicinc.com>
Date: Mon, 11 Nov 2024 16:24:43 -0800
Subject: dt-bindings: power: qcom,rpmpd: document the SM8750 RPMh Power
 Domains

Document the RPMh Power Domains on the SM8750 Platform.

Signed-off-by: Taniya Das <quic_tdas@quicinc.com>
Signed-off-by: Jishnu Prakash <quic_jprakash@quicinc.com>
Signed-off-by: Melody Olvera <quic_molvera@quicinc.com>
Message-ID: <20241112002444.2802092-2-quic_molvera@quicinc.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/dt-bindings/power/qcom-rpmpd.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h
index 608087fb9a3d..df599bf46220 100644
--- a/include/dt-bindings/power/qcom-rpmpd.h
+++ b/include/dt-bindings/power/qcom-rpmpd.h
@@ -218,6 +218,7 @@
 /* SDM845 Power Domain performance levels */
 #define RPMH_REGULATOR_LEVEL_RETENTION		16
 #define RPMH_REGULATOR_LEVEL_MIN_SVS		48
+#define RPMH_REGULATOR_LEVEL_LOW_SVS_D3		50
 #define RPMH_REGULATOR_LEVEL_LOW_SVS_D2		52
 #define RPMH_REGULATOR_LEVEL_LOW_SVS_D1		56
 #define RPMH_REGULATOR_LEVEL_LOW_SVS_D0		60
@@ -238,6 +239,7 @@
 #define RPMH_REGULATOR_LEVEL_TURBO_L1		416
 #define RPMH_REGULATOR_LEVEL_TURBO_L2		432
 #define RPMH_REGULATOR_LEVEL_TURBO_L3		448
+#define RPMH_REGULATOR_LEVEL_TURBO_L4		452
 #define RPMH_REGULATOR_LEVEL_SUPER_TURBO 	464
 #define RPMH_REGULATOR_LEVEL_SUPER_TURBO_NO_CPR	480
 
-- 
cgit v1.2.3


From 61952bb73486fff0f5550bccdf4062d9dd0fb163 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Nov 2024 18:00:38 +0100
Subject: block: remove the write_hint field from struct request

The write_hint is only used for read/write requests, which must have a
bio attached to them.  Just use the bio field instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20241112170050.1612998-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2035fad3131f..2804fe181d9d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -156,7 +156,6 @@ struct request {
 	struct blk_crypto_keyslot *crypt_keyslot;
 #endif
 
-	enum rw_hint write_hint;
 	unsigned short ioprio;
 
 	enum mq_rq_state state;
-- 
cgit v1.2.3


From 6975c1a486a40446b5bc77a89d9c520f8296fd08 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Nov 2024 18:00:39 +0100
Subject: block: remove the ioprio field from struct request

The request ioprio is only initialized from the first attached bio,
so requests without a bio already never set it.  Directly use the
bio field instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20241112170050.1612998-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h       | 7 +++----
 include/trace/events/block.h | 6 +++---
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2804fe181d9d..a28264442948 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -156,8 +156,6 @@ struct request {
 	struct blk_crypto_keyslot *crypt_keyslot;
 #endif
 
-	unsigned short ioprio;
-
 	enum mq_rq_state state;
 	atomic_t ref;
 
@@ -221,7 +219,9 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
 
 static inline unsigned short req_get_ioprio(struct request *req)
 {
-	return req->ioprio;
+	if (req->bio)
+		return req->bio->bi_ioprio;
+	return 0;
 }
 
 #define rq_data_dir(rq)		(op_is_write(req_op(rq)) ? WRITE : READ)
@@ -984,7 +984,6 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
 	rq->nr_phys_segments = nr_segs;
 	rq->__data_len = bio->bi_iter.bi_size;
 	rq->bio = rq->biotail = bio;
-	rq->ioprio = bio_prio(bio);
 }
 
 void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 1527d5d45e01..bd0ea07338eb 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -99,7 +99,7 @@ TRACE_EVENT(block_rq_requeue,
 		__entry->dev	   = rq->q->disk ? disk_devt(rq->q->disk) : 0;
 		__entry->sector    = blk_rq_trace_sector(rq);
 		__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
-		__entry->ioprio    = rq->ioprio;
+		__entry->ioprio    = req_get_ioprio(rq);
 
 		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
 		__get_str(cmd)[0] = '\0';
@@ -136,7 +136,7 @@ DECLARE_EVENT_CLASS(block_rq_completion,
 		__entry->sector    = blk_rq_pos(rq);
 		__entry->nr_sector = nr_bytes >> 9;
 		__entry->error     = blk_status_to_errno(error);
-		__entry->ioprio    = rq->ioprio;
+		__entry->ioprio    = req_get_ioprio(rq);
 
 		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
 		__get_str(cmd)[0] = '\0';
@@ -209,7 +209,7 @@ DECLARE_EVENT_CLASS(block_rq,
 		__entry->sector    = blk_rq_trace_sector(rq);
 		__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
 		__entry->bytes     = blk_rq_bytes(rq);
-		__entry->ioprio	   = rq->ioprio;
+		__entry->ioprio	   = req_get_ioprio(rq);
 
 		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
 		__get_str(cmd)[0] = '\0';
-- 
cgit v1.2.3


From 174dd22a781b97cb355b1037f0931436a254d3be Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 11 Nov 2024 09:38:26 -0800
Subject: srcu: Remove smp_mb() from srcu_read_unlock_lite()

The srcu_read_unlock_lite() function invokes __srcu_read_unlock() instead
of __srcu_read_unlock_lite(), which means that it is doing an unnecessary
smp_mb().  This is harmless other than the performance degradation.

This commit therefore switches to __srcu_read_unlock_lite().

Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/
Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()")
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 include/linux/srcu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index bab1dae3f69e..56f83237de4d 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -372,7 +372,7 @@ static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
 	WARN_ON_ONCE(idx & ~0x1);
 	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
 	srcu_lock_release(&ssp->dep_map);
-	__srcu_read_unlock(ssp, idx);
+	__srcu_read_unlock_lite(ssp, idx);
 }
 
 /**
-- 
cgit v1.2.3


From a76ab5731e32d50ff5b1ae97e9dc4b23f41c23f5 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Tue, 12 Nov 2024 08:39:07 -0800
Subject: bpf: Find eligible subprogs for private stack support

Private stack will be allocated with percpu allocator in jit time.
To avoid complexity at runtime, only one copy of private stack is
available per cpu per prog. So runtime recursion check is necessary
to avoid stack corruption.

Current private stack only supports kprobe/perf_event/tp/raw_tp
which has recursion check in the kernel, and prog types that use
bpf trampoline recursion check. For trampoline related prog types,
currently only tracing progs have recursion checking.

To avoid complexity, all async_cb subprogs use normal kernel stack
including those subprogs used by both main prog subtree and async_cb
subtree. Any prog having tail call also uses kernel stack.

To avoid jit penalty with private stack support, a subprog stack
size threshold is set such that only if the stack size is no less
than the threshold, private stack is supported. The current threshold
is 64 bytes. This avoids jit penality if the stack usage is small.

A useless 'continue' is also removed from a loop in func
check_max_stack_depth().

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20241112163907.2223839-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 7 +++++++
 include/linux/filter.h       | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 3a74033d49c4..d62bb2ca1828 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -633,6 +633,12 @@ struct bpf_subprog_arg_info {
 	};
 };
 
+enum priv_stack_mode {
+	PRIV_STACK_UNKNOWN,
+	NO_PRIV_STACK,
+	PRIV_STACK_ADAPTIVE,
+};
+
 struct bpf_subprog_info {
 	/* 'start' has to be the first field otherwise find_subprog() won't work */
 	u32 start; /* insn idx of function entry point */
@@ -653,6 +659,7 @@ struct bpf_subprog_info {
 	/* true if bpf_fastcall stack region is used by functions that can't be inlined */
 	bool keep_fastcall_stack: 1;
 
+	enum priv_stack_mode priv_stack_mode;
 	u8 arg_cnt;
 	struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS];
 };
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7d7578a8eac1..3a21947f2fd4 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1119,6 +1119,7 @@ bool bpf_jit_supports_exceptions(void);
 bool bpf_jit_supports_ptr_xchg(void);
 bool bpf_jit_supports_arena(void);
 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena);
+bool bpf_jit_supports_private_stack(void);
 u64 bpf_arch_uaddress_limit(void);
 void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
 bool bpf_helper_changes_pkt_data(void *func);
-- 
cgit v1.2.3


From e00931c02568dc6ac76f94b1ab471de05e6fdfe8 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Tue, 12 Nov 2024 08:39:12 -0800
Subject: bpf: Enable private stack for eligible subprogs

If private stack is used by any subprog, set that subprog
prog->aux->jits_use_priv_stack to be true so later jit can allocate
private stack for that subprog properly.

Also set env->prog->aux->jits_use_priv_stack to be true if
any subprog uses private stack. This is a use case for a
single main prog (no subprogs) to use private stack, and
also a use case for later struct-ops progs where
env->prog->aux->jits_use_priv_stack will enable recursion
check if any subprog uses private stack.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20241112163912.2224007-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7da41ae2eac8..129b29e85cec 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1523,6 +1523,7 @@ struct bpf_prog_aux {
 	bool exception_cb;
 	bool exception_boundary;
 	bool is_extended; /* true if extended by freplace program */
+	bool jits_use_priv_stack;
 	u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
 	struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
 	struct bpf_arena *arena;
-- 
cgit v1.2.3


From 7d1cd70d4b16ff0216a5f6c2ae7d0fa9fa978c07 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Tue, 12 Nov 2024 08:39:22 -0800
Subject: bpf, x86: Support private stack in jit

Private stack is allocated in function bpf_int_jit_compile() with
alignment 8. Private stack allocation size includes the stack size
determined by verifier and additional space to protect stack overflow
and underflow. See below an illustration:
  ---> memory address increasing
  [8 bytes to protect overflow] [normal stack] [8 bytes to protect underflow]
If overflow/underflow is detected, kernel messages will be
emited in dmesg like
  BPF private stack overflow/underflow detected for prog Fx
  BPF Private stack overflow/underflow detected for prog bpf_prog_a41699c234a1567a_subprog1x
Those messages are generated when I made some changes to jitted code
to intentially cause overflow for some progs.

For the jited prog, The x86 register 9 (X86_REG_R9) is used to replace
bpf frame register (BPF_REG_10). The private stack is used per
subprog per cpu. The X86_REG_R9 is saved and restored around every
func call (not including tailcall) to maintain correctness of
X86_REG_R9.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20241112163922.2224385-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 129b29e85cec..d32cc373dfd1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1507,6 +1507,7 @@ struct bpf_prog_aux {
 	u32 max_rdwr_access;
 	struct btf *attach_btf;
 	const struct bpf_ctx_arg_aux *ctx_arg_info;
+	void __percpu *priv_stack_ptr;
 	struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
 	struct bpf_prog *dst_prog;
 	struct bpf_trampoline *dst_trampoline;
-- 
cgit v1.2.3


From 5bd36da1e37e7a78e8b38efd287de6e1394b7d6e Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Tue, 12 Nov 2024 08:39:33 -0800
Subject: bpf: Support private stack for struct_ops progs

For struct_ops progs, whether a particular prog uses private stack
depends on prog->aux->priv_stack_requested setting before actual
insn-level verification for that prog. One particular implementation
is to piggyback on struct_ops->check_member(). The next patch has
an example for this. The struct_ops->check_member() sets
prog->aux->priv_stack_requested to be true which enables private stack
usage.

The struct_ops prog follows the same rule as kprobe/tracing progs after
function bpf_enable_priv_stack(). For example, even a struct_ops prog
requests private stack, it could still use normal kernel stack if
the stack size is small (< 64 bytes).

Similar to tracing progs, nested same cpu same prog run will be skipped.
A field (recursion_detected()) is added to bpf_prog_aux structure.
If bpf_prog->aux->recursion_detected is implemented by the struct_ops
subsystem and nested same cpu/prog happens, the function will be
triggered to report an error, collect related info, etc.

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20241112163933.2224962-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          | 2 ++
 include/linux/bpf_verifier.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d32cc373dfd1..10945c8858ce 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1525,9 +1525,11 @@ struct bpf_prog_aux {
 	bool exception_boundary;
 	bool is_extended; /* true if extended by freplace program */
 	bool jits_use_priv_stack;
+	bool priv_stack_requested;
 	u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
 	struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
 	struct bpf_arena *arena;
+	void (*recursion_detected)(struct bpf_prog *prog); /* callback if recursion is detected */
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
 	/* function name for valid attach_btf_id */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d62bb2ca1828..6b7c91629176 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -879,6 +879,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
 	case BPF_PROG_TYPE_TRACING:
 		return prog->expected_attach_type != BPF_TRACE_ITER;
 	case BPF_PROG_TYPE_STRUCT_OPS:
+		return prog->aux->jits_use_priv_stack;
 	case BPF_PROG_TYPE_LSM:
 		return false;
 	default:
-- 
cgit v1.2.3


From 7c8ce4ffb684676039b1ff9ff81c126794e8d88e Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Tue, 12 Nov 2024 22:58:49 +0800
Subject: bpf: Add kernel symbol for struct_ops trampoline

Without kernel symbols for struct_ops trampoline, the unwinder may
produce unexpected stacktraces.

For example, the x86 ORC and FP unwinders check if an IP is in kernel
text by verifying the presence of the IP's kernel symbol. When a
struct_ops trampoline address is encountered, the unwinder stops due
to the absence of symbol, resulting in an incomplete stacktrace that
consists only of direct and indirect child functions called from the
trampoline.

The arm64 unwinder is another example. While the arm64 unwinder can
proceed across a struct_ops trampoline address, the corresponding
symbol name is displayed as "unknown", which is confusing.

Thus, add kernel symbol for struct_ops trampoline. The name is
bpf__<struct_ops_name>_<member_name>, where <struct_ops_name> is the
type name of the struct_ops, and <member_name> is the name of
the member that the trampoline is linked to.

Below is a comparison of stacktraces captured on x86 by perf record,
before and after this patch.

Before:
ffffffff8116545d __lock_acquire+0xad ([kernel.kallsyms])
ffffffff81167fcc lock_acquire+0xcc ([kernel.kallsyms])
ffffffff813088f4 __bpf_prog_enter+0x34 ([kernel.kallsyms])

After:
ffffffff811656bd __lock_acquire+0x30d ([kernel.kallsyms])
ffffffff81167fcc lock_acquire+0xcc ([kernel.kallsyms])
ffffffff81309024 __bpf_prog_enter+0x34 ([kernel.kallsyms])
ffffffffc000d7e9 bpf__tcp_congestion_ops_cong_avoid+0x3e ([kernel.kallsyms])
ffffffff81f250a5 tcp_ack+0x10d5 ([kernel.kallsyms])
ffffffff81f27c66 tcp_rcv_established+0x3b6 ([kernel.kallsyms])
ffffffff81f3ad03 tcp_v4_do_rcv+0x193 ([kernel.kallsyms])
ffffffff81d65a18 __release_sock+0xd8 ([kernel.kallsyms])
ffffffff81d65af4 release_sock+0x34 ([kernel.kallsyms])
ffffffff81f15c4b tcp_sendmsg+0x3b ([kernel.kallsyms])
ffffffff81f663d7 inet_sendmsg+0x47 ([kernel.kallsyms])
ffffffff81d5ab40 sock_write_iter+0x160 ([kernel.kallsyms])
ffffffff8149c67b vfs_write+0x3fb ([kernel.kallsyms])
ffffffff8149caf6 ksys_write+0xc6 ([kernel.kallsyms])
ffffffff8149cb5d __x64_sys_write+0x1d ([kernel.kallsyms])
ffffffff81009200 x64_sys_call+0x1d30 ([kernel.kallsyms])
ffffffff82232d28 do_syscall_64+0x68 ([kernel.kallsyms])
ffffffff8240012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms])

Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20241112145849.3436772-4-xukuohai@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 10945c8858ce..3ace0d6227e3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1402,7 +1402,8 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
 void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
 				struct bpf_prog *to);
 /* Called only from JIT-enabled code, so there's no need for stubs. */
-void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym);
+void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym);
+void bpf_image_ksym_add(struct bpf_ksym *ksym);
 void bpf_image_ksym_del(struct bpf_ksym *ksym);
 void bpf_ksym_add(struct bpf_ksym *ksym);
 void bpf_ksym_del(struct bpf_ksym *ksym);
-- 
cgit v1.2.3


From 12bbabd3cab8a7dab0ddad8ed1e671f40c7cdeeb Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 12 Nov 2024 18:01:25 +0200
Subject: usb: cdns3: Synchronise PCI IDs via common data base

There are a few places in the kernel where PCI IDs for different Cadence
USB controllers are being used. Besides different naming, they duplicate
each other. Make this all in order by providing common definitions via
PCI IDs database and use in all users. While doing that, rename
definitions as Roger suggested.

Suggested-by: Roger Quadros <rogerq@kernel.org>
Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20241112160125.2340972-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/pci_ids.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4cf6aaed5f35..eff9eccc52fc 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -121,6 +121,7 @@
 #define PCI_CLASS_SERIAL_USB_OHCI	0x0c0310
 #define PCI_CLASS_SERIAL_USB_EHCI	0x0c0320
 #define PCI_CLASS_SERIAL_USB_XHCI	0x0c0330
+#define PCI_CLASS_SERIAL_USB_CDNS	0x0c0380
 #define PCI_CLASS_SERIAL_USB_DEVICE	0x0c03fe
 #define PCI_CLASS_SERIAL_FIBER		0x0c04
 #define PCI_CLASS_SERIAL_SMBUS		0x0c05
@@ -2421,6 +2422,9 @@
 #define PCI_VENDOR_ID_QCOM		0x17cb
 
 #define PCI_VENDOR_ID_CDNS		0x17cd
+#define PCI_DEVICE_ID_CDNS_USBSS	0x0100
+#define PCI_DEVICE_ID_CDNS_USB		0x0120
+#define PCI_DEVICE_ID_CDNS_USBSSP	0x0200
 
 #define PCI_VENDOR_ID_ARECA		0x17d3
 #define PCI_DEVICE_ID_ARECA_1110	0x1110
-- 
cgit v1.2.3


From 948ccbd95016f50ce01df5eef9440eede3b8c713 Mon Sep 17 00:00:00 2001
From: Xianglai Li <lixianglai@loongson.cn>
Date: Wed, 13 Nov 2024 16:18:26 +0800
Subject: LoongArch: KVM: Add iocsr and mmio bus simulation in kernel

Add iocsr and mmio memory read and write simulation to the kernel. When
the VM accesses the device address space through iocsr instructions or
mmio, it does not need to return to the qemu user mode but can directly
completes the access in the kernel mode.

Signed-off-by: Tianrui Zhao <zhaotianrui@loongson.cn>
Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 include/linux/kvm_host.h   |  1 +
 include/trace/events/kvm.h | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 45be36e5285f..164d9725cb08 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -219,6 +219,7 @@ enum kvm_bus {
 	KVM_PIO_BUS,
 	KVM_VIRTIO_CCW_NOTIFY_BUS,
 	KVM_FAST_MMIO_BUS,
+	KVM_IOCSR_BUS,
 	KVM_NR_BUSES
 };
 
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 74e40d5d4af4..fc7d0f8ff078 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -236,6 +236,41 @@ TRACE_EVENT(kvm_mmio,
 		  __entry->len, __entry->gpa, __entry->val)
 );
 
+#define KVM_TRACE_IOCSR_READ_UNSATISFIED 0
+#define KVM_TRACE_IOCSR_READ 1
+#define KVM_TRACE_IOCSR_WRITE 2
+
+#define kvm_trace_symbol_iocsr \
+	{ KVM_TRACE_IOCSR_READ_UNSATISFIED, "unsatisfied-read" }, \
+	{ KVM_TRACE_IOCSR_READ, "read" }, \
+	{ KVM_TRACE_IOCSR_WRITE, "write" }
+
+TRACE_EVENT(kvm_iocsr,
+	TP_PROTO(int type, int len, u64 gpa, void *val),
+	TP_ARGS(type, len, gpa, val),
+
+	TP_STRUCT__entry(
+		__field(	u32,	type	)
+		__field(	u32,	len	)
+		__field(	u64,	gpa	)
+		__field(	u64,	val	)
+	),
+
+	TP_fast_assign(
+		__entry->type		= type;
+		__entry->len		= len;
+		__entry->gpa		= gpa;
+		__entry->val		= 0;
+		if (val)
+			memcpy(&__entry->val, val,
+			       min_t(u32, sizeof(__entry->val), len));
+	),
+
+	TP_printk("iocsr %s len %u gpa 0x%llx val 0x%llx",
+		  __print_symbolic(__entry->type, kvm_trace_symbol_iocsr),
+		  __entry->len, __entry->gpa, __entry->val)
+);
+
 #define kvm_fpu_load_symbol	\
 	{0, "unload"},		\
 	{1, "load"}
-- 
cgit v1.2.3


From c532de5a67a70f8533d495f8f2aaa9a0491c3ad0 Mon Sep 17 00:00:00 2001
From: Xianglai Li <lixianglai@loongson.cn>
Date: Wed, 13 Nov 2024 16:18:27 +0800
Subject: LoongArch: KVM: Add IPI device support

Add device model for IPI interrupt controller, implement basic create &
destroy interfaces, and register device model to kvm device table.

Signed-off-by: Tianrui Zhao <zhaotianrui@loongson.cn>
Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 include/uapi/linux/kvm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 637efc055145..9fff439c30ea 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1158,7 +1158,11 @@ enum kvm_device_type {
 #define KVM_DEV_TYPE_ARM_PV_TIME	KVM_DEV_TYPE_ARM_PV_TIME
 	KVM_DEV_TYPE_RISCV_AIA,
 #define KVM_DEV_TYPE_RISCV_AIA		KVM_DEV_TYPE_RISCV_AIA
+	KVM_DEV_TYPE_LOONGARCH_IPI,
+#define KVM_DEV_TYPE_LOONGARCH_IPI	KVM_DEV_TYPE_LOONGARCH_IPI
+
 	KVM_DEV_TYPE_MAX,
+
 };
 
 struct kvm_vfio_spapr_tce {
-- 
cgit v1.2.3


From 2e8b9df82631e714cc2b7bf302772c8259673180 Mon Sep 17 00:00:00 2001
From: Xianglai Li <lixianglai@loongson.cn>
Date: Wed, 13 Nov 2024 16:18:27 +0800
Subject: LoongArch: KVM: Add EIOINTC device support

Add device model for EIOINTC interrupt controller, implement basic
create & destroy interfaces, and register device model to kvm device
table.

Signed-off-by: Tianrui Zhao <zhaotianrui@loongson.cn>
Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 include/uapi/linux/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 9fff439c30ea..0ec5c631d9e9 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1160,6 +1160,8 @@ enum kvm_device_type {
 #define KVM_DEV_TYPE_RISCV_AIA		KVM_DEV_TYPE_RISCV_AIA
 	KVM_DEV_TYPE_LOONGARCH_IPI,
 #define KVM_DEV_TYPE_LOONGARCH_IPI	KVM_DEV_TYPE_LOONGARCH_IPI
+	KVM_DEV_TYPE_LOONGARCH_EIOINTC,
+#define KVM_DEV_TYPE_LOONGARCH_EIOINTC	KVM_DEV_TYPE_LOONGARCH_EIOINTC
 
 	KVM_DEV_TYPE_MAX,
 
-- 
cgit v1.2.3


From e785dfacf7e7fe94370fa0e8e3ff1bc8fe179831 Mon Sep 17 00:00:00 2001
From: Xianglai Li <lixianglai@loongson.cn>
Date: Wed, 13 Nov 2024 16:18:27 +0800
Subject: LoongArch: KVM: Add PCHPIC device support

Add device model for PCHPIC interrupt controller, implemente basic
create & destroy interface, and register device model to kvm device
table.

Signed-off-by: Tianrui Zhao <zhaotianrui@loongson.cn>
Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 include/uapi/linux/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0ec5c631d9e9..502ea63b5d2e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1162,6 +1162,8 @@ enum kvm_device_type {
 #define KVM_DEV_TYPE_LOONGARCH_IPI	KVM_DEV_TYPE_LOONGARCH_IPI
 	KVM_DEV_TYPE_LOONGARCH_EIOINTC,
 #define KVM_DEV_TYPE_LOONGARCH_EIOINTC	KVM_DEV_TYPE_LOONGARCH_EIOINTC
+	KVM_DEV_TYPE_LOONGARCH_PCHPIC,
+#define KVM_DEV_TYPE_LOONGARCH_PCHPIC	KVM_DEV_TYPE_LOONGARCH_PCHPIC
 
 	KVM_DEV_TYPE_MAX,
 
-- 
cgit v1.2.3


From da115c4ee29f589bb72ec2e86eb5e196b6bbcb41 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 12 Nov 2024 15:29:15 +0100
Subject: printk: add dummy printk_force_console_enter/exit helpers

The newly added interface is broken when PRINTK is disabled:

drivers/tty/sysrq.c: In function '__handle_sysrq':
drivers/tty/sysrq.c:601:9: error: implicit declaration of function 'printk_force_console_enter' [-Wimplicit-function-declaration]
  601 |         printk_force_console_enter();
      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/tty/sysrq.c:611:25: error: implicit declaration of function 'printk_force_console_exit' [-Wimplicit-function-declaration]
  611 |                         printk_force_console_exit();
      |                         ^~~~~~~~~~~~~~~~~~~~~~~~~

Add empty stub functions for both.

Fixes: ed76c07c6885 ("printk: Introduce FORCE_CON flag")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Marcos Paulo de Souza <mpdesouza@suse.com>
Tested-by: Marcos Paulo de Souza <mpdesouza@suse.com>
Link: https://lore.kernel.org/r/20241112142939.724093-1-arnd@kernel.org
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/printk.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 232e5fd06701..4217a9f412b2 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -232,6 +232,14 @@ static inline void printk_deferred_exit(void)
 {
 }
 
+static inline void printk_force_console_enter(void)
+{
+}
+
+static inline void printk_force_console_exit(void)
+{
+}
+
 static inline int printk_ratelimit(void)
 {
 	return 0;
-- 
cgit v1.2.3


From 1d58f7f3a1373734b2e86a246edcf1cd39359f3e Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Fri, 25 Oct 2024 21:31:01 +0100
Subject: clocksource/drivers/dw_apb: Remove unused dw_apb_clockevent functions

dw_apb_clockevent_pause(), dw_apb_clockevent_resume() and
dw_apb_clockevent_stop() have been unused since 2021's
commit 1b79fc4f2bfd ("x86/apb_timer: Remove driver for deprecated
platform")

Remove them.

(Some of the other clockevent functions are still called by
dw_apb_timer_of.c  so I guess it is still in use?)

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Link: https://lore.kernel.org/r/20241025203101.241709-1-linux@treblig.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/linux/dw_apb_timer.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h
index 82ebf9223948..f8811c46b89e 100644
--- a/include/linux/dw_apb_timer.h
+++ b/include/linux/dw_apb_timer.h
@@ -34,9 +34,6 @@ struct dw_apb_clocksource {
 };
 
 void dw_apb_clockevent_register(struct dw_apb_clock_event_device *dw_ced);
-void dw_apb_clockevent_pause(struct dw_apb_clock_event_device *dw_ced);
-void dw_apb_clockevent_resume(struct dw_apb_clock_event_device *dw_ced);
-void dw_apb_clockevent_stop(struct dw_apb_clock_event_device *dw_ced);
 
 struct dw_apb_clock_event_device *
 dw_apb_clockevent_init(int cpu, const char *name, unsigned rating,
-- 
cgit v1.2.3


From 44010543fc8bedad172aa5b6c43480e5d2124497 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 11 Nov 2024 10:09:57 -0500
Subject: fs: add the ability for statmount() to report the sb_source

/proc/self/mountinfo displays the source for the mount, but statmount()
doesn't yet have a way to return it. Add a new STATMOUNT_SB_SOURCE flag,
claim the 32-bit __spare1 field to hold the offset into the str[] array.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/20241111-statmount-v4-3-2eaf35d07a80@kernel.org
Acked-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/mount.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 2e939dddf9cb..2b49e9131d77 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -174,7 +174,7 @@ struct statmount {
 	__u32 mnt_point;	/* [str] Mountpoint relative to current root */
 	__u64 mnt_ns_id;	/* ID of the mount namespace */
 	__u32 fs_subtype;	/* [str] Subtype of fs_type (if any) */
-	__u32 __spare1[1];
+	__u32 sb_source;	/* [str] Source string of the mount */
 	__u64 __spare2[48];
 	char str[];		/* Variable size part containing strings */
 };
@@ -210,6 +210,7 @@ struct mnt_id_req {
 #define STATMOUNT_MNT_NS_ID		0x00000040U	/* Want/got mnt_ns_id */
 #define STATMOUNT_MNT_OPTS		0x00000080U	/* Want/got mnt_opts */
 #define STATMOUNT_FS_SUBTYPE		0x00000100U	/* Want/got fs_subtype */
+#define STATMOUNT_SB_SOURCE		0x00000200U	/* Want/got sb_source */
 
 /*
  * Special @mnt_id values that can be passed to listmount
-- 
cgit v1.2.3


From 2f4d4503e9e5ab765a7948f98bc5deef7850f607 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 12 Nov 2024 11:10:04 +0100
Subject: statmount: add flag to retrieve unescaped options

Filesystem options can be retrieved with STATMOUNT_MNT_OPTS, which
returns a string of comma separated options, where some characters are
escaped using the \OOO notation.

Add a new flag, STATMOUNT_OPT_ARRAY, which instead returns the raw
option values separated with '\0' charaters.

Since escaped charaters are rare, this inteface is preferable for
non-libmount users which likley don't want to deal with option
de-escaping.

Example code:

	if (st->mask & STATMOUNT_OPT_ARRAY) {
		const char *opt = st->str + st->opt_array;

		for (unsigned int i = 0; i < st->opt_num; i++) {
			printf("opt_array[%i]: <%s>\n", i, opt);
			opt += strlen(opt) + 1;
		}
	}

Example ouput:

(1) mnt_opts: <lowerdir+=/l\054w\054r,lowerdir+=/l\054w\054r1,upperdir=/upp\054r,workdir=/w\054rk,redirect_dir=nofollow,uuid=null>

(2) opt_array[0]: <lowerdir+=/l,w,r>
    opt_array[1]: <lowerdir+=/l,w,r1>
    opt_array[2]: <upperdir=/upp,r>
    opt_array[3]: <workdir=/w,rk>
    opt_array[4]: <redirect_dir=nofollow>
    opt_array[5]: <uuid=null>

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Link: https://lore.kernel.org/r/20241112101006.30715-1-mszeredi@redhat.com
Acked-by: Jeff Layton <jlayton@kernel.org>
[brauner: tweak variable naming and parsing add example output]
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/mount.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 2b49e9131d77..c0fda4604187 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -154,7 +154,7 @@ struct mount_attr {
  */
 struct statmount {
 	__u32 size;		/* Total size, including strings */
-	__u32 mnt_opts;		/* [str] Mount options of the mount */
+	__u32 mnt_opts;		/* [str] Options (comma separated, escaped) */
 	__u64 mask;		/* What results were written */
 	__u32 sb_dev_major;	/* Device ID */
 	__u32 sb_dev_minor;
@@ -175,7 +175,9 @@ struct statmount {
 	__u64 mnt_ns_id;	/* ID of the mount namespace */
 	__u32 fs_subtype;	/* [str] Subtype of fs_type (if any) */
 	__u32 sb_source;	/* [str] Source string of the mount */
-	__u64 __spare2[48];
+	__u32 opt_num;		/* Number of fs options */
+	__u32 opt_array;	/* [str] Array of nul terminated fs options */
+	__u64 __spare2[47];
 	char str[];		/* Variable size part containing strings */
 };
 
@@ -211,6 +213,7 @@ struct mnt_id_req {
 #define STATMOUNT_MNT_OPTS		0x00000080U	/* Want/got mnt_opts */
 #define STATMOUNT_FS_SUBTYPE		0x00000100U	/* Want/got fs_subtype */
 #define STATMOUNT_SB_SOURCE		0x00000200U	/* Want/got sb_source */
+#define STATMOUNT_OPT_ARRAY		0x00000400U	/* Want/got opt_... */
 
 /*
  * Special @mnt_id values that can be passed to listmount
-- 
cgit v1.2.3


From 95f567f81e43a1bcb5fbf0559e55b7505707300d Mon Sep 17 00:00:00 2001
From: Stefan Berger <stefanb@linux.ibm.com>
Date: Fri, 1 Nov 2024 15:37:03 -0400
Subject: fs: Simplify getattr interface function checking AT_GETATTR_NOSEC
 flag

Commit 8a924db2d7b5 ("fs: Pass AT_GETATTR_NOSEC flag to getattr interface
function")' introduced the AT_GETATTR_NOSEC flag to ensure that the
call paths only call vfs_getattr_nosec if it is set instead of vfs_getattr.
Now, simplify the getattr interface functions of filesystems where the flag
AT_GETATTR_NOSEC is checked.

There is only a single caller of inode_operations getattr function and it
is located in fs/stat.c in vfs_getattr_nosec. The caller there is the only
one from which the AT_GETATTR_NOSEC flag is passed from.

Two filesystems are checking this flag in .getattr and the flag is always
passed to them unconditionally from only vfs_getattr_nosec:

- ecryptfs:  Simplify by always calling vfs_getattr_nosec in
             ecryptfs_getattr. From there the flag is passed to no other
             function and this function is not called otherwise.

- overlayfs: Simplify by always calling vfs_getattr_nosec in
             ovl_getattr. From there the flag is passed to no other
             function and this function is not called otherwise.

The query_flags in vfs_getattr_nosec will mask-out AT_GETATTR_NOSEC from
any caller using AT_STATX_SYNC_TYPE as mask so that the flag is not
important inside this function. Also, since no filesystem is checking the
flag anymore, remove the flag entirely now, including the BUG_ON check that
never triggered.

The net change of the changes here combined with the original commit is
that ecryptfs and overlayfs do not call vfs_getattr but only
vfs_getattr_nosec.

Fixes: 8a924db2d7b5 ("fs: Pass AT_GETATTR_NOSEC flag to getattr interface function")
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Closes: https://lore.kernel.org/linux-fsdevel/20241101011724.GN1350452@ZenIV/T/#u
Cc: Tyler Hicks <code@tyhicks.com>
Cc: ecryptfs@vger.kernel.org
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: linux-unionfs@vger.kernel.org
Cc: Christian Brauner <brauner@kernel.org>
Cc: linux-fsdevel@vger.kernel.org
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/uapi/linux/fcntl.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 87e2dec79fea..a40833bf2855 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -154,8 +154,4 @@
 					   usable with open_by_handle_at(2). */
 #define AT_HANDLE_MNT_ID_UNIQUE	0x001	/* Return the u64 unique mount ID. */
 
-#if defined(__KERNEL__)
-#define AT_GETATTR_NOSEC	0x80000000
-#endif
-
 #endif /* _UAPI_LINUX_FCNTL_H */
-- 
cgit v1.2.3


From 2bd9077b6261b8f1281d0fa74d51afe090319263 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 1 Nov 2024 14:45:23 -0600
Subject: jbd2: avoid dozens of -Wflex-array-member-not-at-end warnings

-Wflex-array-member-not-at-end was introduced in GCC-14, and we
are getting ready to enable it, globally.

Use the `DEFINE_RAW_FLEX()` helper for an on-stack definition of
a flexible structure (`struct shash_desc`) where the size of the
flexible-array member (`__ctx`) is known at compile-time, and
refactor the rest of the code, accordingly.

So, with this, fix 77 of the following warnings:

include/linux/jbd2.h:1800:35: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end]

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/ZyU94w0IALVhc9Jy@kspp
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/jbd2.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 8aef9bb6ad57..50f7ea8714bf 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1796,22 +1796,21 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal)
 static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
 			      const void *address, unsigned int length)
 {
-	struct {
-		struct shash_desc shash;
-		char ctx[JBD_MAX_CHECKSUM_SIZE];
-	} desc;
+	DEFINE_RAW_FLEX(struct shash_desc, desc, __ctx,
+		DIV_ROUND_UP(JBD_MAX_CHECKSUM_SIZE,
+			     sizeof(*((struct shash_desc *)0)->__ctx)));
 	int err;
 
 	BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) >
 		JBD_MAX_CHECKSUM_SIZE);
 
-	desc.shash.tfm = journal->j_chksum_driver;
-	*(u32 *)desc.ctx = crc;
+	desc->tfm = journal->j_chksum_driver;
+	*(u32 *)desc->__ctx = crc;
 
-	err = crypto_shash_update(&desc.shash, address, length);
+	err = crypto_shash_update(desc, address, length);
 	BUG_ON(err);
 
-	return *(u32 *)desc.ctx;
+	return *(u32 *)desc->__ctx;
 }
 
 /* Return most recent uncommitted transaction */
-- 
cgit v1.2.3


From 470d2bc3a0bc19a849cc7478c02d3f5ecaa1233e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 13 Nov 2024 09:45:35 +0100
Subject: block: export blk_validate_limits

While block drivers do the validation as part of committing them to the
queue, users that use the limit outside of a block device context have
to validate the limits and fill in the calculated values as well.

So far btrfs is the only user of queue limits without a block device,
and it has gotten away with that more or less by accident.  But with
commit 559218d43ec9 ("block: pre-calculate max_zone_append_sectors")
this became fatal for setups that have small max zone append size,
as it won't be limited now.

Export blk_validate_limits so that it can be called directly from btrfs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20241113084541.34315-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 65f37ae70712..cd905afaf51a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -948,6 +948,7 @@ queue_limits_start_update(struct request_queue *q)
 int queue_limits_commit_update(struct request_queue *q,
 		struct queue_limits *lim);
 int queue_limits_set(struct request_queue *q, struct queue_limits *lim);
+int blk_validate_limits(struct queue_limits *lim);
 
 /**
  * queue_limits_cancel_update - cancel an atomic update of queue limits
-- 
cgit v1.2.3


From f3dd9ae7f03aefa5bb12a4606f3d6cca87863622 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 1 Sep 2024 17:17:09 +0200
Subject: dax: Remove an unused field in struct dax_operations

.dax_supported() was apparently removed by commit 7b0800d00dae ("dax:
remove dax_capable") on 2021-11.

Remove the now unused function pointer from the struct dax_operations.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/56b92b722ca0a6fd1387c871a6ec01bcb9bd525e.1725203804.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 include/linux/dax.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9d3e3327af4c..df41a0017b31 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -27,12 +27,6 @@ struct dax_operations {
 	 */
 	long (*direct_access)(struct dax_device *, pgoff_t, long,
 			enum dax_access_mode, void **, pfn_t *);
-	/*
-	 * Validate whether this device is usable as an fsdax backing
-	 * device.
-	 */
-	bool (*dax_supported)(struct dax_device *, struct block_device *, int,
-			sector_t, sector_t);
 	/* zero_page_range: required operation. Zero page range   */
 	int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
 	/*
-- 
cgit v1.2.3


From e8225ab15006fbcdb14cef426a0a54475292fbbc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 13 Nov 2024 16:20:43 +0100
Subject: block: remove rq_list_move

Unused now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20241113152050.157179-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a28264442948..ad26a41d13f9 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -268,23 +268,6 @@ static inline unsigned short req_get_ioprio(struct request *req)
 #define rq_list_next(rq)	(rq)->rq_next
 #define rq_list_empty(list)	((list) == (struct request *) NULL)
 
-/**
- * rq_list_move() - move a struct request from one list to another
- * @src: The source list @rq is currently in
- * @dst: The destination list that @rq will be appended to
- * @rq: The request to move
- * @prev: The request preceding @rq in @src (NULL if @rq is the head)
- */
-static inline void rq_list_move(struct request **src, struct request **dst,
-				struct request *rq, struct request *prev)
-{
-	if (prev)
-		prev->rq_next = rq->rq_next;
-	else
-		*src = rq->rq_next;
-	rq_list_add(dst, rq);
-}
-
 /**
  * enum blk_eh_timer_return - How the timeout handler should proceed
  * @BLK_EH_DONE: The block driver completed the command or will complete it at
-- 
cgit v1.2.3


From a3396b99990d8b4e5797e7b16fdeb64c15ae97bb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 13 Nov 2024 16:20:44 +0100
Subject: block: add a rq_list type

Replace the semi-open coded request list helpers with a proper rq_list
type that mirrors the bio_list and has head and tail pointers.  Besides
better type safety this actually allows to insert at the tail of the
list, which will be useful soon.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20241113152050.157179-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 88 +++++++++++++++++++++++++++++---------------------
 include/linux/blkdev.h | 11 +++++--
 2 files changed, 60 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ad26a41d13f9..c61e04365677 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -229,44 +229,60 @@ static inline unsigned short req_get_ioprio(struct request *req)
 #define rq_dma_dir(rq) \
 	(op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)
 
-#define rq_list_add(listptr, rq)	do {		\
-	(rq)->rq_next = *(listptr);			\
-	*(listptr) = rq;				\
-} while (0)
-
-#define rq_list_add_tail(lastpptr, rq)	do {		\
-	(rq)->rq_next = NULL;				\
-	**(lastpptr) = rq;				\
-	*(lastpptr) = &rq->rq_next;			\
-} while (0)
-
-#define rq_list_pop(listptr)				\
-({							\
-	struct request *__req = NULL;			\
-	if ((listptr) && *(listptr))	{		\
-		__req = *(listptr);			\
-		*(listptr) = __req->rq_next;		\
-	}						\
-	__req;						\
-})
+static inline int rq_list_empty(const struct rq_list *rl)
+{
+	return rl->head == NULL;
+}
 
-#define rq_list_peek(listptr)				\
-({							\
-	struct request *__req = NULL;			\
-	if ((listptr) && *(listptr))			\
-		__req = *(listptr);			\
-	__req;						\
-})
+static inline void rq_list_init(struct rq_list *rl)
+{
+	rl->head = NULL;
+	rl->tail = NULL;
+}
+
+static inline void rq_list_add_tail(struct rq_list *rl, struct request *rq)
+{
+	rq->rq_next = NULL;
+	if (rl->tail)
+		rl->tail->rq_next = rq;
+	else
+		rl->head = rq;
+	rl->tail = rq;
+}
+
+static inline void rq_list_add_head(struct rq_list *rl, struct request *rq)
+{
+	rq->rq_next = rl->head;
+	rl->head = rq;
+	if (!rl->tail)
+		rl->tail = rq;
+}
+
+static inline struct request *rq_list_pop(struct rq_list *rl)
+{
+	struct request *rq = rl->head;
+
+	if (rq) {
+		rl->head = rl->head->rq_next;
+		if (!rl->head)
+			rl->tail = NULL;
+		rq->rq_next = NULL;
+	}
+
+	return rq;
+}
 
-#define rq_list_for_each(listptr, pos)			\
-	for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos))
+static inline struct request *rq_list_peek(struct rq_list *rl)
+{
+	return rl->head;
+}
 
-#define rq_list_for_each_safe(listptr, pos, nxt)			\
-	for (pos = rq_list_peek((listptr)), nxt = rq_list_next(pos);	\
-		pos; pos = nxt, nxt = pos ? rq_list_next(pos) : NULL)
+#define rq_list_for_each(rl, pos)					\
+	for (pos = rq_list_peek((rl)); (pos); pos = pos->rq_next)
 
-#define rq_list_next(rq)	(rq)->rq_next
-#define rq_list_empty(list)	((list) == (struct request *) NULL)
+#define rq_list_for_each_safe(rl, pos, nxt)				\
+	for (pos = rq_list_peek((rl)), nxt = pos->rq_next;		\
+		pos; pos = nxt, nxt = pos ? pos->rq_next : NULL)
 
 /**
  * enum blk_eh_timer_return - How the timeout handler should proceed
@@ -559,7 +575,7 @@ struct blk_mq_ops {
 	 * empty the @rqlist completely, then the rest will be queued
 	 * individually by the block layer upon return.
 	 */
-	void (*queue_rqs)(struct request **rqlist);
+	void (*queue_rqs)(struct rq_list *rqlist);
 
 	/**
 	 * @get_budget: Reserve budget before queue request, once .queue_rq is
@@ -868,7 +884,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
 	else if (iob->complete != complete)
 		return false;
 	iob->need_ts |= blk_mq_need_time_stamp(req);
-	rq_list_add(&iob->req_list, req);
+	rq_list_add_head(&iob->req_list, req);
 	return true;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cd905afaf51a..00212e96261a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1007,6 +1007,11 @@ extern void blk_put_queue(struct request_queue *);
 void blk_mark_disk_dead(struct gendisk *disk);
 
 #ifdef CONFIG_BLOCK
+struct rq_list {
+	struct request *head;
+	struct request *tail;
+};
+
 /*
  * blk_plug permits building a queue of related requests by holding the I/O
  * fragments for a short period. This allows merging of sequential requests
@@ -1019,10 +1024,10 @@ void blk_mark_disk_dead(struct gendisk *disk);
  * blk_flush_plug() is called.
  */
 struct blk_plug {
-	struct request *mq_list; /* blk-mq requests */
+	struct rq_list mq_list; /* blk-mq requests */
 
 	/* if ios_left is > 1, we can batch tag/rq allocations */
-	struct request *cached_rq;
+	struct rq_list cached_rqs;
 	u64 cur_ktime;
 	unsigned short nr_ios;
 
@@ -1684,7 +1689,7 @@ int bdev_thaw(struct block_device *bdev);
 void bdev_fput(struct file *bdev_file);
 
 struct io_comp_batch {
-	struct request *req_list;
+	struct rq_list req_list;
 	bool need_ts;
 	void (*complete)(struct io_comp_batch *);
 };
-- 
cgit v1.2.3


From 00e8d290b55f2fa5c5a0500b4dccf9e090650447 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 13 Nov 2024 16:20:46 +0100
Subject: block: don't reorder requests in blk_mq_add_to_batch

LIFO ordering for batched completions is a bit unexpected and also
defeats some merging optimizations in e.g. the XFS buffered write
code.  Now that we can easily add the request to the tail of the list
do that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20241113152050.157179-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c61e04365677..c596e0e4cb75 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -884,7 +884,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
 	else if (iob->complete != complete)
 		return false;
 	iob->need_ts |= blk_mq_need_time_stamp(req);
-	rq_list_add_head(&iob->req_list, req);
+	rq_list_add_tail(&iob->req_list, req);
 	return true;
 }
 
-- 
cgit v1.2.3


From 27184f8905ba680f22abf1707fbed24036a67119 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Wed, 13 Nov 2024 07:54:12 +0200
Subject: tpm: Opt-in in disable PCR integrity protection

The initial HMAC session feature added TPM bus encryption and/or integrity
protection to various in-kernel TPM operations. This can cause performance
bottlenecks with IMA, as it heavily utilizes PCR extend operations.

In order to mitigate this performance issue, introduce a kernel
command-line parameter to the TPM driver for disabling the integrity
protection for PCR extend operations (i.e. TPM2_PCR_Extend).

Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Link: https://lore.kernel.org/linux-integrity/20241015193916.59964-1-zohar@linux.ibm.com/
Fixes: 6519fea6fd37 ("tpm: add hmac checks to tpm2_pcr_extend()")
Tested-by: Mimi Zohar <zohar@linux.ibm.com>
Co-developed-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Co-developed-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 include/linux/tpm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 587b96b4418e..20a40ade8030 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -421,6 +421,7 @@ void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value);
 u8 tpm_buf_read_u8(struct tpm_buf *buf, off_t *offset);
 u16 tpm_buf_read_u16(struct tpm_buf *buf, off_t *offset);
 u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset);
+void tpm_buf_append_handle(struct tpm_chip *chip, struct tpm_buf *buf, u32 handle);
 
 /*
  * Check if TPM device is in the firmware upgrade mode.
@@ -505,6 +506,8 @@ void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
 void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
 				 u8 attributes, u8 *passphrase,
 				 int passphraselen);
+void tpm_buf_append_auth(struct tpm_chip *chip, struct tpm_buf *buf,
+			 u8 attributes, u8 *passphrase, int passphraselen);
 static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
 						   struct tpm_buf *buf,
 						   u8 attributes,
-- 
cgit v1.2.3


From 7c1ae151e81268db1fe8c8a473d922fc5ba47b72 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Wed, 13 Nov 2024 13:51:54 +0200
Subject: virtio_pci: Introduce device parts access commands

Introduce device parts access commands via the admin queue.

These commands and their structure adhere to the Virtio 1.4
specification.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20241113115200.209269-2-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/uapi/linux/virtio_pci.h | 131 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index a8208492e822..1beb317df1b9 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -40,6 +40,7 @@
 #define _LINUX_VIRTIO_PCI_H
 
 #include <linux/types.h>
+#include <linux/kernel.h>
 
 #ifndef VIRTIO_PCI_NO_LEGACY
 
@@ -240,6 +241,17 @@ struct virtio_pci_cfg_cap {
 #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ		0x5
 #define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO		0x6
 
+/* Device parts access commands. */
+#define VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY		0x7
+#define VIRTIO_ADMIN_CMD_DEVICE_CAP_GET			0x8
+#define VIRTIO_ADMIN_CMD_DRIVER_CAP_SET			0x9
+#define VIRTIO_ADMIN_CMD_RESOURCE_OBJ_CREATE		0xa
+#define VIRTIO_ADMIN_CMD_RESOURCE_OBJ_DESTROY		0xd
+#define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_GET		0xe
+#define VIRTIO_ADMIN_CMD_DEV_PARTS_GET			0xf
+#define VIRTIO_ADMIN_CMD_DEV_PARTS_SET			0x10
+#define VIRTIO_ADMIN_CMD_DEV_MODE_SET			0x11
+
 struct virtio_admin_cmd_hdr {
 	__le16 opcode;
 	/*
@@ -286,4 +298,123 @@ struct virtio_admin_cmd_notify_info_result {
 	struct virtio_admin_cmd_notify_info_data entries[VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO];
 };
 
+#define VIRTIO_DEV_PARTS_CAP 0x0000
+
+struct virtio_dev_parts_cap {
+	__u8 get_parts_resource_objects_limit;
+	__u8 set_parts_resource_objects_limit;
+};
+
+#define MAX_CAP_ID __KERNEL_DIV_ROUND_UP(VIRTIO_DEV_PARTS_CAP + 1, 64)
+
+struct virtio_admin_cmd_query_cap_id_result {
+	__le64 supported_caps[MAX_CAP_ID];
+};
+
+struct virtio_admin_cmd_cap_get_data {
+	__le16 id;
+	__u8 reserved[6];
+};
+
+struct virtio_admin_cmd_cap_set_data {
+	__le16 id;
+	__u8 reserved[6];
+	__u8 cap_specific_data[];
+};
+
+struct virtio_admin_cmd_resource_obj_cmd_hdr {
+	__le16 type;
+	__u8 reserved[2];
+	__le32 id; /* Indicates unique resource object id per resource object type */
+};
+
+struct virtio_admin_cmd_resource_obj_create_data {
+	struct virtio_admin_cmd_resource_obj_cmd_hdr hdr;
+	__le64 flags;
+	__u8 resource_obj_specific_data[];
+};
+
+#define VIRTIO_RESOURCE_OBJ_DEV_PARTS 0
+
+#define VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET 0
+#define VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET 1
+
+struct virtio_resource_obj_dev_parts {
+	__u8 type;
+	__u8 reserved[7];
+};
+
+#define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE 0
+#define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_COUNT 1
+#define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_LIST 2
+
+struct virtio_admin_cmd_dev_parts_metadata_data {
+	struct virtio_admin_cmd_resource_obj_cmd_hdr hdr;
+	__u8 type;
+	__u8 reserved[7];
+};
+
+#define VIRTIO_DEV_PART_F_OPTIONAL 0
+
+struct virtio_dev_part_hdr {
+	__le16 part_type;
+	__u8 flags;
+	__u8 reserved;
+	union {
+		struct {
+			__le32 offset;
+			__le32 reserved;
+		} pci_common_cfg;
+		struct {
+			__le16 index;
+			__u8 reserved[6];
+		} vq_index;
+	} selector;
+	__le32 length;
+};
+
+struct virtio_dev_part {
+	struct virtio_dev_part_hdr hdr;
+	__u8 value[];
+};
+
+struct virtio_admin_cmd_dev_parts_metadata_result {
+	union {
+		struct {
+			__le32 size;
+			__le32 reserved;
+		} parts_size;
+		struct {
+			__le32 count;
+			__le32 reserved;
+		} hdr_list_count;
+		struct {
+			__le32 count;
+			__le32 reserved;
+			struct virtio_dev_part_hdr hdrs[];
+		} hdr_list;
+	};
+};
+
+#define VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_SELECTED 0
+#define VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL 1
+
+struct virtio_admin_cmd_dev_parts_get_data {
+	struct virtio_admin_cmd_resource_obj_cmd_hdr hdr;
+	__u8 type;
+	__u8 reserved[7];
+	struct virtio_dev_part_hdr hdr_list[];
+};
+
+struct virtio_admin_cmd_dev_parts_set_data {
+	struct virtio_admin_cmd_resource_obj_cmd_hdr hdr;
+	struct virtio_dev_part parts[];
+};
+
+#define VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED 0
+
+struct virtio_admin_cmd_dev_mode_set_data {
+	__u8 flags;
+};
+
 #endif
-- 
cgit v1.2.3


From 704806ca400e5daa86c110f14bfdda9d28203bb7 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Wed, 13 Nov 2024 13:51:55 +0200
Subject: virtio: Extend the admin command to include the result size

Extend the admin command by incorporating a result size field.

This allows higher layers to determine the actual result size from the
backend when this information is not included in the result_sg.

The additional information introduced here will be used in subsequent
patches of this series.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20241113115200.209269-3-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/virtio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 306137a15d07..b5f7a611715a 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -111,6 +111,7 @@ struct virtio_admin_cmd {
 	struct scatterlist *data_sg;
 	struct scatterlist *result_sg;
 	struct completion completion;
+	u32 result_sg_size;
 	int ret;
 };
 
-- 
cgit v1.2.3


From 52a22c0ed03ce23f20df81f79b23cb6637716fae Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Wed, 13 Nov 2024 13:51:57 +0200
Subject: virtio-pci: Introduce APIs to execute device parts admin commands

Introduce APIs to handle the execution of device parts admin commands.

These APIs cover functionalities such as mode setting, object creation
and destruction, and operations like parts get/set and metadata
retrieval.

These APIs will be utilized in upcoming patches within this series.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20241113115200.209269-5-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/virtio_pci_admin.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/virtio_pci_admin.h b/include/linux/virtio_pci_admin.h
index f4a100a0fe2e..dffc92c17ad2 100644
--- a/include/linux/virtio_pci_admin.h
+++ b/include/linux/virtio_pci_admin.h
@@ -20,4 +20,15 @@ int virtio_pci_admin_legacy_io_notify_info(struct pci_dev *pdev,
 					   u64 *bar_offset);
 #endif
 
+bool virtio_pci_admin_has_dev_parts(struct pci_dev *pdev);
+int virtio_pci_admin_mode_set(struct pci_dev *pdev, u8 mode);
+int virtio_pci_admin_obj_create(struct pci_dev *pdev, u16 obj_type, u8 operation_type,
+				u32 *obj_id);
+int virtio_pci_admin_obj_destroy(struct pci_dev *pdev, u16 obj_type, u32 id);
+int virtio_pci_admin_dev_parts_metadata_get(struct pci_dev *pdev, u16 obj_type,
+					    u32 id, u8 metadata_type, u32 *out);
+int virtio_pci_admin_dev_parts_get(struct pci_dev *pdev, u16 obj_type, u32 id,
+				   u8 get_type, struct scatterlist *res_sg, u32 *res_size);
+int virtio_pci_admin_dev_parts_set(struct pci_dev *pdev, struct scatterlist *data_sg);
+
 #endif /* _LINUX_VIRTIO_PCI_ADMIN_H */
-- 
cgit v1.2.3


From 6b998404c71ecff9ebeed343c1ce21f9df3ef131 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 12 Nov 2024 21:36:29 +0100
Subject: net: simplify eeecfg_mac_can_tx_lpi

Simplify the function.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/f9a4623b-b94c-466c-8733-62057c6d9a17@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/eee.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/eee.h b/include/net/eee.h
index 84837aba3cd9..cfab1b8bc46a 100644
--- a/include/net/eee.h
+++ b/include/net/eee.h
@@ -13,10 +13,7 @@ struct eee_config {
 static inline bool eeecfg_mac_can_tx_lpi(const struct eee_config *eeecfg)
 {
 	/* eee_enabled is the master on/off */
-	if (!eeecfg->eee_enabled || !eeecfg->tx_lpi_enabled)
-		return false;
-
-	return true;
+	return eeecfg->eee_enabled && eeecfg->tx_lpi_enabled;
 }
 
 static inline void eeecfg_to_eee(struct ethtool_keee *eee,
-- 
cgit v1.2.3


From eb94b7bb10109a14a5431a67e5d8e31cfa06b395 Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Mon, 11 Nov 2024 00:17:34 +0100
Subject: net: Make copy_safe_from_sockptr() match documentation

copy_safe_from_sockptr()
  return copy_from_sockptr()
    return copy_from_sockptr_offset()
      return copy_from_user()

copy_from_user() does not return an error on fault. Instead, it returns a
number of bytes that were not copied. Have it handled.

Patch has a side effect: it un-breaks garbage input handling of
nfc_llcp_setsockopt() and mISDN's data_sock_setsockopt().

Fixes: 6309863b31dd ("net: add copy_safe_from_sockptr() helper")
Signed-off-by: Michal Luczaj <mhal@rbox.co>
Link: https://patch.msgid.link/20241111-sockptr-copy-ret-fix-v1-1-a520083a93fb@rbox.co
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/sockptr.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
index fc5a206c4043..195debe2b1db 100644
--- a/include/linux/sockptr.h
+++ b/include/linux/sockptr.h
@@ -77,7 +77,9 @@ static inline int copy_safe_from_sockptr(void *dst, size_t ksize,
 {
 	if (optlen < ksize)
 		return -EINVAL;
-	return copy_from_sockptr(dst, optval, ksize);
+	if (copy_from_sockptr(dst, optval, ksize))
+		return -EFAULT;
+	return 0;
 }
 
 static inline int copy_struct_from_sockptr(void *dst, size_t ksize,
-- 
cgit v1.2.3


From 16220cb315a0a10d2a003d9af534988686e675ea Mon Sep 17 00:00:00 2001
From: Tristram Ha <tristram.ha@microchip.com>
Date: Fri, 8 Nov 2024 17:57:05 -0800
Subject: net: dsa: microchip: Add LAN9646 switch support to KSZ DSA driver

LAN9646 switch is a 6-port switch with functions like KSZ9897.  It has
4 internal PHYs and 1 SGMII port.  The chip id read from hardware is
same as KSZ9477, so software driver needs to create a new chip id and
group allowable functions under its chip data structure to
differentiate the product.

Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
Link: https://patch.msgid.link/20241109015705.82685-3-Tristram.Ha@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/platform_data/microchip-ksz.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h
index 2ee1a679e592..0e0e8fe6975f 100644
--- a/include/linux/platform_data/microchip-ksz.h
+++ b/include/linux/platform_data/microchip-ksz.h
@@ -42,6 +42,7 @@ enum ksz_chip_id {
 	LAN9372_CHIP_ID = 0x00937200,
 	LAN9373_CHIP_ID = 0x00937300,
 	LAN9374_CHIP_ID = 0x00937400,
+	LAN9646_CHIP_ID = 0x00964600,
 };
 
 struct ksz_platform_data {
-- 
cgit v1.2.3


From e311b04db66aaed1819bdd479d42c4f338f105b9 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 12 Nov 2024 12:56:45 +0000
Subject: soundwire: Update the includes on the sdw.h header

There are quite a few things used in the sdw.h header that it relies on
the consumer to include. If something is used directly in the header it
should be included by the header. Update the includes to cover the
missing items, or add forward declarations for things that are only used
as pointers. Whilst making the change also alphabetise the list of
includes.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20241112125646.590240-1-ckeepax@opensource.cirrus.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 73f655334fe9..1fd4b126287f 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -4,12 +4,19 @@
 #ifndef __SOUNDWIRE_H
 #define __SOUNDWIRE_H
 
+#include <linux/bitfield.h>
 #include <linux/bug.h>
-#include <linux/lockdep_types.h>
+#include <linux/completion.h>
+#include <linux/device.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
+#include <linux/lockdep_types.h>
 #include <linux/mod_devicetable.h>
-#include <linux/bitfield.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+struct dentry;
+struct fwnode_handle;
 
 struct sdw_bus;
 struct sdw_slave;
-- 
cgit v1.2.3


From dd690b31de0ed46adc5856698880560b900386ba Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 12 Nov 2024 12:56:46 +0000
Subject: soundwire: Minor formatting fixups in sdw.h header

Fixup some minor formatting and whitespace in the sdw.h header file.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20241112125646.590240-2-ckeepax@opensource.cirrus.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 40 +++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 1fd4b126287f..784656f740f6 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -616,7 +616,6 @@ struct sdw_slave_ops {
 	int (*clk_stop)(struct sdw_slave *slave,
 			enum sdw_clk_stop_mode mode,
 			enum sdw_clk_stop_type type);
-
 };
 
 /**
@@ -690,8 +689,7 @@ struct sdw_master_device {
 	container_of(d, struct sdw_master_device, dev)
 
 struct sdw_driver {
-	int (*probe)(struct sdw_slave *sdw,
-			const struct sdw_device_id *id);
+	int (*probe)(struct sdw_slave *sdw, const struct sdw_device_id *id);
 	int (*remove)(struct sdw_slave *sdw);
 	void (*shutdown)(struct sdw_slave *sdw);
 
@@ -710,7 +708,7 @@ struct sdw_driver {
 	SDW_SLAVE_ENTRY_EXT((_mfg_id), (_part_id), 0, 0, (_drv_data))
 
 int sdw_handle_slave_status(struct sdw_bus *bus,
-			enum sdw_slave_status status[]);
+			    enum sdw_slave_status status[]);
 
 /*
  * SDW master structures and APIs
@@ -792,15 +790,14 @@ struct sdw_enable_ch {
  */
 struct sdw_master_port_ops {
 	int (*dpn_set_port_params)(struct sdw_bus *bus,
-			struct sdw_port_params *port_params,
-			unsigned int bank);
+				   struct sdw_port_params *port_params,
+				   unsigned int bank);
 	int (*dpn_set_port_transport_params)(struct sdw_bus *bus,
-			struct sdw_transport_params *transport_params,
-			enum sdw_reg_bank bank);
-	int (*dpn_port_prep)(struct sdw_bus *bus,
-			struct sdw_prepare_ch *prepare_ch);
+					     struct sdw_transport_params *transport_params,
+					     enum sdw_reg_bank bank);
+	int (*dpn_port_prep)(struct sdw_bus *bus, struct sdw_prepare_ch *prepare_ch);
 	int (*dpn_port_enable_ch)(struct sdw_bus *bus,
-			struct sdw_enable_ch *enable_ch, unsigned int bank);
+				  struct sdw_enable_ch *enable_ch, unsigned int bank);
 };
 
 struct sdw_msg;
@@ -835,14 +832,11 @@ struct sdw_defer {
  */
 struct sdw_master_ops {
 	int (*read_prop)(struct sdw_bus *bus);
-	u64 (*override_adr)
-			(struct sdw_bus *bus, u64 addr);
-	enum sdw_command_response (*xfer_msg)
-			(struct sdw_bus *bus, struct sdw_msg *msg);
-	enum sdw_command_response (*xfer_msg_defer)
-			(struct sdw_bus *bus);
+	u64 (*override_adr)(struct sdw_bus *bus, u64 addr);
+	enum sdw_command_response (*xfer_msg)(struct sdw_bus *bus, struct sdw_msg *msg);
+	enum sdw_command_response (*xfer_msg_defer)(struct sdw_bus *bus);
 	int (*set_bus_conf)(struct sdw_bus *bus,
-			struct sdw_bus_params *params);
+			    struct sdw_bus_params *params);
 	int (*pre_bank_switch)(struct sdw_bus *bus);
 	int (*post_bank_switch)(struct sdw_bus *bus);
 	u32 (*read_ping_status)(struct sdw_bus *bus);
@@ -1019,12 +1013,12 @@ void sdw_release_stream(struct sdw_stream_runtime *stream);
 int sdw_compute_params(struct sdw_bus *bus);
 
 int sdw_stream_add_master(struct sdw_bus *bus,
-		struct sdw_stream_config *stream_config,
-		const struct sdw_port_config *port_config,
-		unsigned int num_ports,
-		struct sdw_stream_runtime *stream);
+			  struct sdw_stream_config *stream_config,
+			  const struct sdw_port_config *port_config,
+			  unsigned int num_ports,
+			  struct sdw_stream_runtime *stream);
 int sdw_stream_remove_master(struct sdw_bus *bus,
-		struct sdw_stream_runtime *stream);
+			     struct sdw_stream_runtime *stream);
 int sdw_startup_stream(void *sdw_substream);
 int sdw_prepare_stream(struct sdw_stream_runtime *stream);
 int sdw_enable_stream(struct sdw_stream_runtime *stream);
-- 
cgit v1.2.3


From 04782e63917dbcb60932fe93df52c4a4e3859d07 Mon Sep 17 00:00:00 2001
From: Colton Lewis <coltonlewis@google.com>
Date: Wed, 13 Nov 2024 19:01:52 +0000
Subject: perf/core: Hoist perf_instruction_pointer() and perf_misc_flags()

For clarity, rename the arch-specific definitions of these functions
to perf_arch_* to denote they are arch-specifc. Define the
generic-named functions in one place where they can call the
arch-specific ones as needed.

Signed-off-by: Colton Lewis <coltonlewis@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Acked-by: Thomas Richter <tmricht@linux.ibm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Acked-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lore.kernel.org/r/20241113190156.2145593-3-coltonlewis@google.com
---
 include/linux/perf_event.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 91b310052a7c..3b4bf5e329f6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1676,10 +1676,13 @@ extern void perf_tp_event(u16 event_type, u64 count, void *record,
 			  struct task_struct *task);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
-#ifndef perf_misc_flags
-# define perf_misc_flags(regs) \
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+
+#ifndef perf_arch_misc_flags
+# define perf_arch_misc_flags(regs) \
 		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
-# define perf_instruction_pointer(regs)	instruction_pointer(regs)
+# define perf_arch_instruction_pointer(regs)	instruction_pointer(regs)
 #endif
 #ifndef perf_arch_bpf_user_pt_regs
 # define perf_arch_bpf_user_pt_regs(regs) regs
-- 
cgit v1.2.3


From 2c47e7a74f445426d156278e339b7abb259e50de Mon Sep 17 00:00:00 2001
From: Colton Lewis <coltonlewis@google.com>
Date: Wed, 13 Nov 2024 19:01:55 +0000
Subject: perf/core: Correct perf sampling with guest VMs

Previously any PMU overflow interrupt that fired while a VCPU was
loaded was recorded as a guest event whether it truly was or not. This
resulted in nonsense perf recordings that did not honor
perf_event_attr.exclude_guest and recorded guest IPs where it should
have recorded host IPs.

Rework the sampling logic to only record guest samples for events with
exclude_guest = 0. This way any host-only events with exclude_guest
set will never see unexpected guest samples. The behaviour of events
with exclude_guest = 0 is unchanged.

Note that events configured to sample both host and guest may still
misattribute a PMI that arrived in the host as a guest event depending
on KVM arch and vendor behavior.

Signed-off-by: Colton Lewis <coltonlewis@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241113190156.2145593-6-coltonlewis@google.com
---
 include/linux/perf_event.h | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3b4bf5e329f6..cb99ec8c9e96 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1676,8 +1676,9 @@ extern void perf_tp_event(u16 event_type, u64 count, void *record,
 			  struct task_struct *task);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct perf_event *event, struct pt_regs *regs);
+extern unsigned long perf_instruction_pointer(struct perf_event *event,
+					      struct pt_regs *regs);
 
 #ifndef perf_arch_misc_flags
 # define perf_arch_misc_flags(regs) \
@@ -1688,6 +1689,22 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 # define perf_arch_bpf_user_pt_regs(regs) regs
 #endif
 
+#ifndef perf_arch_guest_misc_flags
+static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
+{
+	unsigned long guest_state = perf_guest_state();
+
+	if (!(guest_state & PERF_GUEST_ACTIVE))
+		return 0;
+
+	if (guest_state & PERF_GUEST_USER)
+		return PERF_RECORD_MISC_GUEST_USER;
+	else
+		return PERF_RECORD_MISC_GUEST_KERNEL;
+}
+# define perf_arch_guest_misc_flags(regs)	perf_arch_guest_misc_flags(regs)
+#endif
+
 static inline bool has_branch_stack(struct perf_event *event)
 {
 	return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
-- 
cgit v1.2.3


From 9fed2c0f2f0771b990d068ef0a2b32e770ae6d48 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 13 Nov 2024 09:17:51 -0500
Subject: fs: reduce pointer chasing in is_mgtime() test

The is_mgtime test checks whether the FS_MGTIME flag is set in the
fstype. To get there from the inode though, we have to dereference 3
pointers.

Add a new IOP_MGTIME flag, and have inode_init_always() set that flag
when the fstype flag is set. Then, make is_mgtime test for IOP_MGTIME
instead.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/20241113-mgtime-v1-1-84e256980e11@kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b1a3bd07711b..a9c6c8cbda50 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -623,6 +623,7 @@ is_uncached_acl(struct posix_acl *acl)
 #define IOP_NOFOLLOW	0x0004
 #define IOP_XATTR	0x0008
 #define IOP_DEFAULT_READLINK	0x0010
+#define IOP_MGTIME	0x0020
 
 /*
  * Keep mostly read-only and often accessed (especially for
@@ -2581,7 +2582,7 @@ struct file_system_type {
  */
 static inline bool is_mgtime(const struct inode *inode)
 {
-	return inode->i_sb->s_type->fs_flags & FS_MGTIME;
+	return inode->i_opflags & IOP_MGTIME;
 }
 
 extern struct dentry *mount_bdev(struct file_system_type *fs_type,
-- 
cgit v1.2.3


From 8eb36164d1a6769a20ed43033510067ff3dab9ee Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 11 Nov 2024 10:16:49 +0000
Subject: bonding: add ns target multicast address to slave device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 4598380f9c54 ("bonding: fix ns validation on backup slaves")
tried to resolve the issue where backup slaves couldn't be brought up when
receiving IPv6 Neighbor Solicitation (NS) messages. However, this fix only
worked for drivers that receive all multicast messages, such as the veth
interface.

For standard drivers, the NS multicast message is silently dropped because
the slave device is not a member of the NS target multicast group.

To address this, we need to make the slave device join the NS target
multicast group, ensuring it can receive these IPv6 NS messages to validate
the slave’s status properly.

There are three policies before joining the multicast group:
1. All settings must be under active-backup mode (alb and tlb do not support
   arp_validate), with backup slaves and slaves supporting multicast.
2. We can add or remove multicast groups when arp_validate changes.
3. Other operations, such as enslaving, releasing, or setting NS targets,
   need to be guarded by arp_validate.

Fixes: 4e24be018eb9 ("bonding: add new parameter ns_targets")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/bond_options.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index 473a0147769e..18687ccf0638 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -161,5 +161,7 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond);
 #if IS_ENABLED(CONFIG_IPV6)
 void bond_option_ns_ip6_targets_clear(struct bonding *bond);
 #endif
+void bond_slave_ns_maddrs_add(struct bonding *bond, struct slave *slave);
+void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave);
 
 #endif /* _NET_BOND_OPTIONS_H */
-- 
cgit v1.2.3


From a8ee6b900c147d3bedced6c52ba6cb603226aaa3 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 13 Nov 2024 16:35:50 +0100
Subject: netfilter: nf_tables: prepare for multiple elements in nft_trans_elem
 structure

Add helpers to release the individual elements contained in the
trans_elem container structure.

No functional change intended.

Followup patch will add 'nelems' member and will turn 'priv' into
a flexible array.

These helpers can then loop over all elements.
Care needs to be taken to handle a mix of new elements and existing
elements that are being updated (e.g. timeout refresh).

Before this patch, NEWSETELEM transaction with update is released
early so nft_trans_set_elem_destroy() won't get called, so we need
to skip elements marked as update.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index f24278767bfd..37af0b174c39 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1759,28 +1759,25 @@ enum nft_trans_elem_flags {
 	NFT_TRANS_UPD_EXPIRATION	= (1 << 1),
 };
 
-struct nft_trans_elem {
-	struct nft_trans		nft_trans;
-	struct nft_set			*set;
-	struct nft_elem_priv		*elem_priv;
+struct nft_trans_one_elem {
+	struct nft_elem_priv		*priv;
 	u64				timeout;
 	u64				expiration;
 	u8				update_flags;
+};
+
+struct nft_trans_elem {
+	struct nft_trans		nft_trans;
+	struct nft_set			*set;
 	bool				bound;
+	unsigned int			nelems;
+	struct nft_trans_one_elem	elems[] __counted_by(nelems);
 };
 
 #define nft_trans_container_elem(t)			\
 	container_of(t, struct nft_trans_elem, nft_trans)
 #define nft_trans_elem_set(trans)			\
 	nft_trans_container_elem(trans)->set
-#define nft_trans_elem_priv(trans)			\
-	nft_trans_container_elem(trans)->elem_priv
-#define nft_trans_elem_update_flags(trans)		\
-	nft_trans_container_elem(trans)->update_flags
-#define nft_trans_elem_timeout(trans)			\
-	nft_trans_container_elem(trans)->timeout
-#define nft_trans_elem_expiration(trans)		\
-	nft_trans_container_elem(trans)->expiration
 #define nft_trans_elem_set_bound(trans)			\
 	nft_trans_container_elem(trans)->bound
 
-- 
cgit v1.2.3


From 508180850b732c7a0e3a728460cf3f95f25e1fbd Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 13 Nov 2024 16:35:53 +0100
Subject: netfilter: nf_tables: allocate element update information dynamically

Move the timeout/expire/flag members from nft_trans_one_elem struct into
a dybamically allocated structure, only needed when timeout update was
requested.

This halves size of nft_trans_one_elem struct and allows to compact up to
124 elements in one transaction container rather than 62.

This halves memory requirements for a large flush or insert transaction,
where ->update remains NULL.

Care has to be taken to release the extra data in all spots, including
abort path.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 37af0b174c39..80a537ac26cd 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1759,11 +1759,15 @@ enum nft_trans_elem_flags {
 	NFT_TRANS_UPD_EXPIRATION	= (1 << 1),
 };
 
-struct nft_trans_one_elem {
-	struct nft_elem_priv		*priv;
+struct nft_elem_update {
 	u64				timeout;
 	u64				expiration;
-	u8				update_flags;
+	u8				flags;
+};
+
+struct nft_trans_one_elem {
+	struct nft_elem_priv		*priv;
+	struct nft_elem_update		*update;
 };
 
 struct nft_trans_elem {
-- 
cgit v1.2.3


From 0c32840763b1579c923b4216c18bb756ca4ba473 Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Thu, 14 Nov 2024 08:03:57 -0500
Subject: platform/x86/intel/pmt: allow user offset for PMT callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Usage of the telem sysfs file allows for partial reads at an offset.
The current callback method returns the buffer starting from offset 0
only.

Include the requested offset in the callback and update the necessary
address calculations with the offset.

Note: offset addition is moved from the caller to the local usage. For
non-callback usage this is unchanged behavior.

Fixes: e92affc74cd8 ("platform/x86/intel/vsec: Add PMT read callbacks")
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Link: https://lore.kernel.org/r/20241114130358.2467787-2-michael.j.ruhl@intel.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/intel_vsec.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h
index 11ee185566c3..b94beab64610 100644
--- a/include/linux/intel_vsec.h
+++ b/include/linux/intel_vsec.h
@@ -74,10 +74,11 @@ enum intel_vsec_quirks {
  * @pdev:  PCI device reference for the callback's use
  * @guid:  ID of data to acccss
  * @data:  buffer for the data to be copied
+ * @off:   offset into the requested buffer
  * @count: size of buffer
  */
 struct pmt_callbacks {
-	int (*read_telem)(struct pci_dev *pdev, u32 guid, u64 *data, u32 count);
+	int (*read_telem)(struct pci_dev *pdev, u32 guid, u64 *data, loff_t off, u32 count);
 };
 
 /**
-- 
cgit v1.2.3


From aefff51e1c2986e16f2780ca8e4c97b784800ab5 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 14 Nov 2024 16:31:27 +0100
Subject: statmount: retrieve security mount options

Add the ability to retrieve security mount options. Keep them separate
from filesystem specific mount options so it's easy to tell them apart.
Also allow to retrieve them separate from other mount options as most of
the time users won't be interested in security specific mount options.

Link: https://lore.kernel.org/r/20241114-radtour-ofenrohr-ff34b567b40a@brauner
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/mount.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index c0fda4604187..c07008816aca 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -177,7 +177,9 @@ struct statmount {
 	__u32 sb_source;	/* [str] Source string of the mount */
 	__u32 opt_num;		/* Number of fs options */
 	__u32 opt_array;	/* [str] Array of nul terminated fs options */
-	__u64 __spare2[47];
+	__u32 opt_sec_num;	/* Number of security options */
+	__u32 opt_sec_array;	/* [str] Array of nul terminated security options */
+	__u64 __spare2[46];
 	char str[];		/* Variable size part containing strings */
 };
 
@@ -214,6 +216,7 @@ struct mnt_id_req {
 #define STATMOUNT_FS_SUBTYPE		0x00000100U	/* Want/got fs_subtype */
 #define STATMOUNT_SB_SOURCE		0x00000200U	/* Want/got sb_source */
 #define STATMOUNT_OPT_ARRAY		0x00000400U	/* Want/got opt_... */
+#define STATMOUNT_OPT_SEC_ARRAY		0x00000800U	/* Want/got opt_sec... */
 
 /*
  * Special @mnt_id values that can be passed to listmount
-- 
cgit v1.2.3


From 0b3144da31f855fce652303f588416a60991bdef Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 13 Nov 2024 07:49:22 +0100
Subject: USB: make single lock for all usb dynamic id lists

There are a number of places where we accidentally pass in a constant
structure to later cast it off to a dynamic one, and then attempt to
grab a lock on it, which is not a good idea.  To help resolve this, move
the dynamic id lock out of the dynamic id structure for the driver and
into one single lock for all USB dynamic ids.  As this lock should never
have any real contention (it's only every accessed when a device is
added or removed, which is always serialized) there should not be any
difference except for some memory savings.

Note, this just converts the existing use of the dynamic id lock to the
new static lock, there is one place that is accessing the dynamic id
list without grabbing the lock, that will be fixed up in a follow-on
change.

Cc: Johan Hovold <johan@kernel.org>
Cc: Herve Codina <herve.codina@bootlin.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Grant Grundler <grundler@chromium.org>
Cc: Oliver Neukum <oneukum@suse.com>
Cc: Yajun Deng <yajun.deng@linux.dev>
Cc: Douglas Anderson <dianders@chromium.org>
Cc: linux-usb@vger.kernel.org
Link: https://lore.kernel.org/r/2024111322-kindly-finalist-d247@gregkh
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 672d8fc2abdb..b66b1af3e439 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1129,8 +1129,8 @@ static inline int usb_make_path(struct usb_device *dev, char *buf, size_t size)
 /* ----------------------------------------------------------------------- */
 
 /* Stuff for dynamic usb ids */
+extern struct mutex usb_dynids_lock;
 struct usb_dynids {
-	spinlock_t lock;
 	struct list_head list;
 };
 
-- 
cgit v1.2.3


From 829ed626499c11c9d11c65e93febc1e0da7cd61b Mon Sep 17 00:00:00 2001
From: Steve Sistare <steven.sistare@oracle.com>
Date: Wed, 13 Nov 2024 11:51:36 -0800
Subject: iommufd: Add IOMMU_IOAS_CHANGE_PROCESS

Add an ioctl that updates all DMA mappings to reflect the current process,
Change the mm and transfer locked memory accounting from old to current mm.
This will be used for live update, allowing an old process to hand the
iommufd device descriptor to a new process.  The new process calls the
ioctl.

IOMMU_IOAS_CHANGE_PROCESS only supports DMA mappings created with
IOMMU_IOAS_MAP_FILE, because the kernel metadata for such mappings does
not depend on the userland VA of the pages (which is different in the new
process).
IOMMU_IOAS_CHANGE_PROCESS fails if other types of mappings are present.

This is a revised version of code originally provided by Jason.

Link: https://patch.msgid.link/r/1731527497-16091-4-git-send-email-steven.sistare@oracle.com
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 747d3d9baa3d..4ae8b1ee0444 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -54,6 +54,7 @@ enum {
 	IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f,
 	IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
 	IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
+	IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
 };
 
 /**
@@ -972,4 +973,26 @@ struct iommu_vdevice_alloc {
 	__aligned_u64 virt_id;
 };
 #define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
+
+/**
+ * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS)
+ * @size: sizeof(struct iommu_ioas_change_process)
+ * @__reserved: Must be 0
+ *
+ * This transfers pinned memory counts for every memory map in every IOAS
+ * in the context to the current process.  This only supports maps created
+ * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present.
+ * If the ioctl returns a failure status, then nothing is changed.
+ *
+ * This API is useful for transferring operation of a device from one process
+ * to another, such as during userland live update.
+ */
+struct iommu_ioas_change_process {
+	__u32 size;
+	__u32 __reserved;
+};
+
+#define IOMMU_IOAS_CHANGE_PROCESS \
+	_IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)
+
 #endif
-- 
cgit v1.2.3


From 2f3aab7aecb827ba93c6222646eb0faa8228d590 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 13 Nov 2024 15:04:40 +0100
Subject: USB: make to_usb_driver() use container_of_const()

Turns out that we have some const pointers being passed to
to_usb_driver() but were not catching this.  Change the macro to
properly propagate the const-ness of the pointer so that we will notice
when we try to write to memory that we shouldn't be writing to.

This requires fixing up the usb_match_dynamic_id() function as well,
because it can handle a const * to struct usb_driver.

Cc: Johan Hovold <johan@kernel.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Grant Grundler <grundler@chromium.org>
Cc: Yajun Deng <yajun.deng@linux.dev>
Cc: Oliver Neukum <oneukum@suse.com>
Cc: Douglas Anderson <dianders@chromium.org>
Cc: linux-usb@vger.kernel.org
Link: https://lore.kernel.org/r/2024111339-shaky-goldsmith-b233@gregkh
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index b66b1af3e439..7a9e96f9d886 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1243,7 +1243,7 @@ struct usb_driver {
 	unsigned int disable_hub_initiated_lpm:1;
 	unsigned int soft_unbind:1;
 };
-#define	to_usb_driver(d) container_of(d, struct usb_driver, driver)
+#define	to_usb_driver(d) container_of_const(d, struct usb_driver, driver)
 
 /**
  * struct usb_device_driver - identifies USB device driver to usbcore
-- 
cgit v1.2.3


From d6fa15bbcf9604e3c14816410550d2cf22b955e4 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 13 Nov 2024 15:04:41 +0100
Subject: USB: make to_usb_device_driver() use container_of_const()

Turns out that we have some const pointers being passed to
to_usb_device_driver() but were not catching this.  Change the macro to
properly propagate the const-ness of the pointer so that we will notice
when we try to write to memory that we shouldn't be writing to.

This requires fixing up the usb_driver_applicable() function as well,
because it can handle a const * to struct usb_driver.

Cc: Johan Hovold <johan@kernel.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Grant Grundler <grundler@chromium.org>
Cc: Yajun Deng <yajun.deng@linux.dev>
Cc: Oliver Neukum <oneukum@suse.com>
Cc: Douglas Anderson <dianders@chromium.org>
Cc: linux-usb@vger.kernel.org
Link: https://lore.kernel.org/r/2024111342-lagoon-reapprove-5e49@gregkh
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 7a9e96f9d886..cfa8005e24f9 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1294,8 +1294,7 @@ struct usb_device_driver {
 	unsigned int supports_autosuspend:1;
 	unsigned int generic_subclass:1;
 };
-#define	to_usb_device_driver(d) container_of(d, struct usb_device_driver, \
-		driver)
+#define	to_usb_device_driver(d) container_of_const(d, struct usb_device_driver, driver)
 
 /**
  * struct usb_class_driver - identifies a USB driver that wants to use the USB major number
-- 
cgit v1.2.3


From d96c77bd4eeba469bddbbb14323d2191684da82a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Nov 2024 04:56:31 -0500
Subject: KVM: x86: switch hugepage recovery thread to vhost_task

kvm_vm_create_worker_thread() is meant to be used for kthreads that
can consume significant amounts of CPU time on behalf of a VM or in
response to how the VM behaves (for example how it accesses its memory).
Therefore it wants to charge the CPU time consumed by that work to
the VM's container.

However, because of these threads, cgroups which have kvm instances
inside never complete freezing.  This can be trivially reproduced:

  root@test ~# mkdir /sys/fs/cgroup/test
  root@test ~# echo $$ > /sys/fs/cgroup/test/cgroup.procs
  root@test ~# qemu-system-x86_64 -nographic -enable-kvm

and in another terminal:

  root@test ~# echo 1 > /sys/fs/cgroup/test/cgroup.freeze
  root@test ~# cat /sys/fs/cgroup/test/cgroup.events
  populated 1
  frozen 0

The cgroup freezing happens in the signal delivery path but
kvm_nx_huge_page_recovery_worker, while joining non-root cgroups, never
calls into the signal delivery path and thus never gets frozen. Because
the cgroup freezer determines whether a given cgroup is frozen by
comparing the number of frozen threads to the total number of threads
in the cgroup, the cgroup never becomes frozen and users waiting for
the state transition may hang indefinitely.

Since the worker kthread is tied to a user process, it's better if
it behaves similarly to user tasks as much as possible, including
being able to send SIGSTOP and SIGCONT.  In fact, vhost_task is all
that kvm_vm_create_worker_thread() wanted to be and more: not only it
inherits the userspace process's cgroups, it has other niceties like
being parented properly in the process tree.  Use it instead of the
homegrown alternative.

Incidentally, the new code is also better behaved when you flip recovery
back and forth to disabled and back to enabled.  If your recovery period
is 1 minute, it will run the next recovery after 1 minute independent
of how many times you flipped the parameter.

(Commit message based on emails from Tejun).

Reported-by: Tejun Heo <tj@kernel.org>
Reported-by: Luca Boccassi <bluca@debian.org>
Acked-by: Tejun Heo <tj@kernel.org>
Tested-by: Luca Boccassi <bluca@debian.org>
Cc: stable@vger.kernel.org
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 03e4d26e3bcc..401439bb21e3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2425,12 +2425,6 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
 }
 #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
 
-typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
-
-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
-				uintptr_t data, const char *name,
-				struct task_struct **thread_ptr);
-
 #ifdef CONFIG_KVM_XFER_TO_GUEST_WORK
 static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
 {
-- 
cgit v1.2.3


From 94464a7b71634037b13d54021e0dfd0fb0d8c1f0 Mon Sep 17 00:00:00 2001
From: Danil Pylaev <danstiv404@gmail.com>
Date: Mon, 21 Oct 2024 12:22:44 +0000
Subject: Bluetooth: Add new quirks for ATS2851

This adds quirks for broken extended create connection,
and write auth payload timeout.

Signed-off-by: Danil Pylaev <danstiv404@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci.h      | 14 ++++++++++++++
 include/net/bluetooth/hci_core.h | 10 ++++++----
 2 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index bab1e3d7452a..4f64066915be 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -300,6 +300,20 @@ enum {
 	 */
 	HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT,
 
+	/*
+	 * When this quirk is set, the HCI_OP_LE_EXT_CREATE_CONN command is
+	 * disabled. This is required for the Actions Semiconductor ATS2851
+	 * based controllers, which erroneously claims to support it.
+	 */
+	HCI_QUIRK_BROKEN_EXT_CREATE_CONN,
+
+	/*
+	 * When this quirk is set, the command WRITE_AUTH_PAYLOAD_TIMEOUT is
+	 * skipped. This is required for the Actions Semiconductor ATS2851
+	 * based controllers, due to a race condition in pairing process.
+	 */
+	HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT,
+
 	/* When this quirk is set, MSFT extension monitor tracking by
 	 * address filter is supported. Since tracking quantity of each
 	 * pattern is limited, this feature supports tracking multiple
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 88265d37aa72..94ddc8684973 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1871,8 +1871,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 			   !test_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &(dev)->quirks))
 
 /* Use ext create connection if command is supported */
-#define use_ext_conn(dev) ((dev)->commands[37] & 0x80)
-
+#define use_ext_conn(dev) (((dev)->commands[37] & 0x80) && \
+	!test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, &(dev)->quirks))
 /* Extended advertising support */
 #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV))
 
@@ -1885,8 +1885,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
  * C24: Mandatory if the LE Controller supports Connection State and either
  * LE Feature (LL Privacy) or LE Feature (Extended Advertising) is supported
  */
-#define use_enhanced_conn_complete(dev) (ll_privacy_capable(dev) || \
-					 ext_adv_capable(dev))
+#define use_enhanced_conn_complete(dev) ((ll_privacy_capable(dev) || \
+					 ext_adv_capable(dev)) && \
+					 !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, \
+						 &(dev)->quirks))
 
 /* Periodic advertising support */
 #define per_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_PERIODIC_ADV))
-- 
cgit v1.2.3


From 4a5e0ba68676b3a77298cf646cd2b39c94fbd2f5 Mon Sep 17 00:00:00 2001
From: Iulia Tanasescu <iulia.tanasescu@nxp.com>
Date: Fri, 1 Nov 2024 10:23:36 +0200
Subject: Bluetooth: ISO: Do not emit LE PA Create Sync if previous is pending

The Bluetooth Core spec does not allow a LE PA Create sync command to be
sent to Controller if another one is pending (Vol 4, Part E, page 2493).

In order to avoid this issue, the HCI_CONN_CREATE_PA_SYNC was added
to mark that the LE PA Create Sync command has been sent for a hcon.
Once the PA Sync Established event is received, the hcon flag is
erased and the next pending hcon is handled.

Signed-off-by: Iulia Tanasescu <iulia.tanasescu@nxp.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci.h      |  3 ++-
 include/net/bluetooth/hci_core.h | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 4f64066915be..4becf201b063 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1,7 +1,7 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
-   Copyright 2023 NXP
+   Copyright 2023-2024 NXP
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
@@ -697,6 +697,7 @@ enum {
 #define HCI_RSSI_INVALID	127
 
 #define HCI_SYNC_HANDLE_INVALID	0xffff
+#define HCI_SID_INVALID		0xff
 
 #define HCI_ROLE_MASTER		0x00
 #define HCI_ROLE_SLAVE		0x01
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 94ddc8684973..43474b751a50 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -668,6 +668,7 @@ struct hci_conn {
 	__u8		adv_instance;
 	__u16		handle;
 	__u16		sync_handle;
+	__u8		sid;
 	__u16		state;
 	__u16		mtu;
 	__u8		mode;
@@ -947,6 +948,7 @@ enum {
 	HCI_CONN_CREATE_CIS,
 	HCI_CONN_BIG_SYNC,
 	HCI_CONN_BIG_SYNC_FAILED,
+	HCI_CONN_CREATE_PA_SYNC,
 	HCI_CONN_PA_SYNC,
 	HCI_CONN_PA_SYNC_FAILED,
 };
@@ -1099,6 +1101,30 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev,
 	return NULL;
 }
 
+static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev,
+							__u8 sid,
+							bdaddr_t *dst,
+							__u8 dst_type)
+{
+	struct hci_conn_hash *h = &hdev->conn_hash;
+	struct hci_conn  *c;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(c, &h->list, list) {
+		if (c->type != ISO_LINK  || bacmp(&c->dst, dst) ||
+		    c->dst_type != dst_type || c->sid != sid)
+			continue;
+
+		rcu_read_unlock();
+		return c;
+	}
+
+	rcu_read_unlock();
+
+	return NULL;
+}
+
 static inline struct hci_conn *
 hci_conn_hash_lookup_per_adv_bis(struct hci_dev *hdev,
 				 bdaddr_t *ba,
@@ -1328,6 +1354,13 @@ hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle)
 		if (c->type != ISO_LINK)
 			continue;
 
+		/* Ignore the listen hcon, we are looking
+		 * for the child hcon that was created as
+		 * a result of the PA sync established event.
+		 */
+		if (c->state == BT_LISTEN)
+			continue;
+
 		if (c->sync_handle == sync_handle) {
 			rcu_read_unlock();
 			return c;
@@ -1445,6 +1478,7 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle);
 void hci_sco_setup(struct hci_conn *conn, __u8 status);
 bool hci_iso_setup_path(struct hci_conn *conn);
 int hci_le_create_cis_pending(struct hci_dev *hdev);
+int hci_pa_create_sync_pending(struct hci_dev *hdev);
 int hci_conn_check_create_cis(struct hci_conn *conn);
 
 struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
-- 
cgit v1.2.3


From 42ecf1947135110ea08abeaca39741636f9a2285 Mon Sep 17 00:00:00 2001
From: Iulia Tanasescu <iulia.tanasescu@nxp.com>
Date: Fri, 1 Nov 2024 10:23:38 +0200
Subject: Bluetooth: ISO: Do not emit LE BIG Create Sync if previous is pending

The Bluetooth Core spec does not allow a LE BIG Create sync command to be
sent to Controller if another one is pending (Vol 4, Part E, page 2586).

In order to avoid this issue, the HCI_CONN_CREATE_BIG_SYNC was added
to mark that the LE BIG Create Sync command has been sent for a hcon.
Once the BIG Sync Established event is received, the hcon flag is
erased and the next pending hcon is handled.

Signed-off-by: Iulia Tanasescu <iulia.tanasescu@nxp.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci.h      |  1 +
 include/net/bluetooth/hci_core.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 4becf201b063..5bb4eaa52e14 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -29,6 +29,7 @@
 #define HCI_MAX_ACL_SIZE	1024
 #define HCI_MAX_SCO_SIZE	255
 #define HCI_MAX_ISO_SIZE	251
+#define HCI_MAX_ISO_BIS		31
 #define HCI_MAX_EVENT_SIZE	260
 #define HCI_MAX_FRAME_SIZE	(HCI_MAX_ACL_SIZE + 4)
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 43474b751a50..c95f7e6ba255 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -711,6 +711,9 @@ struct hci_conn {
 	__s8		tx_power;
 	__s8		max_tx_power;
 	struct bt_iso_qos iso_qos;
+	__u8		num_bis;
+	__u8		bis[HCI_MAX_ISO_BIS];
+
 	unsigned long	flags;
 
 	enum conn_reasons conn_reason;
@@ -946,6 +949,7 @@ enum {
 	HCI_CONN_PER_ADV,
 	HCI_CONN_BIG_CREATED,
 	HCI_CONN_CREATE_CIS,
+	HCI_CONN_CREATE_BIG_SYNC,
 	HCI_CONN_BIG_SYNC,
 	HCI_CONN_BIG_SYNC_FAILED,
 	HCI_CONN_CREATE_PA_SYNC,
@@ -1295,6 +1299,30 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev,
 	return NULL;
 }
 
+static inline struct hci_conn *
+hci_conn_hash_lookup_big_sync_pend(struct hci_dev *hdev,
+				   __u8 handle, __u8 num_bis)
+{
+	struct hci_conn_hash *h = &hdev->conn_hash;
+	struct hci_conn  *c;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(c, &h->list, list) {
+		if (c->type != ISO_LINK)
+			continue;
+
+		if (handle == c->iso_qos.bcast.big && num_bis == c->num_bis) {
+			rcu_read_unlock();
+			return c;
+		}
+	}
+
+	rcu_read_unlock();
+
+	return NULL;
+}
+
 static inline struct hci_conn *
 hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle,  __u16 state)
 {
@@ -1479,6 +1507,7 @@ void hci_sco_setup(struct hci_conn *conn, __u8 status);
 bool hci_iso_setup_path(struct hci_conn *conn);
 int hci_le_create_cis_pending(struct hci_dev *hdev);
 int hci_pa_create_sync_pending(struct hci_dev *hdev);
+int hci_le_big_create_sync_pending(struct hci_dev *hdev);
 int hci_conn_check_create_cis(struct hci_conn *conn);
 
 struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
-- 
cgit v1.2.3


From 83d328a72eff3268ea4c19deb0a6cf4c7da15746 Mon Sep 17 00:00:00 2001
From: Iulia Tanasescu <iulia.tanasescu@nxp.com>
Date: Fri, 1 Nov 2024 10:23:39 +0200
Subject: Bluetooth: ISO: Update hci_conn_hash_lookup_big for Broadcast slave

Currently, hci_conn_hash_lookup_big only checks for BIS master connections,
by filtering out connections with the destination address set. This commit
updates this function to also consider BIS slave connections, since it is
also used for a Broadcast Receiver to set an available BIG handle before
issuing the LE BIG Create Sync command.

Signed-off-by: Iulia Tanasescu <iulia.tanasescu@nxp.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_core.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index c95f7e6ba255..ea798f07c5a2 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1285,7 +1285,17 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev,
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(c, &h->list, list) {
-		if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK)
+		if (c->type != ISO_LINK)
+			continue;
+
+		/* An ISO_LINK hcon with BDADDR_ANY as destination
+		 * address is a Broadcast connection. A Broadcast
+		 * slave connection is associated with a PA train,
+		 * so the sync_handle can be used to differentiate
+		 * from unicast.
+		 */
+		if (bacmp(&c->dst, BDADDR_ANY) &&
+		    c->sync_handle == HCI_SYNC_HANDLE_INVALID)
 			continue;
 
 		if (handle == c->iso_qos.bcast.big) {
-- 
cgit v1.2.3


From 96e7c4273560be4744ba8c444bc6969745315251 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 6 Nov 2024 09:53:45 -0500
Subject: Bluetooth: HCI: Add IPC(11) bus type

Zephyr(1) has been using the same bus defines as Linux so tools likes of
btmon, etc, are able to decode the bus used by the driver to transport
HCI packets.

Link: https://github.com/zephyrproject-rtos/zephyr/pull/80808
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 5bb4eaa52e14..6203bd8663b7 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -68,6 +68,7 @@
 #define HCI_I2C		8
 #define HCI_SMD		9
 #define HCI_VIRTIO	10
+#define HCI_IPC		11
 
 /* HCI device quirks */
 enum {
-- 
cgit v1.2.3


From 827af4787e74e8df9e8e0677a69fbb15e0856d2f Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 23 Oct 2024 16:55:57 -0400
Subject: Bluetooth: MGMT: Add initial implementation of MGMT_OP_HCI_CMD_SYNC

This adds the initial implementation of MGMT_OP_HCI_CMD_SYNC as
documented in mgmt-api (BlueZ tree):

Send HCI command and wait for event Command
===========================================

	Command Code:		0x005B
	Controller Index:	<controller id>
	Command Parameters:	Opcode (2 Octets)
				Event (1 Octet)
				Timeout (1 Octet)
				Parameter Length (2 Octets)
				Parameter (variable)
	Return Parameters:	Response (1-variable Octets)

	This command may be used to send a HCI command and wait for an
	(optional) event.

	The HCI command is specified by the Opcode, any arbitrary is supported
	including vendor commands, but contrary to the like of
	Raw/User channel it is run as an HCI command send by the kernel
	since it uses its command synchronization thus it is possible to wait
	for a specific event as a response.

	Setting event to 0x00 will cause the command to wait for either
	HCI Command Status or HCI Command Complete.

	Timeout is specified in seconds, setting it to 0 will cause the
	default timeout to be used.

	Possible errors:	Failed
				Invalid Parameters

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/mgmt.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index d382679efd2b..affac861efdc 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -878,6 +878,16 @@ struct mgmt_cp_mesh_send_cancel {
 } __packed;
 #define MGMT_MESH_SEND_CANCEL_SIZE	1
 
+#define MGMT_OP_HCI_CMD_SYNC		0x005B
+struct mgmt_cp_hci_cmd_sync {
+	__le16 opcode;
+	__u8   event;
+	__u8   timeout;
+	__le16 params_len;
+	__u8   params[];
+} __packed;
+#define MGMT_HCI_CMD_SYNC_SIZE		6
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
-- 
cgit v1.2.3


From a7479860bb4099a8eef64999e843c085838248e4 Mon Sep 17 00:00:00 2001
From: Yassine Oudjana <y.oudjana@protonmail.com>
Date: Wed, 6 Nov 2024 11:14:30 +0000
Subject: dt-bindings: clock: mediatek: Add bindings for MT6735 syscon clock
 and reset controllers

Add device tree bindings for syscon clock and reset controllers (IMGSYS,
MFGCFG, VDECSYS and VENCSYS).

Signed-off-by: Yassine Oudjana <y.oudjana@protonmail.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20241106111402.200940-2-y.oudjana@protonmail.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/mediatek,mt6735-imgsys.h  | 15 +++++++++++++++
 include/dt-bindings/clock/mediatek,mt6735-mfgcfg.h  |  8 ++++++++
 include/dt-bindings/clock/mediatek,mt6735-vdecsys.h |  9 +++++++++
 include/dt-bindings/clock/mediatek,mt6735-vencsys.h | 11 +++++++++++
 include/dt-bindings/reset/mediatek,mt6735-mfgcfg.h  |  9 +++++++++
 include/dt-bindings/reset/mediatek,mt6735-vdecsys.h |  9 +++++++++
 6 files changed, 61 insertions(+)
 create mode 100644 include/dt-bindings/clock/mediatek,mt6735-imgsys.h
 create mode 100644 include/dt-bindings/clock/mediatek,mt6735-mfgcfg.h
 create mode 100644 include/dt-bindings/clock/mediatek,mt6735-vdecsys.h
 create mode 100644 include/dt-bindings/clock/mediatek,mt6735-vencsys.h
 create mode 100644 include/dt-bindings/reset/mediatek,mt6735-mfgcfg.h
 create mode 100644 include/dt-bindings/reset/mediatek,mt6735-vdecsys.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/mediatek,mt6735-imgsys.h b/include/dt-bindings/clock/mediatek,mt6735-imgsys.h
new file mode 100644
index 000000000000..f250c26c5eb4
--- /dev/null
+++ b/include/dt-bindings/clock/mediatek,mt6735-imgsys.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_CLK_MT6735_IMGSYS_H
+#define _DT_BINDINGS_CLK_MT6735_IMGSYS_H
+
+#define CLK_IMG_SMI_LARB2		0
+#define CLK_IMG_CAM_SMI			1
+#define CLK_IMG_CAM_CAM			2
+#define CLK_IMG_SEN_TG			3
+#define CLK_IMG_SEN_CAM			4
+#define CLK_IMG_CAM_SV			5
+#define CLK_IMG_SUFOD			6
+#define CLK_IMG_FD			7
+
+#endif /* _DT_BINDINGS_CLK_MT6735_IMGSYS_H */
diff --git a/include/dt-bindings/clock/mediatek,mt6735-mfgcfg.h b/include/dt-bindings/clock/mediatek,mt6735-mfgcfg.h
new file mode 100644
index 000000000000..d2d99a48348a
--- /dev/null
+++ b/include/dt-bindings/clock/mediatek,mt6735-mfgcfg.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_CLK_MT6735_MFGCFG_H
+#define _DT_BINDINGS_CLK_MT6735_MFGCFG_H
+
+#define CLK_MFG_BG3D			0
+
+#endif /* _DT_BINDINGS_CLK_MT6735_MFGCFG_H */
diff --git a/include/dt-bindings/clock/mediatek,mt6735-vdecsys.h b/include/dt-bindings/clock/mediatek,mt6735-vdecsys.h
new file mode 100644
index 000000000000..f94cec10c89f
--- /dev/null
+++ b/include/dt-bindings/clock/mediatek,mt6735-vdecsys.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_CLK_MT6735_VDECSYS_H
+#define _DT_BINDINGS_CLK_MT6735_VDECSYS_H
+
+#define CLK_VDEC_VDEC			0
+#define CLK_VDEC_SMI_LARB1		1
+
+#endif /* _DT_BINDINGS_CLK_MT6735_VDECSYS_H */
diff --git a/include/dt-bindings/clock/mediatek,mt6735-vencsys.h b/include/dt-bindings/clock/mediatek,mt6735-vencsys.h
new file mode 100644
index 000000000000..e5a9cb4f269f
--- /dev/null
+++ b/include/dt-bindings/clock/mediatek,mt6735-vencsys.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_CLK_MT6735_VENCSYS_H
+#define _DT_BINDINGS_CLK_MT6735_VENCSYS_H
+
+#define CLK_VENC_SMI_LARB3			0
+#define CLK_VENC_VENC			1
+#define CLK_VENC_JPGENC			2
+#define CLK_VENC_JPGDEC			3
+
+#endif /* _DT_BINDINGS_CLK_MT6735_VENCSYS_H */
diff --git a/include/dt-bindings/reset/mediatek,mt6735-mfgcfg.h b/include/dt-bindings/reset/mediatek,mt6735-mfgcfg.h
new file mode 100644
index 000000000000..c489242b226e
--- /dev/null
+++ b/include/dt-bindings/reset/mediatek,mt6735-mfgcfg.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_RESET_MT6735_MFGCFG_H
+#define _DT_BINDINGS_RESET_MT6735_MFGCFG_H
+
+#define MT6735_MFG_RST0_AXI		0
+#define MT6735_MFG_RST0_G3D		1
+
+#endif /* _DT_BINDINGS_RESET_MT6735_MFGCFG_H */
diff --git a/include/dt-bindings/reset/mediatek,mt6735-vdecsys.h b/include/dt-bindings/reset/mediatek,mt6735-vdecsys.h
new file mode 100644
index 000000000000..b6ae5d249192
--- /dev/null
+++ b/include/dt-bindings/reset/mediatek,mt6735-vdecsys.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_RESET_MT6735_VDECSYS_H
+#define _DT_BINDINGS_RESET_MT6735_VDECSYS_H
+
+#define MT6735_VDEC_RST0_VDEC			0
+#define MT6735_VDEC_RST1_SMI_LARB1		1
+
+#endif /* _DT_BINDINGS_RESET_MT6735_VDECSYS_H */
-- 
cgit v1.2.3


From f03b086624aa7cfbc6f3bf4ac728639602b4c478 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Duje=20Mihanovi=C4=87?= <duje.mihanovic@skole.hr>
Date: Mon, 4 Nov 2024 17:37:06 +0100
Subject: dt-bindings: clock: Add Marvell PXA1908 clock bindings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add dt bindings and documentation for the Marvell PXA1908 clock
controller.

Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Duje Mihanović <duje.mihanovic@skole.hr>
Link: https://lore.kernel.org/r/20241104-pxa1908-lkml-v13-4-e050609b8d6c@skole.hr
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/marvell,pxa1908.h | 88 +++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 include/dt-bindings/clock/marvell,pxa1908.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/marvell,pxa1908.h b/include/dt-bindings/clock/marvell,pxa1908.h
new file mode 100644
index 000000000000..fb15b0d0cd4c
--- /dev/null
+++ b/include/dt-bindings/clock/marvell,pxa1908.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+#ifndef __DTS_MARVELL_PXA1908_CLOCK_H
+#define __DTS_MARVELL_PXA1908_CLOCK_H
+
+/* plls */
+#define PXA1908_CLK_CLK32		1
+#define PXA1908_CLK_VCTCXO		2
+#define PXA1908_CLK_PLL1_624		3
+#define PXA1908_CLK_PLL1_416		4
+#define PXA1908_CLK_PLL1_499		5
+#define PXA1908_CLK_PLL1_832		6
+#define PXA1908_CLK_PLL1_1248		7
+#define PXA1908_CLK_PLL1_D2		8
+#define PXA1908_CLK_PLL1_D4		9
+#define PXA1908_CLK_PLL1_D8		10
+#define PXA1908_CLK_PLL1_D16		11
+#define PXA1908_CLK_PLL1_D6		12
+#define PXA1908_CLK_PLL1_D12		13
+#define PXA1908_CLK_PLL1_D24		14
+#define PXA1908_CLK_PLL1_D48		15
+#define PXA1908_CLK_PLL1_D96		16
+#define PXA1908_CLK_PLL1_D13		17
+#define PXA1908_CLK_PLL1_32		18
+#define PXA1908_CLK_PLL1_208		19
+#define PXA1908_CLK_PLL1_117		20
+#define PXA1908_CLK_PLL1_416_GATE	21
+#define PXA1908_CLK_PLL1_624_GATE	22
+#define PXA1908_CLK_PLL1_832_GATE	23
+#define PXA1908_CLK_PLL1_1248_GATE	24
+#define PXA1908_CLK_PLL1_D2_GATE	25
+#define PXA1908_CLK_PLL1_499_EN		26
+#define PXA1908_CLK_PLL2VCO		27
+#define PXA1908_CLK_PLL2		28
+#define PXA1908_CLK_PLL2P		29
+#define PXA1908_CLK_PLL2VCODIV3		30
+#define PXA1908_CLK_PLL3VCO		31
+#define PXA1908_CLK_PLL3		32
+#define PXA1908_CLK_PLL3P		33
+#define PXA1908_CLK_PLL3VCODIV3		34
+#define PXA1908_CLK_PLL4VCO		35
+#define PXA1908_CLK_PLL4		36
+#define PXA1908_CLK_PLL4P		37
+#define PXA1908_CLK_PLL4VCODIV3		38
+
+/* apb (apbc) peripherals */
+#define PXA1908_CLK_UART0		1
+#define PXA1908_CLK_UART1		2
+#define PXA1908_CLK_GPIO		3
+#define PXA1908_CLK_PWM0		4
+#define PXA1908_CLK_PWM1		5
+#define PXA1908_CLK_PWM2		6
+#define PXA1908_CLK_PWM3		7
+#define PXA1908_CLK_SSP0		8
+#define PXA1908_CLK_SSP1		9
+#define PXA1908_CLK_IPC_RST		10
+#define PXA1908_CLK_RTC			11
+#define PXA1908_CLK_TWSI0		12
+#define PXA1908_CLK_KPC			13
+#define PXA1908_CLK_SWJTAG		14
+#define PXA1908_CLK_SSP2		15
+#define PXA1908_CLK_TWSI1		16
+#define PXA1908_CLK_THERMAL		17
+#define PXA1908_CLK_TWSI3		18
+
+/* apb (apbcp) peripherals */
+#define PXA1908_CLK_UART2		1
+#define PXA1908_CLK_TWSI2		2
+#define PXA1908_CLK_AICER		3
+
+/* axi (apmu) peripherals */
+#define PXA1908_CLK_CCIC1		1
+#define PXA1908_CLK_ISP			2
+#define PXA1908_CLK_DSI1		3
+#define PXA1908_CLK_DISP1		4
+#define PXA1908_CLK_CCIC0		5
+#define PXA1908_CLK_SDH0		6
+#define PXA1908_CLK_SDH1		7
+#define PXA1908_CLK_USB			8
+#define PXA1908_CLK_NF			9
+#define PXA1908_CLK_CORE_DEBUG		10
+#define PXA1908_CLK_VPU			11
+#define PXA1908_CLK_GC			12
+#define PXA1908_CLK_SDH2		13
+#define PXA1908_CLK_GC2D		14
+#define PXA1908_CLK_TRACE		15
+#define PXA1908_CLK_DVC_DFC_DEBUG	16
+
+#endif
-- 
cgit v1.2.3


From 6a46b75a91a4347e50b25fb4a150a8f34b026034 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= <theo.lebrun@bootlin.com>
Date: Wed, 6 Nov 2024 17:03:53 +0100
Subject: dt-bindings: clock: eyeq: add more Mobileye EyeQ5/EyeQ6H clocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add #defines for Mobileye clock controller:

 - EyeQ5 core 0 thru 3 clocks. Internally:

      EQ5C_PLL_CPU:           already exposed
      └── EQ5C_CPU_OCC:       unexposed, no reason to do so
          ├── EQ5C_CPU_CORE0: new!
          ├── EQ5C_CPU_CORE1: new!
          ├── EQ5C_CPU_CORE2: new!
          └── EQ5C_CPU_CORE3: new!

 - EyeQ5 peripheral clocks. Internally:

      EQ5C_PLL_PER:          already exposed
      ├── EQ5C_PER_OCC:      new!
      │   ├── EQ5C_PER_SPI:  new!
      │   ├── EQ5C_PER_I2C:  new!
      │   ├── EQ5C_PER_GPIO: new!
      │   └── EQ5C_PER_UART: new!
      ├── EQ5C_PER_EMMC:     new!
      └── EQ5C_PER_OCC_PCI:  new!

 - EyeQ6H central OLB. Internally:

      EQ6HC_CENTRAL_PLL_CPU:     new!
      └── EQ6HC_CENTRAL_CPU_OCC: new!

 - EyeQ6H west OLB. Internally:

      EQ6HC_WEST_PLL_PER:          new!
      └── EQ6HC_WEST_PER_OCC:      new!
          └── EQ6HC_WEST_PER_UART: new!

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
Link: https://lore.kernel.org/r/20241106-mbly-clk-v2-2-84cfefb3f485@bootlin.com
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/mobileye,eyeq5-clk.h | 46 +++++++++++++++++++-------
 1 file changed, 34 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/mobileye,eyeq5-clk.h b/include/dt-bindings/clock/mobileye,eyeq5-clk.h
index b433c1772c28..f353c2988035 100644
--- a/include/dt-bindings/clock/mobileye,eyeq5-clk.h
+++ b/include/dt-bindings/clock/mobileye,eyeq5-clk.h
@@ -6,24 +6,46 @@
 #ifndef _DT_BINDINGS_CLOCK_MOBILEYE_EYEQ5_CLK_H
 #define _DT_BINDINGS_CLOCK_MOBILEYE_EYEQ5_CLK_H
 
-#define EQ5C_PLL_CPU	0
-#define EQ5C_PLL_VMP	1
-#define EQ5C_PLL_PMA	2
-#define EQ5C_PLL_VDI	3
-#define EQ5C_PLL_DDR0	4
-#define EQ5C_PLL_PCI	5
-#define EQ5C_PLL_PER	6
-#define EQ5C_PLL_PMAC	7
-#define EQ5C_PLL_MPC	8
-#define EQ5C_PLL_DDR1	9
-
-#define EQ5C_DIV_OSPI	10
+#define EQ5C_PLL_CPU		0
+#define EQ5C_PLL_VMP		1
+#define EQ5C_PLL_PMA		2
+#define EQ5C_PLL_VDI		3
+#define EQ5C_PLL_DDR0		4
+#define EQ5C_PLL_PCI		5
+#define EQ5C_PLL_PER		6
+#define EQ5C_PLL_PMAC		7
+#define EQ5C_PLL_MPC		8
+#define EQ5C_PLL_DDR1		9
+
+#define EQ5C_DIV_OSPI		10
+
+/* EQ5C_PLL_CPU children */
+#define EQ5C_CPU_CORE0		11
+#define EQ5C_CPU_CORE1		12
+#define EQ5C_CPU_CORE2		13
+#define EQ5C_CPU_CORE3		14
+
+/* EQ5C_PLL_PER children */
+#define EQ5C_PER_OCC		15
+#define EQ5C_PER_UART		16
+#define EQ5C_PER_SPI		17
+#define EQ5C_PER_I2C		18
+#define EQ5C_PER_GPIO		19
+#define EQ5C_PER_EMMC		20
+#define EQ5C_PER_OCC_PCI	21
 
 #define EQ6LC_PLL_DDR		0
 #define EQ6LC_PLL_CPU		1
 #define EQ6LC_PLL_PER		2
 #define EQ6LC_PLL_VDI		3
 
+#define EQ6HC_CENTRAL_PLL_CPU	0
+#define EQ6HC_CENTRAL_CPU_OCC	1
+
+#define EQ6HC_WEST_PLL_PER	0
+#define EQ6HC_WEST_PER_OCC	1
+#define EQ6HC_WEST_PER_UART	2
+
 #define EQ6HC_SOUTH_PLL_VDI		0
 #define EQ6HC_SOUTH_PLL_PCIE		1
 #define EQ6HC_SOUTH_PLL_PER		2
-- 
cgit v1.2.3


From 4eb5e9c6c4cdfca9fb0577f62580db46f5acd1a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= <theo.lebrun@bootlin.com>
Date: Wed, 6 Nov 2024 17:03:54 +0100
Subject: clk: fixed-factor: add clk_hw_register_fixed_factor_index() function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add non-devres version of clk_hw_register_fixed_factor(), with parent
targeted using its index.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
Link: https://lore.kernel.org/r/20241106-mbly-clk-v2-3-84cfefb3f485@bootlin.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk-provider.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index dbe793964c24..52b46c63a2bd 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -1142,6 +1142,9 @@ struct clk_hw *clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *
 		struct device_node *np, const char *name, const char *fw_name,
 		unsigned long flags, unsigned int mult, unsigned int div,
 		unsigned long acc);
+struct clk_hw *clk_hw_register_fixed_factor_index(struct device *dev,
+		const char *name, unsigned int index, unsigned long flags,
+		unsigned int mult, unsigned int div);
 void clk_hw_unregister_fixed_factor(struct clk_hw *hw);
 struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev,
 		const char *name, const char *parent_name, unsigned long flags,
-- 
cgit v1.2.3


From 721aa69e708b7432af83c4bb00a30e2b7c27da28 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 8 Nov 2024 07:54:47 +0100
Subject: net: phy: convert eee_broken_modes to a linkmode bitmap

eee_broken_modes has a eee_cap1 register layout currently. This doen't
allow to flag e.g. 2.5Gbps or 5Gbps BaseT EEE as broken. To overcome
this limitation switch eee_broken_modes to a linkmode bitmap.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/dfe0c9ff-84b0-4328-86d7-e917ebc084a1@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1e4127c495c0..a6622f873d03 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -721,16 +721,15 @@ struct phy_device {
 	/* used for eee validation and configuration*/
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee);
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee);
+	/* Energy efficient ethernet modes which should be prohibited */
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(eee_broken_modes);
 	bool eee_enabled;
+	bool enable_tx_lpi;
+	struct eee_config eee_cfg;
 
 	/* Host supported PHY interface types. Should be ignored if empty. */
 	DECLARE_PHY_INTERFACE_MASK(host_interfaces);
 
-	/* Energy efficient ethernet modes which should be prohibited */
-	u32 eee_broken_modes;
-	bool enable_tx_lpi;
-	struct eee_config eee_cfg;
-
 #ifdef CONFIG_LED_TRIGGER_PHY
 	struct phy_led_trigger *phy_led_triggers;
 	unsigned int phy_num_led_triggers;
-- 
cgit v1.2.3


From ed623fb8e38e2a241da12864778ec9c9cf930c65 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 8 Nov 2024 08:07:10 +0100
Subject: net: phy: add phy_set_eee_broken

Add an accessor for eee_broken_modes, so that drivers
don't have to deal with phylib internals.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/0f8ee279-d40d-4489-a3b0-d993472d744a@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index a6622f873d03..b8346db42727 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1263,6 +1263,16 @@ void of_set_phy_eee_broken(struct phy_device *phydev);
 void of_set_phy_timing_role(struct phy_device *phydev);
 int phy_speed_down_core(struct phy_device *phydev);
 
+/**
+ * phy_set_eee_broken - Mark an EEE mode as broken so that it isn't advertised.
+ * @phydev: The phy_device struct
+ * @link_mode: The broken EEE mode
+ */
+static inline void phy_set_eee_broken(struct phy_device *phydev, u32 link_mode)
+{
+	linkmode_set_bit(link_mode, phydev->eee_broken_modes);
+}
+
 /**
  * phy_is_started - Convenience function to check whether PHY is started
  * @phydev: The phy_device struct
-- 
cgit v1.2.3


From e7cb7cf43afb96a4fd8f68c2dfb9b2fbdd444654 Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair.francis@wdc.com>
Date: Tue, 12 Nov 2024 20:54:29 +1000
Subject: include: mdio: Remove mdio45_ethtool_gset()

mdio45_ethtool_gset() is never called, so let's remove it.

Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
Link: https://patch.msgid.link/20241112105430.438491-1-alistair@alistair23.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mdio.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index efeca5bd7600..c63f43645d50 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -173,22 +173,6 @@ mdio45_ethtool_ksettings_get_npage(const struct mdio_if_info *mdio,
 				   struct ethtool_link_ksettings *cmd,
 				   u32 npage_adv, u32 npage_lpa);
 
-/**
- * mdio45_ethtool_gset - get settings for ETHTOOL_GSET
- * @mdio: MDIO interface
- * @ecmd: Ethtool request structure
- *
- * Since the CSRs for auto-negotiation using next pages are not fully
- * standardised, this function does not attempt to decode them.  Use
- * mdio45_ethtool_gset_npage() to specify advertisement bits from next
- * pages.
- */
-static inline void mdio45_ethtool_gset(const struct mdio_if_info *mdio,
-				       struct ethtool_cmd *ecmd)
-{
-	mdio45_ethtool_gset_npage(mdio, ecmd, 0, 0);
-}
-
 /**
  * mdio45_ethtool_ksettings_get - get settings for ETHTOOL_GLINKSETTINGS
  * @mdio: MDIO interface
-- 
cgit v1.2.3


From 575092a7f0ce4c6d5907cd908d28c0f69690a136 Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair.francis@wdc.com>
Date: Tue, 12 Nov 2024 20:54:30 +1000
Subject: mdio: Remove mdio45_ethtool_gset_npage()

The mdio45_ethtool_gset_npage() function isn't called, so let's remove
it.

Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
Link: https://patch.msgid.link/20241112105430.438491-2-alistair@alistair23.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mdio.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index c63f43645d50..3c3deac57894 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -165,9 +165,6 @@ extern int mdio_set_flag(const struct mdio_if_info *mdio,
 			 bool sense);
 extern int mdio45_links_ok(const struct mdio_if_info *mdio, u32 mmds);
 extern int mdio45_nway_restart(const struct mdio_if_info *mdio);
-extern void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio,
-				      struct ethtool_cmd *ecmd,
-				      u32 npage_adv, u32 npage_lpa);
 extern void
 mdio45_ethtool_ksettings_get_npage(const struct mdio_if_info *mdio,
 				   struct ethtool_link_ksettings *cmd,
-- 
cgit v1.2.3


From 9e43ad7a1edef268acac603e1975c8f50a20d02f Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Wed, 13 Nov 2024 12:13:09 +0000
Subject: net: ethtool: only allow set_rxnfc with rss + ring_cookie if driver
 opts in

Ethtool ntuple filters with FLOW_RSS were originally defined as adding
 the base queue ID (ring_cookie) to the value from the indirection table,
 so that the same table could distribute over more than one set of queues
 when used by different filters.
However, some drivers / hardware ignore the ring_cookie, and simply use
 the indirection table entries as queue IDs directly.  Thus, for drivers
 which have not opted in by setting ethtool_ops.cap_rss_rxnfc_adds to
 declare that they support the original (addition) semantics, reject in
 ethtool_set_rxnfc any filter which combines FLOW_RSS and a nonzero ring.
(For a ring_cookie of zero, both behaviours are equivalent.)
Set the cap bit in sfc, as it is known to support this feature.

Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Martin Habets <habetsm.xilinx@gmail.com>
Link: https://patch.msgid.link/cc3da0844083b0e301a33092a6299e4042b65221.1731499022.git.ecree.xilinx@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 1199e308c8dd..299280c94d07 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -734,6 +734,9 @@ struct kernel_ethtool_ts_info {
  * @rxfh_per_ctx_key: device supports setting different RSS key for each
  *	additional context. Netlink API should report hfunc, key, and input_xfrm
  *	for every context, not just context 0.
+ * @cap_rss_rxnfc_adds: device supports nonzero ring_cookie in filters with
+ *	%FLOW_RSS flag; the queue ID from the filter is added to the value from
+ *	the indirection table to determine the delivery queue.
  * @rxfh_indir_space: max size of RSS indirection tables, if indirection table
  *	size as returned by @get_rxfh_indir_size may change during lifetime
  *	of the device. Leave as 0 if the table size is constant.
@@ -956,6 +959,7 @@ struct ethtool_ops {
 	u32     cap_rss_ctx_supported:1;
 	u32	cap_rss_sym_xor_supported:1;
 	u32	rxfh_per_ctx_key:1;
+	u32	cap_rss_rxnfc_adds:1;
 	u32	rxfh_indir_space;
 	u16	rxfh_key_space;
 	u16	rxfh_priv_size;
-- 
cgit v1.2.3


From fd7b4f9f46d46acbc7af3a439bb0d869efdc5c58 Mon Sep 17 00:00:00 2001
From: Qun-Wei Lin <qun-wei.lin@mediatek.com>
Date: Wed, 13 Nov 2024 12:25:43 +0800
Subject: sched/task_stack: fix object_is_on_stack() for KASAN tagged pointers

When CONFIG_KASAN_SW_TAGS and CONFIG_KASAN_STACK are enabled, the
object_is_on_stack() function may produce incorrect results due to the
presence of tags in the obj pointer, while the stack pointer does not have
tags.  This discrepancy can lead to incorrect stack object detection and
subsequently trigger warnings if CONFIG_DEBUG_OBJECTS is also enabled.

Example of the warning:

ODEBUG: object 3eff800082ea7bb0 is NOT on stack ffff800082ea0000, but annotated.
------------[ cut here ]------------
WARNING: CPU: 0 PID: 1 at lib/debugobjects.c:557 __debug_object_init+0x330/0x364
Modules linked in:
CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc5 #4
Hardware name: linux,dummy-virt (DT)
pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : __debug_object_init+0x330/0x364
lr : __debug_object_init+0x330/0x364
sp : ffff800082ea7b40
x29: ffff800082ea7b40 x28: 98ff0000c0164518 x27: 98ff0000c0164534
x26: ffff800082d93ec8 x25: 0000000000000001 x24: 1cff0000c00172a0
x23: 0000000000000000 x22: ffff800082d93ed0 x21: ffff800081a24418
x20: 3eff800082ea7bb0 x19: efff800000000000 x18: 0000000000000000
x17: 00000000000000ff x16: 0000000000000047 x15: 206b63617473206e
x14: 0000000000000018 x13: ffff800082ea7780 x12: 0ffff800082ea78e
x11: 0ffff800082ea790 x10: 0ffff800082ea79d x9 : 34d77febe173e800
x8 : 34d77febe173e800 x7 : 0000000000000001 x6 : 0000000000000001
x5 : feff800082ea74b8 x4 : ffff800082870a90 x3 : ffff80008018d3c4
x2 : 0000000000000001 x1 : ffff800082858810 x0 : 0000000000000050
Call trace:
 __debug_object_init+0x330/0x364
 debug_object_init_on_stack+0x30/0x3c
 schedule_hrtimeout_range_clock+0xac/0x26c
 schedule_hrtimeout+0x1c/0x30
 wait_task_inactive+0x1d4/0x25c
 kthread_bind_mask+0x28/0x98
 init_rescuer+0x1e8/0x280
 workqueue_init+0x1a0/0x3cc
 kernel_init_freeable+0x118/0x200
 kernel_init+0x28/0x1f0
 ret_from_fork+0x10/0x20
---[ end trace 0000000000000000 ]---
ODEBUG: object 3eff800082ea7bb0 is NOT on stack ffff800082ea0000, but annotated.
------------[ cut here ]------------

Link: https://lkml.kernel.org/r/20241113042544.19095-1-qun-wei.lin@mediatek.com
Signed-off-by: Qun-Wei Lin <qun-wei.lin@mediatek.com>
Cc: Andrew Yang <andrew.yang@mediatek.com>
Cc: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Cc: Casper Li <casper.li@mediatek.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chinwen Chang <chinwen.chang@mediatek.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched/task_stack.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index bf10bdb487dd..6c2fef89a4fd 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -9,6 +9,7 @@
 #include <linux/sched.h>
 #include <linux/magic.h>
 #include <linux/refcount.h>
+#include <linux/kasan.h>
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 
@@ -89,6 +90,7 @@ static inline int object_is_on_stack(const void *obj)
 {
 	void *stack = task_stack_page(current);
 
+	obj = kasan_reset_tag(obj);
 	return (obj >= stack) && (obj < (stack + THREAD_SIZE));
 }
 
-- 
cgit v1.2.3


From 05d4532b60e3e6e2a094ec56a88d1def50bd2430 Mon Sep 17 00:00:00 2001
From: Joshua Hahn <joshua.hahnjy@gmail.com>
Date: Fri, 1 Nov 2024 13:44:02 -0700
Subject: memcg/hugetlb: add hugeTLB counters to memcg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch introduces a new counter to memory.stat that tracks hugeTLB
usage, only if hugeTLB accounting is done to memory.current.  This feature
is enabled the same way hugeTLB accounting is enabled, via the
memory_hugetlb_accounting mount flag for cgroupsv2.

1. Why is this patch necessary?
Currently, memcg hugeTLB accounting is an opt-in feature [1] that adds
hugeTLB usage to memory.current.  However, the metric is not reported in
memory.stat.  Given that users often interpret memory.stat as a breakdown
of the value reported in memory.current, the disparity between the two
reports can be confusing.  This patch solves this problem by including the
metric in memory.stat as well, but only if it is also reported in
memory.current (it would also be confusing if the value was reported in
memory.stat, but not in memory.current)

Aside from the consistency between the two files, we also see benefits in
observability.  Userspace might be interested in the hugeTLB footprint of
cgroups for many reasons.  For instance, system admins might want to
verify that hugeTLB usage is distributed as expected across tasks: i.e.
memory-intensive tasks are using more hugeTLB pages than tasks that don't
consume a lot of memory, or are seen to fault frequently.  Note that this
is separate from wanting to inspect the distribution for limiting purposes
(in which case, hugeTLB controller makes more sense).

2. We already have a hugeTLB controller. Why not use that?
It is true that hugeTLB tracks the exact value that we want.  In fact, by
enabling the hugeTLB controller, we get all of the observability benefits
that I mentioned above, and users can check the total hugeTLB usage,
verify if it is distributed as expected, etc.

With this said, there are 2 problems:
(a) They are still not reported in memory.stat, which means the
    disparity between the memcg reports are still there.
(b) We cannot reasonably expect users to enable the hugeTLB controller
    just for the sake of hugeTLB usage reporting, especially since
    they don't have any use for hugeTLB usage enforcing [2].

3. Implementation Details:
In the alloc / free hugetlb functions, we call lruvec_stat_mod_folio
regardless of whether memcg accounts hugetlb.  mem_cgroup_commit_charge
which is called from alloc_hugetlb_folio will set memcg for the folio only
if the CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING cgroup mount option is used, so
lruvec_stat_mod_folio accounts per-memcg hugetlb counters only if the
feature is enabled.  Regardless of whether memcg accounts for hugetlb, the
newly added global counter is updated and shown in /proc/vmstat.

The global counter is added because vmstats is the preferred framework for
cgroup stats.  It makes stat items consistent between global and cgroups.
It also provides a per-node breakdown, which is useful.  Because it does
not use cgroup-specific hooks, we also keep generic MM code separate from
memcg code.

[1] https://lore.kernel.org/all/20231006184629.155543-1-nphamcs@gmail.com/
[2] Of course, we can't make a new patch for every feature that can be
    duplicated. However, since the existing solution of enabling the
    hugeTLB controller is an imperfect solution that still leaves a
    discrepancy between memory.stat and memory.curent, I think that it
    is reasonable to isolate the feature in this case.

Link: https://lkml.kernel.org/r/20241101204402.1885383-1-joshua.hahnjy@gmail.com
Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Suggested-by: Nhat Pham <nphamcs@gmail.com>
Suggested-by: Shakeel Butt <shakeel.butt@linux.dev>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Chris Down <chris@chrisdown.name>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5e8f567753bd..b36124145a16 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -220,6 +220,9 @@ enum node_stat_item {
 	PGDEMOTE_KSWAPD,
 	PGDEMOTE_DIRECT,
 	PGDEMOTE_KHUGEPAGED,
+#ifdef CONFIG_HUGETLB_PAGE
+	NR_HUGETLB,
+#endif
 	NR_VM_NODE_STAT_ITEMS
 };
 
-- 
cgit v1.2.3


From a12143e6084c502fc3cfaa8b717bffc8c14cf806 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Thu, 14 Nov 2024 22:07:51 +0100
Subject: netfilter: bitwise: rename some boolean operation functions

In the next patch we add support for doing AND, OR and XOR operations
directly in the kernel, so rename some functions and an enum constant
related to mask-and-xor boolean operations.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 9e9079321380..487542234ccd 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -564,16 +564,20 @@ enum nft_immediate_attributes {
 /**
  * enum nft_bitwise_ops - nf_tables bitwise operations
  *
- * @NFT_BITWISE_BOOL: mask-and-xor operation used to implement NOT, AND, OR and
- *                    XOR boolean operations
+ * @NFT_BITWISE_MASK_XOR: mask-and-xor operation used to implement NOT, AND, OR
+ *                        and XOR boolean operations
  * @NFT_BITWISE_LSHIFT: left-shift operation
  * @NFT_BITWISE_RSHIFT: right-shift operation
  */
 enum nft_bitwise_ops {
-	NFT_BITWISE_BOOL,
+	NFT_BITWISE_MASK_XOR,
 	NFT_BITWISE_LSHIFT,
 	NFT_BITWISE_RSHIFT,
 };
+/*
+ * Old name for NFT_BITWISE_MASK_XOR.  Retained for backwards-compatibility.
+ */
+#define NFT_BITWISE_BOOL NFT_BITWISE_MASK_XOR
 
 /**
  * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes
-- 
cgit v1.2.3


From 4a530a7c751d27f9dbd70b7fc45670cd11713b13 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 11 Oct 2024 11:00:21 +0200
Subject: fs: prepare for "explicit connectable" file handles

We would like to use the high 16bit of the handle_type field to encode
file handle traits, such as "connectable".

In preparation for this change, make sure that filesystems do not return
a handle_type value with upper bits set and that the open_by_handle_at(2)
syscall rejects these handle types.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://lore.kernel.org/r/20241011090023.655623-2-amir73il@gmail.com
Fixes: 570df4e9c23f ("ceph: snapshot nfs re-export")
Acked-by:
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/exportfs.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 893a1d21dc1c..5e14d4500a75 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -160,6 +160,17 @@ struct fid {
 #define EXPORT_FH_FID		0x2 /* File handle may be non-decodeable */
 #define EXPORT_FH_DIR_ONLY	0x4 /* Only decode file handle for a directory */
 
+/*
+ * Filesystems use only lower 8 bits of file_handle type for fid_type.
+ * name_to_handle_at() uses upper 16 bits of type as user flags to be
+ * interpreted by open_by_handle_at().
+ */
+#define FILEID_USER_FLAGS_MASK	0xffff0000
+#define FILEID_USER_FLAGS(type) ((type) & FILEID_USER_FLAGS_MASK)
+
+/* Flags supported in encoded handle_type that is exported to user */
+#define FILEID_VALID_USER_FLAGS	(0)
+
 /**
  * struct export_operations - for nfsd to communicate with file systems
  * @encode_fh:      encode a file handle fragment from a dentry
-- 
cgit v1.2.3


From c374196b2b9f4b803fccd59ed82f0712041e21e1 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 11 Oct 2024 11:00:22 +0200
Subject: fs: name_to_handle_at() support for "explicit connectable" file
 handles

nfsd encodes "connectable" file handles for the subtree_check feature,
which can be resolved to an open file with a connected path.
So far, userspace nfs server could not make use of this functionality.

Introduce a new flag AT_HANDLE_CONNECTABLE to name_to_handle_at(2).
When used, the encoded file handle is "explicitly connectable".

The "explicitly connectable" file handle sets bits in the high 16bit of
the handle_type field, so open_by_handle_at(2) will know that it needs
to open a file with a connected path.

old kernels will now recognize the handle_type with high bits set,
so "explicitly connectable" file handles cannot be decoded by
open_by_handle_at(2) on old kernels.

The flag AT_HANDLE_CONNECTABLE is not allowed together with either
AT_HANDLE_FID or AT_EMPTY_PATH.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://lore.kernel.org/r/20241011090023.655623-3-amir73il@gmail.com
Fixes: 570df4e9c23f ("ceph: snapshot nfs re-export")
Acked-by:
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/exportfs.h   | 2 ++
 include/uapi/linux/fcntl.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 5e14d4500a75..4ee42b2cf4ab 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -169,6 +169,8 @@ struct fid {
 #define FILEID_USER_FLAGS(type) ((type) & FILEID_USER_FLAGS_MASK)
 
 /* Flags supported in encoded handle_type that is exported to user */
+#define FILEID_IS_CONNECTABLE	0x10000
+#define FILEID_IS_DIR		0x20000
 #define FILEID_VALID_USER_FLAGS	(0)
 
 /**
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 87e2dec79fea..56ff2100e021 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -153,6 +153,7 @@
 					   object identity and may not be
 					   usable with open_by_handle_at(2). */
 #define AT_HANDLE_MNT_ID_UNIQUE	0x001	/* Return the u64 unique mount ID. */
+#define AT_HANDLE_CONNECTABLE	0x002	/* Request a connectable file handle */
 
 #if defined(__KERNEL__)
 #define AT_GETATTR_NOSEC	0x80000000
-- 
cgit v1.2.3


From a20853ab8296d4a8754482cb5e9adde8ab426a25 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 11 Oct 2024 11:00:23 +0200
Subject: fs: open_by_handle_at() support for decoding "explicit connectable"
 file handles

Teach open_by_handle_at(2) about the type format of "explicit connectable"
file handles that were created using the AT_HANDLE_CONNECTABLE flag to
name_to_handle_at(2).

When decoding an "explicit connectable" file handles, name_to_handle_at(2)
should fail if it cannot open a "connected" fd with known path, which is
accessible (to capable user) from mount fd path.

Note that this does not check if the path is accessible to the calling
user, just that it is accessible wrt the mount namesapce, so if there
is no "connected" alias, or if parts of the path are hidden in the
mount namespace, open_by_handle_at(2) will return -ESTALE.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://lore.kernel.org/r/20241011090023.655623-4-amir73il@gmail.com
Fixes: 570df4e9c23f ("ceph: snapshot nfs re-export")
Acked-by:
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/exportfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 4ee42b2cf4ab..fcab6ab1d38a 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -171,7 +171,7 @@ struct fid {
 /* Flags supported in encoded handle_type that is exported to user */
 #define FILEID_IS_CONNECTABLE	0x10000
 #define FILEID_IS_DIR		0x20000
-#define FILEID_VALID_USER_FLAGS	(0)
+#define FILEID_VALID_USER_FLAGS	(FILEID_IS_CONNECTABLE | FILEID_IS_DIR)
 
 /**
  * struct export_operations - for nfsd to communicate with file systems
-- 
cgit v1.2.3


From b0ccf4f53d968e794a4ea579d5135cc1aaf1a53f Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Thu, 14 Nov 2024 22:08:13 +0100
Subject: netfilter: bitwise: add support for doing AND, OR and XOR directly

Hitherto, these operations have been converted in user space to
mask-and-xor operations on one register and two immediate values, and it
is the latter which have been evaluated by the kernel.  We add support
for evaluating these operations directly in kernel space on one register
and either an immediate value or a second register.

Pablo made a few changes to the original patch:

- EINVAL if NFTA_BITWISE_SREG2 is used with fast version.
- Allow _AND,_OR,_XOR with _DATA != sizeof(u32)
- Dump _SREG2 or _DATA with _AND,_OR,_XOR

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 487542234ccd..49c944e78463 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -568,11 +568,17 @@ enum nft_immediate_attributes {
  *                        and XOR boolean operations
  * @NFT_BITWISE_LSHIFT: left-shift operation
  * @NFT_BITWISE_RSHIFT: right-shift operation
+ * @NFT_BITWISE_AND: and operation
+ * @NFT_BITWISE_OR: or operation
+ * @NFT_BITWISE_XOR: xor operation
  */
 enum nft_bitwise_ops {
 	NFT_BITWISE_MASK_XOR,
 	NFT_BITWISE_LSHIFT,
 	NFT_BITWISE_RSHIFT,
+	NFT_BITWISE_AND,
+	NFT_BITWISE_OR,
+	NFT_BITWISE_XOR,
 };
 /*
  * Old name for NFT_BITWISE_MASK_XOR.  Retained for backwards-compatibility.
@@ -590,6 +596,7 @@ enum nft_bitwise_ops {
  * @NFTA_BITWISE_OP: type of operation (NLA_U32: nft_bitwise_ops)
  * @NFTA_BITWISE_DATA: argument for non-boolean operations
  *                     (NLA_NESTED: nft_data_attributes)
+ * @NFTA_BITWISE_SREG2: second source register (NLA_U32: nft_registers)
  *
  * The bitwise expression supports boolean and shift operations.  It implements
  * the boolean operations by performing the following operation:
@@ -613,6 +620,7 @@ enum nft_bitwise_attributes {
 	NFTA_BITWISE_XOR,
 	NFTA_BITWISE_OP,
 	NFTA_BITWISE_DATA,
+	NFTA_BITWISE_SREG2,
 	__NFTA_BITWISE_MAX
 };
 #define NFTA_BITWISE_MAX	(__NFTA_BITWISE_MAX - 1)
-- 
cgit v1.2.3


From 957860cbc1dc89f79f2acc193470224e350dfd03 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 15 Nov 2024 07:14:03 -0700
Subject: block: make struct rq_list available for !CONFIG_BLOCK

A previous commit changed how requests are linked in the plug structure,
but unlike the previous method, it uses a new type for it rather than
struct request. The latter is available even for !CONFIG_BLOCK, while
struct rq_list is now. Move it outside CONFIG_BLOCK.

Reported-by: Nathan Chancellor <nathan@kernel.org>
Fixes: a3396b99990d ("block: add a rq_list type")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 00212e96261a..a1fd0ddce5cf 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1006,12 +1006,12 @@ extern void blk_put_queue(struct request_queue *);
 
 void blk_mark_disk_dead(struct gendisk *disk);
 
-#ifdef CONFIG_BLOCK
 struct rq_list {
 	struct request *head;
 	struct request *tail;
 };
 
+#ifdef CONFIG_BLOCK
 /*
  * blk_plug permits building a queue of related requests by holding the I/O
  * fragments for a short period. This allows merging of sequential requests
-- 
cgit v1.2.3


From 7eb4e1dd71009472215bc1f8226ee9a041da8d37 Mon Sep 17 00:00:00 2001
From: Nicolas Saenz Julienne <nsaenz@amazon.com>
Date: Tue, 12 Nov 2024 18:52:16 +0000
Subject: x86/efi: Drop support for the EFI_PROPERTIES_TABLE

Drop support for the EFI_PROPERTIES_TABLE. It was a failed, short-lived
experiment that broke the boot both on Linux and Windows, and was
replaced by the EFI_MEMORY_ATTRIBUTES_TABLE shortly after.

Suggested-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Nicolas Saenz Julienne <nsaenz@amazon.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index e28d88066033..e5815867aba9 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -379,7 +379,6 @@ void efi_native_runtime_setup(void);
 #define EFI_SYSTEM_RESOURCE_TABLE_GUID		EFI_GUID(0xb122a263, 0x3661, 0x4f68,  0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80)
 #define EFI_FILE_SYSTEM_GUID			EFI_GUID(0x964e5b22, 0x6459, 0x11d2,  0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
 #define DEVICE_TREE_GUID			EFI_GUID(0xb1b621d5, 0xf19c, 0x41a5,  0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0)
-#define EFI_PROPERTIES_TABLE_GUID		EFI_GUID(0x880aaca3, 0x4adc, 0x4a04,  0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5)
 #define EFI_RNG_PROTOCOL_GUID			EFI_GUID(0x3152bca5, 0xeade, 0x433d,  0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44)
 #define EFI_RNG_ALGORITHM_RAW			EFI_GUID(0xe43176d7, 0xb6e8, 0x4827,  0xb7, 0x84, 0x7f, 0xfd, 0xc4, 0xb6, 0x85, 0x61)
 #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID	EFI_GUID(0xdcfa911d, 0x26eb, 0x469f,  0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20)
@@ -580,15 +579,6 @@ struct efi_mem_range {
 	u64 attribute;
 };
 
-typedef struct {
-	u32 version;
-	u32 length;
-	u64 memory_protection_attribute;
-} efi_properties_table_t;
-
-#define EFI_PROPERTIES_TABLE_VERSION	0x00010000
-#define EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA	0x1
-
 typedef struct {
 	u16 version;
 	u16 length;
@@ -871,10 +861,9 @@ static inline int efi_range_is_wc(unsigned long start, unsigned long len)
 #define EFI_PARAVIRT		6	/* Access is via a paravirt interface */
 #define EFI_ARCH_1		7	/* First arch-specific bit */
 #define EFI_DBG			8	/* Print additional debug info at runtime */
-#define EFI_NX_PE_DATA		9	/* Can runtime data regions be mapped non-executable? */
-#define EFI_MEM_ATTR		10	/* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */
-#define EFI_MEM_NO_SOFT_RESERVE	11	/* Is the kernel configured to ignore soft reservations? */
-#define EFI_PRESERVE_BS_REGIONS	12	/* Are EFI boot-services memory segments available? */
+#define EFI_MEM_ATTR		9	/* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */
+#define EFI_MEM_NO_SOFT_RESERVE	10	/* Is the kernel configured to ignore soft reservations? */
+#define EFI_PRESERVE_BS_REGIONS	11	/* Are EFI boot-services memory segments available? */
 
 #ifdef CONFIG_EFI
 /*
-- 
cgit v1.2.3


From 83e041522eb9c45479f4490b212687cf1e7e9999 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 15 Nov 2024 16:54:40 +0000
Subject: io_uring: temporarily disable registered waits

Disable wait argument registration as it'll be replaced with a more
generic feature. We'll still need IORING_ENTER_EXT_ARG_REG parsing
in a few commits so leave it be.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/70b1d1d218c41ba77a76d1789c8641dab0b0563e.1731689588.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 10 ----------
 include/uapi/linux/io_uring.h  |  3 ---
 2 files changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 072e65e93105..52a5da99a205 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -330,14 +330,6 @@ struct io_ring_ctx {
 		atomic_t		cq_wait_nr;
 		atomic_t		cq_timeouts;
 		struct wait_queue_head	cq_wait;
-
-		/*
-		 * If registered with IORING_REGISTER_CQWAIT_REG, a single
-		 * page holds N entries, mapped in cq_wait_arg. cq_wait_index
-		 * is the maximum allowable index.
-		 */
-		struct io_uring_reg_wait	*cq_wait_arg;
-		unsigned char			cq_wait_index;
 	} ____cacheline_aligned_in_smp;
 
 	/* timeouts */
@@ -431,8 +423,6 @@ struct io_ring_ctx {
 	unsigned short			n_sqe_pages;
 	struct page			**ring_pages;
 	struct page			**sqe_pages;
-
-	struct page			**cq_wait_page;
 };
 
 struct io_tw_state {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5d08435b95a8..132f5db3d4e8 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -627,9 +627,6 @@ enum io_uring_register_op {
 	/* resize CQ ring */
 	IORING_REGISTER_RESIZE_RINGS		= 33,
 
-	/* register fixed io_uring_reg_wait arguments */
-	IORING_REGISTER_CQWAIT_REG		= 34,
-
 	/* this goes last */
 	IORING_REGISTER_LAST,
 
-- 
cgit v1.2.3


From dfbbfbf191878e8dd422768ce009858d8b5b761e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 15 Nov 2024 16:54:41 +0000
Subject: io_uring: introduce concept of memory regions

We've got a good number of mappings we share with the userspace, that
includes the main rings, provided buffer rings, upcoming rings for
zerocopy rx and more. All of them duplicate user argument parsing and
some internal details as well (page pinnning, huge page optimisations,
mmap'ing, etc.)

Introduce a notion of regions. For userspace for now it's just a new
structure called struct io_uring_region_desc which is supposed to
parameterise all such mapping / queue creations. A region either
represents a user provided chunk of memory, in which case the user_addr
field should point to it, or a request for the kernel to allocate the
memory, in which case the user would need to mmap it after using the
offset returned in the mmap_offset field. With a uniform userspace API
we can avoid additional boiler plate code and apply future optimisation
to all of them at once.

Internally, there is a new structure struct io_mapped_region holding all
relevant runtime information and some helpers to work with it. This
patch limits it to user provided regions.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/0e6fe25818dfbaebd1bd90b870a6cac503fe1a24.1731689588.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  6 ++++++
 include/uapi/linux/io_uring.h  | 14 ++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 52a5da99a205..1d3a37234ace 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -75,6 +75,12 @@ struct io_hash_table {
 	unsigned		hash_bits;
 };
 
+struct io_mapped_region {
+	struct page		**pages;
+	void			*vmap_ptr;
+	size_t			nr_pages;
+};
+
 /*
  * Arbitrary limit, can be raised if need be
  */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 132f5db3d4e8..5cbfd330c688 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -647,6 +647,20 @@ struct io_uring_files_update {
 	__aligned_u64 /* __s32 * */ fds;
 };
 
+enum {
+	/* initialise with user provided memory pointed by user_addr */
+	IORING_MEM_REGION_TYPE_USER		= 1,
+};
+
+struct io_uring_region_desc {
+	__u64 user_addr;
+	__u64 size;
+	__u32 flags;
+	__u32 id;
+	__u64 mmap_offset;
+	__u64 __resv[4];
+};
+
 /*
  * Register a fully sparse file space, rather than pass in an array of all
  * -1 file descriptors.
-- 
cgit v1.2.3


From 93238e66185524aad925acefb2312203b9e26d63 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 15 Nov 2024 16:54:42 +0000
Subject: io_uring: add memory region registration

Regions will serve multiple purposes. First, with it we can decouple
ring/etc. object creation from registration / mapping of the memory they
will be placed in. We already have hacks that allow to put both SQ and
CQ into the same huge page, in the future we should be able to:

region = create_region(io_ring);
create_pbuf_ring(io_uring, region, offset=0);
create_pbuf_ring(io_uring, region, offset=N);

The second use case is efficiently passing parameters. The following
patch enables back on top of regions IORING_ENTER_EXT_ARG_REG, which
optimises wait arguments. It'll also be useful for request arguments
replacing iovecs, msghdr, etc. pointers. Eventually it would also be
handy for BPF as well if it comes to fruition.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/0798cf3a14fad19cfc96fc9feca5f3e11481691d.1731689588.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 +++
 include/uapi/linux/io_uring.h  | 8 ++++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 1d3a37234ace..e1d69123e164 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -429,6 +429,9 @@ struct io_ring_ctx {
 	unsigned short			n_sqe_pages;
 	struct page			**ring_pages;
 	struct page			**sqe_pages;
+
+	/* used for optimised request parameter and wait argument passing  */
+	struct io_mapped_region		param_region;
 };
 
 struct io_tw_state {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5cbfd330c688..1ee35890125b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -627,6 +627,8 @@ enum io_uring_register_op {
 	/* resize CQ ring */
 	IORING_REGISTER_RESIZE_RINGS		= 33,
 
+	IORING_REGISTER_MEM_REGION		= 34,
+
 	/* this goes last */
 	IORING_REGISTER_LAST,
 
@@ -661,6 +663,12 @@ struct io_uring_region_desc {
 	__u64 __resv[4];
 };
 
+struct io_uring_mem_region_reg {
+	__u64 region_uptr; /* struct io_uring_region_desc * */
+	__u64 flags;
+	__u64 __resv[2];
+};
+
 /*
  * Register a fully sparse file space, rather than pass in an array of all
  * -1 file descriptors.
-- 
cgit v1.2.3


From 96a30e469ca1d2b8cc7811b40911f8614b558241 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 14 Nov 2024 16:13:03 -0800
Subject: bpf: use common instruction history across all states

Instead of allocating and copying instruction history each time we
enqueue child verifier state, switch to a model where we use one common
dynamically sized array of instruction history entries across all states.

The key observation for proving this is correct is that instruction
history is only relevant while state is active, which means it either is
a current state (and thus we are actively modifying instruction history
and no other state can interfere with us) or we are checkpointed state
with some children still active (either enqueued or being current).

In the latter case our portion of instruction history is finalized and
won't change or grow, so as long as we keep it immutable until the state
is finalized, we are good.

Now, when state is finalized and is put into state hash for potentially
future pruning lookups, instruction history is not used anymore. This is
because instruction history is only used by precision marking logic, and
we never modify precision markings for finalized states.

So, instead of each state having its own small instruction history, we
keep a global dynamically-sized instruction history, where each state in
current DFS path from root to active state remembers its portion of
instruction history. Current state can append to this history, but
cannot modify any of its parent histories.

Async callback state enqueueing, while logically detached from parent
state, still is part of verification backtracking tree, so has to follow
the same schema as normal state checkpoints.

Because the insn_hist array can be grown through realloc, states don't
keep pointers, they instead maintain two indices, [start, end), into
global instruction history array. End is exclusive index, so
`start == end` means there is no relevant instruction history.

This eliminates a lot of allocations and minimizes overall memory usage.

For instance, running a worst-case test from [0] (but without the
heuristics-based fix [1]), it took 12.5 minutes until we get -ENOMEM.
With the changes in this patch the whole test succeeds in 10 minutes
(very slow, so heuristics from [1] is important, of course).

To further validate correctness, veristat-based comparison was performed for
Meta production BPF objects and BPF selftests objects. In both cases there
were no differences *at all* in terms of verdict or instruction and state
counts, providing a good confidence in the change.

Having this low-memory-overhead solution of keeping dynamic
per-instruction history cheaply opens up some new possibilities, like
keeping extra information for literally every single validated
instruction. This will be used for simplifying precision backpropagation
logic in follow up patches.

  [0] https://lore.kernel.org/bpf/20241029172641.1042523-2-eddyz87@gmail.com/
  [1] https://lore.kernel.org/bpf/20241029172641.1042523-1-eddyz87@gmail.com/

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20241115001303.277272-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 6b7c91629176..f4290c179bee 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -334,7 +334,7 @@ struct bpf_func_state {
 
 #define MAX_CALL_FRAMES 8
 
-/* instruction history flags, used in bpf_jmp_history_entry.flags field */
+/* instruction history flags, used in bpf_insn_hist_entry.flags field */
 enum {
 	/* instruction references stack slot through PTR_TO_STACK register;
 	 * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8)
@@ -352,7 +352,7 @@ enum {
 static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES);
 static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8);
 
-struct bpf_jmp_history_entry {
+struct bpf_insn_hist_entry {
 	u32 idx;
 	/* insn idx can't be bigger than 1 million */
 	u32 prev_idx : 22;
@@ -442,13 +442,14 @@ struct bpf_verifier_state {
 	 * See get_loop_entry() for more information.
 	 */
 	struct bpf_verifier_state *loop_entry;
-	/* jmp history recorded from first to last.
-	 * backtracking is using it to go from last to first.
-	 * For most states jmp_history_cnt is [0-3].
+	/* Sub-range of env->insn_hist[] corresponding to this state's
+	 * instruction history.
+	 * Backtracking is using it to go from last to first.
+	 * For most states instruction history is short, 0-3 instructions.
 	 * For loops can go up to ~40.
 	 */
-	struct bpf_jmp_history_entry *jmp_history;
-	u32 jmp_history_cnt;
+	u32 insn_hist_start;
+	u32 insn_hist_end;
 	u32 dfs_depth;
 	u32 callback_unroll_depth;
 	u32 may_goto_depth;
@@ -738,7 +739,9 @@ struct bpf_verifier_env {
 		int cur_stack;
 	} cfg;
 	struct backtrack_state bt;
-	struct bpf_jmp_history_entry *cur_hist_ent;
+	struct bpf_insn_hist_entry *insn_hist;
+	struct bpf_insn_hist_entry *cur_hist_ent;
+	u32 insn_hist_cap;
 	u32 pass_cnt; /* number of times do_check() was called */
 	u32 subprog_cnt;
 	/* number of instructions analyzed by the verifier */
-- 
cgit v1.2.3


From 906c508afdca3487c4273bfeda8abedc8e21047b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Fri, 15 Nov 2024 17:42:48 +0100
Subject: sysfs: attribute_group: allow registration of const bin_attribute
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To be able to constify instances of struct bin_attribute it has to be
possible to add them to string attribute_group.
The current type of the bin_attrs member however is not compatible with
that.
Introduce a union that allows registration of both const and non-const
attributes to enable a piecewise transition.
As both union member types are compatible no logic needs to be adapted.

Technically it is now possible register a const struct
bin_attribute and receive it as mutable pointer in the callbacks.
This is a soundness issue.
But this same soundness issue already exists today in
sysfs_create_bin_file().
Also the struct definition and callback implementation are always
closely linked and are meant to be moved to const in lockstep.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20241115-b4-sysfs-const-bin_attr-group-v1-1-2c9bb12dfc48@weissschuh.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d713a6445a62..0f2fcd244523 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -106,7 +106,10 @@ struct attribute_group {
 					    const struct bin_attribute *,
 					    int);
 	struct attribute	**attrs;
-	struct bin_attribute	**bin_attrs;
+	union {
+		struct bin_attribute		**bin_attrs;
+		const struct bin_attribute	*const *bin_attrs_new;
+	};
 };
 
 #define SYSFS_PREALLOC		010000
-- 
cgit v1.2.3


From d617b3147d54c42351eac63b5398d4ddf4f4011b Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 15 Nov 2024 16:54:43 +0000
Subject: io_uring: restore back registered wait arguments

Now we've got a more generic region registration API, place
IORING_ENTER_EXT_ARG_REG and re-enable it.

First, the user has to register a region with the
IORING_MEM_REGION_REG_WAIT_ARG flag set. It can only be done for a
ring in a disabled state, aka IORING_SETUP_R_DISABLED, to avoid races
with already running waiters. With that we should have stable constant
values for ctx->cq_wait_{size,arg} in io_get_ext_arg_reg() and hence no
READ_ONCE required.

The other API difference is that we're now passing byte offsets instead
of indexes. The user _must_ align all offsets / pointers to the native
word size, failing to do so might but not necessarily has to lead to a
failure usually returned as -EFAULT. liburing will be hiding this
details from users.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/81822c1b4ffbe8ad391b4f9ad1564def0d26d990.1731689588.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 +++
 include/uapi/linux/io_uring.h  | 5 +++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index e1d69123e164..aa5f5ea98076 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -324,6 +324,9 @@ struct io_ring_ctx {
 		unsigned		cq_entries;
 		struct io_ev_fd	__rcu	*io_ev_fd;
 		unsigned		cq_extra;
+
+		void			*cq_wait_arg;
+		size_t			cq_wait_size;
 	} ____cacheline_aligned_in_smp;
 
 	/*
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1ee35890125b..4418d0192959 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -663,6 +663,11 @@ struct io_uring_region_desc {
 	__u64 __resv[4];
 };
 
+enum {
+	/* expose the region as registered wait arguments */
+	IORING_MEM_REGION_REG_WAIT_ARG		= 1,
+};
+
 struct io_uring_mem_region_reg {
 	__u64 region_uptr; /* struct io_uring_region_desc * */
 	__u64 flags;
-- 
cgit v1.2.3


From 9407f5c3ec10c155aae61fc4496a7c17a96907b4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 11 Nov 2024 16:28:51 -0800
Subject: srcu: Unconditionally record srcu_read_lock_lite() in
 ->srcu_reader_flavor

Currently, srcu_read_lock_lite() uses the SRCU_READ_FLAVOR_LITE bit in
->srcu_reader_flavor to communicate to the grace-period processing in
srcu_readers_active_idx_check() that the smp_mb() must be replaced by a
synchronize_rcu().  Unfortunately, ->srcu_reader_flavor is not updated
unless the kernel is built with CONFIG_PROVE_RCU=y.  Therefore in all
kernels built with CONFIG_PROVE_RCU=n, srcu_readers_active_idx_check()
incorrectly uses smp_mb() instead of synchronize_rcu() for srcu_struct
structures whose readers use srcu_read_lock_lite().

This commit therefore causes Tree SRCU srcu_read_lock_lite()
to unconditionally update ->srcu_reader_flavor so that
srcu_readers_active_idx_check() can make the correct choice.

Reported-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Closes: https://lore.kernel.org/all/d07e8f4a-d5ff-4c8e-8e61-50db285c57e9@amd.com/
Fixes: c0f08d6b5a61 ("srcu: Add srcu_read_lock_lite() and srcu_read_unlock_lite()")
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 include/linux/srcu.h     |  8 +-------
 include/linux/srcutiny.h |  3 +++
 include/linux/srcutree.h | 21 +++++++++++++++++++++
 3 files changed, 25 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 56f83237de4d..08339eb8a01c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -183,12 +183,6 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
-void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
-#else
-#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0)
-#endif
-
 
 /**
  * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
@@ -277,7 +271,7 @@ static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp)
 {
 	int retval;
 
-	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+	srcu_check_read_flavor_lite(ssp);
 	retval = __srcu_read_lock_lite(ssp);
 	rcu_try_lock_acquire(&ssp->dep_map);
 	return retval;
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 4d96bbdb45f0..1321da803274 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -81,6 +81,9 @@ static inline void srcu_barrier(struct srcu_struct *ssp)
 	synchronize_srcu(ssp);
 }
 
+#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0)
+#define srcu_check_read_flavor_lite(ssp) do { } while (0)
+
 /* Defined here to avoid size increase for non-torture kernels. */
 static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
 					    char *tt, char *tf)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 778eb61542e1..490aeecc6bb4 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -248,4 +248,25 @@ static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite().");
 }
 
+void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
+
+// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels.
+static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp)
+{
+	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
+
+	if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE))
+		return;
+
+	// Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered.
+	__srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+}
+
+// Record non-_lite() usage only for CONFIG_PROVE_RCU=y kernels.
+static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor)
+{
+	if (IS_ENABLED(CONFIG_PROVE_RCU))
+		__srcu_check_read_flavor(ssp, read_flavor);
+}
+
 #endif
-- 
cgit v1.2.3


From 41ffcd95015f18178ddc53fed919c2842d52fe38 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 14 Nov 2024 10:33:27 +0000
Subject: net: phy: fix phylib's dual eee_enabled

phylib has two eee_enabled members. Some parts of the code are using
phydev->eee_enabled, other parts are using phydev->eee_cfg.eee_enabled.
This leads to incorrect behaviour as their state goes out of sync.
ethtool --show-eee shows incorrect information, and --set-eee sometimes
doesn't take effect.

Fix this by only having one eee_enabled member - that in eee_cfg.

Fixes: 49168d1980e2 ("net: phy: Add phy_support_eee() indicating MAC support EEE")
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/E1tBXAF-00341F-EQ@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index a98bc91a0cde..44890cdf40a2 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -601,7 +601,6 @@ struct macsec_ops;
  * @adv_old: Saved advertised while power saving for WoL
  * @supported_eee: supported PHY EEE linkmodes
  * @advertising_eee: Currently advertised EEE linkmodes
- * @eee_enabled: Flag indicating whether the EEE feature is enabled
  * @enable_tx_lpi: When True, MAC should transmit LPI to PHY
  * @eee_cfg: User configuration of EEE
  * @lp_advertising: Current link partner advertised linkmodes
@@ -721,7 +720,6 @@ struct phy_device {
 	/* used for eee validation and configuration*/
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee);
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee);
-	bool eee_enabled;
 
 	/* Host supported PHY interface types. Should be ignored if empty. */
 	DECLARE_PHY_INTERFACE_MASK(host_interfaces);
-- 
cgit v1.2.3


From 221a9c1df790fa711d65daf5ba05d0addc279153 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Thu, 14 Nov 2024 03:00:11 -0800
Subject: net: netpoll: Individualize the skb pool

The current implementation of the netpoll system uses a global skb
pool, which can lead to inefficient memory usage and
waste when targets are disabled or no longer in use.

This can result in a significant amount of memory being unnecessarily
allocated and retained, potentially causing performance issues and
limiting the availability of resources for other system components.

Modify the netpoll system to assign a skb pool to each target instead of
using a global one.

This approach allows for more fine-grained control over memory
allocation and deallocation, ensuring that resources are only allocated
and retained as needed.

Signed-off-by: Breno Leitao <leitao@debian.org>
Link: https://patch.msgid.link/20241114-skb_buffers_v2-v3-1-9be9f52a8b69@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netpoll.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index cd4e28db0cbd..77635b885c18 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -32,6 +32,7 @@ struct netpoll {
 	bool ipv6;
 	u16 local_port, remote_port;
 	u8 remote_mac[ETH_ALEN];
+	struct sk_buff_head skb_pool;
 };
 
 struct netpoll_info {
-- 
cgit v1.2.3


From 4b42fbc6bd8f73d9ded535d8c61ccaa837ff3bd4 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 14 Nov 2024 15:09:53 +0100
Subject: ndo_fdb_add: Add a parameter to report whether notification was sent

Currently when FDB entries are added to or deleted from a VXLAN netdevice,
the VXLAN driver emits one notification, including the VXLAN-specific
attributes. The core however always sends a notification as well, a generic
one. Thus two notifications are unnecessarily sent for these operations. A
similar situation comes up with bridge driver, which also emits
notifications on its own:

 # ip link add name vx type vxlan id 1000 dstport 4789
 # bridge monitor fdb &
 [1] 1981693
 # bridge fdb add de:ad:be:ef:13:37 dev vx self dst 192.0.2.1
 de:ad:be:ef:13:37 dev vx dst 192.0.2.1 self permanent
 de:ad:be:ef:13:37 dev vx self permanent

In order to prevent this duplicity, add a paremeter to ndo_fdb_add,
bool *notified. The flag is primed to false, and if the callee sends a
notification on its own, it sets it to true, thus informing the core that
it should not generate another notification.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/cbf6ae8195e85cbf922f8058ce4eba770f3b71ed.1731589511.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0aae346d919e..6a7fd191e1ee 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1248,8 +1248,10 @@ struct netdev_net_notifier {
  * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[],
  *		      struct net_device *dev,
  *		      const unsigned char *addr, u16 vid, u16 flags,
- *		      struct netlink_ext_ack *extack);
+ *		      bool *notified, struct netlink_ext_ack *extack);
  *	Adds an FDB entry to dev for addr.
+ *	Callee shall set *notified to true if it sent any appropriate
+ *	notification(s). Otherwise core will send a generic one.
  * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
  *		      struct net_device *dev,
  *		      const unsigned char *addr, u16 vid)
@@ -1525,6 +1527,7 @@ struct net_device_ops {
 					       const unsigned char *addr,
 					       u16 vid,
 					       u16 flags,
+					       bool *notified,
 					       struct netlink_ext_ack *extack);
 	int			(*ndo_fdb_del)(struct ndmsg *ndm,
 					       struct nlattr *tb[],
-- 
cgit v1.2.3


From 42575ad5aab932273475d1ec3e7881cb5a05420e Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 14 Nov 2024 15:09:54 +0100
Subject: ndo_fdb_del: Add a parameter to report whether notification was sent

In a similar fashion to ndo_fdb_add, which was covered in the previous
patch, add the bool *notified argument to ndo_fdb_del. Callees that send a
notification on their own set the flag to true.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/06b1acf4953ef0a5ed153ef1f32d7292044f2be6.1731589511.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6a7fd191e1ee..ecc686409161 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1254,8 +1254,11 @@ struct netdev_net_notifier {
  *	notification(s). Otherwise core will send a generic one.
  * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
  *		      struct net_device *dev,
- *		      const unsigned char *addr, u16 vid)
+ *		      const unsigned char *addr, u16 vid
+ *		      bool *notified, struct netlink_ext_ack *extack);
  *	Deletes the FDB entry from dev corresponding to addr.
+ *	Callee shall set *notified to true if it sent any appropriate
+ *	notification(s). Otherwise core will send a generic one.
  * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev,
  *			   struct netlink_ext_ack *extack);
  * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
@@ -1533,7 +1536,9 @@ struct net_device_ops {
 					       struct nlattr *tb[],
 					       struct net_device *dev,
 					       const unsigned char *addr,
-					       u16 vid, struct netlink_ext_ack *extack);
+					       u16 vid,
+					       bool *notified,
+					       struct netlink_ext_ack *extack);
 	int			(*ndo_fdb_del_bulk)(struct nlmsghdr *nlh,
 						    struct net_device *dev,
 						    struct netlink_ext_ack *extack);
-- 
cgit v1.2.3


From 3ef66af31feaf5ff5dd73e63b1327789822ed476 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Tue, 12 Nov 2024 09:29:20 +0800
Subject: virtio_ring: introduce add api for premapped

Two APIs are introduced to submit premapped per-buffers.

int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
                                 struct scatterlist *sg, unsigned int num,
                                 void *data,
                                 void *ctx,
                                 gfp_t gfp);

int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
                                  struct scatterlist *sg, unsigned int num,
                                  void *data,
                                  gfp_t gfp);

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://patch.msgid.link/20241112012928.102478-6-xuanzhuo@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/virtio.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 306137a15d07..13b3f55abca3 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -56,6 +56,17 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
 			    void *ctx,
 			    gfp_t gfp);
 
+int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
+				  struct scatterlist *sg, unsigned int num,
+				  void *data,
+				  void *ctx,
+				  gfp_t gfp);
+
+int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
+				   struct scatterlist *sg, unsigned int num,
+				   void *data,
+				   gfp_t gfp);
+
 int virtqueue_add_sgs(struct virtqueue *vq,
 		      struct scatterlist *sgs[],
 		      unsigned int out_sgs,
-- 
cgit v1.2.3


From 880ebcbe06636c42cfb328039581e177c6cd6ba5 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Tue, 12 Nov 2024 09:29:22 +0800
Subject: virtio_ring: remove API virtqueue_set_dma_premapped

Now, this API is useless. remove it.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://patch.msgid.link/20241112012928.102478-8-xuanzhuo@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/virtio.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 13b3f55abca3..338e0f5efb4b 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -93,8 +93,6 @@ bool virtqueue_enable_cb(struct virtqueue *vq);
 
 unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq);
 
-int virtqueue_set_dma_premapped(struct virtqueue *_vq);
-
 bool virtqueue_poll(struct virtqueue *vq, unsigned);
 
 bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
-- 
cgit v1.2.3


From 665745f274870c921020f610e2c99a3b1613519b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Fri, 18 Oct 2024 17:47:52 +0300
Subject: PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This mostly reverts the commit b4c7d2076b4e ("PCI/LINK: Remove bandwidth
notification"). An upcoming commit extends this driver building PCIe
bandwidth controller on top of it.

PCIe bandwidth notifications were first added in the commit e8303bb7a75c
("PCI/LINK: Report degraded links via link bandwidth notification") but
later had to be removed. The significant changes compared with the old
bandwidth notification driver include:

1) Don't print the notifications into kernel log, just keep the Link
   Speed cached in struct pci_bus updated. While somewhat unfortunate,
   the log spam was the source of complaints that eventually lead to
   the removal of the bandwidth notifications driver (see the links
   below for further information).

2) Besides the Link Bandwidth Management Interrupt, also enable Link
   Autonomous Bandwidth Interrupt to cover the other source of bandwidth
   changes.

3) Handle Link Speed updates robustly. Refresh the cached Link Speed
   when enabling Bandwidth Notification Interrupts, and solve the race
   between Link Speed read and LBMS/LABS update in
   pcie_bwnotif_irq_thread().

4) Use concurrency safe LNKCTL RMW operations.

5) The driver is now called PCIe bwctrl (bandwidth controller) instead
   of just bandwidth notifications because of increased scope and
   functionality within the driver.

6) Coexist with the Target Link Speed quirk in pcie_failed_link_retrain().
   Provide LBMS counting API for it.

7) Tweaks to variable/functions names for consistency and length reasons.

Bandwidth Notifications enable the cur_bus_speed in the struct pci_bus to
keep track PCIe Link Speed changes.

[bhelgaas: This is based on previous work by Alexandru Gagniuc
<mr.nuke.me@gmail.com>; see e8303bb7a75c ("PCI/LINK: Report degraded links
via link bandwidth notification")]

Link: https://lore.kernel.org/r/20241018144755.7875-7-ilpo.jarvinen@linux.intel.com
Link: https://lore.kernel.org/all/20190429185611.121751-1-helgaas@kernel.org/
Link: https://lore.kernel.org/linux-pci/20190501142942.26972-1-keith.busch@intel.com/
Link: https://lore.kernel.org/linux-pci/20200115221008.GA191037@google.com/
Suggested-by: Lukas Wunner <lukas@wunner.de> # Building bwctrl on top of bwnotif
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
[bhelgaas: squash fix to drop IRQF_ONESHOT and convert to hardirq handler:
https://lore.kernel.org/r/20241115165717.15233-1-ilpo.jarvinen@linux.intel.com]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Stefan Wahren <wahrenst@gmx.net>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 99c6fa30d25b..579b28833429 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -313,6 +313,7 @@ struct pci_vpd {
 };
 
 struct irq_affinity;
+struct pcie_bwctrl_data;
 struct pcie_link_state;
 struct pci_sriov;
 struct pci_p2pdma;
@@ -502,6 +503,7 @@ struct pci_dev {
 	unsigned int	dpc_rp_extensions:1;
 	u8		dpc_rp_log_size;
 #endif
+	struct pcie_bwctrl_data		*link_bwctrl;
 #ifdef CONFIG_PCI_ATS
 	union {
 		struct pci_sriov	*sriov;		/* PF: SR-IOV info */
-- 
cgit v1.2.3


From de9a6c8d5dbfedb5eb3722c822da0490f6a59a45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Fri, 18 Oct 2024 17:47:53 +0300
Subject: PCI/bwctrl: Add pcie_set_target_speed() to set PCIe Link Speed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, PCIe Link Speeds are adjusted by custom code rather than in a
common function provided in PCI core. The PCIe bandwidth controller
(bwctrl) introduces an in-kernel API, pcie_set_target_speed(), to set PCIe
Link Speed.

Convert Target Speed quirk to use the new API. The Target Speed quirk runs
very early when bwctrl is not yet probed for a Port and can also run later
when bwctrl is already setup for the Port, which requires the per port
mutex (set_speed_mutex) to be only taken if the bwctrl setup is already
complete.

The new API is also intended to be used in an upcoming commit that adds a
thermal cooling device to throttle PCIe bandwidth when thermal thresholds
are reached.

The PCIe bandwidth control procedure is as follows. The highest speed
supported by the Port and the PCIe device which is not higher than the
requested speed is selected and written into the Target Link Speed in the
Link Control 2 Register. Then bandwidth controller retrains the PCIe Link.

Bandwidth Notifications enable the cur_bus_speed in the struct pci_bus to
keep track PCIe Link Speed changes. While Bandwidth Notifications should
also be generated when bandwidth controller alters the PCIe Link Speed, a
few platforms do not deliver LMBS interrupt after Link Training as
expected. Thus, after changing the Link Speed, bandwidth controller makes
additional read for the Link Status Register to ensure cur_bus_speed is
consistent with the new PCIe Link Speed.

Link: https://lore.kernel.org/r/20241018144755.7875-8-ilpo.jarvinen@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
[bhelgaas: squash devm_mutex_init() error checking from
https://lore.kernel.org/r/20241030163139.2111689-1-andriy.shevchenko@linux.intel.com,
drop export of pcie_set_target_speed()]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/pci.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 579b28833429..4f02d9f91399 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1797,9 +1797,19 @@ static inline int pci_irqd_intx_xlate(struct irq_domain *d,
 #ifdef CONFIG_PCIEPORTBUS
 extern bool pcie_ports_disabled;
 extern bool pcie_ports_native;
+
+int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
+			  bool use_lt);
 #else
 #define pcie_ports_disabled	true
 #define pcie_ports_native	false
+
+static inline int pcie_set_target_speed(struct pci_dev *port,
+					enum pci_bus_speed speed_req,
+					bool use_lt)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #define PCIE_LINK_STATE_L0S		(BIT(0) | BIT(1)) /* Upstr/dwnstr L0s */
-- 
cgit v1.2.3


From d278b098282d1327f6e1be82aacb18457a4d244d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Fri, 18 Oct 2024 17:47:54 +0300
Subject: thermal: Add PCIe cooling driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a thermal cooling driver to provide path to access PCIe bandwidth
controller using the usual thermal interfaces.

A cooling device is instantiated for controllable PCIe Ports from the
bwctrl service driver.

If registering the cooling device fails, allow bwctrl's probe to succeed
regardless. As cdev in that case contains IS_ERR() pseudo "pointer", clean
that up inside the probe function so the remove side doesn't need to
suddenly make an odd looking IS_ERR() check.

The thermal side state 0 means no throttling, i.e., maximum supported PCIe
Link Speed.

Link: https://lore.kernel.org/r/20241018144755.7875-9-ilpo.jarvinen@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
[bhelgaas: dropped data->cdev test per
https://lore.kernel.org/r/ZzRm1SJTwEMRsAr8@wunner.de]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org> # From the cooling device interface perspective
---
 include/linux/pci-bwctrl.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 include/linux/pci-bwctrl.h

(limited to 'include')

diff --git a/include/linux/pci-bwctrl.h b/include/linux/pci-bwctrl.h
new file mode 100644
index 000000000000..cee07127455b
--- /dev/null
+++ b/include/linux/pci-bwctrl.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * PCIe bandwidth controller
+ *
+ * Copyright (C) 2023-2024 Intel Corporation
+ */
+
+#ifndef LINUX_PCI_BWCTRL_H
+#define LINUX_PCI_BWCTRL_H
+
+#include <linux/pci.h>
+
+struct thermal_cooling_device;
+
+#ifdef CONFIG_PCIE_THERMAL
+struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port);
+void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev);
+#else
+static inline struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port)
+{
+	return NULL;
+}
+static inline void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev)
+{
+}
+#endif
+
+#endif
-- 
cgit v1.2.3


From d7a516c6eeae29144649f1c3f586fa580ec9e040 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 15 Nov 2024 21:46:02 +0100
Subject: compiler.h: Fix undefined BUILD_BUG_ON_ZERO()

<linux/compiler.h> defines __must_be_array() and __must_be_cstr() and
both expand to BUILD_BUG_ON_ZERO(), but <linux/build_bug.h> defines
BUILD_BUG_ON_ZERO(). Including <linux/build_bug.h> in
<linux/compiler.h> would create a cyclic dependency as
<linux/build_bug.h> already includes <linux/compiler.h>.

Fix that by defining __BUILD_BUG_ON_ZERO_MSG() in <linux/compiler.h>
and using that for __must_be_array() and __must_be_cstr().

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Link: https://lore.kernel.org/r/20241115204602.249590-1-philipp.reisner@linbit.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/compiler.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 4d4e23b6e3e7..469a64dd6495 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -239,11 +239,18 @@ static inline void *offset_to_ptr(const int *off)
 
 #endif /* __ASSEMBLY__ */
 
+#ifdef __CHECKER__
+#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0)
+#else /* __CHECKER__ */
+#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);}))
+#endif /* __CHECKER__ */
+
 /* &a[0] degrades to a pointer: a different type from an array */
-#define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#define __must_be_array(a)	__BUILD_BUG_ON_ZERO_MSG(__same_type((a), &(a)[0]), "must be array")
 
 /* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */
-#define __must_be_cstr(p)	BUILD_BUG_ON_ZERO(__annotated(p, nonstring))
+#define __must_be_cstr(p) \
+	__BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)")
 
 /*
  * This returns a constant expression while determining if an argument is
-- 
cgit v1.2.3


From accdd51dc74ff65b7b7be1961b11723d228fbbbd Mon Sep 17 00:00:00 2001
From: Philo Lu <lulie@linux.alibaba.com>
Date: Thu, 14 Nov 2024 18:52:04 +0800
Subject: net/udp: Add a new struct for hash2 slot

Preparing for udp 4-tuple hash (uhash4 for short).

To implement uhash4 without cache line missing when lookup, hslot2 is
used to record the number of hashed sockets in hslot4. Thus adding a new
struct udp_hslot_main with field hash4_cnt, which is used by hash2. The
new struct is used to avoid doubling the size of udp_hslot.

Before uhash4 lookup, firstly checking hash4_cnt to see if there are
hashed sks in hslot4. Because hslot2 is always used in lookup, there is
no cache line miss.

Related helpers are updated, and use the helpers as possible.

uhash4 is implemented in following patches.

Signed-off-by: Philo Lu <lulie@linux.alibaba.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index 61222545ab1c..62a7207e65f2 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -50,7 +50,7 @@ struct udp_skb_cb {
 #define UDP_SKB_CB(__skb)	((struct udp_skb_cb *)((__skb)->cb))
 
 /**
- *	struct udp_hslot - UDP hash slot
+ *	struct udp_hslot - UDP hash slot used by udp_table.hash
  *
  *	@head:	head of list of sockets
  *	@count:	number of sockets in 'head' list
@@ -60,7 +60,22 @@ struct udp_hslot {
 	struct hlist_head	head;
 	int			count;
 	spinlock_t		lock;
-} __attribute__((aligned(2 * sizeof(long))));
+} __aligned(2 * sizeof(long));
+
+/**
+ *	struct udp_hslot_main - UDP hash slot used by udp_table.hash2
+ *
+ *	@hslot:	basic hash slot
+ *	@hash4_cnt: number of sockets in hslot4 of the same
+ *		    (local port, local address)
+ */
+struct udp_hslot_main {
+	struct udp_hslot	hslot; /* must be the first member */
+#if !IS_ENABLED(CONFIG_BASE_SMALL)
+	u32			hash4_cnt;
+#endif
+} __aligned(2 * sizeof(long));
+#define UDP_HSLOT_MAIN(__hslot) ((struct udp_hslot_main *)(__hslot))
 
 /**
  *	struct udp_table - UDP table
@@ -72,7 +87,7 @@ struct udp_hslot {
  */
 struct udp_table {
 	struct udp_hslot	*hash;
-	struct udp_hslot	*hash2;
+	struct udp_hslot_main	*hash2;
 	unsigned int		mask;
 	unsigned int		log;
 };
@@ -84,6 +99,7 @@ static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
 {
 	return &table->hash[udp_hashfn(net, num, table->mask)];
 }
+
 /*
  * For secondary hash, net_hash_mix() is performed before calling
  * udp_hashslot2(), this explains difference with udp_hashslot()
@@ -91,8 +107,22 @@ static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
 static inline struct udp_hslot *udp_hashslot2(struct udp_table *table,
 					      unsigned int hash)
 {
-	return &table->hash2[hash & table->mask];
+	return &table->hash2[hash & table->mask].hslot;
+}
+
+#if IS_ENABLED(CONFIG_BASE_SMALL)
+static inline void udp_table_hash4_init(struct udp_table *table)
+{
+}
+#else /* !CONFIG_BASE_SMALL */
+
+/* Must be called with table->hash2 initialized */
+static inline void udp_table_hash4_init(struct udp_table *table)
+{
+	for (int i = 0; i <= table->mask; i++)
+		table->hash2[i].hash4_cnt = 0;
 }
+#endif /* CONFIG_BASE_SMALL */
 
 extern struct proto udp_prot;
 
-- 
cgit v1.2.3


From dab78a1745ab3c6001e1e4d50a9d09efef8e260d Mon Sep 17 00:00:00 2001
From: Philo Lu <lulie@linux.alibaba.com>
Date: Thu, 14 Nov 2024 18:52:05 +0800
Subject: net/udp: Add 4-tuple hash list basis

Add a new hash list, hash4, in udp table. It will be used to implement
4-tuple hash for connected udp sockets. This patch adds the hlist to
table, and implements helpers and the initialization. 4-tuple hash is
implemented in the following patch.

hash4 uses hlist_nulls to avoid moving wrongly onto another hlist due to
concurrent rehash, because rehash() can happen with lookup().

Co-developed-by: Cambda Zhu <cambda@linux.alibaba.com>
Signed-off-by: Cambda Zhu <cambda@linux.alibaba.com>
Co-developed-by: Fred Chen <fred.cc@alibaba-inc.com>
Signed-off-by: Fred Chen <fred.cc@alibaba-inc.com>
Co-developed-by: Yubing Qiu <yubing.qiuyubing@alibaba-inc.com>
Signed-off-by: Yubing Qiu <yubing.qiuyubing@alibaba-inc.com>
Signed-off-by: Philo Lu <lulie@linux.alibaba.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h | 11 +++++++
 include/net/udp.h   | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 93 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 3eb3f2b9a2a0..0807e21cfec9 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -56,6 +56,12 @@ struct udp_sock {
 	int		 pending;	/* Any pending frames ? */
 	__u8		 encap_type;	/* Is this an Encapsulation socket? */
 
+#if !IS_ENABLED(CONFIG_BASE_SMALL)
+	/* For UDP 4-tuple hash */
+	__u16 udp_lrpa_hash;
+	struct hlist_nulls_node udp_lrpa_node;
+#endif
+
 	/*
 	 * Following member retains the information to create a UDP header
 	 * when the socket is uncorked.
@@ -206,6 +212,11 @@ static inline void udp_allow_gso(struct sock *sk)
 #define udp_portaddr_for_each_entry_rcu(__sk, list) \
 	hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
 
+#if !IS_ENABLED(CONFIG_BASE_SMALL)
+#define udp_lrpa_for_each_entry_rcu(__up, node, list) \
+	hlist_nulls_for_each_entry_rcu(__up, node, list, udp_lrpa_node)
+#endif
+
 #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)
 
 #endif	/* _LINUX_UDP_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index 62a7207e65f2..edb669967130 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -50,14 +50,21 @@ struct udp_skb_cb {
 #define UDP_SKB_CB(__skb)	((struct udp_skb_cb *)((__skb)->cb))
 
 /**
- *	struct udp_hslot - UDP hash slot used by udp_table.hash
+ *	struct udp_hslot - UDP hash slot used by udp_table.hash/hash4
  *
  *	@head:	head of list of sockets
+ *	@nulls_head:	head of list of sockets, only used by hash4
  *	@count:	number of sockets in 'head' list
  *	@lock:	spinlock protecting changes to head/count
  */
 struct udp_hslot {
-	struct hlist_head	head;
+	union {
+		struct hlist_head	head;
+		/* hash4 uses hlist_nulls to avoid moving wrongly onto another
+		 * hlist, because rehash() can happen with lookup().
+		 */
+		struct hlist_nulls_head	nulls_head;
+	};
 	int			count;
 	spinlock_t		lock;
 } __aligned(2 * sizeof(long));
@@ -82,12 +89,17 @@ struct udp_hslot_main {
  *
  *	@hash:	hash table, sockets are hashed on (local port)
  *	@hash2:	hash table, sockets are hashed on (local port, local address)
+ *	@hash4:	hash table, connected sockets are hashed on
+ *		(local port, local address, remote port, remote address)
  *	@mask:	number of slots in hash tables, minus 1
  *	@log:	log2(number of slots in hash table)
  */
 struct udp_table {
 	struct udp_hslot	*hash;
 	struct udp_hslot_main	*hash2;
+#if !IS_ENABLED(CONFIG_BASE_SMALL)
+	struct udp_hslot	*hash4;
+#endif
 	unsigned int		mask;
 	unsigned int		log;
 };
@@ -114,13 +126,80 @@ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table,
 static inline void udp_table_hash4_init(struct udp_table *table)
 {
 }
+
+static inline struct udp_hslot *udp_hashslot4(struct udp_table *table,
+					      unsigned int hash)
+{
+	BUILD_BUG();
+	return NULL;
+}
+
+static inline bool udp_hashed4(const struct sock *sk)
+{
+	return false;
+}
+
+static inline unsigned int udp_hash4_slot_size(void)
+{
+	return 0;
+}
+
+static inline bool udp_has_hash4(const struct udp_hslot *hslot2)
+{
+	return false;
+}
+
+static inline void udp_hash4_inc(struct udp_hslot *hslot2)
+{
+}
+
+static inline void udp_hash4_dec(struct udp_hslot *hslot2)
+{
+}
 #else /* !CONFIG_BASE_SMALL */
 
 /* Must be called with table->hash2 initialized */
 static inline void udp_table_hash4_init(struct udp_table *table)
 {
-	for (int i = 0; i <= table->mask; i++)
+	table->hash4 = (void *)(table->hash2 + (table->mask + 1));
+	for (int i = 0; i <= table->mask; i++) {
 		table->hash2[i].hash4_cnt = 0;
+
+		INIT_HLIST_NULLS_HEAD(&table->hash4[i].nulls_head, i);
+		table->hash4[i].count = 0;
+		spin_lock_init(&table->hash4[i].lock);
+	}
+}
+
+static inline struct udp_hslot *udp_hashslot4(struct udp_table *table,
+					      unsigned int hash)
+{
+	return &table->hash4[hash & table->mask];
+}
+
+static inline bool udp_hashed4(const struct sock *sk)
+{
+	return !hlist_nulls_unhashed(&udp_sk(sk)->udp_lrpa_node);
+}
+
+static inline unsigned int udp_hash4_slot_size(void)
+{
+	return sizeof(struct udp_hslot);
+}
+
+static inline bool udp_has_hash4(const struct udp_hslot *hslot2)
+{
+	return UDP_HSLOT_MAIN(hslot2)->hash4_cnt;
+}
+
+static inline void udp_hash4_inc(struct udp_hslot *hslot2)
+{
+	UDP_HSLOT_MAIN(hslot2)->hash4_cnt++;
+}
+
+static inline void udp_hash4_dec(struct udp_hslot *hslot2)
+{
+	UDP_HSLOT_MAIN(hslot2)->hash4_cnt--;
 }
 #endif /* CONFIG_BASE_SMALL */
 
-- 
cgit v1.2.3


From 78c91ae2c6deb5d236a5a93ff2995cdd05514380 Mon Sep 17 00:00:00 2001
From: Philo Lu <lulie@linux.alibaba.com>
Date: Thu, 14 Nov 2024 18:52:06 +0800
Subject: ipv4/udp: Add 4-tuple hash for connected socket

Currently, the udp_table has two hash table, the port hash and portaddr
hash. Usually for UDP servers, all sockets have the same local port and
addr, so they are all on the same hash slot within a reuseport group.

In some applications, UDP servers use connect() to manage clients. In
particular, when firstly receiving from an unseen 4 tuple, a new socket
is created and connect()ed to the remote addr:port, and then the fd is
used exclusively by the client.

Once there are connected sks in a reuseport group, udp has to score all
sks in the same hash2 slot to find the best match. This could be
inefficient with a large number of connections, resulting in high
softirq overhead.

To solve the problem, this patch implement 4-tuple hash for connected
udp sockets. During connect(), hash4 slot is updated, as well as a
corresponding counter, hash4_cnt, in hslot2. In __udp4_lib_lookup(),
hslot4 will be searched firstly if the counter is non-zero. Otherwise,
hslot2 is used like before. Note that only connected sockets enter this
hash4 path, while un-connected ones are not affected.

hlist_nulls is used for hash4, because we probably move to another hslot
wrongly when lookup with concurrent rehash. Then we check nulls at the
list end to see if we should restart lookup. Because udp does not use
SLAB_TYPESAFE_BY_RCU, we don't need to touch sk_refcnt when lookup.

Stress test results (with 1 cpu fully used) are shown below, in pps:
(1) _un-connected_ socket as server
    [a] w/o hash4: 1,825176
    [b] w/  hash4: 1,831750 (+0.36%)

(2) 500 _connected_ sockets as server
    [c] w/o hash4:   290860 (only 16% of [a])
    [d] w/  hash4: 1,889658 (+3.1% compared with [b])

With hash4, compute_score is skipped when lookup, so [d] is slightly
better than [b].

Co-developed-by: Cambda Zhu <cambda@linux.alibaba.com>
Signed-off-by: Cambda Zhu <cambda@linux.alibaba.com>
Co-developed-by: Fred Chen <fred.cc@alibaba-inc.com>
Signed-off-by: Fred Chen <fred.cc@alibaba-inc.com>
Co-developed-by: Yubing Qiu <yubing.qiuyubing@alibaba-inc.com>
Signed-off-by: Yubing Qiu <yubing.qiuyubing@alibaba-inc.com>
Signed-off-by: Philo Lu <lulie@linux.alibaba.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index edb669967130..feb06c0e48fb 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -302,13 +302,27 @@ static inline int udp_lib_hash(struct sock *sk)
 }
 
 void udp_lib_unhash(struct sock *sk);
-void udp_lib_rehash(struct sock *sk, u16 new_hash);
+void udp_lib_rehash(struct sock *sk, u16 new_hash, u16 new_hash4);
 
 static inline void udp_lib_close(struct sock *sk, long timeout)
 {
 	sk_common_release(sk);
 }
 
+/* hash4 routines shared between UDPv4/6 */
+#if IS_ENABLED(CONFIG_BASE_SMALL)
+static inline void udp_lib_hash4(struct sock *sk, u16 hash)
+{
+}
+
+static inline void udp4_hash4(struct sock *sk)
+{
+}
+#else /* !CONFIG_BASE_SMALL */
+void udp_lib_hash4(struct sock *sk, u16 hash);
+void udp4_hash4(struct sock *sk);
+#endif /* CONFIG_BASE_SMALL */
+
 int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		     unsigned int hash2_nulladdr);
 
-- 
cgit v1.2.3


From 1b29a730ef8b6fd3aa3e11c2f6d409cf201cd913 Mon Sep 17 00:00:00 2001
From: Philo Lu <lulie@linux.alibaba.com>
Date: Thu, 14 Nov 2024 18:52:07 +0800
Subject: ipv6/udp: Add 4-tuple hash for connected socket

Implement ipv6 udp hash4 like that in ipv4. The major difference is that
the hash value should be calculated with udp6_ehashfn(). Besides,
ipv4-mapped ipv6 address is handled before hash() and rehash(). Export
udp_ehashfn because now we use it in udpv6 rehash.

Core procedures of hash/unhash/rehash are same as ipv4, and udpv4 and
udpv6 share the same udptable, so some functions in ipv4 hash4 can also
be shared.

Co-developed-by: Cambda Zhu <cambda@linux.alibaba.com>
Signed-off-by: Cambda Zhu <cambda@linux.alibaba.com>
Co-developed-by: Fred Chen <fred.cc@alibaba-inc.com>
Signed-off-by: Fred Chen <fred.cc@alibaba-inc.com>
Co-developed-by: Yubing Qiu <yubing.qiuyubing@alibaba-inc.com>
Signed-off-by: Yubing Qiu <yubing.qiuyubing@alibaba-inc.com>
Signed-off-by: Philo Lu <lulie@linux.alibaba.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index feb06c0e48fb..6e89520e100d 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -303,6 +303,8 @@ static inline int udp_lib_hash(struct sock *sk)
 
 void udp_lib_unhash(struct sock *sk);
 void udp_lib_rehash(struct sock *sk, u16 new_hash, u16 new_hash4);
+u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
+		const __be32 faddr, const __be16 fport);
 
 static inline void udp_lib_close(struct sock *sk, long timeout)
 {
-- 
cgit v1.2.3


From a06e4a93067cd8f55a74638d45146ddde76574f2 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@linux-m68k.org>
Date: Wed, 13 Nov 2024 09:32:15 +1100
Subject: rtc: m48t59: Use platform_data struct for year offset value

Instead of hard-coded values and ifdefs, store the year offset in the
platform_data struct.

Tested-by: Daniel Palmer <daniel@0x0f.com>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Tested-by: Andreas Larsson <andreas@gaisler.com>
Acked-by: Andreas Larsson <andreas@gaisler.com>
Link: https://lore.kernel.org/r/665c3526184a8d0c4a6373297d8e7d9a12591d8b.1731450735.git.fthain@linux-m68k.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/rtc/m48t59.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/rtc/m48t59.h b/include/linux/rtc/m48t59.h
index 9465d5405fe2..373ba77071c6 100644
--- a/include/linux/rtc/m48t59.h
+++ b/include/linux/rtc/m48t59.h
@@ -56,6 +56,9 @@ struct m48t59_plat_data {
 	void __iomem *ioaddr;
 	/* offset to RTC registers, automatically set according to the type */
 	unsigned int offset;
+
+	/* YY digits (in RTC) are offset, i.e. year is 1900 + yy_offset + YY */
+	int yy_offset;
 };
 
 #endif /* _LINUX_RTC_M48T59_H_ */
-- 
cgit v1.2.3


From c750629caeca01979da3403f4bebecda88713233 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 18 Nov 2024 15:14:34 +0000
Subject: io_uring: remove io_uring_cqwait_reg_arg

A separate wait argument registration API was removed, also delete
leftover uapi definitions.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/143b6a53591badac23632d3e6fa3e5db4b342ee2.1731942445.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 4418d0192959..aac9a4f8fa9a 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -873,20 +873,6 @@ enum {
 	IORING_REG_WAIT_TS		= (1U << 0),
 };
 
-/*
- * Argument for IORING_REGISTER_CQWAIT_REG, registering a region of
- * struct io_uring_reg_wait that can be indexed when io_uring_enter(2) is
- * called rather than pass in a wait argument structure separately.
- */
-struct io_uring_cqwait_reg_arg {
-	__u32		flags;
-	__u32		struct_size;
-	__u32		nr_entries;
-	__u32		pad;
-	__u64		user_addr;
-	__u64		pad2[3];
-};
-
 /*
  * Argument for io_uring_enter(2) with
  * IORING_GETEVENTS | IORING_ENTER_EXT_ARG_REG set, where the actual argument
-- 
cgit v1.2.3


From 03854920c39c62b88c0b540c92cf35746d059af2 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sun, 6 Oct 2024 02:19:52 +0100
Subject: libceph: Remove unused ceph_pagelist functions

ceph_pagelist_truncate() and ceph_pagelist_set_cursor() have been unused
since commit
39be95e9c8c0 ("ceph: ceph_pagelist_append might sleep while atomic")

Remove them.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/pagelist.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index 5dead8486fd8..879bec0863aa 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -17,12 +17,6 @@ struct ceph_pagelist {
 	refcount_t refcnt;
 };
 
-struct ceph_pagelist_cursor {
-	struct ceph_pagelist *pl;   /* pagelist, for error checking */
-	struct list_head *page_lru; /* page in list */
-	size_t room;		    /* room remaining to reset to */
-};
-
 struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags);
 
 extern void ceph_pagelist_release(struct ceph_pagelist *pl);
@@ -33,12 +27,6 @@ extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space);
 
 extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl);
 
-extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
-				     struct ceph_pagelist_cursor *c);
-
-extern int ceph_pagelist_truncate(struct ceph_pagelist *pl,
-				  struct ceph_pagelist_cursor *c);
-
 static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v)
 {
 	__le64 ev = cpu_to_le64(v);
-- 
cgit v1.2.3


From ee1eb8ccaab8cc2ef4bda8e11a40409ee20f6405 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sun, 6 Oct 2024 02:19:53 +0100
Subject: libceph: Remove unused pagevec functions

ceph_copy_user_to_page_vector() has been unused since 2013's commit
e8344e668915 ("ceph: Implement writev/pwritev for sync operation.")

ceph_copy_to_page_vector() has been unused since 2012's commit
913d2fdcf605 ("rbd: always pass ops array to rbd_req_sync_op()")

Remove them.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 15fb566d3f46..733e7f93db66 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -317,12 +317,6 @@ extern void ceph_release_page_vector(struct page **pages, int num_pages);
 extern void ceph_put_page_vector(struct page **pages, int num_pages,
 				 bool dirty);
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
-extern int ceph_copy_user_to_page_vector(struct page **pages,
-					 const void __user *data,
-					 loff_t off, size_t len);
-extern void ceph_copy_to_page_vector(struct page **pages,
-				    const void *data,
-				    loff_t off, size_t len);
 extern void ceph_copy_from_page_vector(struct page **pages,
 				    void *data,
 				    loff_t off, size_t len);
-- 
cgit v1.2.3


From 32844fd72b879d02f1f6b4394025349d31a09fd3 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sun, 6 Oct 2024 02:19:54 +0100
Subject: libceph: Remove unused ceph_osdc_watch_check

ceph_osdc_watch_check() has been unused since it was added in commit
b07d3c4bd727 ("libceph: support for checking on status of watch")

Remove it.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index d7941478158c..d55b30057a45 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -626,8 +626,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
 		     u32 timeout,
 		     struct page ***preply_pages,
 		     size_t *preply_len);
-int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
-			  struct ceph_osd_linger_request *lreq);
 int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
 			    struct ceph_object_id *oid,
 			    struct ceph_object_locator *oloc,
-- 
cgit v1.2.3


From 979c6342f9c0a48696a6420f14f9dd409591657f Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 15 Nov 2024 13:41:21 -0800
Subject: nvme-pci: add support for sgl metadata

Supporting this mode allows creating and merging multi-segment metadata
requests that wouldn't be possible otherwise. It also allows directly
using user space requests that straddle physically discontiguous pages.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 0a6e22038ce3..5873ce859cc8 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -389,6 +389,7 @@ enum {
 	NVME_CTRL_CTRATT_PREDICTABLE_LAT	= 1 << 5,
 	NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY	= 1 << 7,
 	NVME_CTRL_CTRATT_UUID_LIST		= 1 << 9,
+	NVME_CTRL_SGLS_MSDS                     = 1 << 19,
 };
 
 struct nvme_lbaf {
-- 
cgit v1.2.3


From 6399a0db8cd61eedbfb4b7809a4f4699157a9bf8 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 13 Nov 2024 13:57:04 -0800
Subject: nvme: define the remaining used sgls constants

This provides a little more context when reading the code than hardcoded
magic numbers.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 5873ce859cc8..2baf9a80b470 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -389,7 +389,11 @@ enum {
 	NVME_CTRL_CTRATT_PREDICTABLE_LAT	= 1 << 5,
 	NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY	= 1 << 7,
 	NVME_CTRL_CTRATT_UUID_LIST		= 1 << 9,
+	NVME_CTRL_SGLS_BYTE_ALIGNED             = 1,
+	NVME_CTRL_SGLS_DWORD_ALIGNED            = 2,
+	NVME_CTRL_SGLS_KSDBDS			= 1 << 2,
 	NVME_CTRL_SGLS_MSDS                     = 1 << 19,
+	NVME_CTRL_SGLS_SAOS                     = 1 << 20,
 };
 
 struct nvme_lbaf {
-- 
cgit v1.2.3


From 28b6acd75e3cefbe746ec7402c7ff4fdb114f327 Mon Sep 17 00:00:00 2001
From: Rick Wertenbroek <rick.wertenbroek@gmail.com>
Date: Thu, 14 Nov 2024 17:10:32 +0100
Subject: PCI: endpoint: Fix pci_epc_map map_size kerneldoc string
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because some endpoint controllers have requirements on the alignment of
the controller physical memory address that must be used to map a RC PCI
address region, the map PCI start address is not necessarily the desired
PCI base address to be mapped. This can result in map_pci_addr being
lower than pci_addr as documented. This results in map_size covering the
range map_pci_addr..pci_addr+pci_size.

The old text had the pci_addr twice instead of map_pci_addr..pci_addr,
so replace the erroneous kerneldoc string to reflect the actual range.

Link: https://lore.kernel.org/r/20241114161032.3046202-1-rick.wertenbroek@gmail.com
Signed-off-by: Rick Wertenbroek <rick.wertenbroek@gmail.com>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
---
 include/linux/pci-epc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index de8cc3658220..e818e3fdcded 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -40,7 +40,7 @@ pci_epc_interface_string(enum pci_epc_interface_type type)
  * @map_pci_addr: RC PCI address used as the first address mapped (may be lower
  *                than @pci_addr)
  * @map_size: size of the controller memory needed for mapping the RC PCI address
- *            range @pci_addr..@pci_addr+@pci_size
+ *            range @map_pci_addr..@pci_addr+@pci_size
  * @phys_base: base physical address of the allocated EPC memory for mapping the
  *             RC PCI address range
  * @phys_addr: physical address at which @pci_addr is mapped
-- 
cgit v1.2.3


From 2f746e40e9baae878f8193e09d8f6d601dce42bb Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 17 Oct 2024 09:36:27 -0400
Subject: lockd: Remove unused typedef

Clean up: Looks like the last usage of this typedef was removed by
commit 026fec7e7c47 ("sunrpc: properly type pc_decode callbacks") in
2017.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/xdr.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 80cca9426761..17d53165d9f2 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -73,8 +73,6 @@ struct nlm_args {
 	u32			fsm_mode;
 };
 
-typedef struct nlm_args nlm_args;
-
 /*
  * Generic lockd result
  */
-- 
cgit v1.2.3


From 8994a512e2598c0bf1b6e9ece1e8292976b0dd65 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 17 Oct 2024 09:36:30 -0400
Subject: lockd: Remove unused parameter to nlmsvc_testlock()

The nlm_cookie parameter has been unused since commit 09802fd2a8ca
("lockd: rip out deferred lock handling from testlock codepath").

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/lockd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 61c4b9c41904..c8f0f9458f2c 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -278,9 +278,9 @@ __be32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
 			      struct nlm_host *, struct nlm_lock *, int,
 			      struct nlm_cookie *, int);
 __be32		  nlmsvc_unlock(struct net *net, struct nlm_file *, struct nlm_lock *);
-__be32		  nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
-			struct nlm_host *, struct nlm_lock *,
-			struct nlm_lock *, struct nlm_cookie *);
+__be32		  nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
+			struct nlm_host *host, struct nlm_lock *lock,
+			struct nlm_lock *conflock);
 __be32		  nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
 void		  nlmsvc_retry_blocked(struct svc_rqst *rqstp);
 void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
-- 
cgit v1.2.3


From c840b8e1f039e90f97ca55525667eb961422f86c Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@kernel.org>
Date: Wed, 13 Nov 2024 22:59:38 -0500
Subject: nfs_common: must not hold RCU while calling nfsd_file_put_local

Move holding the RCU from nfs_to_nfsd_file_put_local to
nfs_to_nfsd_net_put.  It is the call to nfs_to->nfsd_serv_put that
requires the RCU anyway (the puts for nfsd_file and netns were
combined to avoid an extra indirect reference but that
micro-optimization isn't possible now).

This fixes xfstests generic/013 and it triggering:

"Voluntary context switch within RCU read-side critical section!"

[  143.545738] Call Trace:
[  143.546206]  <TASK>
[  143.546625]  ? show_regs+0x6d/0x80
[  143.547267]  ? __warn+0x91/0x140
[  143.547951]  ? rcu_note_context_switch+0x496/0x5d0
[  143.548856]  ? report_bug+0x193/0x1a0
[  143.549557]  ? handle_bug+0x63/0xa0
[  143.550214]  ? exc_invalid_op+0x1d/0x80
[  143.550938]  ? asm_exc_invalid_op+0x1f/0x30
[  143.551736]  ? rcu_note_context_switch+0x496/0x5d0
[  143.552634]  ? wakeup_preempt+0x62/0x70
[  143.553358]  __schedule+0xaa/0x1380
[  143.554025]  ? _raw_spin_unlock_irqrestore+0x12/0x40
[  143.554958]  ? try_to_wake_up+0x1fe/0x6b0
[  143.555715]  ? wake_up_process+0x19/0x20
[  143.556452]  schedule+0x2e/0x120
[  143.557066]  schedule_preempt_disabled+0x19/0x30
[  143.557933]  rwsem_down_read_slowpath+0x24d/0x4a0
[  143.558818]  ? xfs_efi_item_format+0x50/0xc0 [xfs]
[  143.559894]  down_read+0x4e/0xb0
[  143.560519]  xlog_cil_commit+0x1b2/0xbc0 [xfs]
[  143.561460]  ? _raw_spin_unlock+0x12/0x30
[  143.562212]  ? xfs_inode_item_precommit+0xc7/0x220 [xfs]
[  143.563309]  ? xfs_trans_run_precommits+0x69/0xd0 [xfs]
[  143.564394]  __xfs_trans_commit+0xb5/0x330 [xfs]
[  143.565367]  xfs_trans_roll+0x48/0xc0 [xfs]
[  143.566262]  xfs_defer_trans_roll+0x57/0x100 [xfs]
[  143.567278]  xfs_defer_finish_noroll+0x27a/0x490 [xfs]
[  143.568342]  xfs_defer_finish+0x1a/0x80 [xfs]
[  143.569267]  xfs_bunmapi_range+0x4d/0xb0 [xfs]
[  143.570208]  xfs_itruncate_extents_flags+0x13d/0x230 [xfs]
[  143.571353]  xfs_free_eofblocks+0x12e/0x190 [xfs]
[  143.572359]  xfs_file_release+0x12d/0x140 [xfs]
[  143.573324]  __fput+0xe8/0x2d0
[  143.573922]  __fput_sync+0x1d/0x30
[  143.574574]  nfsd_filp_close+0x33/0x60 [nfsd]
[  143.575430]  nfsd_file_free+0x96/0x150 [nfsd]
[  143.576274]  nfsd_file_put+0xf7/0x1a0 [nfsd]
[  143.577104]  nfsd_file_put_local+0x18/0x30 [nfsd]
[  143.578070]  nfs_close_local_fh+0x101/0x110 [nfs_localio]
[  143.579079]  __put_nfs_open_context+0xc9/0x180 [nfs]
[  143.580031]  nfs_file_clear_open_context+0x4a/0x60 [nfs]
[  143.581038]  nfs_file_release+0x3e/0x60 [nfs]
[  143.581879]  __fput+0xe8/0x2d0
[  143.582464]  __fput_sync+0x1d/0x30
[  143.583108]  __x64_sys_close+0x41/0x80
[  143.583823]  x64_sys_call+0x189a/0x20d0
[  143.584552]  do_syscall_64+0x64/0x170
[  143.585240]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[  143.586185] RIP: 0033:0x7f3c5153efd7

Fixes: 65f2a5c36635 ("nfs_common: fix race in NFS calls to nfsd_file_put_local() and nfsd_serv_put()")
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/nfslocalio.h | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index 3982fea79919..9202f4b24343 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -55,7 +55,7 @@ struct nfsd_localio_operations {
 						const struct cred *,
 						const struct nfs_fh *,
 						const fmode_t);
-	void (*nfsd_file_put_local)(struct nfsd_file *);
+	struct net *(*nfsd_file_put_local)(struct nfsd_file *);
 	struct file *(*nfsd_file_file)(struct nfsd_file *);
 } ____cacheline_aligned;
 
@@ -66,7 +66,7 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *,
 		   struct rpc_clnt *, const struct cred *,
 		   const struct nfs_fh *, const fmode_t);
 
-static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
+static inline void nfs_to_nfsd_net_put(struct net *net)
 {
 	/*
 	 * Once reference to nfsd_serv is dropped, NFSD could be
@@ -74,10 +74,22 @@ static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
 	 * by always taking RCU.
 	 */
 	rcu_read_lock();
-	nfs_to->nfsd_file_put_local(localio);
+	nfs_to->nfsd_serv_put(net);
 	rcu_read_unlock();
 }
 
+static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
+{
+	/*
+	 * Must not hold RCU otherwise nfsd_file_put() can easily trigger:
+	 * "Voluntary context switch within RCU read-side critical section!"
+	 * by scheduling deep in underlying filesystem (e.g. XFS).
+	 */
+	struct net *net = nfs_to->nfsd_file_put_local(localio);
+
+	nfs_to_nfsd_net_put(net);
+}
+
 #else   /* CONFIG_NFS_LOCALIO */
 static inline void nfsd_localio_ops_init(void)
 {
-- 
cgit v1.2.3


From 1cfb5e57886aa69a992ff0ebd32e4651eb0fc995 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Fri, 15 Nov 2024 12:43:03 -0800
Subject: Revert "net: ethtool: Avoid thousands of
 -Wflex-array-member-not-at-end warnings"

This reverts commit 3bd9b9abdf1563a22041b7255baea6d449902f1a. We cannot
use the new tagged struct group because it throws C++ errors even under
"extern C".

Signed-off-by: Kees Cook <kees@kernel.org>
Link: https://patch.msgid.link/20241115204308.3821419-1-kees@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 299280c94d07..b8b935b52603 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -211,7 +211,7 @@ void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id);
  * fields, but they are allowed to overwrite them (will be ignored).
  */
 struct ethtool_link_ksettings {
-	struct ethtool_link_settings_hdr base;
+	struct ethtool_link_settings base;
 	struct {
 		__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
 		__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
-- 
cgit v1.2.3


From ebda123fe703f492d7d557a4da00888ddec4779e Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Fri, 15 Nov 2024 12:43:04 -0800
Subject: Revert "UAPI: ethtool: Use __struct_group() in struct
 ethtool_link_settings"

This reverts commit 43d3487035e9a86fad952de4240a518614240d43. We cannot
use tagged struct groups in UAPI because C++ will throw syntax errors
even under "extern C".

Signed-off-by: Kees Cook <kees@kernel.org>
Link: https://patch.msgid.link/20241115204308.3821419-2-kees@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool.h | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index fc1f54b065f9..c405ed63acfa 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -2511,24 +2511,21 @@ enum ethtool_reset_flags {
  *	autonegotiation; 0 if unknown or not applicable.  Read-only.
  */
 struct ethtool_link_settings {
-	/* New members MUST be added within the __struct_group() macro below. */
-	__struct_group(ethtool_link_settings_hdr, hdr, /* no attrs */,
-		__u32	cmd;
-		__u32	speed;
-		__u8	duplex;
-		__u8	port;
-		__u8	phy_address;
-		__u8	autoneg;
-		__u8	mdio_support;
-		__u8	eth_tp_mdix;
-		__u8	eth_tp_mdix_ctrl;
-		__s8	link_mode_masks_nwords;
-		__u8	transceiver;
-		__u8	master_slave_cfg;
-		__u8	master_slave_state;
-		__u8	rate_matching;
-		__u32	reserved[7];
-	);
+	__u32	cmd;
+	__u32	speed;
+	__u8	duplex;
+	__u8	port;
+	__u8	phy_address;
+	__u8	autoneg;
+	__u8	mdio_support;
+	__u8	eth_tp_mdix;
+	__u8	eth_tp_mdix_ctrl;
+	__s8	link_mode_masks_nwords;
+	__u8	transceiver;
+	__u8	master_slave_cfg;
+	__u8	master_slave_state;
+	__u8	rate_matching;
+	__u32	reserved[7];
 	__u32	link_mode_masks[];
 	/* layout of link_mode_masks fields:
 	 * __u32 map_supported[link_mode_masks_nwords];
-- 
cgit v1.2.3


From 96c677fca54a28fcfea4dbab9c1f2530bd0a08d1 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Fri, 15 Nov 2024 12:43:05 -0800
Subject: UAPI: ethtool: Avoid flex-array in struct ethtool_link_settings

struct ethtool_link_settings tends to be used as a header for other
structures that have trailing bytes[1], but has a trailing flexible array
itself. Using this overlapped with other structures leads to ambiguous
object sizing in the compiler, so we want to avoid such situations (which
have caused real bugs in the past). Detecting this can be done with
-Wflex-array-member-not-at-end, which will need to be enabled globally.

Using a tagged struct_group() to create a new ethtool_link_settings_hdr
structure isn't possible as it seems we cannot use the tagged variant of
struct_group() due to syntax issues from C++'s perspective (even within
"extern C")[2]. Instead, we can just leave the offending member defined
in UAPI and remove it from the kernel's view of the structure, as Linux
doesn't actually use this member at all. There is also no change in
size since it was already a flexible array that didn't contribute to
size returned by any use of sizeof().

Reported-by: Jakub Kicinski <kuba@kernel.org>
Closes: https://lore.kernel.org/lkml/20241109100213.262a2fa0@kernel.org/ [2]
Link: https://lore.kernel.org/lkml/0bc2809fe2a6c11dd4c8a9a10d9bd65cccdb559b.1730238285.git.gustavoars@kernel.org/ [1]
Signed-off-by: Kees Cook <kees@kernel.org>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20241115204308.3821419-3-kees@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index c405ed63acfa..7e1b3820f91f 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -2526,12 +2526,19 @@ struct ethtool_link_settings {
 	__u8	master_slave_state;
 	__u8	rate_matching;
 	__u32	reserved[7];
+#ifndef __KERNEL__
+	/* Linux builds with -Wflex-array-member-not-at-end but does
+	 * not use the "link_mode_masks" member. Leave it defined for
+	 * userspace for now, and when userspace wants to start using
+	 * -Wfamnae, we'll need a new solution.
+	 */
 	__u32	link_mode_masks[];
 	/* layout of link_mode_masks fields:
 	 * __u32 map_supported[link_mode_masks_nwords];
 	 * __u32 map_advertising[link_mode_masks_nwords];
 	 * __u32 map_lp_advertising[link_mode_masks_nwords];
 	 */
+#endif
 };
 
 /**
-- 
cgit v1.2.3


From a57d5a72f8dec7db8a79d0016fb0a3bdecc82b56 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Mon, 18 Nov 2024 03:15:18 -0800
Subject: netpoll: Use rcu_access_pointer() in netpoll_poll_lock

The ndev->npinfo pointer in netpoll_poll_lock() is RCU-protected but is
being accessed directly for a NULL check. While no RCU read lock is held
in this context, we should still use proper RCU primitives for
consistency and correctness.

Replace the direct NULL check with rcu_access_pointer(), which is the
appropriate primitive when only checking for NULL without dereferencing
the pointer. This function provides the necessary ordering guarantees
without requiring RCU read-side protection.

Fixes: bea3348eef27 ("[NET]: Make NAPI polling independent of struct net_device objects.")
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
Link: https://patch.msgid.link/20241118-netpoll_rcu-v1-2-a1888dcb4a02@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netpoll.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index cd4e28db0cbd..959a4daacea1 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -72,7 +72,7 @@ static inline void *netpoll_poll_lock(struct napi_struct *napi)
 {
 	struct net_device *dev = napi->dev;
 
-	if (dev && dev->npinfo) {
+	if (dev && rcu_access_pointer(dev->npinfo)) {
 		int owner = smp_processor_id();
 
 		while (cmpxchg(&napi->poll_owner, -1, owner) != -1)
-- 
cgit v1.2.3


From 50f42c489528566ea2d8a9561ee54748b0441d51 Mon Sep 17 00:00:00 2001
From: Patrick Donnelly <pdonnell@redhat.com>
Date: Wed, 26 Jun 2024 09:49:10 -0400
Subject: ceph: correct ceph_mds_cap_item field name

The issue_seq is sent with bulk cap releases, not the current sequence
number. See also ceph.git commit 655cddb7c9f3 ("include/ceph_fs: correct
ceph_mds_cap_item field name").

Link: https://tracker.ceph.com/issues/66704
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index ee1d0e5f9789..4ff3ad5e9210 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -822,7 +822,7 @@ struct ceph_mds_cap_release {
 struct ceph_mds_cap_item {
 	__le64 ino;
 	__le64 cap_id;
-	__le32 migrate_seq, seq;
+	__le32 migrate_seq, issue_seq;
 } __attribute__ ((packed));
 
 #define CEPH_MDS_LEASE_REVOKE           1  /*    mds  -> client */
-- 
cgit v1.2.3


From 8b41ac43c7bb902786eae09cc23bcc9964ef2386 Mon Sep 17 00:00:00 2001
From: Patrick Donnelly <pdonnell@redhat.com>
Date: Wed, 26 Jun 2024 12:57:27 -0400
Subject: ceph: correct ceph_mds_cap_peer field name

The peer seq is used as the issue_seq. Use that name for consistency.
See also ceph.git commit 1da6ef237fc7 ("include/ceph_fs: correct
ceph_mds_cap_peer field name").

Link: https://tracker.ceph.com/issues/66704
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 4ff3ad5e9210..2d7d86f0290d 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -808,7 +808,7 @@ struct ceph_mds_caps {
 
 struct ceph_mds_cap_peer {
 	__le64 cap_id;
-	__le32 seq;
+	__le32 issue_seq;
 	__le32 mseq;
 	__le32 mds;
 	__u8   flags;
-- 
cgit v1.2.3


From 46fd48ab3ea3eb3bb215684bd66ea3d260b091a9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Nov 2024 08:26:02 +0100
Subject: block: return unsigned int from bdev_io_min

The underlying limit is defined as an unsigned int, so return that from
bdev_io_min as well.

Fixes: ac481c20ef8f ("block: Topology ioctls")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20241119072602.1059488-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a1fd0ddce5cf..195db38fda16 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1261,7 +1261,7 @@ static inline unsigned int queue_io_min(const struct request_queue *q)
 	return q->limits.io_min;
 }
 
-static inline int bdev_io_min(struct block_device *bdev)
+static inline unsigned int bdev_io_min(struct block_device *bdev)
 {
 	return queue_io_min(bdev_get_queue(bdev));
 }
-- 
cgit v1.2.3


From f06e108a3dc53c0f5234d18de0bd224753db5019 Mon Sep 17 00:00:00 2001
From: Jan Hendrik Farr <kernel@jfarr.cc>
Date: Tue, 29 Oct 2024 15:00:36 +0100
Subject: Compiler Attributes: disable __counted_by for clang < 19.1.3

This patch disables __counted_by for clang versions < 19.1.3 because
of the two issues listed below. It does this by introducing
CONFIG_CC_HAS_COUNTED_BY.

1. clang < 19.1.2 has a bug that can lead to __bdos returning 0:
https://github.com/llvm/llvm-project/pull/110497

2. clang < 19.1.3 has a bug that can lead to __bdos being off by 4:
https://github.com/llvm/llvm-project/pull/112636

Fixes: c8248faf3ca2 ("Compiler Attributes: counted_by: Adjust name and identifier expansion")
Cc: stable@vger.kernel.org # 6.6.x: 16c31dd7fdf6: Compiler Attributes: counted_by: bump min gcc version
Cc: stable@vger.kernel.org # 6.6.x: 2993eb7a8d34: Compiler Attributes: counted_by: fixup clang URL
Cc: stable@vger.kernel.org # 6.6.x: 231dc3f0c936: lkdtm/bugs: Improve warning message for compilers without counted_by support
Cc: stable@vger.kernel.org # 6.6.x
Reported-by: Nathan Chancellor <nathan@kernel.org>
Closes: https://lore.kernel.org/all/20240913164630.GA4091534@thelio-3990X/
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202409260949.a1254989-oliver.sang@intel.com
Link: https://lore.kernel.org/all/Zw8iawAF5W2uzGuh@archlinux/T/#m204c09f63c076586a02d194b87dffc7e81b8de7b
Suggested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Jan Hendrik Farr <kernel@jfarr.cc>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Thorsten Blum <thorsten.blum@linux.dev>
Link: https://lore.kernel.org/r/20241029140036.577804-2-kernel@jfarr.cc
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/compiler_attributes.h | 13 -------------
 include/linux/compiler_types.h      | 19 +++++++++++++++++++
 2 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 32284cd26d52..c16d4199bf92 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -94,19 +94,6 @@
 # define __copy(symbol)
 #endif
 
-/*
- * Optional: only supported since gcc >= 15
- * Optional: only supported since clang >= 18
- *
- *   gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
- * clang: https://github.com/llvm/llvm-project/pull/76348
- */
-#if __has_attribute(__counted_by__)
-# define __counted_by(member)		__attribute__((__counted_by__(member)))
-#else
-# define __counted_by(member)
-#endif
-
 /*
  * Optional: not supported by gcc
  * Optional: only supported since clang >= 14.0
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 1a957ea2f4fe..639be0f30b45 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -323,6 +323,25 @@ struct ftrace_likely_data {
 #define __no_sanitize_or_inline __always_inline
 #endif
 
+/*
+ * Optional: only supported since gcc >= 15
+ * Optional: only supported since clang >= 18
+ *
+ *   gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
+ * clang: https://github.com/llvm/llvm-project/pull/76348
+ *
+ * __bdos on clang < 19.1.2 can erroneously return 0:
+ * https://github.com/llvm/llvm-project/pull/110497
+ *
+ * __bdos on clang < 19.1.3 can be off by 4:
+ * https://github.com/llvm/llvm-project/pull/112636
+ */
+#ifdef CONFIG_CC_HAS_COUNTED_BY
+# define __counted_by(member)		__attribute__((__counted_by__(member)))
+#else
+# define __counted_by(member)
+#endif
+
 /*
  * Apply __counted_by() when the Endianness matches to increase test coverage.
  */
-- 
cgit v1.2.3


From d7f36dc446e894e0f57b5f05c5628f03c5f9e2d2 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Mon, 18 Nov 2024 10:50:15 +0000
Subject: block: Support atomic writes limits for stacked devices

Allow stacked devices to support atomic writes by aggregating the minimum
capability of all bottom devices.

Flag BLK_FEAT_ATOMIC_WRITES_STACKED is set for stacked devices which
have been enabled to support atomic writes.

Some things to note on the implementation:
- For simplicity, all bottom devices must have same atomic write boundary
  value (if any)
- The atomic write boundary must be a power-of-2 already, but this
  restriction could be relaxed. Furthermore, it is now required that the
  chunk sectors for a top device must be aligned with this boundary.
- If a bottom device atomic write unit min/max are not aligned with the
  top device chunk sectors, the top device atomic write unit min/max are
  reduced to a value which works for the chunk sectors.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20241118105018.1870052-3-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 195db38fda16..31867de88213 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -333,6 +333,10 @@ typedef unsigned int __bitwise blk_features_t;
 #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
 	((__force blk_features_t)(1u << 15))
 
+/* stacked device can/does support atomic writes */
+#define BLK_FEAT_ATOMIC_WRITES_STACKED \
+	((__force blk_features_t)(1u << 16))
+
 /*
  * Flags automatically inherited when stacking limits.
  */
-- 
cgit v1.2.3


From 9cf9f2e70bea4e66a2c8b8c4743489beb21258a8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 15 Nov 2024 22:00:25 +0100
Subject: cpuidle: Change :enter_dead() driver callback return type to void

After a previous change, cpuidle_play_dead(), which is the only caller
of idle state :enter_dead() callbacks, ignores their return values, so
they may as well be void.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://patch.msgid.link/2285569.iZASKD2KPV@rjwysocki.net
---
 include/linux/cpuidle.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 3183aeb7f5b4..a9ee4fe55dcf 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -61,7 +61,7 @@ struct cpuidle_state {
 			struct cpuidle_driver *drv,
 			int index);
 
-	int (*enter_dead) (struct cpuidle_device *dev, int index);
+	void (*enter_dead) (struct cpuidle_device *dev, int index);
 
 	/*
 	 * CPUs execute ->enter_s2idle with the local tick or entire timekeeping
-- 
cgit v1.2.3


From 5017ec667b48464a301d34db9b63fa5ab8765d49 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Mon, 11 Nov 2024 13:54:09 +0300
Subject: kunit: skb: add gfp to kernel doc for kunit_zalloc_skb()

Kuan-Wei Chiu pointed out that the kernel doc for kunit_zalloc_skb()
needs to include the @gfp information.  Add it.

Reported-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Closes: https://lore.kernel.org/all/Zy+VIXDPuU613fFd@visitorckw-System-Product-Name/
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/skbuff.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/kunit/skbuff.h b/include/kunit/skbuff.h
index 345e1e8f0312..07784694357c 100644
--- a/include/kunit/skbuff.h
+++ b/include/kunit/skbuff.h
@@ -20,8 +20,9 @@ static void kunit_action_kfree_skb(void *p)
  * kunit_zalloc_skb() - Allocate and initialize a resource managed skb.
  * @test: The test case to which the skb belongs
  * @len: size to allocate
+ * @gfp: allocation flags
  *
- * Allocate a new struct sk_buff with GFP_KERNEL, zero fill the give length
+ * Allocate a new struct sk_buff with gfp flags, zero fill the given length
  * and add it as a resource to the kunit test for automatic cleanup.
  *
  * Returns: newly allocated SKB, or %NULL on error
-- 
cgit v1.2.3


From 5a9d1b83e5334915c651604648c20a9fc64d47a3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Nov 2024 17:09:18 +0100
Subject: block: return unsigned int from bdev_io_opt

The underlying limit is defined as an unsigned int, so return that from
bdev_io_opt as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20241119160932.1327864-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 31867de88213..97c89c0c1398 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1275,7 +1275,7 @@ static inline unsigned int queue_io_opt(const struct request_queue *q)
 	return q->limits.io_opt;
 }
 
-static inline int bdev_io_opt(struct block_device *bdev)
+static inline unsigned int bdev_io_opt(struct block_device *bdev)
 {
 	return queue_io_opt(bdev_get_queue(bdev));
 }
-- 
cgit v1.2.3


From ed5db174cf39374215934f21b04639a7a1513023 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Nov 2024 17:09:19 +0100
Subject: block: return unsigned int from queue_dma_alignment

The underlying limit is defined as an unsigned int, so return that from
queue_dma_alignment as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20241119160932.1327864-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 97c89c0c1398..d8de261c2b99 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1421,7 +1421,7 @@ static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
 	return is_seq;
 }
 
-static inline int queue_dma_alignment(const struct request_queue *q)
+static inline unsigned int queue_dma_alignment(const struct request_queue *q)
 {
 	return q->limits.dma_alignment;
 }
-- 
cgit v1.2.3


From e769489a54401d0c89555f7ad8672038b5c2b767 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Nov 2024 17:09:20 +0100
Subject: block: return unsigned int from blk_lim_dma_alignment_and_pad

The underlying limits are defined as unsigned int, so return that from
blk_lim_dma_alignment_and_pad as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20241119160932.1327864-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d8de261c2b99..d0d8190429c8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1462,7 +1462,8 @@ static inline bool bdev_iter_is_aligned(struct block_device *bdev,
 				   bdev_logical_block_size(bdev) - 1);
 }
 
-static inline int blk_lim_dma_alignment_and_pad(struct queue_limits *lim)
+static inline unsigned int
+blk_lim_dma_alignment_and_pad(struct queue_limits *lim)
 {
 	return lim->dma_alignment | lim->dma_pad_mask;
 }
-- 
cgit v1.2.3


From da77d9b23700708d0d22a4407d32a8755a3596e8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Nov 2024 17:09:21 +0100
Subject: block: return bool from blk_rq_aligned

blk_rq_aligned returns a boolean condition, don't mascquerade it as int.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20241119160932.1327864-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d0d8190429c8..5270117d54ac 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1468,7 +1468,7 @@ blk_lim_dma_alignment_and_pad(struct queue_limits *lim)
 	return lim->dma_alignment | lim->dma_pad_mask;
 }
 
-static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
+static inline bool blk_rq_aligned(struct request_queue *q, unsigned long addr,
 				 unsigned int len)
 {
 	unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits);
-- 
cgit v1.2.3


From e888810bc4f471f85989a0991aff28d2ac9f783b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Nov 2024 17:09:22 +0100
Subject: block: remove a duplicate definition for bdev_read_only

bdev_read_only is already defined as an inline function in blkdev.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20241119160932.1327864-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5270117d54ac..8b4e4692e7fb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1586,7 +1586,6 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
 	return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev);
 }
 
-int bdev_read_only(struct block_device *bdev);
 int set_blocksize(struct file *file, int size);
 
 int lookup_bdev(const char *pathname, dev_t *dev);
-- 
cgit v1.2.3


From 766a71ef65bb217ed8bf1c068ac14c7d3c15d487 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Nov 2024 17:09:23 +0100
Subject: block: return bool from get_disk_ro and bdev_read_only

get_disk_ro and bdev_read_only return boolean conditions,
don't masquerade them as int.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20241119160932.1327864-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8b4e4692e7fb..08a727b40816 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -779,13 +779,13 @@ static inline void bdev_clear_flag(struct block_device *bdev, unsigned flag)
 	atomic_andnot(flag, &bdev->__bd_flags);
 }
 
-static inline int get_disk_ro(struct gendisk *disk)
+static inline bool get_disk_ro(struct gendisk *disk)
 {
 	return bdev_test_flag(disk->part0, BD_READ_ONLY) ||
 		test_bit(GD_READ_ONLY, &disk->state);
 }
 
-static inline int bdev_read_only(struct block_device *bdev)
+static inline bool bdev_read_only(struct block_device *bdev)
 {
 	return bdev_test_flag(bdev, BD_READ_ONLY) || get_disk_ro(bdev->bd_disk);
 }
-- 
cgit v1.2.3


From e7619f2a2f8f9b10feb784ec6b8ea5320ad3b97e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 17 Nov 2024 16:57:56 +0000
Subject: KVM: arm64: vgic: Kill VGIC_MAX_PRIVATE definition

VGIC_MAX_PRIVATE is a pretty useless definition, and is better
replaced with VGIC_NR_PRIVATE_IRQS.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20241117165757.247686-4-maz@kernel.org
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 include/kvm/arm_vgic.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f5172549f9ba..3a8ccfda34d2 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -26,7 +26,6 @@
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_MAX_PRIVATE	(VGIC_NR_PRIVATE_IRQS - 1)
 #define VGIC_MAX_SPI		1019
 #define VGIC_MAX_RESERVED	1023
 #define VGIC_MIN_LPI		8192
-- 
cgit v1.2.3


From f46b9cdb22f7a167c36b6bcddaef7e8aee2598fa Mon Sep 17 00:00:00 2001
From: David Wei <dw@davidwei.uk>
Date: Wed, 20 Nov 2024 14:14:52 -0800
Subject: io_uring: limit local tw done

Instead of eagerly running all available local tw, limit the amount of
local tw done to the max of IO_LOCAL_TW_DEFAULT_MAX (20) or wait_nr. The
value of 20 is chosen as a reasonable heuristic to allow enough work
batching but also keep latency down.

Add a retry_llist that maintains a list of local tw that couldn't be
done in time. No synchronisation is needed since it is only modified
within the task context.

Signed-off-by: David Wei <dw@davidwei.uk>
Link: https://lore.kernel.org/r/20241120221452.3762588-3-dw@davidwei.uk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index aa5f5ea98076..3e934feb3187 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -335,6 +335,7 @@ struct io_ring_ctx {
 	 */
 	struct {
 		struct llist_head	work_llist;
+		struct llist_head	retry_llist;
 		unsigned long		check_cq;
 		atomic_t		cq_wait_nr;
 		atomic_t		cq_timeouts;
-- 
cgit v1.2.3


From b88cbaaa6fa109c2eb455d8fe2a318de0d197ea2 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 15 Nov 2024 15:44:27 -0600
Subject: PCI/pwrctrl: Rename pwrctl files to pwrctrl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To slightly reduce confusion between "pwrctl" (the power controller and
power sequencing framework) and "bwctrl" (the bandwidth controller),
rename "pwrctl" to "pwrctrl" so they use the same "ctrl" suffix.

Rename drivers/pci/pwrctl/ to drivers/pci/pwrctrl/, including the related
MAINTAINERS, include file (include/linux/pci-pwrctl.h), Makefile, and
Kconfig changes.

This is the minimal rename of files only.  A subsequent commit will rename
functions and data structures.

Link: https://lore.kernel.org/r/20241115214428.2061153-2-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Acked-by: Krzysztof Wilczyński <kw@linux.com>
---
 include/linux/pci-pwrctl.h  | 54 ---------------------------------------------
 include/linux/pci-pwrctrl.h | 54 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 54 deletions(-)
 delete mode 100644 include/linux/pci-pwrctl.h
 create mode 100644 include/linux/pci-pwrctrl.h

(limited to 'include')

diff --git a/include/linux/pci-pwrctl.h b/include/linux/pci-pwrctl.h
deleted file mode 100644
index 0d23dddf59ec..000000000000
--- a/include/linux/pci-pwrctl.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2024 Linaro Ltd.
- */
-
-#ifndef __PCI_PWRCTL_H__
-#define __PCI_PWRCTL_H__
-
-#include <linux/notifier.h>
-#include <linux/workqueue.h>
-
-struct device;
-struct device_link;
-
-/*
- * This is a simple framework for solving the issue of PCI devices that require
- * certain resources (regulators, GPIOs, clocks) to be enabled before the
- * device can actually be detected on the PCI bus.
- *
- * The idea is to reuse the platform bus to populate OF nodes describing the
- * PCI device and its resources, let these platform devices probe and enable
- * relevant resources and then trigger a rescan of the PCI bus allowing for the
- * same device (with a second associated struct device) to be registered with
- * the PCI subsystem.
- *
- * To preserve a correct hierarchy for PCI power management and device reset,
- * we create a device link between the power control platform device (parent)
- * and the supplied PCI device (child).
- */
-
-/**
- * struct pci_pwrctl - PCI device power control context.
- * @dev: Address of the power controlling device.
- *
- * An object of this type must be allocated by the PCI power control device and
- * passed to the pwrctl subsystem to trigger a bus rescan and setup a device
- * link with the device once it's up.
- */
-struct pci_pwrctl {
-	struct device *dev;
-
-	/* Private: don't use. */
-	struct notifier_block nb;
-	struct device_link *link;
-	struct work_struct work;
-};
-
-void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev);
-int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl);
-void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl);
-int devm_pci_pwrctl_device_set_ready(struct device *dev,
-				     struct pci_pwrctl *pwrctl);
-
-#endif /* __PCI_PWRCTL_H__ */
diff --git a/include/linux/pci-pwrctrl.h b/include/linux/pci-pwrctrl.h
new file mode 100644
index 000000000000..0d23dddf59ec
--- /dev/null
+++ b/include/linux/pci-pwrctrl.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 Linaro Ltd.
+ */
+
+#ifndef __PCI_PWRCTL_H__
+#define __PCI_PWRCTL_H__
+
+#include <linux/notifier.h>
+#include <linux/workqueue.h>
+
+struct device;
+struct device_link;
+
+/*
+ * This is a simple framework for solving the issue of PCI devices that require
+ * certain resources (regulators, GPIOs, clocks) to be enabled before the
+ * device can actually be detected on the PCI bus.
+ *
+ * The idea is to reuse the platform bus to populate OF nodes describing the
+ * PCI device and its resources, let these platform devices probe and enable
+ * relevant resources and then trigger a rescan of the PCI bus allowing for the
+ * same device (with a second associated struct device) to be registered with
+ * the PCI subsystem.
+ *
+ * To preserve a correct hierarchy for PCI power management and device reset,
+ * we create a device link between the power control platform device (parent)
+ * and the supplied PCI device (child).
+ */
+
+/**
+ * struct pci_pwrctl - PCI device power control context.
+ * @dev: Address of the power controlling device.
+ *
+ * An object of this type must be allocated by the PCI power control device and
+ * passed to the pwrctl subsystem to trigger a bus rescan and setup a device
+ * link with the device once it's up.
+ */
+struct pci_pwrctl {
+	struct device *dev;
+
+	/* Private: don't use. */
+	struct notifier_block nb;
+	struct device_link *link;
+	struct work_struct work;
+};
+
+void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev);
+int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl);
+void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl);
+int devm_pci_pwrctl_device_set_ready(struct device *dev,
+				     struct pci_pwrctl *pwrctl);
+
+#endif /* __PCI_PWRCTL_H__ */
-- 
cgit v1.2.3


From 3f925cd6287401bbc9d568f56d796a69c8bd292a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 15 Nov 2024 15:44:28 -0600
Subject: PCI/pwrctrl: Rename pwrctrl functions and structures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename pwrctrl functions and structures from "pwrctl" to "pwrctrl" to match
the similar file renames.

Link: https://lore.kernel.org/r/20241115214428.2061153-3-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Acked-by: Krzysztof Wilczyński <kw@linux.com>
---
 include/linux/pci-pwrctrl.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci-pwrctrl.h b/include/linux/pci-pwrctrl.h
index 0d23dddf59ec..7d439b0675e9 100644
--- a/include/linux/pci-pwrctrl.h
+++ b/include/linux/pci-pwrctrl.h
@@ -3,8 +3,8 @@
  * Copyright (C) 2024 Linaro Ltd.
  */
 
-#ifndef __PCI_PWRCTL_H__
-#define __PCI_PWRCTL_H__
+#ifndef __PCI_PWRCTRL_H__
+#define __PCI_PWRCTRL_H__
 
 #include <linux/notifier.h>
 #include <linux/workqueue.h>
@@ -29,14 +29,14 @@ struct device_link;
  */
 
 /**
- * struct pci_pwrctl - PCI device power control context.
+ * struct pci_pwrctrl - PCI device power control context.
  * @dev: Address of the power controlling device.
  *
  * An object of this type must be allocated by the PCI power control device and
- * passed to the pwrctl subsystem to trigger a bus rescan and setup a device
+ * passed to the pwrctrl subsystem to trigger a bus rescan and setup a device
  * link with the device once it's up.
  */
-struct pci_pwrctl {
+struct pci_pwrctrl {
 	struct device *dev;
 
 	/* Private: don't use. */
@@ -45,10 +45,10 @@ struct pci_pwrctl {
 	struct work_struct work;
 };
 
-void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev);
-int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl);
-void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl);
-int devm_pci_pwrctl_device_set_ready(struct device *dev,
-				     struct pci_pwrctl *pwrctl);
+void pci_pwrctrl_init(struct pci_pwrctrl *pwrctrl, struct device *dev);
+int pci_pwrctrl_device_set_ready(struct pci_pwrctrl *pwrctrl);
+void pci_pwrctrl_device_unset_ready(struct pci_pwrctrl *pwrctrl);
+int devm_pci_pwrctrl_device_set_ready(struct device *dev,
+				     struct pci_pwrctrl *pwrctrl);
 
-#endif /* __PCI_PWRCTL_H__ */
+#endif /* __PCI_PWRCTRL_H__ */
-- 
cgit v1.2.3


From 3273d8ad947dea925a65a78ca29e5351c960c801 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Fri, 8 Nov 2024 09:25:54 +0800
Subject: f2fs: fix to do cast in F2FS_{BLK_TO_BYTES, BTYES_TO_BLK} to avoid
 overflow

It missed to cast variable to unsigned long long type before
bit shift, which will cause overflow, fix it.

Fixes: f7ef9b83b583 ("f2fs: introduce macros to convert bytes and blocks in f2fs")
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index b0b821edfd97..3b2ad444c002 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -24,10 +24,10 @@
 #define NEW_ADDR		((block_t)-1)	/* used as block_t addresses */
 #define COMPRESS_ADDR		((block_t)-2)	/* used as compressed data flag */
 
-#define F2FS_BYTES_TO_BLK(bytes)	((bytes) >> F2FS_BLKSIZE_BITS)
-#define F2FS_BLK_TO_BYTES(blk)		((blk) << F2FS_BLKSIZE_BITS)
+#define F2FS_BYTES_TO_BLK(bytes)	((unsigned long long)(bytes) >> F2FS_BLKSIZE_BITS)
+#define F2FS_BLK_TO_BYTES(blk)		((unsigned long long)(blk) << F2FS_BLKSIZE_BITS)
 #define F2FS_BLK_END_BYTES(blk)		(F2FS_BLK_TO_BYTES(blk + 1) - 1)
-#define F2FS_BLK_ALIGN(x)			(F2FS_BYTES_TO_BLK((x) + F2FS_BLKSIZE - 1))
+#define F2FS_BLK_ALIGN(x)		(F2FS_BYTES_TO_BLK((x) + F2FS_BLKSIZE - 1))
 
 /* 0, 1(node nid), 2(meta nid) are reserved node id */
 #define F2FS_RESERVED_NODE_NUM		3
-- 
cgit v1.2.3


From 77569f785c8624fa4189795fb52e635a973672e5 Mon Sep 17 00:00:00 2001
From: Zhiguo Niu <zhiguo.niu@unisoc.com>
Date: Fri, 8 Nov 2024 09:25:56 +0800
Subject: f2fs: fix to adjust appropriate length for fiemap

If user give a file size as "length" parameter for fiemap
operations, but if this size is non-block size aligned,
it will show 2 segments fiemap results even this whole file
is contiguous on disk, such as the following results:

 ./f2fs_io fiemap 0 19034 ylog/analyzer.py
Fiemap: offset = 0 len = 19034
        logical addr.    physical addr.   length           flags
0       0000000000000000 0000000020baa000 0000000000004000 00001000
1       0000000000004000 0000000020bae000 0000000000001000 00001001

after this patch:
./f2fs_io fiemap 0 19034 ylog/analyzer.py
Fiemap: offset = 0 len = 19034
    logical addr.    physical addr.   length           flags
0    0000000000000000 00000000315f3000 0000000000005000 00001001

Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 3b2ad444c002..c24f8bc01045 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -24,6 +24,7 @@
 #define NEW_ADDR		((block_t)-1)	/* used as block_t addresses */
 #define COMPRESS_ADDR		((block_t)-2)	/* used as compressed data flag */
 
+#define F2FS_BLKSIZE_MASK		(F2FS_BLKSIZE - 1)
 #define F2FS_BYTES_TO_BLK(bytes)	((unsigned long long)(bytes) >> F2FS_BLKSIZE_BITS)
 #define F2FS_BLK_TO_BYTES(blk)		((unsigned long long)(blk) << F2FS_BLKSIZE_BITS)
 #define F2FS_BLK_END_BYTES(blk)		(F2FS_BLK_TO_BYTES(blk + 1) - 1)
-- 
cgit v1.2.3


From 7d2f9f870f26798699585f96fa7a2a2cab38dc02 Mon Sep 17 00:00:00 2001
From: Guixin Liu <kanie@linux.alibaba.com>
Date: Wed, 20 Nov 2024 15:10:07 +0800
Subject: nvme: introduce change ptpl and iekey definition

This is for the next tuning pr code more readble patch, make
linux/nvme.h's changes separately.

Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2baf9a80b470..13377dde4527 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -2171,4 +2171,13 @@ enum nvme_pr_release_action {
 	NVME_PR_RELEASE_ACT_CLEAR		= 1,
 };
 
+enum nvme_pr_change_ptpl {
+	NVME_PR_CPTPL_NO_CHANGE			= 0,
+	NVME_PR_CPTPL_RESV			= 1 << 30,
+	NVME_PR_CPTPL_CLEARED			= 2 << 30,
+	NVME_PR_CPTPL_PERSIST			= 3 << 30,
+};
+
+#define NVME_PR_IGNORE_KEY (1 << 3)
+
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 64214c2b95364d26cdff045d8bbefd37380edbe1 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Thu, 14 Nov 2024 15:55:30 -0400
Subject: iommu: Add ops->domain_alloc_nested()

It turns out all the drivers that are using this immediately call into
another function, so just make that function directly into the op. This
makes paging=NULL for domain_alloc_user and we can remove the argument in
the next patch.

The function mirrors the similar op in the viommu that allocates a nested
domain on top of the viommu's nesting parent. This version supports cases
where a viommu is not being used.

Link: https://patch.msgid.link/r/1-v1-c252ebdeb57b+329-iommu_paging_flags_jgg@nvidia.com
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommu.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d6aaaec3caf4..0472cc124519 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -559,15 +559,13 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
  *                the caller iommu_domain_alloc() returns.
  * @domain_alloc_user: Allocate an iommu domain corresponding to the input
  *                     parameters as defined in include/uapi/linux/iommufd.h.
- *                     Upon success, if the @user_data is valid and the @parent
- *                     points to a kernel-managed domain, the new domain must be
- *                     IOMMU_DOMAIN_NESTED type; otherwise, the @parent must be
- *                     NULL while the @user_data can be optionally provided, the
+ *                     The @user_data can be optionally provided, the
  *                     new domain must support __IOMMU_DOMAIN_PAGING.
  *                     Upon failure, ERR_PTR must be returned.
  * @domain_alloc_paging: Allocate an iommu_domain that can be used for
  *                       UNMANAGED, DMA, and DMA_FQ domain types.
  * @domain_alloc_sva: Allocate an iommu_domain for Shared Virtual Addressing.
+ * @domain_alloc_nested: Allocate an iommu_domain for nested translation.
  * @probe_device: Add device to iommu driver handling
  * @release_device: Remove device from iommu driver handling
  * @probe_finalize: Do final setup work after the device is added to an IOMMU
@@ -622,6 +620,9 @@ struct iommu_ops {
 	struct iommu_domain *(*domain_alloc_paging)(struct device *dev);
 	struct iommu_domain *(*domain_alloc_sva)(struct device *dev,
 						 struct mm_struct *mm);
+	struct iommu_domain *(*domain_alloc_nested)(
+		struct device *dev, struct iommu_domain *parent, u32 flags,
+		const struct iommu_user_data *user_data);
 
 	struct iommu_device *(*probe_device)(struct device *dev);
 	void (*release_device)(struct device *dev);
-- 
cgit v1.2.3


From d53764723ecd639a0cc0c5ad24146847fc09f78d Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Thu, 14 Nov 2024 15:55:31 -0400
Subject: iommu: Rename ops->domain_alloc_user() to domain_alloc_paging_flags()

Now that the main domain allocating path is calling this function it
doesn't make sense to leave it named _user. Change the name to
alloc_paging_flags() to mirror the new iommu_paging_domain_alloc_flags()
function.

A driver should implement only one of ops->domain_alloc_paging() or
ops->domain_alloc_paging_flags(). The former is a simpler interface with
less boiler plate that the majority of drivers use. The latter is for
drivers with a greater feature set (PASID, multiple page table support,
advanced iommufd support, nesting, etc). Additional patches will be needed
to achieve this.

Link: https://patch.msgid.link/r/2-v1-c252ebdeb57b+329-iommu_paging_flags_jgg@nvidia.com
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommu.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0472cc124519..1e3308e89996 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -557,13 +557,17 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
  * @domain_alloc: allocate and return an iommu domain if success. Otherwise
  *                NULL is returned. The domain is not fully initialized until
  *                the caller iommu_domain_alloc() returns.
- * @domain_alloc_user: Allocate an iommu domain corresponding to the input
- *                     parameters as defined in include/uapi/linux/iommufd.h.
- *                     The @user_data can be optionally provided, the
- *                     new domain must support __IOMMU_DOMAIN_PAGING.
- *                     Upon failure, ERR_PTR must be returned.
+ * @domain_alloc_paging_flags: Allocate an iommu domain corresponding to the
+ *                     input parameters as defined in
+ *                     include/uapi/linux/iommufd.h. The @user_data can be
+ *                     optionally provided, the new domain must support
+ *                     __IOMMU_DOMAIN_PAGING. Upon failure, ERR_PTR must be
+ *                     returned.
  * @domain_alloc_paging: Allocate an iommu_domain that can be used for
- *                       UNMANAGED, DMA, and DMA_FQ domain types.
+ *                       UNMANAGED, DMA, and DMA_FQ domain types. This is the
+ *                       same as invoking domain_alloc_paging_flags() with
+ *                       @flags=0, @user_data=NULL. A driver should implement
+ *                       only one of the two ops.
  * @domain_alloc_sva: Allocate an iommu_domain for Shared Virtual Addressing.
  * @domain_alloc_nested: Allocate an iommu_domain for nested translation.
  * @probe_device: Add device to iommu driver handling
@@ -614,8 +618,8 @@ struct iommu_ops {
 
 	/* Domain allocation and freeing by the iommu driver */
 	struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type);
-	struct iommu_domain *(*domain_alloc_user)(
-		struct device *dev, u32 flags, struct iommu_domain *parent,
+	struct iommu_domain *(*domain_alloc_paging_flags)(
+		struct device *dev, u32 flags,
 		const struct iommu_user_data *user_data);
 	struct iommu_domain *(*domain_alloc_paging)(struct device *dev);
 	struct iommu_domain *(*domain_alloc_sva)(struct device *dev,
-- 
cgit v1.2.3


From 9d8a2b033db179bef9b6b5bad492f611a0fe89b7 Mon Sep 17 00:00:00 2001
From: Miao Wang <shankerwangmiao@gmail.com>
Date: Thu, 21 Nov 2024 22:25:21 +0800
Subject: ACPI: introduce acpi_arch_init()

To avoid arch-specific code in general ACPI initialization flow,
introduce a weak symbol acpi_arch_init().

Currently, arm64 and riscv can utillize this to insert their
arch-specific flow.

In the future, other architectures can also have a chance to define
their own arch-specific ACPI initialization process if necessary.

Reviewed-by: Sunil V L <sunilvl@ventanamicro.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Miao Wang <shankerwangmiao@gmail.com>
Link: https://patch.msgid.link/20241121-intro-acpi-arch-init-v4-1-b1fb517e7d8b@gmail.com
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7dd24acd9ffe..05f39fbfa485 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1530,17 +1530,7 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
 }
 #endif
 
-#ifdef CONFIG_ARM64
-void acpi_arm_init(void);
-#else
-static inline void acpi_arm_init(void) { }
-#endif
-
-#ifdef CONFIG_RISCV
-void acpi_riscv_init(void);
-#else
-static inline void acpi_riscv_init(void) { }
-#endif
+void acpi_arch_init(void);
 
 #ifdef CONFIG_ACPI_PCC
 void acpi_init_pcc(void);
-- 
cgit v1.2.3


From 0172afefbfbdd8987787c926b40b68400bd1c3d1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 22 Nov 2024 21:28:49 +0100
Subject: tracing: Record task flag NEED_RESCHED_LAZY.

The scheduler added NEED_RESCHED_LAZY scheduling. Record this state as
part of trace flags and expose it in the need_resched field.

Record and expose NEED_RESCHED_LAZY.

[bigeasy: Commit description, documentation bits.]

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://lore.kernel.org/20241122202849.7DfYpJR0@linutronix.de
Reviewed-by: Ankur Arora <ankur.a.arora@oracle.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 016b29a56c87..2a5df5b62cfc 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -184,6 +184,7 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
 
 enum trace_flag_type {
 	TRACE_FLAG_IRQS_OFF		= 0x01,
+	TRACE_FLAG_NEED_RESCHED_LAZY	= 0x02,
 	TRACE_FLAG_NEED_RESCHED		= 0x04,
 	TRACE_FLAG_HARDIRQ		= 0x08,
 	TRACE_FLAG_SOFTIRQ		= 0x10,
-- 
cgit v1.2.3


From 306d40aa53b671ea72c3bf272f85ccb6c1b0bc65 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Sat, 23 Nov 2024 10:30:27 -0500
Subject: tracing: Move it_func[0] comment to the relevant context

When introducing __DO_TRACE_CALL(), the iteration over it_func moved
from __DO_TRACE() to __tracepoint_iter_##_name(), but the comment
relevant for this iterator was left in its original location.

Move the comment to the relevant context.

Fixes: d25e37d89dd2 ("tracepoint: Optimize using static_call()")
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Jordan Rife <jrife@google.com>
Link: https://lore.kernel.org/20241123153031.2884933-2-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 425123e921ac..d390e8cabf02 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -210,9 +210,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 #endif /* CONFIG_HAVE_STATIC_CALL */
 
 /*
- * it_func[0] is never NULL because there is at least one element in the array
- * when the array itself is non NULL.
- *
  * With @syscall=0, the tracepoint callback array dereference is
  * protected by disabling preemption.
  * With @syscall=1, the tracepoint callback array dereference is
@@ -316,6 +313,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
  * We have no guarantee that gcc and the linker won't up-align the tracepoint
  * structures, so we create an array of pointers that will be used for iteration
  * on the tracepoints.
+ *
+ * it_func[0] is never NULL because there is at least one element in the array
+ * when the array itself is non NULL.
  */
 #define __DEFINE_TRACE_EXT(_name, _ext, proto, args)			\
 	static const char __tpstrtab_##_name[]				\
-- 
cgit v1.2.3


From 89c7e17f303e2d56d50a162bb65d786d3a43963e Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Sat, 23 Nov 2024 10:30:28 -0500
Subject: tracing: Remove __idx variable from __DO_TRACE

Since the removal of SRCU-protected tracepoints, the __idx variable in
__DO_TRACE is unused. Remove this variable.

Fixes: 48bcda684823 ("tracing: Remove definition of trace_*_rcuidle()")
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Jordan Rife <jrife@google.com>
Link: https://lore.kernel.org/20241123153031.2884933-3-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d390e8cabf02..867f3c1ac7dc 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -218,8 +218,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
  */
 #define __DO_TRACE(name, args, cond, syscall)				\
 	do {								\
-		int __maybe_unused __idx = 0;				\
-									\
 		if (!(cond))						\
 			return;						\
 									\
-- 
cgit v1.2.3


From 7c565a4d4e437034755a06b7d61361add3b98882 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Sat, 23 Nov 2024 10:30:29 -0500
Subject: rcupdate_trace: Define rcu_tasks_trace lock guard

Define a rcu_tasks_trace lock guard for use by the syscall enter/exit
tracepoints.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Jordan Rife <jrife@google.com>
Cc: linux-trace-kernel@vger.kernel.org
Link: https://lore.kernel.org/20241123153031.2884933-4-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/rcupdate_trace.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index eda493200663..e6c44eb428ab 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -10,6 +10,7 @@
 
 #include <linux/sched.h>
 #include <linux/rcupdate.h>
+#include <linux/cleanup.h>
 
 extern struct lockdep_map rcu_trace_lock_map;
 
@@ -98,4 +99,8 @@ static inline void rcu_read_lock_trace(void) { BUG(); }
 static inline void rcu_read_unlock_trace(void) { BUG(); }
 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
 
+DEFINE_LOCK_GUARD_0(rcu_tasks_trace,
+	rcu_read_lock_trace(),
+	rcu_read_unlock_trace())
+
 #endif /* __LINUX_RCUPDATE_TRACE_H */
-- 
cgit v1.2.3


From 98bf0fbb65226809a8df4d1948c5b6cf0c91590f Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Sat, 23 Nov 2024 10:30:30 -0500
Subject: tracing: Remove conditional locking from __DO_TRACE()

Remove conditional locking by moving the __DO_TRACE() code into
trace_##name().

When the faultable syscall tracepoints were implemented, __DO_TRACE()
had a rcuidle argument which selected between SRCU and preempt disable.
Therefore, the RCU tasks trace protection for faultable syscall
tracepoints was introduced using the same pattern.

At that point, it did not appear obvious that this feedback from Linus [1]
applied here as well, because the __DO_TRACE() modification was
extending a pre-existing pattern.

Shortly before pulling the faultable syscall tracepoints modifications,
Steven removed the rcuidle argument and SRCU protection scheme entirely
from tracepoint.h:

commit 48bcda684823 ("tracing: Remove definition of trace_*_rcuidle()")

This required a rebase of the faultable syscall tracepoints series,
which missed a perfect opportunity to integrate the prior recommendation
from Linus.

In response to the pull request, Linus pointed out [2] that he was not
pleased by the implementation, expecting this to be fixed in a follow up
patch series.

Move __DO_TRACE() code into trace_##name() within each of
__DECLARE_TRACE() and __DECLARE_TRACE_SYSCALL(). Use a scoped guard
to guard the preempt disable notrace and RCU tasks trace critical
sections.

Link: https://lore.kernel.org/all/CAHk-=wggDLDeTKbhb5hh--x=-DQd69v41137M72m6NOTmbD-cw@mail.gmail.com/ [1]
Link: https://lore.kernel.org/lkml/CAHk-=witPrLcu22dZ93VCyRQonS7+-dFYhQbna=KBa-TAhayMw@mail.gmail.com/ [2]

Fixes: a363d27cdbc2 ("tracing: Allow system call tracepoints to handle page faults")
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Jordan Rife <jrife@google.com>
Cc: linux-trace-kernel@vger.kernel.org
Link: https://lore.kernel.org/20241123153031.2884933-5-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 45 ++++++++++++---------------------------------
 1 file changed, 12 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 867f3c1ac7dc..832f49b56b1f 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -209,31 +209,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 #define __DO_TRACE_CALL(name, args)	__traceiter_##name(NULL, args)
 #endif /* CONFIG_HAVE_STATIC_CALL */
 
-/*
- * With @syscall=0, the tracepoint callback array dereference is
- * protected by disabling preemption.
- * With @syscall=1, the tracepoint callback array dereference is
- * protected by Tasks Trace RCU, which allows probes to handle page
- * faults.
- */
-#define __DO_TRACE(name, args, cond, syscall)				\
-	do {								\
-		if (!(cond))						\
-			return;						\
-									\
-		if (syscall)						\
-			rcu_read_lock_trace();				\
-		else							\
-			preempt_disable_notrace();			\
-									\
-		__DO_TRACE_CALL(name, TP_ARGS(args));			\
-									\
-		if (syscall)						\
-			rcu_read_unlock_trace();			\
-		else							\
-			preempt_enable_notrace();			\
-	} while (0)
-
 /*
  * Make sure the alignment of the structure in the __tracepoints section will
  * not add unwanted padding between the beginning of the section and the
@@ -282,10 +257,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \
 	static inline void trace_##name(proto)				\
 	{								\
-		if (static_branch_unlikely(&__tracepoint_##name.key))	\
-			__DO_TRACE(name,				\
-				TP_ARGS(args),				\
-				TP_CONDITION(cond), 0);			\
+		if (static_branch_unlikely(&__tracepoint_##name.key)) { \
+			if (cond) {					\
+				scoped_guard(preempt_notrace)		\
+					__DO_TRACE_CALL(name, TP_ARGS(args)); \
+			}						\
+		}							\
 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
 			WARN_ONCE(!rcu_is_watching(),			\
 				  "RCU not watching for tracepoint");	\
@@ -297,10 +274,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	static inline void trace_##name(proto)				\
 	{								\
 		might_fault();						\
-		if (static_branch_unlikely(&__tracepoint_##name.key))	\
-			__DO_TRACE(name,				\
-				TP_ARGS(args),				\
-				TP_CONDITION(cond), 1);			\
+		if (static_branch_unlikely(&__tracepoint_##name.key)) {	\
+			if (cond) {					\
+				scoped_guard(rcu_tasks_trace)		\
+					__DO_TRACE_CALL(name, TP_ARGS(args)); \
+			}						\
+		}							\
 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
 			WARN_ONCE(!rcu_is_watching(),			\
 				  "RCU not watching for tracepoint");	\
-- 
cgit v1.2.3


From ef0d4186083127d2f99ed04e051fd94ba061d253 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Sat, 23 Nov 2024 10:30:31 -0500
Subject: tracing: Remove cond argument from __DECLARE_TRACE_SYSCALL

Syscall tracepoints do not require a "cond" argument, because they are
meant to be used only for sys_enter and sys_exit instrumentation, which
don't require condition evaluation.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Jordan Rife <jrife@google.com>
Cc: linux-trace-kernel@vger.kernel.org
Link: https://lore.kernel.org/20241123153031.2884933-6-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 832f49b56b1f..b2633a72e871 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -220,7 +220,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
  * site if it is not watching, as it will need to be active when the
  * tracepoint is enabled.
  */
-#define __DECLARE_TRACE_COMMON(name, proto, args, cond, data_proto)	\
+#define __DECLARE_TRACE_COMMON(name, proto, args, data_proto)		\
 	extern int __traceiter_##name(data_proto);			\
 	DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name);	\
 	extern struct tracepoint __tracepoint_##name;			\
@@ -254,7 +254,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	}
 
 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
-	__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \
+	__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \
 	static inline void trace_##name(proto)				\
 	{								\
 		if (static_branch_unlikely(&__tracepoint_##name.key)) { \
@@ -269,18 +269,16 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		}							\
 	}
 
-#define __DECLARE_TRACE_SYSCALL(name, proto, args, cond, data_proto)	\
-	__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \
+#define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto)		\
+	__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \
 	static inline void trace_##name(proto)				\
 	{								\
 		might_fault();						\
 		if (static_branch_unlikely(&__tracepoint_##name.key)) {	\
-			if (cond) {					\
-				scoped_guard(rcu_tasks_trace)		\
-					__DO_TRACE_CALL(name, TP_ARGS(args)); \
-			}						\
+			scoped_guard(rcu_tasks_trace)			\
+				__DO_TRACE_CALL(name, TP_ARGS(args));	\
 		}							\
-		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
+		if (IS_ENABLED(CONFIG_LOCKDEP)) {			\
 			WARN_ONCE(!rcu_is_watching(),			\
 				  "RCU not watching for tracepoint");	\
 		}							\
@@ -363,7 +361,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 
 
 #else /* !TRACEPOINTS_ENABLED */
-#define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
+#define __DECLARE_TRACE_COMMON(name, proto, args, data_proto)		\
 	static inline void trace_##name(proto)				\
 	{ }								\
 	static inline int						\
@@ -387,7 +385,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		return false;						\
 	}
 
-#define __DECLARE_TRACE_SYSCALL	__DECLARE_TRACE
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
+	__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto))
+
+#define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto)		\
+	__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto))
 
 #define DEFINE_TRACE_FN(name, reg, unreg, proto, args)
 #define DEFINE_TRACE_SYSCALL(name, reg, unreg, proto, args)
@@ -453,7 +455,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 
 #define DECLARE_TRACE_SYSCALL(name, proto, args)			\
 	__DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args),	\
-				cpu_online(raw_smp_processor_id()),	\
 				PARAMS(void *__data, proto))
 
 #define TRACE_EVENT_FLAGS(event, flag)
-- 
cgit v1.2.3


From 7f9e19f207be0c534d517d65e01417ba968cdd34 Mon Sep 17 00:00:00 2001
From: Adam Young <admiyo@os.amperecomputing.com>
Date: Wed, 20 Nov 2024 14:02:14 -0500
Subject: mailbox: pcc: Check before sending MCTP PCC response ACK

Type 4 PCC channels have an option to send back a response
to the platform when they are done processing the request.
The flag to indicate whether or not to respond is inside
the message body, and thus is not available to the pcc
mailbox.

If the flag is not set, still set command completion
bit after processing message.

In order to read the flag, this patch maps the shared
buffer to virtual memory. To avoid duplication of mapping
the shared buffer is then made available to be used by
the driver that uses the mailbox.

Signed-off-by: Adam Young <admiyo@os.amperecomputing.com>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Jassi Brar <jassisinghbrar@gmail.com>
---
 include/acpi/pcc.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/acpi/pcc.h b/include/acpi/pcc.h
index 9b373d172a77..699c1a37b8e7 100644
--- a/include/acpi/pcc.h
+++ b/include/acpi/pcc.h
@@ -12,6 +12,7 @@
 struct pcc_mbox_chan {
 	struct mbox_chan *mchan;
 	u64 shmem_base_addr;
+	void __iomem *shmem;
 	u64 shmem_size;
 	u32 latency;
 	u32 max_access_rate;
@@ -31,11 +32,13 @@ struct pcc_mbox_chan {
 #define PCC_CMD_COMPLETION_NOTIFY	BIT(0)
 
 #define MAX_PCC_SUBSPACES	256
+#define PCC_ACK_FLAG_MASK	0x1
 
 #ifdef CONFIG_PCC
 extern struct pcc_mbox_chan *
 pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id);
 extern void pcc_mbox_free_channel(struct pcc_mbox_chan *chan);
+extern int pcc_mbox_ioremap(struct mbox_chan *chan);
 #else
 static inline struct pcc_mbox_chan *
 pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id)
@@ -43,6 +46,10 @@ pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id)
 	return ERR_PTR(-ENODEV);
 }
 static inline void pcc_mbox_free_channel(struct pcc_mbox_chan *chan) { }
+static inline int pcc_mbox_ioremap(struct mbox_chan *chan)
+{
+	return 0;
+};
 #endif
 
 #endif /* _PCC_H */
-- 
cgit v1.2.3


From 2bd9b57d04df417f31ef54448477c212fcdd14fc Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 25 Nov 2024 09:25:14 -0500
Subject: tracing: Use guard() rather than scoped_guard()

Using scoped_guard() in the implementation of trace_##name() adds an
unnecessary level of indentation.

Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Jordan Rife <jrife@google.com>
Link: https://lore.kernel.org/20241125142514.2897143-1-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index b2633a72e871..e398f6e43f61 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -259,8 +259,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	{								\
 		if (static_branch_unlikely(&__tracepoint_##name.key)) { \
 			if (cond) {					\
-				scoped_guard(preempt_notrace)		\
-					__DO_TRACE_CALL(name, TP_ARGS(args)); \
+				guard(preempt_notrace)();		\
+				__DO_TRACE_CALL(name, TP_ARGS(args));	\
 			}						\
 		}							\
 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
@@ -275,8 +275,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	{								\
 		might_fault();						\
 		if (static_branch_unlikely(&__tracepoint_##name.key)) {	\
-			scoped_guard(rcu_tasks_trace)			\
-				__DO_TRACE_CALL(name, TP_ARGS(args));	\
+			guard(rcu_tasks_trace)();			\
+			__DO_TRACE_CALL(name, TP_ARGS(args));		\
 		}							\
 		if (IS_ENABLED(CONFIG_LOCKDEP)) {			\
 			WARN_ONCE(!rcu_is_watching(),			\
-- 
cgit v1.2.3


From 222974c6ec9d901f7ad13bbe6e505ec1f1d822d4 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 25 Nov 2024 18:45:33 -0800
Subject: iommu: remove stale declaration left over by a merge conflict

The merge commit ae3325f752ef ("Merge branches 'arm/smmu', 'mediatek',
's390', 'ti/omap', 'riscv' and 'core' into next") left a stale
declaration of 'iommu_present()' even though the 'core' branch that was
merged had removed the function (and the declaration).

Remove it for real.

Reported-by: Dmitry Safonov <0x7f454c46@gmail.com>
Cc: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/iommu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d6aaaec3caf4..301e97d745c1 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -841,7 +841,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
 	};
 }
 
-extern bool iommu_present(const struct bus_type *bus);
 extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
 extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
 struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, unsigned int flags);
-- 
cgit v1.2.3


From 0043ecea2399ffc8bfd99ed9dbbe766e7c79293c Mon Sep 17 00:00:00 2001
From: Rong Xu <xur@google.com>
Date: Sat, 2 Nov 2024 10:51:10 -0700
Subject: vmlinux.lds.h: Adjust symbol ordering in text output section
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the -ffunction-sections compiler option is enabled, each function
is placed in a separate section named .text.function_name rather than
putting all functions in a single .text section.

However, using -function-sections can cause problems with the
linker script. The comments included in include/asm-generic/vmlinux.lds.h
note these issues.:
  “TEXT_MAIN here will match .text.fixup and .text.unlikely if dead
   code elimination is enabled, so these sections should be converted
   to use ".." first.”

It is unclear whether there is a straightforward method for converting
a suffix to "..".

This patch modifies the order of subsections within the text output
section. Specifically, it changes current order:
  .text.hot, .text, .text_unlikely, .text.unknown, .text.asan
to the new order:
  .text.asan, .text.unknown, .text_unlikely, .text.hot, .text

Here is the rationale behind the new layout:

The majority of the code resides in three sections: .text.hot, .text,
and .text.unlikely, with .text.unknown containing a negligible amount.
.text.asan is only generated in ASAN builds.

The primary goal is to group code segments based on their execution
frequency (hotness).

First, we want to place .text.hot adjacent to .text. Since we cannot put
.text.hot after .text (Due to constraints with -ffunction-sections,
placing .text.hot after .text is problematic), we need to put
.text.hot before .text.

Then it comes to .text.unlikely, we cannot put it after .text (same
-ffunction-sections issue) . Therefore, we position .text.unlikely
before .text.hot.

.text.unknown and .tex.asan follow the same logic.

This revised ordering effectively reverses the original arrangement (for
.text.unlikely, .text.unknown, and .tex.asan), maintaining a similar level
of affinity between sections.

It also places .text.hot section at the beginning of a page to better
utilize the TLB entry.

Note that the limitation arises because the linker script employs glob
patterns instead of regular expressions for string matching. While there
is a method to maintain the current order using complex patterns, this
significantly complicates the pattern and increases the likelihood of
errors.

This patch also changes vmlinux.lds.S for the sparc64 architecture to
accommodate specific symbol placement requirements.

Co-developed-by: Han Shen <shenhan@google.com>
Signed-off-by: Han Shen <shenhan@google.com>
Signed-off-by: Rong Xu <xur@google.com>
Suggested-by: Sriraman Tallam <tmsriram@google.com>
Suggested-by: Krzysztof Pszeniczny <kpszeniczny@google.com>
Tested-by: Yonghong Song <yonghong.song@linux.dev>
Tested-by: Yabin Cui <yabinc@google.com>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <kees@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index eeadbaeccf88..fd901951549c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -553,19 +553,24 @@
  * .text section. Map to function alignment to avoid address changes
  * during second ld run in second ld pass when generating System.map
  *
- * TEXT_MAIN here will match .text.fixup and .text.unlikely if dead
- * code elimination is enabled, so these sections should be converted
- * to use ".." first.
+ * TEXT_MAIN here will match symbols with a fixed pattern (for example,
+ * .text.hot or .text.unlikely) if dead code elimination or
+ * function-section is enabled. Match these symbols first before
+ * TEXT_MAIN to ensure they are grouped together.
+ *
+ * Also placing .text.hot section at the beginning of a page, this
+ * would help the TLB performance.
  */
 #define TEXT_TEXT							\
 		ALIGN_FUNCTION();					\
+		*(.text.asan.* .text.tsan.*)				\
+		*(.text.unknown .text.unknown.*)			\
+		*(.text.unlikely .text.unlikely.*)			\
+		. = ALIGN(PAGE_SIZE);					\
 		*(.text.hot .text.hot.*)				\
 		*(TEXT_MAIN .text.fixup)				\
-		*(.text.unlikely .text.unlikely.*)			\
-		*(.text.unknown .text.unknown.*)			\
 		NOINSTR_TEXT						\
-		*(.ref.text)						\
-		*(.text.asan.* .text.tsan.*)
+		*(.ref.text)
 
 
 /* sched.text is aling to function alignment to secure we have same
-- 
cgit v1.2.3


From db0b2991ae1aac5ca985ec6fd8ff9bd9b2126c9b Mon Sep 17 00:00:00 2001
From: Rong Xu <xur@google.com>
Date: Sat, 2 Nov 2024 10:51:11 -0700
Subject: vmlinux.lds.h: Add markers for text_unlikely and text_hot sections

Add markers like __hot_text_start, __hot_text_end, __unlikely_text_start,
and __unlikely_text_end which will be included in System.map. These markers
indicate how the compiler groups functions, providing valuable information
to developers about the layout and optimization of the code.

Co-developed-by: Han Shen <shenhan@google.com>
Signed-off-by: Han Shen <shenhan@google.com>
Signed-off-by: Rong Xu <xur@google.com>
Suggested-by: Sriraman Tallam <tmsriram@google.com>
Tested-by: Yonghong Song <yonghong.song@linux.dev>
Tested-by: Yabin Cui <yabinc@google.com>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <kees@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index fd901951549c..e02973f3b418 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -549,6 +549,16 @@
 		__cpuidle_text_end = .;					\
 		__noinstr_text_end = .;
 
+#define TEXT_UNLIKELY							\
+		__unlikely_text_start = .;				\
+		*(.text.unlikely .text.unlikely.*)			\
+		__unlikely_text_end = .;
+
+#define TEXT_HOT							\
+		__hot_text_start = .;					\
+		*(.text.hot .text.hot.*)				\
+		__hot_text_end = .;
+
 /*
  * .text section. Map to function alignment to avoid address changes
  * during second ld run in second ld pass when generating System.map
@@ -565,9 +575,9 @@
 		ALIGN_FUNCTION();					\
 		*(.text.asan.* .text.tsan.*)				\
 		*(.text.unknown .text.unknown.*)			\
-		*(.text.unlikely .text.unlikely.*)			\
+		TEXT_UNLIKELY						\
 		. = ALIGN(PAGE_SIZE);					\
-		*(.text.hot .text.hot.*)				\
+		TEXT_HOT						\
 		*(TEXT_MAIN .text.fixup)				\
 		NOINSTR_TEXT						\
 		*(.ref.text)
-- 
cgit v1.2.3


From 0847420f5e499a7ab518942fff71482179290163 Mon Sep 17 00:00:00 2001
From: Rong Xu <xur@google.com>
Date: Sat, 2 Nov 2024 10:51:12 -0700
Subject: AutoFDO: Enable -ffunction-sections for the AutoFDO build

Enable -ffunction-sections by default for the AutoFDO build.

With -ffunction-sections, the compiler places each function in its own
section named .text.function_name instead of placing all functions in
the .text section. In the AutoFDO build, this allows the linker to
utilize profile information to reorganize functions for improved
utilization of iCache and iTLB.

Co-developed-by: Han Shen <shenhan@google.com>
Signed-off-by: Han Shen <shenhan@google.com>
Signed-off-by: Rong Xu <xur@google.com>
Suggested-by: Sriraman Tallam <tmsriram@google.com>
Tested-by: Yonghong Song <yonghong.song@linux.dev>
Tested-by: Yabin Cui <yabinc@google.com>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <kees@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index e02973f3b418..bd64fdedabd2 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -95,18 +95,25 @@
  * With LTO_CLANG, the linker also splits sections by default, so we need
  * these macros to combine the sections during the final link.
  *
+ * With AUTOFDO_CLANG, by default, the linker splits text sections and
+ * regroups functions into subsections.
+ *
  * RODATA_MAIN is not used because existing code already defines .rodata.x
  * sections to be brought in with rodata.
  */
-#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
+#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \
+defined(CONFIG_AUTOFDO_CLANG)
 #define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
+#else
+#define TEXT_MAIN .text
+#endif
+#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
 #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L*
 #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
 #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L*
 #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral*
 #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]*
 #else
-#define TEXT_MAIN .text
 #define DATA_MAIN .data
 #define SDATA_MAIN .sdata
 #define RODATA_MAIN .rodata
-- 
cgit v1.2.3


From 2fd65f7afd5a73b685a1651cb651ade120b53e15 Mon Sep 17 00:00:00 2001
From: Rong Xu <xur@google.com>
Date: Sat, 2 Nov 2024 10:51:13 -0700
Subject: AutoFDO: Enable machine function split optimization for AutoFDO

Enable the machine function split optimization for AutoFDO in Clang.

Machine function split (MFS) is a pass in the Clang compiler that
splits a function into hot and cold parts. The linker groups all
cold blocks across functions together. This decreases hot code
fragmentation and improves iCache and iTLB utilization.

MFS requires a profile so this is enabled only for the AutoFDO builds.

Co-developed-by: Han Shen <shenhan@google.com>
Signed-off-by: Han Shen <shenhan@google.com>
Signed-off-by: Rong Xu <xur@google.com>
Suggested-by: Sriraman Tallam <tmsriram@google.com>
Suggested-by: Krzysztof Pszeniczny <kpszeniczny@google.com>
Tested-by: Yonghong Song <yonghong.song@linux.dev>
Tested-by: Yabin Cui <yabinc@google.com>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <kees@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index bd64fdedabd2..8a0bb3946cf0 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -556,6 +556,11 @@ defined(CONFIG_AUTOFDO_CLANG)
 		__cpuidle_text_end = .;					\
 		__noinstr_text_end = .;
 
+#define TEXT_SPLIT							\
+		__split_text_start = .;					\
+		*(.text.split .text.split.[0-9a-zA-Z_]*)		\
+		__split_text_end = .;
+
 #define TEXT_UNLIKELY							\
 		__unlikely_text_start = .;				\
 		*(.text.unlikely .text.unlikely.*)			\
@@ -582,6 +587,7 @@ defined(CONFIG_AUTOFDO_CLANG)
 		ALIGN_FUNCTION();					\
 		*(.text.asan.* .text.tsan.*)				\
 		*(.text.unknown .text.unknown.*)			\
+		TEXT_SPLIT						\
 		TEXT_UNLIKELY						\
 		. = ALIGN(PAGE_SIZE);					\
 		TEXT_HOT						\
@@ -589,7 +595,6 @@ defined(CONFIG_AUTOFDO_CLANG)
 		NOINSTR_TEXT						\
 		*(.ref.text)
 
-
 /* sched.text is aling to function alignment to secure we have same
  * address even at second ld pass when generating System.map */
 #define SCHED_TEXT							\
-- 
cgit v1.2.3


From d5dc95836147f2e25b134c0ca3a0bc1a5867ea29 Mon Sep 17 00:00:00 2001
From: Rong Xu <xur@google.com>
Date: Sat, 2 Nov 2024 10:51:14 -0700
Subject: kbuild: Add Propeller configuration for kernel build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the build support for using Clang's Propeller optimizer. Like
AutoFDO, Propeller uses hardware sampling to gather information
about the frequency of execution of different code paths within a
binary. This information is then used to guide the compiler's
optimization decisions, resulting in a more efficient binary.

The support requires a Clang compiler LLVM 19 or later, and the
create_llvm_prof tool
(https://github.com/google/autofdo/releases/tag/v0.30.1). This
commit is limited to x86 platforms that support PMU features
like LBR on Intel machines and AMD Zen3 BRS.

Here is an example workflow for building an AutoFDO+Propeller
optimized kernel:

1) Build the kernel on the host machine, with AutoFDO and Propeller
   build config
      CONFIG_AUTOFDO_CLANG=y
      CONFIG_PROPELLER_CLANG=y
   then
      $ make LLVM=1 CLANG_AUTOFDO_PROFILE=<autofdo_profile>

“<autofdo_profile>” is the profile collected when doing a non-Propeller
AutoFDO build. This step builds a kernel that has the same optimization
level as AutoFDO, plus a metadata section that records basic block
information. This kernel image runs as fast as an AutoFDO optimized
kernel.

2) Install the kernel on test/production machines.

3) Run the load tests. The '-c' option in perf specifies the sample
   event period. We suggest using a suitable prime number,
   like 500009, for this purpose.
   For Intel platforms:
      $ perf record -e BR_INST_RETIRED.NEAR_TAKEN:k -a -N -b -c <count> \
        -o <perf_file> -- <loadtest>
   For AMD platforms:
      The supported system are: Zen3 with BRS, or Zen4 with amd_lbr_v2
      # To see if Zen3 support LBR:
      $ cat proc/cpuinfo | grep " brs"
      # To see if Zen4 support LBR:
      $ cat proc/cpuinfo | grep amd_lbr_v2
      # If the result is yes, then collect the profile using:
      $ perf record --pfm-events RETIRED_TAKEN_BRANCH_INSTRUCTIONS:k -a \
        -N -b -c <count> -o <perf_file> -- <loadtest>

4) (Optional) Download the raw perf file to the host machine.

5) Generate Propeller profile:
   $ create_llvm_prof --binary=<vmlinux> --profile=<perf_file> \
     --format=propeller --propeller_output_module_name \
     --out=<propeller_profile_prefix>_cc_profile.txt \
     --propeller_symorder=<propeller_profile_prefix>_ld_profile.txt

   “create_llvm_prof” is the profile conversion tool, and a prebuilt
   binary for linux can be found on
   https://github.com/google/autofdo/releases/tag/v0.30.1 (can also build
   from source).

   "<propeller_profile_prefix>" can be something like
   "/home/user/dir/any_string".

   This command generates a pair of Propeller profiles:
   "<propeller_profile_prefix>_cc_profile.txt" and
   "<propeller_profile_prefix>_ld_profile.txt".

6) Rebuild the kernel using the AutoFDO and Propeller profile files.
      CONFIG_AUTOFDO_CLANG=y
      CONFIG_PROPELLER_CLANG=y
   and
      $ make LLVM=1 CLANG_AUTOFDO_PROFILE=<autofdo_profile> \
        CLANG_PROPELLER_PROFILE_PREFIX=<propeller_profile_prefix>

Co-developed-by: Han Shen <shenhan@google.com>
Signed-off-by: Han Shen <shenhan@google.com>
Signed-off-by: Rong Xu <xur@google.com>
Suggested-by: Sriraman Tallam <tmsriram@google.com>
Suggested-by: Krzysztof Pszeniczny <kpszeniczny@google.com>
Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Suggested-by: Stephane Eranian <eranian@google.com>
Tested-by: Yonghong Song <yonghong.song@linux.dev>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <kees@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8a0bb3946cf0..c995474e4c64 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -95,14 +95,14 @@
  * With LTO_CLANG, the linker also splits sections by default, so we need
  * these macros to combine the sections during the final link.
  *
- * With AUTOFDO_CLANG, by default, the linker splits text sections and
- * regroups functions into subsections.
+ * With AUTOFDO_CLANG and PROPELLER_CLANG, by default, the linker splits
+ * text sections and regroups functions into subsections.
  *
  * RODATA_MAIN is not used because existing code already defines .rodata.x
  * sections to be brought in with rodata.
  */
 #if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \
-defined(CONFIG_AUTOFDO_CLANG)
+defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
 #define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
 #else
 #define TEXT_MAIN .text
-- 
cgit v1.2.3


From bb43a59944f45e89aa158740b8a16ba8f0b0fa2b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 7 Nov 2024 01:14:40 +0900
Subject: Rename .data.unlikely to .data..unlikely

Commit 7ccaba5314ca ("consolidate WARN_...ONCE() static variables")
was intended to collect all .data.unlikely sections into one chunk.
However, this has not worked when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
or CONFIG_LTO_CLANG is enabled, because .data.unlikely matches the
.data.[0-9a-zA-Z_]* pattern in the DATA_MAIN macro.

Commit cb87481ee89d ("kbuild: linker script do not match C names unless
LD_DEAD_CODE_DATA_ELIMINATION is configured") was introduced to suppress
the issue for the default CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=n case,
providing a minimal fix for stable backporting. We were aware this did
not address the issue for CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y. The
plan was to apply correct fixes and then revert cb87481ee89d. [1]

Seven years have passed since then, yet the #ifdef workaround remains in
place.

Using a ".." separator in the section name fixes the issue for
CONFIG_LD_DEAD_CODE_DATA_ELIMINATION and CONFIG_LTO_CLANG.

[1]: https://lore.kernel.org/linux-kbuild/CAK7LNASck6BfdLnESxXUeECYL26yUDm0cwRZuM4gmaWUkxjL5g@mail.gmail.com/

Fixes: cb87481ee89d ("kbuild: linker script do not match C names unless LD_DEAD_CODE_DATA_ELIMINATION is configured")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 include/linux/rcupdate.h          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c995474e4c64..3c9dc1fd094d 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -357,7 +357,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
 	*(.data..decrypted)						\
 	*(.ref.data)							\
 	*(.data..shared_aligned) /* percpu related */			\
-	*(.data.unlikely)						\
+	*(.data..unlikely)						\
 	__start_once = .;						\
 	*(.data.once)							\
 	__end_once = .;							\
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 58d84c59f3dd..48e5c03df1dd 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -401,7 +401,7 @@ static inline int debug_lockdep_rcu_enabled(void)
  */
 #define RCU_LOCKDEP_WARN(c, s)						\
 	do {								\
-		static bool __section(".data.unlikely") __warned;	\
+		static bool __section(".data..unlikely") __warned;	\
 		if (debug_lockdep_rcu_enabled() && (c) &&		\
 		    debug_lockdep_rcu_enabled() && !__warned) {		\
 			__warned = true;				\
-- 
cgit v1.2.3


From dbefa1f31a91670c9e7dac9b559625336206466f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 7 Nov 2024 01:14:41 +0900
Subject: Rename .data.once to .data..once to fix resetting WARN*_ONCE

Commit b1fca27d384e ("kernel debug: support resetting WARN*_ONCE")
added support for clearing the state of once warnings. However,
it is not functional when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION or
CONFIG_LTO_CLANG is enabled, because .data.once matches the
.data.[0-9a-zA-Z_]* pattern in the DATA_MAIN macro.

Commit cb87481ee89d ("kbuild: linker script do not match C names unless
LD_DEAD_CODE_DATA_ELIMINATION is configured") was introduced to suppress
the issue for the default CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=n case,
providing a minimal fix for stable backporting. We were aware this did
not address the issue for CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y. The
plan was to apply correct fixes and then revert cb87481ee89d. [1]

Seven years have passed since then, yet the #ifdef workaround remains in
place. Meanwhile, commit b1fca27d384e introduced the .data.once section,
and commit dc5723b02e52 ("kbuild: add support for Clang LTO") extended
the #ifdef.

Using a ".." separator in the section name fixes the issue for
CONFIG_LD_DEAD_CODE_DATA_ELIMINATION and CONFIG_LTO_CLANG.

[1]: https://lore.kernel.org/linux-kbuild/CAK7LNASck6BfdLnESxXUeECYL26yUDm0cwRZuM4gmaWUkxjL5g@mail.gmail.com/

Fixes: b1fca27d384e ("kernel debug: support resetting WARN*_ONCE")
Fixes: dc5723b02e52 ("kbuild: add support for Clang LTO")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 include/linux/mmdebug.h           | 6 +++---
 include/linux/once.h              | 4 ++--
 include/linux/once_lite.h         | 2 +-
 include/net/net_debug.h           | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 3c9dc1fd094d..54504013c749 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -359,7 +359,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
 	*(.data..shared_aligned) /* percpu related */			\
 	*(.data..unlikely)						\
 	__start_once = .;						\
-	*(.data.once)							\
+	*(.data..once)							\
 	__end_once = .;							\
 	STRUCT_ALIGN();							\
 	*(__tracepoints)						\
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 39a7714605a7..d7cb1e5ecbda 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -46,7 +46,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
 		}							\
 	} while (0)
 #define VM_WARN_ON_ONCE_PAGE(cond, page)	({			\
-	static bool __section(".data.once") __warned;			\
+	static bool __section(".data..once") __warned;			\
 	int __ret_warn_once = !!(cond);					\
 									\
 	if (unlikely(__ret_warn_once && !__warned)) {			\
@@ -66,7 +66,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
 	unlikely(__ret_warn);						\
 })
 #define VM_WARN_ON_ONCE_FOLIO(cond, folio)	({			\
-	static bool __section(".data.once") __warned;			\
+	static bool __section(".data..once") __warned;			\
 	int __ret_warn_once = !!(cond);					\
 									\
 	if (unlikely(__ret_warn_once && !__warned)) {			\
@@ -77,7 +77,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
 	unlikely(__ret_warn_once);					\
 })
 #define VM_WARN_ON_ONCE_MM(cond, mm)		({			\
-	static bool __section(".data.once") __warned;			\
+	static bool __section(".data..once") __warned;			\
 	int __ret_warn_once = !!(cond);					\
 									\
 	if (unlikely(__ret_warn_once && !__warned)) {			\
diff --git a/include/linux/once.h b/include/linux/once.h
index bc714d414448..30346fcdc799 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -46,7 +46,7 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key,
 #define DO_ONCE(func, ...)						     \
 	({								     \
 		bool ___ret = false;					     \
-		static bool __section(".data.once") ___done = false;	     \
+		static bool __section(".data..once") ___done = false;	     \
 		static DEFINE_STATIC_KEY_TRUE(___once_key);		     \
 		if (static_branch_unlikely(&___once_key)) {		     \
 			unsigned long ___flags;				     \
@@ -64,7 +64,7 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key,
 #define DO_ONCE_SLEEPABLE(func, ...)						\
 	({									\
 		bool ___ret = false;						\
-		static bool __section(".data.once") ___done = false;		\
+		static bool __section(".data..once") ___done = false;		\
 		static DEFINE_STATIC_KEY_TRUE(___once_key);			\
 		if (static_branch_unlikely(&___once_key)) {			\
 			___ret = __do_once_sleepable_start(&___done);		\
diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h
index b7bce4983638..27de7bc32a06 100644
--- a/include/linux/once_lite.h
+++ b/include/linux/once_lite.h
@@ -12,7 +12,7 @@
 
 #define __ONCE_LITE_IF(condition)					\
 	({								\
-		static bool __section(".data.once") __already_done;	\
+		static bool __section(".data..once") __already_done;	\
 		bool __ret_cond = !!(condition);			\
 		bool __ret_once = false;				\
 									\
diff --git a/include/net/net_debug.h b/include/net/net_debug.h
index 1e74684cbbdb..4a79204c8d30 100644
--- a/include/net/net_debug.h
+++ b/include/net/net_debug.h
@@ -27,7 +27,7 @@ void netdev_info(const struct net_device *dev, const char *format, ...);
 
 #define netdev_level_once(level, dev, fmt, ...)			\
 do {								\
-	static bool __section(".data.once") __print_once;	\
+	static bool __section(".data..once") __print_once;	\
 								\
 	if (!__print_once) {					\
 		__print_once = true;				\
-- 
cgit v1.2.3


From 81de291d86b704de1809cfb06672902d003cf3a3 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Wed, 6 Nov 2024 17:33:28 +0800
Subject: of: dynamic: Add of_changeset_update_prop_string

Add a helper function to add string property updates to an OF changeset.
This is similar to of_changeset_add_prop_string(), but instead of adding
the property (and failing if it exists), it will update the property.

This shall be used later in the DT hardware prober.

Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/of.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index 086a60f3b8a6..d0307e3b093d 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -1651,6 +1651,10 @@ static inline int of_changeset_add_prop_u32(struct of_changeset *ocs,
 	return of_changeset_add_prop_u32_array(ocs, np, prop_name, &val, 1);
 }
 
+int of_changeset_update_prop_string(struct of_changeset *ocs,
+				    struct device_node *np,
+				    const char *prop_name, const char *str);
+
 int of_changeset_add_prop_bool(struct of_changeset *ocs, struct device_node *np,
 			       const char *prop_name);
 
-- 
cgit v1.2.3


From 1fcc67e3a354865775355eafec1fb061a755c971 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Wed, 6 Nov 2024 17:33:29 +0800
Subject: of: base: Add for_each_child_of_node_with_prefix()

There are cases where drivers would go through child device nodes and
operate on only the ones whose node name starts with a given prefix.

Provide a helper for these users. This will mainly be used in a
subsequent patch that implements a hardware component prober for I2C
busses.

Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/of.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index d0307e3b093d..f921786cb8ac 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -289,6 +289,9 @@ extern struct device_node *of_get_parent(const struct device_node *node);
 extern struct device_node *of_get_next_parent(struct device_node *node);
 extern struct device_node *of_get_next_child(const struct device_node *node,
 					     struct device_node *prev);
+extern struct device_node *of_get_next_child_with_prefix(const struct device_node *node,
+							 struct device_node *prev,
+							 const char *prefix);
 extern struct device_node *of_get_next_available_child(
 	const struct device_node *node, struct device_node *prev);
 extern struct device_node *of_get_next_reserved_child(
@@ -1468,6 +1471,12 @@ static inline int of_property_read_s32(const struct device_node *np,
 	     child != NULL;						\
 	     child = of_get_next_child(parent, child))
 
+#define for_each_child_of_node_with_prefix(parent, child, prefix)	\
+	for (struct device_node *child __free(device_node) =		\
+	     of_get_next_child_with_prefix(parent, NULL, prefix);	\
+	     child != NULL;						\
+	     child = of_get_next_child_with_prefix(parent, child, prefix))
+
 #define for_each_available_child_of_node(parent, child) \
 	for (child = of_get_next_available_child(parent, NULL); child != NULL; \
 	     child = of_get_next_available_child(parent, child))
-- 
cgit v1.2.3


From 157ce8f381efe264933e9366db828d845bade3a1 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Wed, 6 Nov 2024 17:33:30 +0800
Subject: i2c: Introduce OF component probe function

Some devices are designed and manufactured with some components having
multiple drop-in replacement options. These components are often
connected to the mainboard via ribbon cables, having the same signals
and pin assignments across all options. These may include the display
panel and touchscreen on laptops and tablets, and the trackpad on
laptops. Sometimes which component option is used in a particular device
can be detected by some firmware provided identifier, other times that
information is not available, and the kernel has to try to probe each
device.

This change attempts to make the "probe each device" case cleaner. The
current approach is to have all options added and enabled in the device
tree. The kernel would then bind each device and run each driver's probe
function. This works, but has been broken before due to the introduction
of asynchronous probing, causing multiple instances requesting "shared"
resources, such as pinmuxes, GPIO pins, interrupt lines, at the same
time, with only one instance succeeding. Work arounds for these include
moving the pinmux to the parent I2C controller, using GPIO hogs or
pinmux settings to keep the GPIO pins in some fixed configuration, and
requesting the interrupt line very late. Such configurations can be seen
on the MT8183 Krane Chromebook tablets, and the Qualcomm sc8280xp-based
Lenovo Thinkpad 13S.

Instead of this delicate dance between drivers and device tree quirks,
this change introduces a simple I2C component probe function. For a
given class of devices on the same I2C bus, it will go through all of
them, doing a simple I2C read transfer and see which one of them responds.
It will then enable the device that responds.

This requires some minor modifications in the existing device tree. The
status for all the device nodes for the component options must be set
to "fail-needs-probe". This makes it clear that some mechanism is
needed to enable one of them, and also prevents the prober and device
drivers running at the same time.

Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c-of-prober.h | 75 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 include/linux/i2c-of-prober.h

(limited to 'include')

diff --git a/include/linux/i2c-of-prober.h b/include/linux/i2c-of-prober.h
new file mode 100644
index 000000000000..e7e052ac9e48
--- /dev/null
+++ b/include/linux/i2c-of-prober.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Definitions for the Linux I2C OF component prober
+ *
+ * Copyright (C) 2024 Google LLC
+ */
+
+#ifndef _LINUX_I2C_OF_PROBER_H
+#define _LINUX_I2C_OF_PROBER_H
+
+#include <linux/kconfig.h>
+
+struct device;
+struct device_node;
+
+/**
+ * struct i2c_of_probe_ops - I2C OF component prober callbacks
+ *
+ * A set of callbacks to be used by i2c_of_probe_component().
+ *
+ * All callbacks are optional. Callbacks are called only once per run, and are
+ * used in the order they are defined in this structure.
+ *
+ * All callbacks that have return values shall return %0 on success,
+ * or a negative error number on failure.
+ *
+ * The @dev parameter passed to the callbacks is the same as @dev passed to
+ * i2c_of_probe_component(). It should only be used for dev_printk() calls
+ * and nothing else, especially not managed device resource (devres) APIs.
+ */
+struct i2c_of_probe_ops {
+	/**
+	 * @enable: Retrieve and enable resources so that the components respond to probes.
+	 *
+	 * It is OK for this callback to return -EPROBE_DEFER since the intended use includes
+	 * retrieving resources and enables them. Resources should be reverted to their initial
+	 * state and released before returning if this fails.
+	 */
+	int (*enable)(struct device *dev, struct device_node *bus_node, void *data);
+
+	/**
+	 * @cleanup_early: Release exclusive resources prior to calling probe() on a
+	 *		   detected component.
+	 *
+	 * Only called if a matching component is actually found. If none are found,
+	 * resources that would have been released in this callback should be released in
+	 * @free_resourcs_late instead.
+	 */
+	void (*cleanup_early)(struct device *dev, void *data);
+
+	/**
+	 * @cleanup: Opposite of @enable to balance refcounts and free resources after probing.
+	 *
+	 * Should check if resources were already freed by @cleanup_early.
+	 */
+	void (*cleanup)(struct device *dev, void *data);
+};
+
+/**
+ * struct i2c_of_probe_cfg - I2C OF component prober configuration
+ * @ops: Callbacks for the prober to use.
+ * @type: A string to match the device node name prefix to probe for.
+ */
+struct i2c_of_probe_cfg {
+	const struct i2c_of_probe_ops *ops;
+	const char *type;
+};
+
+#if IS_ENABLED(CONFIG_OF_DYNAMIC)
+
+int i2c_of_probe_component(struct device *dev, const struct i2c_of_probe_cfg *cfg, void *ctx);
+
+#endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */
+
+#endif /* _LINUX_I2C_OF_PROBER_H */
-- 
cgit v1.2.3


From 897261149d255d03fc90bec6782e3835cacbfdde Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Wed, 6 Nov 2024 17:33:31 +0800
Subject: i2c: of-prober: Add simple helpers for regulator support

Add helpers to do regulator management for the I2C OF component prober.
Components that the prober intends to probe likely require their
regulator supplies be enabled, and GPIOs be toggled to enable them or
bring them out of reset before they will respond to probe attempts.
GPIOs will be handled in the next patch.

The assumption is that the same class of components to be probed are
always connected in the same fashion with the same regulator supply
and GPIO. The names may vary due to binding differences, but the
physical layout does not change.

This set of helpers supports at most one regulator supply. The user
must specify the node from which the supply is retrieved. The supply
name and the amount of time to wait after the supply is enabled are
also given by the user.

Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c-of-prober.h | 44 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'include')

diff --git a/include/linux/i2c-of-prober.h b/include/linux/i2c-of-prober.h
index e7e052ac9e48..df95aa6ad90e 100644
--- a/include/linux/i2c-of-prober.h
+++ b/include/linux/i2c-of-prober.h
@@ -70,6 +70,50 @@ struct i2c_of_probe_cfg {
 
 int i2c_of_probe_component(struct device *dev, const struct i2c_of_probe_cfg *cfg, void *ctx);
 
+/**
+ * DOC: I2C OF component prober simple helpers
+ *
+ * Components such as trackpads are commonly connected to a devices baseboard
+ * with a 6-pin ribbon cable. That gives at most one voltage supply and one
+ * GPIO (commonly a "enable" or "reset" line) besides the I2C bus, interrupt
+ * pin, and common ground. Touchscreens, while integrated into the display
+ * panel's connection, typically have the same set of connections.
+ *
+ * A simple set of helpers are provided here for use with the I2C OF component
+ * prober. This implementation targets such components, allowing for at most
+ * one regulator supply.
+ *
+ * The following helpers are provided:
+ * * i2c_of_probe_simple_enable()
+ * * i2c_of_probe_simple_cleanup()
+ */
+
+/**
+ * struct i2c_of_probe_simple_opts - Options for simple I2C component prober callbacks
+ * @res_node_compatible: Compatible string of device node to retrieve resources from.
+ * @supply_name: Name of regulator supply.
+ * @post_power_on_delay_ms: Delay after regulators are powered on. Passed to msleep().
+ */
+struct i2c_of_probe_simple_opts {
+	const char *res_node_compatible;
+	const char *supply_name;
+	unsigned int post_power_on_delay_ms;
+};
+
+struct regulator;
+
+struct i2c_of_probe_simple_ctx {
+	/* public: provided by user before helpers are used. */
+	const struct i2c_of_probe_simple_opts *opts;
+	/* private: internal fields for helpers. */
+	struct regulator *supply;
+};
+
+int i2c_of_probe_simple_enable(struct device *dev, struct device_node *bus_node, void *data);
+void i2c_of_probe_simple_cleanup(struct device *dev, void *data);
+
+extern struct i2c_of_probe_ops i2c_of_probe_simple_ops;
+
 #endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */
 
 #endif /* _LINUX_I2C_OF_PROBER_H */
-- 
cgit v1.2.3


From 39b415f84654892003cebb7c026b7daa3380610b Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Wed, 6 Nov 2024 17:33:32 +0800
Subject: i2c: of-prober: Add GPIO support to simple helpers

Add GPIO support to the simple helpers for the I2C OF component prober.
Components that the prober intends to probe likely require their
regulator supplies be enabled, and GPIOs be toggled to enable them or
bring them out of reset before they will respond to probe attempts.
Regulator supplies were handled in the previous patch.

The assumption is that the same class of components to be probed are
always connected in the same fashion with the same regulator supply
and GPIO. The names may vary due to binding differences, but the
physical layout does not change.

This supports at most one GPIO pin. The user must specify the GPIO name,
the polarity, and the amount of time to wait after the GPIO is toggled.
Devices with more than one GPIO pin likely require specific power
sequencing beyond what generic code can easily support.

Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Andrey Skvortsov <andrej.skvortzov@gmail.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c-of-prober.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/i2c-of-prober.h b/include/linux/i2c-of-prober.h
index df95aa6ad90e..bb6d47f50ee5 100644
--- a/include/linux/i2c-of-prober.h
+++ b/include/linux/i2c-of-prober.h
@@ -9,6 +9,7 @@
 #define _LINUX_I2C_OF_PROBER_H
 
 #include <linux/kconfig.h>
+#include <linux/types.h>
 
 struct device;
 struct device_node;
@@ -85,6 +86,7 @@ int i2c_of_probe_component(struct device *dev, const struct i2c_of_probe_cfg *cf
  *
  * The following helpers are provided:
  * * i2c_of_probe_simple_enable()
+ * * i2c_of_probe_simple_cleanup_early()
  * * i2c_of_probe_simple_cleanup()
  */
 
@@ -92,14 +94,31 @@ int i2c_of_probe_component(struct device *dev, const struct i2c_of_probe_cfg *cf
  * struct i2c_of_probe_simple_opts - Options for simple I2C component prober callbacks
  * @res_node_compatible: Compatible string of device node to retrieve resources from.
  * @supply_name: Name of regulator supply.
+ * @gpio_name: Name of GPIO. NULL if no GPIO line is used. Empty string ("") if GPIO
+ *	       line is unnamed.
  * @post_power_on_delay_ms: Delay after regulators are powered on. Passed to msleep().
+ * @post_gpio_config_delay_ms: Delay after GPIO is configured. Passed to msleep().
+ * @gpio_assert_to_enable: %true if GPIO should be asserted, i.e. set to logical high,
+ *			   to enable the component.
+ *
+ * This describes power sequences common for the class of components supported by the
+ * simple component prober:
+ * * @gpio_name is configured to the non-active setting according to @gpio_assert_to_enable.
+ * * @supply_name regulator supply is enabled.
+ * * Wait for @post_power_on_delay_ms to pass.
+ * * @gpio_name is configured to the active setting according to @gpio_assert_to_enable.
+ * * Wait for @post_gpio_config_delay_ms to pass.
  */
 struct i2c_of_probe_simple_opts {
 	const char *res_node_compatible;
 	const char *supply_name;
+	const char *gpio_name;
 	unsigned int post_power_on_delay_ms;
+	unsigned int post_gpio_config_delay_ms;
+	bool gpio_assert_to_enable;
 };
 
+struct gpio_desc;
 struct regulator;
 
 struct i2c_of_probe_simple_ctx {
@@ -107,9 +126,11 @@ struct i2c_of_probe_simple_ctx {
 	const struct i2c_of_probe_simple_opts *opts;
 	/* private: internal fields for helpers. */
 	struct regulator *supply;
+	struct gpio_desc *gpiod;
 };
 
 int i2c_of_probe_simple_enable(struct device *dev, struct device_node *bus_node, void *data);
+void i2c_of_probe_simple_cleanup_early(struct device *dev, void *data);
 void i2c_of_probe_simple_cleanup(struct device *dev, void *data);
 
 extern struct i2c_of_probe_ops i2c_of_probe_simple_ops;
-- 
cgit v1.2.3


From 054a9cd395a79f4a5595f2cd24542e9bca8723c8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Wed, 20 Nov 2024 08:56:52 +0900
Subject: modpost: rename alias symbol for MODULE_DEVICE_TABLE()

This commit renames the alias symbol, __mod_<type>__<name>_device_table
to __mod_device_table__<type>__<name>.

This change simplifies the code slightly, as there is no longer a need
to check both the prefix and suffix.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 include/linux/module.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index 88ecc5e9f523..3dd79a3d0cbf 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -247,7 +247,7 @@ extern void cleanup_module(void);
 #ifdef MODULE
 /* Creates an alias so file2alias.c can find device table. */
 #define MODULE_DEVICE_TABLE(type, name)					\
-extern typeof(name) __mod_##type##__##name##_device_table		\
+extern typeof(name) __mod_device_table__##type##__##name		\
   __attribute__ ((unused, alias(__stringify(name))))
 #else  /* !MODULE */
 #define MODULE_DEVICE_TABLE(type, name)
-- 
cgit v1.2.3


From 49b2b973325add467270e906d4d794aa6679b38b Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Tue, 19 Nov 2024 14:31:43 +0100
Subject: net: Comment copy_from_sockptr() explaining its behaviour

copy_from_sockptr() has a history of misuse. Add a comment explaining that
the function follows API of copy_from_user(), i.e. returns 0 for success,
or number of bytes not copied on error.

Signed-off-by: Michal Luczaj <mhal@rbox.co>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/sockptr.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
index 195debe2b1db..3e6c8e9d67ae 100644
--- a/include/linux/sockptr.h
+++ b/include/linux/sockptr.h
@@ -53,6 +53,8 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src,
 /* Deprecated.
  * This is unsafe, unless caller checked user provided optlen.
  * Prefer copy_safe_from_sockptr() instead.
+ *
+ * Returns 0 for success, or number of bytes not copied on error.
  */
 static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
 {
-- 
cgit v1.2.3


From e2668c34b7e1a2288ea0a97ccf3cd12e2870ca18 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Sat, 23 Nov 2024 14:50:12 +0000
Subject: net: phy: fix phy_ethtool_set_eee() incorrectly enabling LPI

When phy_ethtool_set_eee_noneg() detects a change in the LPI
parameters, it attempts to update phylib state and trigger the link
to cycle so the MAC sees the updated parameters.

However, in doing so, it sets phydev->enable_tx_lpi depending on
whether the EEE configuration allows the MAC to generate LPI without
taking into account the result of negotiation.

This can be demonstrated with a 1000base-T FD interface by:

 # ethtool --set-eee eno0 advertise 8   # cause EEE to be not negotiated
 # ethtool --set-eee eno0 tx-lpi off
 # ethtool --set-eee eno0 tx-lpi on

This results in being true, despite EEE not having been negotiated and:
 # ethtool --show-eee eno0
	EEE status: enabled - inactive
	Tx LPI: 250 (us)
	Supported EEE link modes:  100baseT/Full
	                           1000baseT/Full
	Advertised EEE link modes:  100baseT/Full
	                                         1000baseT/Full

Fix this by keeping track of whether EEE was negotiated via a new
eee_active member in struct phy_device, and include this state in
the decision whether phydev->enable_tx_lpi should be set.

Fixes: 3e43b903da04 ("net: phy: Immediately call adjust_link if only tx_lpi_enabled changes")
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1tErSe-005RhB-2R@rmk-PC.armlinux.org.uk
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/phy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 77c6d6451638..563c46205685 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -602,6 +602,7 @@ struct macsec_ops;
  * @supported_eee: supported PHY EEE linkmodes
  * @advertising_eee: Currently advertised EEE linkmodes
  * @enable_tx_lpi: When True, MAC should transmit LPI to PHY
+ * @eee_active: phylib private state, indicating that EEE has been negotiated
  * @eee_cfg: User configuration of EEE
  * @lp_advertising: Current link partner advertised linkmodes
  * @host_interfaces: PHY interface modes supported by host
@@ -723,6 +724,7 @@ struct phy_device {
 	/* Energy efficient ethernet modes which should be prohibited */
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(eee_broken_modes);
 	bool enable_tx_lpi;
+	bool eee_active;
 	struct eee_config eee_cfg;
 
 	/* Host supported PHY interface types. Should be ignored if empty. */
-- 
cgit v1.2.3


From 4d17c25eaf5d8b95d70726e6946e8eb94619e139 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 21 Nov 2024 16:29:31 +0100
Subject: delay: Fix ndelay() spuriously treated as udelay()

A recent rework on delay functions wrongly ended up calling __udelay()
instead of __ndelay() for nanosecond delays, increasing those by 1000.

As a result hangs have been observed on boot

Restore the right function calls.

Fixes: 19e2d91d8cb1 ("delay: Rework udelay and ndelay")
Reported-by: Chen-Yu Tsai <wenst@chromium.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Chen-Yu Tsai <wenst@chromium.org>
Link: https://lore.kernel.org/all/20241121152931.51884-1-frederic@kernel.org
---
 include/asm-generic/delay.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h
index 76cf237b6e4c..03b0ec7afca6 100644
--- a/include/asm-generic/delay.h
+++ b/include/asm-generic/delay.h
@@ -75,11 +75,11 @@ static __always_inline void ndelay(unsigned long nsec)
 {
 	if (__builtin_constant_p(nsec)) {
 		if (nsec >= DELAY_CONST_MAX)
-			__bad_udelay();
+			__bad_ndelay();
 		else
 			__const_udelay(nsec * NDELAY_CONST_MULT);
 	} else {
-		__udelay(nsec);
+		__ndelay(nsec);
 	}
 }
 #define ndelay(x) ndelay(x)
-- 
cgit v1.2.3


From f69e63756f7822fcdad8a34f9967e8b243e883ee Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Wed, 2 Oct 2024 18:31:47 +0100
Subject: printf: Remove unused 'bprintf'

bprintf() is unused. Remove it. It was added in the commit 4370aa4aa753
("vsprintf: add binary printf") but as far as I can see was never used,
unlike the other two functions in that patch.

Link: https://lore.kernel.org/20241002173147.210107-1-linux@treblig.org
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Acked-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/string.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/string.h b/include/linux/string.h
index 0dd27afcfaf7..493ac4862c77 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -335,7 +335,6 @@ int __sysfs_match_string(const char * const *array, size_t n, const char *s);
 #ifdef CONFIG_BINARY_PRINTF
 int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
 int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf);
-int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
 #endif
 
 extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
-- 
cgit v1.2.3